source: trunk/UTIL/PYTHON/bibweb/ads.py @ 1186

Last change on this file since 1186 was 1179, checked in by aslmd, 11 years ago

BIBWEB python. various improvements: big LMD script, teams, no year without pubs, pub counts, verbose mode.

  • Property svn:executable set to *
File size: 7.8 KB
Line 
1#-*- coding: utf8 -*-
2import os, re, urllib
3
4## -----------------------------------------------------------------
5## Purpose: make a nice publication page with an ADS database link
6## Author: Aymeric Spiga 19/05/2013 improvements 10-12/2013 counts 02/2014
7## -----------------------------------------------------------------
8## NB: uses BIBTEX2HTML https://www.lri.fr/~filliatr/bibtex2html/doc/manual.html
9## ... and of course NASA ADS http://adsabs.harvard.edu/
10## -----------------------------------------------------------------
11
12def printpubli(num):
13    if num == 1: char = "%.0f publication" % (num)
14    else: char = "%.0f publications" % (num)
15    return char
16
17def makepage(authorref,
18             bibstyle = "-s custom -nokeys",
19             listyear = [0],
20             customcond = None,
21             embedded = False,
22             linkads = None,
23             title = None,
24             retrieve = True,
25             addpdf = None,
26             addlink = None,
27             printnum = False,
28             verbose = True,
29             target = None):
30
31    htmlcontent1 = ""
32    htmlcontent2 = ""
33    htmlcontent = ""
34   
35    ### HEADER
36    if embedded:
37     htmlfile = open('header.html','r')
38     htmlcontent1 = htmlfile.read()
39     htmlfile.close()
40    #else:
41    if title is None:
42      htmlcontent1 = htmlcontent1 + "<h2>"+authorref+"'s publications</h2>"
43    elif title == "":
44      pass
45    else:
46      htmlcontent1 = htmlcontent1 + title
47   
48    ### if linkads is None, we set it to "link.authorref"
49    if linkads is None: 
50      linkads = authorref+'.link'
51
52    ### GET INFO FROM ADS
53    if retrieve:
54      if verbose: print "retrieving info from ADS"
55      linkfile = open(linkads,'r')
56      url = linkfile.read()
57      linkfile.close()
58      html = urllib.urlopen(url).read()
59      ## fix problem with accents
60      find = re.compile(r"{\\'e}")
61      html = find.sub('é',html)
62      find = re.compile(r"{\\`e}")
63      html = find.sub('è',html)
64      ##
65      bibfile = open(linkads+'.bib','w')
66      print >> bibfile,html
67      bibfile.close()
68   
69    ### if only one year and no customcond, make it useful. ask for years >= this value
70    if len(listyear) == 1 and customcond is None:
71        customcond = "-c 'year>=%s'" % (listyear[0])
72        listyear[0] = 99
73   
74    ### YEAR LOOP
75    numpublitot = 0 ; nonzeroyears = []
76    for year in listyear:
77   
78        author = authorref+str(year)
79   
80        # 0. define condition
81        #    if not user-defined, make it simply year in each listyear instance
82        #    if user-defined, then customcond will be the condition (possibly several)
83        if customcond is None and len(listyear) > 1: cond = "-c 'year=%s'" % (year)
84        elif len(listyear) > 1: cond = customcond + " -c 'year=%s'" % (year)
85        else: cond = customcond
86   
87        # 1. select items ARTICLE in the big bib file
88        #    put those in a dedicated author.bib file
89        arg = \
90              cond,\
91              '"ARTICLE"',\
92              author+'.txt',\
93              author+'.bib',\
94              linkads+'.bib'
95        cmd = "bib2bib --quiet %s -c '$type=%s' -oc %s -ob %s %s >> /dev/null 2>> /dev/null" % (arg)
96        os.system(cmd)
97
98        # count number of publications (both per year and increment total)
99        numpubli = len(open(author+'.txt', 'r').readlines())
100        if verbose: print "%s --> count: %.0f" % (author,numpubli)
101        numpublitot += numpubli
102        # record years with publications
103        if numpubli == 0: continue
104        else: nonzeroyears.append(year)
105
106        # modify the bib file to insert pdf links
107        # the trick is to use the line adsurl and expect pdf to have the same name as ADS reference       
108        #        ... then besides this, it is necessary to link pdfs or rename those
109        #        ... the online repository is indicated by addpdf
110        if retrieve:
111         if addpdf is not None:
112            bibcontent = ''
113            for line in open(linkads+'.bib'):
114                bibcontent += line
115                if 'adsurl' in line:
116                    line = line.replace('adsurl','localpdf')
117                    line = line.replace('http://cdsads.u-strasbg.fr/abs/',addpdf)
118                    line = line.replace('},','.pdf},')
119                    line = line.replace('%','_')
120                    bibcontent += line
121            bibfile2 = open('temp','w')
122            print >> bibfile2,bibcontent
123            bibfile2.close()
124            os.system('mv temp '+linkads+'.bib')
125
126        # 2. make the html page from the author.bib file
127        if customcond is None or len(listyear) > 1:
128           header = '<a name="%.0f"></a>' % (year)
129           header += "<h3>%.0f <a href=''>.</a> </h3>" % (year)
130           if printnum: header += "("+printpubli(numpubli)+")"
131           if embedded: header += '<br>'
132        else:
133           header = ''
134           if printnum: header += "("+printpubli(numpubli)+")"
135   
136        header = '"'+header+'"'
137        arg = \
138              bibstyle,\
139              header,\
140              author+'.bib'
141        cmd = "bibtex2html -q \
142              --both \
143              -m ads.tex \
144              %s \
145              -nf adsurl 'ADS link' \
146              -nf localpdf 'PDF version' \
147              -r -d --revkeys \
148              -nofooter --nodoc \
149              --header %s -nokeywords \
150              %s >> /dev/null 2>> /dev/null" % (arg)
151        os.system(cmd)
152
153        # 3. load page content and delete intermediate HTML file
154        htmlfile = open(author+'.html','r')
155        htmlcontent = htmlcontent + htmlfile.read()
156        htmlfile.close()
157        os.system("rm -rf "+author+'.html')
158   
159    ## make a few corrections
160    ##     bibcontent = open(author+'.bib','r').read()
161    ##     bibcontent.replace('\grl','Yeah')
162   
163    find = re.compile(r'bib')
164    htmlcontent = find.sub('Bibtex entry',htmlcontent)
165    find = re.compile(r'Bibtex entry.html')
166    htmlcontent = find.sub('bib.html',htmlcontent)
167   
168    find = re.compile(r'DOI')
169    htmlcontent = find.sub('Journal website',htmlcontent)
170
171    #find = re.compile(r'.pdf')
172    #htmlcontent = find.sub('PDF version',htmlcontent)
173   
174    find = re.compile(r'<table>')
175    htmlcontent = find.sub('<table border="0" cellspacing="15">',htmlcontent)
176    find = re.compile(r'<td align="right">')
177    htmlcontent = find.sub('<td align="center" width=17% style="font-size: 75%;">',htmlcontent)
178
179    htmlcontent += '''<hr><p>Generated with
180    <a href='https://www.lri.fr/~filliatr/bibtex2html/doc/manual.html'>BibTeX2HTML</a>
181    and <a href='http://adsabs.harvard.edu/'>NASA ADS</a>
182    and a bit of <a href='http://www.python.org/'>Python</a></p>'''
183    if embedded:
184      htmlfile = open('footer.html','r')
185      htmlcontent += htmlfile.read()
186      htmlfile.close()
187
188    ### TREAT HEADER PART DEPENDENT ON PREVIOUS LOOP
189    ### -- total publications + list of years
190    if customcond is None or len(listyear) > 1:
191        if len(nonzeroyears) == 0:
192          htmlcontent2 += "No publications found."
193        else:
194          htmlcontent2 += "Year: "
195          for year in nonzeroyears:
196            htmlcontent2 += "<a href='#"+str(year)+"'>"+str(year)+"</a>.  "
197          if addlink is not None: htmlcontent2 += "<br>"+addlink
198          if printnum: htmlcontent2 += "<p>(Total: "+printpubli(numpublitot)+")</p>"
199
200    ### PUT EVERYTHING TOGETHER AND CREATE A PAGE
201    htmlcontent = htmlcontent1 + htmlcontent2 + htmlcontent
202    htmlmain = open(authorref+'.html','w')
203    print >> htmlmain, htmlcontent
204    htmlmain.close()
205
206    ## move results to target directory and remove txt files
207    if target is not None:
208      target=target+"/"
209      arg = target,\
210          authorref+"*.html",\
211          target,\
212          authorref+"*.bib",\
213          authorref+"*.txt",\
214          target+linkads+".bib",\
215          "*.css",\
216          target
217      os.system( "mkdir -p %s ; mv %s %s ; rm -rf %s %s ; mv %s ./ 2> /dev/null ; cp %s %s 2> /dev/null" % (arg) )
Note: See TracBrowser for help on using the repository browser.