source: trunk/UTIL/PYTHON/bibweb/ads.py @ 1205

Last change on this file since 1205 was 1191, checked in by aslmd, 11 years ago

BIBWEB : option to remove months. bug fix for accents.

  • Property svn:executable set to *
File size: 8.3 KB
Line 
1#-*- coding: utf8 -*-
2import os, re, urllib
3
4## -----------------------------------------------------------------
5## Purpose: make a nice publication page with an ADS database link
6## Author: Aymeric Spiga 19/05/2013 improvements 10-12/2013 counts 02/2014
7## -----------------------------------------------------------------
8## NB: uses BIBTEX2HTML https://www.lri.fr/~filliatr/bibtex2html/doc/manual.html
9## ... and of course NASA ADS http://adsabs.harvard.edu/
10## -----------------------------------------------------------------
11
12def printpubli(num):
13    if num == 1: char = "%.0f publication" % (num)
14    else: char = "%.0f publications" % (num)
15    return char
16
17def makepage(authorref,
18             bibstyle = "-s custom -nokeys",
19             listyear = [0],
20             customcond = None,
21             embedded = False,
22             linkads = None,
23             title = None,
24             retrieve = True,
25             addpdf = None,
26             addlink = None,
27             printnum = False,
28             verbose = True,
29             includemonth = False,
30             target = None):
31
32    htmlcontent1 = ""
33    htmlcontent2 = ""
34    htmlcontent = ""
35   
36    ### HEADER
37    if embedded:
38     htmlfile = open('header.html','r')
39     htmlcontent1 = htmlfile.read()
40     htmlfile.close()
41    #else:
42    if title is None:
43      htmlcontent1 = htmlcontent1 + "<h2>"+authorref+"'s publications</h2>"
44    elif title == "":
45      pass
46    else:
47      htmlcontent1 = htmlcontent1 + title
48   
49    ### if linkads is None, we set it to "link.authorref"
50    if linkads is None: 
51      linkads = authorref+'.link'
52
53    ### GET INFO FROM ADS
54    if retrieve:
55      if verbose: print "retrieving info from ADS"
56      linkfile = open(linkads,'r')
57      url = linkfile.read()
58      linkfile.close()
59      html = urllib.urlopen(url).read()
60      ## fix problem with accents
61      find = re.compile(r"{\\'e}")
62      html = find.sub('é',html)
63      find = re.compile(r"{\\`e}")
64      html = find.sub('è',html)
65      ##
66      bibfile = open(linkads+'.bib','w')
67      print >> bibfile,html
68      bibfile.close()
69      ## includemonth or not
70      if not includemonth:
71        existing = open(linkads+'.bib','r').readlines()
72        new = open(linkads+'.bib','w')
73        for lines in existing:
74         if "month" not in lines:
75          new.write(lines)
76        new.close()
77   
78    ### if only one year and no customcond, make it useful. ask for years >= this value
79    if len(listyear) == 1 and customcond is None:
80        customcond = "-c 'year>=%s'" % (listyear[0])
81        listyear[0] = 99
82   
83    ### YEAR LOOP
84    numpublitot = 0 ; nonzeroyears = []
85    for year in listyear:
86   
87        author = authorref+str(year)
88   
89        # 0. define condition
90        #    if not user-defined, make it simply year in each listyear instance
91        #    if user-defined, then customcond will be the condition (possibly several)
92        if customcond is None and len(listyear) > 1: cond = "-c 'year=%s'" % (year)
93        elif len(listyear) > 1: cond = customcond + " -c 'year=%s'" % (year)
94        else: cond = customcond
95   
96        # 1. select items ARTICLE in the big bib file
97        #    put those in a dedicated author.bib file
98        arg = \
99              cond,\
100              '"ARTICLE"',\
101              author+'.txt',\
102              author+'.bib',\
103              linkads+'.bib'
104        cmd = "bib2bib --quiet %s -c '$type=%s' -oc %s -ob %s %s >> /dev/null 2>> /dev/null" % (arg)
105        os.system(cmd)
106
107        # count number of publications (both per year and increment total)
108        numpubli = len(open(author+'.txt', 'r').readlines())
109        if verbose: print "%s --> count: %.0f" % (author,numpubli)
110        numpublitot += numpubli
111        # record years with publications
112        if numpubli == 0: continue
113        else: nonzeroyears.append(year)
114
115        # modify the bib file to insert pdf links
116        # the trick is to use the line adsurl and expect pdf to have the same name as ADS reference       
117        #        ... then besides this, it is necessary to link pdfs or rename those
118        #        ... the online repository is indicated by addpdf
119        if retrieve:
120         if addpdf is not None:
121            bibcontent = ''
122            for line in open(linkads+'.bib'):
123                bibcontent += line
124                if 'adsurl' in line:
125                    line = line.replace('adsurl','localpdf')
126                    line = line.replace('http://cdsads.u-strasbg.fr/abs/',addpdf)
127                    line = line.replace('},','.pdf},')
128                    line = line.replace('%','_')
129                    bibcontent += line
130            bibfile2 = open('temp','w')
131            print >> bibfile2,bibcontent
132            bibfile2.close()
133            os.system('mv temp '+linkads+'.bib')
134
135        # 2. make the html page from the author.bib file
136        if customcond is None or len(listyear) > 1:
137           header = '<a name="%.0f"></a>' % (year)
138           header += "<h3>%.0f <a href=''>.</a> </h3>" % (year)
139           if printnum: header += "("+printpubli(numpubli)+")"
140           if embedded: header += '<br>'
141        else:
142           header = ''
143           if printnum: header += "("+printpubli(numpubli)+")"
144   
145        header = '"'+header+'"'
146        arg = \
147              bibstyle,\
148              header,\
149              author+'.bib'
150        cmd = "bibtex2html -q \
151              --both \
152              -m ads.tex \
153              %s \
154              -nf adsurl 'ADS link' \
155              -nf localpdf 'PDF version' \
156              -r -d --revkeys \
157              -nofooter --nodoc \
158              --header %s -nokeywords \
159              %s >> /dev/null 2>> /dev/null" % (arg)
160        os.system(cmd)
161
162        # 3. load page content and delete intermediate HTML file
163        htmlfile = open(author+'.html','r')
164        htmlcontent = htmlcontent + htmlfile.read()
165        htmlfile.close()
166        os.system("rm -rf "+author+'.html')
167   
168    ## make a few corrections
169    ##     bibcontent = open(author+'.bib','r').read()
170    ##     bibcontent.replace('\grl','Yeah')
171   
172    find = re.compile(r'bib')
173    htmlcontent = find.sub('Bibtex entry',htmlcontent)
174    find = re.compile(r'Bibtex entry.html')
175    htmlcontent = find.sub('bib.html',htmlcontent)
176   
177    find = re.compile(r'DOI')
178    htmlcontent = find.sub('Journal website',htmlcontent)
179
180    ## fix problem with accents
181    find = re.compile(r'é')
182    htmlcontent = find.sub('&eacute;',htmlcontent)
183    find = re.compile(r'è')
184    htmlcontent = find.sub('&egrave;',htmlcontent)
185
186    #find = re.compile(r'.pdf')
187    #htmlcontent = find.sub('PDF version',htmlcontent)
188   
189    find = re.compile(r'<table>')
190    htmlcontent = find.sub('<table border="0" cellspacing="15">',htmlcontent)
191    find = re.compile(r'<td align="right">')
192    htmlcontent = find.sub('<td align="center" width=17% style="font-size: 75%;">',htmlcontent)
193
194    htmlcontent += '''<hr><p>Generated with
195    <a href='https://www.lri.fr/~filliatr/bibtex2html/doc/manual.html'>BibTeX2HTML</a>
196    and <a href='http://adsabs.harvard.edu/'>NASA ADS</a>
197    and a bit of <a href='http://www.python.org/'>Python</a></p>'''
198    if embedded:
199      htmlfile = open('footer.html','r')
200      htmlcontent += htmlfile.read()
201      htmlfile.close()
202
203    ### TREAT HEADER PART DEPENDENT ON PREVIOUS LOOP
204    ### -- total publications + list of years
205    if customcond is None or len(listyear) > 1:
206        if len(nonzeroyears) == 0:
207          htmlcontent2 += "No publications found."
208        else:
209          htmlcontent2 += "Year: "
210          for year in nonzeroyears:
211            htmlcontent2 += "<a href='#"+str(year)+"'>"+str(year)+"</a>.  "
212          if addlink is not None: htmlcontent2 += "<br>"+addlink
213          if printnum: htmlcontent2 += "<p>(Total: "+printpubli(numpublitot)+")</p>"
214
215    ### PUT EVERYTHING TOGETHER AND CREATE A PAGE
216    htmlcontent = htmlcontent1 + htmlcontent2 + htmlcontent
217    htmlmain = open(authorref+'.html','w')
218    print >> htmlmain, htmlcontent
219    htmlmain.close()
220
221    ## move results to target directory and remove txt files
222    if target is not None:
223      target=target+"/"
224      arg = target,\
225          authorref+"*.html",\
226          target,\
227          authorref+"*.bib",\
228          authorref+"*.txt",\
229          target+linkads+".bib",\
230          "*.css",\
231          target
232      os.system( "mkdir -p %s ; mv %s %s ; rm -rf %s %s ; mv %s ./ 2> /dev/null ; cp %s %s 2> /dev/null" % (arg) )
Note: See TracBrowser for help on using the repository browser.