#-*- coding: utf8 -*-
import os, re, urllib
## -----------------------------------------------------------------
## Purpose: make a nice publication page with an ADS database link
## Author: Aymeric Spiga 19/05/2013 improvements 10-12/2013 counts 02/2014
## -----------------------------------------------------------------
## NB: uses BIBTEX2HTML https://www.lri.fr/~filliatr/bibtex2html/doc/manual.html
## ... and of course NASA ADS http://adsabs.harvard.edu/
## -----------------------------------------------------------------
def printpubli(num):
if num == 1: char = "%.0f publication" % (num)
else: char = "%.0f publications" % (num)
return char
def makepage(authorref,
bibstyle = "-s custom -nokeys",
listyear = [0],
customcond = None,
embedded = False,
linkads = None,
title = None,
retrieve = True,
addpdf = None,
addlink = None,
printnum = False,
verbose = True,
includemonth = False,
target = None):
htmlcontent1 = ""
htmlcontent2 = ""
htmlcontent = ""
### HEADER
if embedded:
htmlfile = open('header.html','r')
htmlcontent1 = htmlfile.read()
htmlfile.close()
#else:
if title is None:
htmlcontent1 = htmlcontent1 + "
"+authorref+"'s publications
"
elif title == "":
pass
else:
htmlcontent1 = htmlcontent1 + title
### if linkads is None, we set it to "link.authorref"
if linkads is None:
linkads = authorref+'.link'
### GET INFO FROM ADS
if retrieve:
if verbose: print "retrieving info from ADS"
linkfile = open(linkads,'r')
url = linkfile.read()
linkfile.close()
html = urllib.urlopen(url).read()
## fix problem with accents
find = re.compile(r"{\\'e}")
html = find.sub('é',html)
find = re.compile(r"{\\`e}")
html = find.sub('è',html)
##
bibfile = open(linkads+'.bib','w')
print >> bibfile,html
bibfile.close()
## includemonth or not
if not includemonth:
existing = open(linkads+'.bib','r').readlines()
new = open(linkads+'.bib','w')
for lines in existing:
if "month" not in lines:
new.write(lines)
new.close()
### if only one year and no customcond, make it useful. ask for years >= this value
if len(listyear) == 1 and customcond is None:
customcond = "-c 'year>=%s'" % (listyear[0])
listyear[0] = 99
### YEAR LOOP
numpublitot = 0 ; nonzeroyears = []
for year in listyear:
author = authorref+str(year)
# 0. define condition
# if not user-defined, make it simply year in each listyear instance
# if user-defined, then customcond will be the condition (possibly several)
if customcond is None and len(listyear) > 1: cond = "-c 'year=%s'" % (year)
elif len(listyear) > 1: cond = customcond + " -c 'year=%s'" % (year)
else: cond = customcond
# 1. select items ARTICLE in the big bib file
# put those in a dedicated author.bib file
arg = \
cond,\
'"ARTICLE"',\
author+'.txt',\
author+'.bib',\
linkads+'.bib'
cmd = "bib2bib --quiet %s -c '$type=%s' -oc %s -ob %s %s >> /dev/null 2>> /dev/null" % (arg)
os.system(cmd)
# count number of publications (both per year and increment total)
numpubli = len(open(author+'.txt', 'r').readlines())
if verbose: print "%s --> count: %.0f" % (author,numpubli)
numpublitot += numpubli
# record years with publications
if numpubli == 0: continue
else: nonzeroyears.append(year)
# modify the bib file to insert pdf links
# the trick is to use the line adsurl and expect pdf to have the same name as ADS reference
# ... then besides this, it is necessary to link pdfs or rename those
# ... the online repository is indicated by addpdf
if retrieve:
if addpdf is not None:
bibcontent = ''
for line in open(linkads+'.bib'):
bibcontent += line
if 'adsurl' in line:
line = line.replace('adsurl','localpdf')
line = line.replace('http://cdsads.u-strasbg.fr/abs/',addpdf)
line = line.replace('},','.pdf},')
line = line.replace('%','_')
bibcontent += line
bibfile2 = open('temp','w')
print >> bibfile2,bibcontent
bibfile2.close()
os.system('mv temp '+linkads+'.bib')
# 2. make the html page from the author.bib file
if customcond is None or len(listyear) > 1:
header = '' % (year)
header += "%.0f .
" % (year)
if printnum: header += "("+printpubli(numpubli)+")"
if embedded: header += '
'
else:
header = ''
if printnum: header += "("+printpubli(numpubli)+")"
header = '"'+header+'"'
arg = \
bibstyle,\
header,\
author+'.bib'
cmd = "bibtex2html -q \
--both \
-m ads.tex \
%s \
-nf adsurl 'ADS link' \
-nf localpdf 'PDF version' \
-r -d --revkeys \
-nofooter --nodoc \
--header %s -nokeywords \
%s >> /dev/null 2>> /dev/null" % (arg)
os.system(cmd)
# 3. load page content and delete intermediate HTML file
htmlfile = open(author+'.html','r')
htmlcontent = htmlcontent + htmlfile.read()
htmlfile.close()
os.system("rm -rf "+author+'.html')
## make a few corrections
## bibcontent = open(author+'.bib','r').read()
## bibcontent.replace('\grl','Yeah')
find = re.compile(r'bib')
htmlcontent = find.sub('Bibtex entry',htmlcontent)
find = re.compile(r'Bibtex entry.html')
htmlcontent = find.sub('bib.html',htmlcontent)
find = re.compile(r'DOI')
htmlcontent = find.sub('Journal website',htmlcontent)
## fix problem with accents
find = re.compile(r'é')
htmlcontent = find.sub('é',htmlcontent)
find = re.compile(r'è')
htmlcontent = find.sub('è',htmlcontent)
#find = re.compile(r'.pdf')
#htmlcontent = find.sub('PDF version',htmlcontent)
find = re.compile(r'')
htmlcontent = find.sub('',htmlcontent)
find = re.compile(r'')
htmlcontent = find.sub(' | ',htmlcontent)
htmlcontent += '''
Generated with
BibTeX2HTML
and NASA ADS
and a bit of Python '''
if embedded:
htmlfile = open('footer.html','r')
htmlcontent += htmlfile.read()
htmlfile.close()
### TREAT HEADER PART DEPENDENT ON PREVIOUS LOOP
### -- total publications + list of years
if customcond is None or len(listyear) > 1:
if len(nonzeroyears) == 0:
htmlcontent2 += "No publications found."
else:
htmlcontent2 += "Year: "
for year in nonzeroyears:
htmlcontent2 += ""+str(year)+". "
if addlink is not None: htmlcontent2 += " "+addlink
if printnum: htmlcontent2 += "(Total: "+printpubli(numpublitot)+") "
### PUT EVERYTHING TOGETHER AND CREATE A PAGE
htmlcontent = htmlcontent1 + htmlcontent2 + htmlcontent
htmlmain = open(authorref+'.html','w')
print >> htmlmain, htmlcontent
htmlmain.close()
## move results to target directory and remove txt files
if target is not None:
target=target+"/"
arg = target,\
authorref+"*.html",\
target,\
authorref+"*.bib",\
authorref+"*.txt",\
target+linkads+".bib",\
"*.css",\
target
os.system( "mkdir -p %s ; mv %s %s ; rm -rf %s %s ; mv %s ./ 2> /dev/null ; cp %s %s 2> /dev/null" % (arg) )
|