Context Navigation

source: trunk/UTIL/PYTHON/bibweb/ads.py @ 1205

Last change on this file since 1205 was 1191, checked in by aslmd, 11 years ago
BIBWEB : option to remove months. bug fix for accents.
Property svn:executable set to ``*
File size: 8.3 KB

Rev	Line
[1179]	1	#-- coding: utf8 --
[967]	2	import os, re, urllib
	3
	4	## -----------------------------------------------------------------
	5	## Purpose: make a nice publication page with an ADS database link
[1179]	6	## Author: Aymeric Spiga 19/05/2013 improvements 10-12/2013 counts 02/2014
[967]	7	## -----------------------------------------------------------------
	8	## NB: uses BIBTEX2HTML https://www.lri.fr/~filliatr/bibtex2html/doc/manual.html
	9	## ... and of course NASA ADS http://adsabs.harvard.edu/
	10	## -----------------------------------------------------------------
	11
[1179]	12	def printpubli(num):
	13	if num == 1: char = "%.0f publication" % (num)
	14	else: char = "%.0f publications" % (num)
	15	return char
	16
[967]	17	def makepage(authorref,
	18	bibstyle = "-s custom -nokeys",
	19	listyear = [0],
	20	customcond = None,
	21	embedded = False,
	22	linkads = None,
	23	title = None,
	24	retrieve = True,
[968]	25	addpdf = None,
[1141]	26	addlink = None,
[1179]	27	printnum = False,
	28	verbose = True,
[1191]	29	includemonth = False,
[1179]	30	target = None):
[967]	31
[1179]	32	htmlcontent1 = ""
	33	htmlcontent2 = ""
[967]	34	htmlcontent = ""
	35
	36	### HEADER
	37	if embedded:
	38	htmlfile = open('header.html','r')
[1179]	39	htmlcontent1 = htmlfile.read()
[967]	40	htmlfile.close()
	41	#else:
	42	if title is None:
[1179]	43	htmlcontent1 = htmlcontent1 + "<h2>"+authorref+"'s publications</h2>"
[967]	44	elif title == "":
	45	pass
	46	else:
[1179]	47	htmlcontent1 = htmlcontent1 + title
[967]	48
	49	### if linkads is None, we set it to "link.authorref"
	50	if linkads is None:
[1141]	51	linkads = authorref+'.link'
[967]	52
	53	### GET INFO FROM ADS
	54	if retrieve:
[1179]	55	if verbose: print "retrieving info from ADS"
[967]	56	linkfile = open(linkads,'r')
	57	url = linkfile.read()
	58	linkfile.close()
	59	html = urllib.urlopen(url).read()
[1179]	60	## fix problem with accents
	61	find = re.compile(r"{\\'e}")
	62	html = find.sub('é',html)
	63	find = re.compile(r"{\\`e}")
	64	html = find.sub('è',html)
	65	##
[967]	66	bibfile = open(linkads+'.bib','w')
	67	print >> bibfile,html
	68	bibfile.close()
[1191]	69	## includemonth or not
	70	if not includemonth:
	71	existing = open(linkads+'.bib','r').readlines()
	72	new = open(linkads+'.bib','w')
	73	for lines in existing:
	74	if "month" not in lines:
	75	new.write(lines)
	76	new.close()
[1179]	77
[967]	78	### if only one year and no customcond, make it useful. ask for years >= this value
	79	if len(listyear) == 1 and customcond is None:
	80	customcond = "-c 'year>=%s'" % (listyear[0])
	81	listyear[0] = 99
	82
	83	### YEAR LOOP
[1179]	84	numpublitot = 0 ; nonzeroyears = []
[967]	85	for year in listyear:
	86
	87	author = authorref+str(year)
	88
	89	# 0. define condition
	90	# if not user-defined, make it simply year in each listyear instance
	91	# if user-defined, then customcond will be the condition (possibly several)
	92	if customcond is None and len(listyear) > 1: cond = "-c 'year=%s'" % (year)
	93	elif len(listyear) > 1: cond = customcond + " -c 'year=%s'" % (year)
	94	else: cond = customcond
	95
	96	# 1. select items ARTICLE in the big bib file
	97	# put those in a dedicated author.bib file
	98	arg = \
	99	cond,\
	100	'"ARTICLE"',\
	101	author+'.txt',\
	102	author+'.bib',\
	103	linkads+'.bib'
[1179]	104	cmd = "bib2bib --quiet %s -c '$type=%s' -oc %s -ob %s %s >> /dev/null 2>> /dev/null" % (arg)
[967]	105	os.system(cmd)
[968]	106
[1179]	107	# count number of publications (both per year and increment total)
	108	numpubli = len(open(author+'.txt', 'r').readlines())
	109	if verbose: print "%s --> count: %.0f" % (author,numpubli)
	110	numpublitot += numpubli
	111	# record years with publications
	112	if numpubli == 0: continue
	113	else: nonzeroyears.append(year)
	114
[968]	115	# modify the bib file to insert pdf links
	116	# the trick is to use the line adsurl and expect pdf to have the same name as ADS reference
	117	# ... then besides this, it is necessary to link pdfs or rename those
	118	# ... the online repository is indicated by addpdf
	119	if retrieve:
	120	if addpdf is not None:
	121	bibcontent = ''
	122	for line in open(linkads+'.bib'):
	123	bibcontent += line
	124	if 'adsurl' in line:
	125	line = line.replace('adsurl','localpdf')
	126	line = line.replace('http://cdsads.u-strasbg.fr/abs/',addpdf)
	127	line = line.replace('},','.pdf},')
	128	line = line.replace('%','_')
	129	bibcontent += line
	130	bibfile2 = open('temp','w')
	131	print >> bibfile2,bibcontent
	132	bibfile2.close()
	133	os.system('mv temp '+linkads+'.bib')
	134
[967]	135	# 2. make the html page from the author.bib file
	136	if customcond is None or len(listyear) > 1:
	137	header = '<a name="%.0f"></a>' % (year)
	138	header += "<h3>%.0f <a href=''>.</a> </h3>" % (year)
[1179]	139	if printnum: header += "("+printpubli(numpubli)+")"
[967]	140	if embedded: header += '<br>'
	141	else:
	142	header = ''
[1179]	143	if printnum: header += "("+printpubli(numpubli)+")"
[967]	144
	145	header = '"'+header+'"'
	146	arg = \
	147	bibstyle,\
	148	header,\
	149	author+'.bib'
	150	cmd = "bibtex2html -q \
	151	--both \
	152	-m ads.tex \
	153	%s \
	154	-nf adsurl 'ADS link' \
[968]	155	-nf localpdf 'PDF version' \
[967]	156	-r -d --revkeys \
	157	-nofooter --nodoc \
	158	--header %s -nokeywords \
[1179]	159	%s >> /dev/null 2>> /dev/null" % (arg)
[967]	160	os.system(cmd)
[968]	161
[967]	162	# 3. load page content and delete intermediate HTML file
	163	htmlfile = open(author+'.html','r')
	164	htmlcontent = htmlcontent + htmlfile.read()
	165	htmlfile.close()
	166	os.system("rm -rf "+author+'.html')
	167
	168	## make a few corrections
	169	## bibcontent = open(author+'.bib','r').read()
	170	## bibcontent.replace('\grl','Yeah')
	171
	172	find = re.compile(r'bib')
	173	htmlcontent = find.sub('Bibtex entry',htmlcontent)
	174	find = re.compile(r'Bibtex entry.html')
	175	htmlcontent = find.sub('bib.html',htmlcontent)
	176
	177	find = re.compile(r'DOI')
	178	htmlcontent = find.sub('Journal website',htmlcontent)
[968]	179
[1191]	180	## fix problem with accents
	181	find = re.compile(r'é')
	182	htmlcontent = find.sub('é',htmlcontent)
	183	find = re.compile(r'è')
	184	htmlcontent = find.sub('è',htmlcontent)
	185
[968]	186	#find = re.compile(r'.pdf')
	187	#htmlcontent = find.sub('PDF version',htmlcontent)
[967]	188
	189	find = re.compile(r'<table>')
	190	htmlcontent = find.sub('<table border="0" cellspacing="15">',htmlcontent)
	191	find = re.compile(r'<td align="right">')
	192	htmlcontent = find.sub('<td align="center" width=17% style="font-size: 75%;">',htmlcontent)
[968]	193
[967]	194	htmlcontent += '''<hr><p>Generated with
	195	<a href='https://www.lri.fr/~filliatr/bibtex2html/doc/manual.html'>BibTeX2HTML</a>
	196	and <a href='http://adsabs.harvard.edu/'>NASA ADS</a>
	197	and a bit of <a href='http://www.python.org/'>Python</a></p>'''
	198	if embedded:
	199	htmlfile = open('footer.html','r')
	200	htmlcontent += htmlfile.read()
	201	htmlfile.close()
[1179]	202
	203	### TREAT HEADER PART DEPENDENT ON PREVIOUS LOOP
	204	### -- total publications + list of years
	205	if customcond is None or len(listyear) > 1:
	206	if len(nonzeroyears) == 0:
	207	htmlcontent2 += "No publications found."
	208	else:
	209	htmlcontent2 += "Year: "
	210	for year in nonzeroyears:
	211	htmlcontent2 += "<a href='#"+str(year)+"'>"+str(year)+"</a>. "
	212	if addlink is not None: htmlcontent2 += "<br>"+addlink
	213	if printnum: htmlcontent2 += "<p>(Total: "+printpubli(numpublitot)+")</p>"
	214
	215	### PUT EVERYTHING TOGETHER AND CREATE A PAGE
	216	htmlcontent = htmlcontent1 + htmlcontent2 + htmlcontent
[967]	217	htmlmain = open(authorref+'.html','w')
	218	print >> htmlmain, htmlcontent
	219	htmlmain.close()
[1141]	220
	221	## move results to target directory and remove txt files
[1142]	222	if target is not None:
	223	target=target+"/"
	224	arg = target,\
[1141]	225	authorref+"*.html",\
[1179]	226	target,\
[1141]	227	authorref+"*.bib",\
	228	authorref+"*.txt",\
[1142]	229	target+linkads+".bib",\
	230	"*.css",\
	231	target
[1179]	232	os.system( "mkdir -p %s ; mv %s %s ; rm -rf %s %s ; mv %s ./ 2> /dev/null ; cp %s %s 2> /dev/null" % (arg) )

Note: See TracBrowser for help on using the repository browser.

Download in other formats: