Context Navigation

source: trunk/UTIL/PYTHON/bibweb/ads.py @ 1288

Last change on this file since 1288 was 1191, checked in by aslmd, 12 years ago
BIBWEB : option to remove months. bug fix for accents.
Property svn:executable set to ``*
File size: 8.3 KB

Line
1	#-- coding: utf8 --
2	import os, re, urllib
3
4	## -----------------------------------------------------------------
5	## Purpose: make a nice publication page with an ADS database link
6	## Author: Aymeric Spiga 19/05/2013 improvements 10-12/2013 counts 02/2014
7	## -----------------------------------------------------------------
8	## NB: uses BIBTEX2HTML https://www.lri.fr/~filliatr/bibtex2html/doc/manual.html
9	## ... and of course NASA ADS http://adsabs.harvard.edu/
10	## -----------------------------------------------------------------
11
12	def printpubli(num):
13	if num == 1: char = "%.0f publication" % (num)
14	else: char = "%.0f publications" % (num)
15	return char
16
17	def makepage(authorref,
18	bibstyle = "-s custom -nokeys",
19	listyear = [0],
20	customcond = None,
21	embedded = False,
22	linkads = None,
23	title = None,
24	retrieve = True,
25	addpdf = None,
26	addlink = None,
27	printnum = False,
28	verbose = True,
29	includemonth = False,
30	target = None):
31
32	htmlcontent1 = ""
33	htmlcontent2 = ""
34	htmlcontent = ""
35
36	### HEADER
37	if embedded:
38	htmlfile = open('header.html','r')
39	htmlcontent1 = htmlfile.read()
40	htmlfile.close()
41	#else:
42	if title is None:
43	htmlcontent1 = htmlcontent1 + "<h2>"+authorref+"'s publications</h2>"
44	elif title == "":
45	pass
46	else:
47	htmlcontent1 = htmlcontent1 + title
48
49	### if linkads is None, we set it to "link.authorref"
50	if linkads is None:
51	linkads = authorref+'.link'
52
53	### GET INFO FROM ADS
54	if retrieve:
55	if verbose: print "retrieving info from ADS"
56	linkfile = open(linkads,'r')
57	url = linkfile.read()
58	linkfile.close()
59	html = urllib.urlopen(url).read()
60	## fix problem with accents
61	find = re.compile(r"{\\'e}")
62	html = find.sub('é',html)
63	find = re.compile(r"{\\`e}")
64	html = find.sub('è',html)
65	##
66	bibfile = open(linkads+'.bib','w')
67	print >> bibfile,html
68	bibfile.close()
69	## includemonth or not
70	if not includemonth:
71	existing = open(linkads+'.bib','r').readlines()
72	new = open(linkads+'.bib','w')
73	for lines in existing:
74	if "month" not in lines:
75	new.write(lines)
76	new.close()
77
78	### if only one year and no customcond, make it useful. ask for years >= this value
79	if len(listyear) == 1 and customcond is None:
80	customcond = "-c 'year>=%s'" % (listyear[0])
81	listyear[0] = 99
82
83	### YEAR LOOP
84	numpublitot = 0 ; nonzeroyears = []
85	for year in listyear:
86
87	author = authorref+str(year)
88
89	# 0. define condition
90	# if not user-defined, make it simply year in each listyear instance
91	# if user-defined, then customcond will be the condition (possibly several)
92	if customcond is None and len(listyear) > 1: cond = "-c 'year=%s'" % (year)
93	elif len(listyear) > 1: cond = customcond + " -c 'year=%s'" % (year)
94	else: cond = customcond
95
96	# 1. select items ARTICLE in the big bib file
97	# put those in a dedicated author.bib file
98	arg = \
99	cond,\
100	'"ARTICLE"',\
101	author+'.txt',\
102	author+'.bib',\
103	linkads+'.bib'
104	cmd = "bib2bib --quiet %s -c '$type=%s' -oc %s -ob %s %s >> /dev/null 2>> /dev/null" % (arg)
105	os.system(cmd)
106
107	# count number of publications (both per year and increment total)
108	numpubli = len(open(author+'.txt', 'r').readlines())
109	if verbose: print "%s --> count: %.0f" % (author,numpubli)
110	numpublitot += numpubli
111	# record years with publications
112	if numpubli == 0: continue
113	else: nonzeroyears.append(year)
114
115	# modify the bib file to insert pdf links
116	# the trick is to use the line adsurl and expect pdf to have the same name as ADS reference
117	# ... then besides this, it is necessary to link pdfs or rename those
118	# ... the online repository is indicated by addpdf
119	if retrieve:
120	if addpdf is not None:
121	bibcontent = ''
122	for line in open(linkads+'.bib'):
123	bibcontent += line
124	if 'adsurl' in line:
125	line = line.replace('adsurl','localpdf')
126	line = line.replace('http://cdsads.u-strasbg.fr/abs/',addpdf)
127	line = line.replace('},','.pdf},')
128	line = line.replace('%','_')
129	bibcontent += line
130	bibfile2 = open('temp','w')
131	print >> bibfile2,bibcontent
132	bibfile2.close()
133	os.system('mv temp '+linkads+'.bib')
134
135	# 2. make the html page from the author.bib file
136	if customcond is None or len(listyear) > 1:
137	header = '<a name="%.0f"></a>' % (year)
138	header += "<h3>%.0f <a href=''>.</a> </h3>" % (year)
139	if printnum: header += "("+printpubli(numpubli)+")"
140	if embedded: header += '<br>'
141	else:
142	header = ''
143	if printnum: header += "("+printpubli(numpubli)+")"
144
145	header = '"'+header+'"'
146	arg = \
147	bibstyle,\
148	header,\
149	author+'.bib'
150	cmd = "bibtex2html -q \
151	--both \
152	-m ads.tex \
153	%s \
154	-nf adsurl 'ADS link' \
155	-nf localpdf 'PDF version' \
156	-r -d --revkeys \
157	-nofooter --nodoc \
158	--header %s -nokeywords \
159	%s >> /dev/null 2>> /dev/null" % (arg)
160	os.system(cmd)
161
162	# 3. load page content and delete intermediate HTML file
163	htmlfile = open(author+'.html','r')
164	htmlcontent = htmlcontent + htmlfile.read()
165	htmlfile.close()
166	os.system("rm -rf "+author+'.html')
167
168	## make a few corrections
169	## bibcontent = open(author+'.bib','r').read()
170	## bibcontent.replace('\grl','Yeah')
171
172	find = re.compile(r'bib')
173	htmlcontent = find.sub('Bibtex entry',htmlcontent)
174	find = re.compile(r'Bibtex entry.html')
175	htmlcontent = find.sub('bib.html',htmlcontent)
176
177	find = re.compile(r'DOI')
178	htmlcontent = find.sub('Journal website',htmlcontent)
179
180	## fix problem with accents
181	find = re.compile(r'é')
182	htmlcontent = find.sub('é',htmlcontent)
183	find = re.compile(r'è')
184	htmlcontent = find.sub('è',htmlcontent)
185
186	#find = re.compile(r'.pdf')
187	#htmlcontent = find.sub('PDF version',htmlcontent)
188
189	find = re.compile(r'<table>')
190	htmlcontent = find.sub('<table border="0" cellspacing="15">',htmlcontent)
191	find = re.compile(r'<td align="right">')
192	htmlcontent = find.sub('<td align="center" width=17% style="font-size: 75%;">',htmlcontent)
193
194	htmlcontent += '''<hr><p>Generated with
195	<a href='https://www.lri.fr/~filliatr/bibtex2html/doc/manual.html'>BibTeX2HTML</a>
196	and <a href='http://adsabs.harvard.edu/'>NASA ADS</a>
197	and a bit of <a href='http://www.python.org/'>Python</a></p>'''
198	if embedded:
199	htmlfile = open('footer.html','r')
200	htmlcontent += htmlfile.read()
201	htmlfile.close()
202
203	### TREAT HEADER PART DEPENDENT ON PREVIOUS LOOP
204	### -- total publications + list of years
205	if customcond is None or len(listyear) > 1:
206	if len(nonzeroyears) == 0:
207	htmlcontent2 += "No publications found."
208	else:
209	htmlcontent2 += "Year: "
210	for year in nonzeroyears:
211	htmlcontent2 += "<a href='#"+str(year)+"'>"+str(year)+"</a>. "
212	if addlink is not None: htmlcontent2 += "<br>"+addlink
213	if printnum: htmlcontent2 += "<p>(Total: "+printpubli(numpublitot)+")</p>"
214
215	### PUT EVERYTHING TOGETHER AND CREATE A PAGE
216	htmlcontent = htmlcontent1 + htmlcontent2 + htmlcontent
217	htmlmain = open(authorref+'.html','w')
218	print >> htmlmain, htmlcontent
219	htmlmain.close()
220
221	## move results to target directory and remove txt files
222	if target is not None:
223	target=target+"/"
224	arg = target,\
225	authorref+"*.html",\
226	target,\
227	authorref+"*.bib",\
228	authorref+"*.txt",\
229	target+linkads+".bib",\
230	"*.css",\
231	target
232	os.system( "mkdir -p %s ; mv %s %s ; rm -rf %s %s ; mv %s ./ 2> /dev/null ; cp %s %s 2> /dev/null" % (arg) )

Note: See TracBrowser for help on using the repository browser.

Download in other formats: