Context Navigation

source: trunk/UTIL/PYTHON/bibweb/ads.py @ 1186

Last change on this file since 1186 was 1179, checked in by aslmd, 11 years ago
BIBWEB python. various improvements: big LMD script, teams, no year without pubs, pub counts, verbose mode.
Property svn:executable set to ``*
File size: 7.8 KB

Line
1	#-- coding: utf8 --
2	import os, re, urllib
3
4	## -----------------------------------------------------------------
5	## Purpose: make a nice publication page with an ADS database link
6	## Author: Aymeric Spiga 19/05/2013 improvements 10-12/2013 counts 02/2014
7	## -----------------------------------------------------------------
8	## NB: uses BIBTEX2HTML https://www.lri.fr/~filliatr/bibtex2html/doc/manual.html
9	## ... and of course NASA ADS http://adsabs.harvard.edu/
10	## -----------------------------------------------------------------
11
12	def printpubli(num):
13	if num == 1: char = "%.0f publication" % (num)
14	else: char = "%.0f publications" % (num)
15	return char
16
17	def makepage(authorref,
18	bibstyle = "-s custom -nokeys",
19	listyear = [0],
20	customcond = None,
21	embedded = False,
22	linkads = None,
23	title = None,
24	retrieve = True,
25	addpdf = None,
26	addlink = None,
27	printnum = False,
28	verbose = True,
29	target = None):
30
31	htmlcontent1 = ""
32	htmlcontent2 = ""
33	htmlcontent = ""
34
35	### HEADER
36	if embedded:
37	htmlfile = open('header.html','r')
38	htmlcontent1 = htmlfile.read()
39	htmlfile.close()
40	#else:
41	if title is None:
42	htmlcontent1 = htmlcontent1 + "<h2>"+authorref+"'s publications</h2>"
43	elif title == "":
44	pass
45	else:
46	htmlcontent1 = htmlcontent1 + title
47
48	### if linkads is None, we set it to "link.authorref"
49	if linkads is None:
50	linkads = authorref+'.link'
51
52	### GET INFO FROM ADS
53	if retrieve:
54	if verbose: print "retrieving info from ADS"
55	linkfile = open(linkads,'r')
56	url = linkfile.read()
57	linkfile.close()
58	html = urllib.urlopen(url).read()
59	## fix problem with accents
60	find = re.compile(r"{\\'e}")
61	html = find.sub('é',html)
62	find = re.compile(r"{\\`e}")
63	html = find.sub('è',html)
64	##
65	bibfile = open(linkads+'.bib','w')
66	print >> bibfile,html
67	bibfile.close()
68
69	### if only one year and no customcond, make it useful. ask for years >= this value
70	if len(listyear) == 1 and customcond is None:
71	customcond = "-c 'year>=%s'" % (listyear[0])
72	listyear[0] = 99
73
74	### YEAR LOOP
75	numpublitot = 0 ; nonzeroyears = []
76	for year in listyear:
77
78	author = authorref+str(year)
79
80	# 0. define condition
81	# if not user-defined, make it simply year in each listyear instance
82	# if user-defined, then customcond will be the condition (possibly several)
83	if customcond is None and len(listyear) > 1: cond = "-c 'year=%s'" % (year)
84	elif len(listyear) > 1: cond = customcond + " -c 'year=%s'" % (year)
85	else: cond = customcond
86
87	# 1. select items ARTICLE in the big bib file
88	# put those in a dedicated author.bib file
89	arg = \
90	cond,\
91	'"ARTICLE"',\
92	author+'.txt',\
93	author+'.bib',\
94	linkads+'.bib'
95	cmd = "bib2bib --quiet %s -c '$type=%s' -oc %s -ob %s %s >> /dev/null 2>> /dev/null" % (arg)
96	os.system(cmd)
97
98	# count number of publications (both per year and increment total)
99	numpubli = len(open(author+'.txt', 'r').readlines())
100	if verbose: print "%s --> count: %.0f" % (author,numpubli)
101	numpublitot += numpubli
102	# record years with publications
103	if numpubli == 0: continue
104	else: nonzeroyears.append(year)
105
106	# modify the bib file to insert pdf links
107	# the trick is to use the line adsurl and expect pdf to have the same name as ADS reference
108	# ... then besides this, it is necessary to link pdfs or rename those
109	# ... the online repository is indicated by addpdf
110	if retrieve:
111	if addpdf is not None:
112	bibcontent = ''
113	for line in open(linkads+'.bib'):
114	bibcontent += line
115	if 'adsurl' in line:
116	line = line.replace('adsurl','localpdf')
117	line = line.replace('http://cdsads.u-strasbg.fr/abs/',addpdf)
118	line = line.replace('},','.pdf},')
119	line = line.replace('%','_')
120	bibcontent += line
121	bibfile2 = open('temp','w')
122	print >> bibfile2,bibcontent
123	bibfile2.close()
124	os.system('mv temp '+linkads+'.bib')
125
126	# 2. make the html page from the author.bib file
127	if customcond is None or len(listyear) > 1:
128	header = '<a name="%.0f"></a>' % (year)
129	header += "<h3>%.0f <a href=''>.</a> </h3>" % (year)
130	if printnum: header += "("+printpubli(numpubli)+")"
131	if embedded: header += '<br>'
132	else:
133	header = ''
134	if printnum: header += "("+printpubli(numpubli)+")"
135
136	header = '"'+header+'"'
137	arg = \
138	bibstyle,\
139	header,\
140	author+'.bib'
141	cmd = "bibtex2html -q \
142	--both \
143	-m ads.tex \
144	%s \
145	-nf adsurl 'ADS link' \
146	-nf localpdf 'PDF version' \
147	-r -d --revkeys \
148	-nofooter --nodoc \
149	--header %s -nokeywords \
150	%s >> /dev/null 2>> /dev/null" % (arg)
151	os.system(cmd)
152
153	# 3. load page content and delete intermediate HTML file
154	htmlfile = open(author+'.html','r')
155	htmlcontent = htmlcontent + htmlfile.read()
156	htmlfile.close()
157	os.system("rm -rf "+author+'.html')
158
159	## make a few corrections
160	## bibcontent = open(author+'.bib','r').read()
161	## bibcontent.replace('\grl','Yeah')
162
163	find = re.compile(r'bib')
164	htmlcontent = find.sub('Bibtex entry',htmlcontent)
165	find = re.compile(r'Bibtex entry.html')
166	htmlcontent = find.sub('bib.html',htmlcontent)
167
168	find = re.compile(r'DOI')
169	htmlcontent = find.sub('Journal website',htmlcontent)
170
171	#find = re.compile(r'.pdf')
172	#htmlcontent = find.sub('PDF version',htmlcontent)
173
174	find = re.compile(r'<table>')
175	htmlcontent = find.sub('<table border="0" cellspacing="15">',htmlcontent)
176	find = re.compile(r'<td align="right">')
177	htmlcontent = find.sub('<td align="center" width=17% style="font-size: 75%;">',htmlcontent)
178
179	htmlcontent += '''<hr><p>Generated with
180	<a href='https://www.lri.fr/~filliatr/bibtex2html/doc/manual.html'>BibTeX2HTML</a>
181	and <a href='http://adsabs.harvard.edu/'>NASA ADS</a>
182	and a bit of <a href='http://www.python.org/'>Python</a></p>'''
183	if embedded:
184	htmlfile = open('footer.html','r')
185	htmlcontent += htmlfile.read()
186	htmlfile.close()
187
188	### TREAT HEADER PART DEPENDENT ON PREVIOUS LOOP
189	### -- total publications + list of years
190	if customcond is None or len(listyear) > 1:
191	if len(nonzeroyears) == 0:
192	htmlcontent2 += "No publications found."
193	else:
194	htmlcontent2 += "Year: "
195	for year in nonzeroyears:
196	htmlcontent2 += "<a href='#"+str(year)+"'>"+str(year)+"</a>. "
197	if addlink is not None: htmlcontent2 += "<br>"+addlink
198	if printnum: htmlcontent2 += "<p>(Total: "+printpubli(numpublitot)+")</p>"
199
200	### PUT EVERYTHING TOGETHER AND CREATE A PAGE
201	htmlcontent = htmlcontent1 + htmlcontent2 + htmlcontent
202	htmlmain = open(authorref+'.html','w')
203	print >> htmlmain, htmlcontent
204	htmlmain.close()
205
206	## move results to target directory and remove txt files
207	if target is not None:
208	target=target+"/"
209	arg = target,\
210	authorref+"*.html",\
211	target,\
212	authorref+"*.bib",\
213	authorref+"*.txt",\
214	target+linkads+".bib",\
215	"*.css",\
216	target
217	os.system( "mkdir -p %s ; mv %s %s ; rm -rf %s %s ; mv %s ./ 2> /dev/null ; cp %s %s 2> /dev/null" % (arg) )

Note: See TracBrowser for help on using the repository browser.

Download in other formats: