Context Navigation

source: trunk/UTIL/PYTHON/powerlaw/previous_tests/plpva.py @ 1242

Last change on this file since 1242 was 1053, checked in by aslmd, 11 years ago
UTIL PYTHON. tools to study dust devils in LES.
File size: 16.6 KB

Line
1	from math import *
2	from random import *
3
4	# function [alpha, xmin, n]=plpva(x, xmin, varargin)
5	# PLPVA calculates the p-value for the given power-law fit to some data.
6	# Source: http://www.santafe.edu/~aaronc/powerlaws/
7	#
8	# PLPVA(x, xmin) takes data x and given lower cutoff for the power-law
9	# behavior xmin and computes the corresponding p-value for the
10	# Kolmogorov-Smirnov test, according to the method described in
11	# Clauset, Shalizi, Newman (2007).
12	# PLPVA automatically detects whether x is composed of real or integer
13	# values, and applies the appropriate method. For discrete data, if
14	# min(x) > 1000, PLPVA uses the continuous approximation, which is
15	# a reliable in this regime.
16	#
17	# The fitting procedure works as follows:
18	# 1) For each possible choice of x_min, we estimate alpha via the
19	# method of maximum likelihood, and calculate the Kolmogorov-Smirnov
20	# goodness-of-fit statistic D.
21	# 2) We then select as our estimate of x_min, the value that gives the
22	# minimum value D over all values of x_min.
23	#
24	# Note that this procedure gives no estimate of the uncertainty of the
25	# fitted parameters, nor of the validity of the fit.
26	#
27	# Example:
28	# x = [500,150,90,81,75,75,70,65,60,58,49,47,40]
29	# [p, gof] = plpva(x, 1);
30	#
31	# For more information, try 'type plpva'
32	#
33	# See also PLFIT, PLVAR
34
35	# Version 1.0.7 (2009 October)
36	# Copyright (C) 2008-2011 Aaron Clauset (Santa Fe Institute)
37
38	# Ported to Python by Joel Ornstein (2011 July)
39	#(joel_ornstein@hmc.edu)
40
41	# Distributed under GPL 2.0
42	# http://www.gnu.org/copyleft/gpl.html
43	# PLPVA comes with ABSOLUTELY NO WARRANTY
44	#
45	#
46	# The 'zeta' helper function is modified from the open-source library 'mpmath'
47	# mpmath: a Python library for arbitrary-precision floating-point arithmetic
48	# http://code.google.com/p/mpmath/
49	# version 0.17 (February 2011) by Fredrik Johansson and others
50	#
51
52	# Notes:
53	#
54	# 1. In order to implement the integer-based methods in Matlab, the numeric
55	# maximization of the log-likelihood function was used. This requires
56	# that we specify the range of scaling parameters considered. We set
57	# this range to be 1.50 to 3.50 at 0.01 intervals by default.
58	# This range can be set by the user like so,
59	#
60	# x = [500,150,90,81,75,75,70,65,60,58,49,47,40]
61	# a = plpva(x,1,'range',[1.50,3.50,0.01])
62	#
63	# 2. PLPVA can be told to limit the range of values considered as estimates
64	# for xmin in two ways. First, it can be instructed to sample these
65	# possible values like so,
66	#
67	# a = plpva(x,1,'sample',100);
68	#
69	# which uses 100 uniformly distributed values on the sorted list of
70	# unique values in the data set. Second, it can simply omit all
71	# candidates above a hard limit, like so
72	#
73	# a = plpva(x,1,'limit',3.4);
74	#
75	# Finally, it can be forced to use a fixed value, like so
76	#
77	# a = plpva(x,1,'xmin',1);
78	#
79	# In the case of discrete data, it rounds the limit to the nearest
80	# integer.
81	#
82	# 3. The default number of semiparametric repetitions of the fitting
83	# procedure is 1000. This number can be changed like so
84	#
85	# p = plpva(x, 1,'reps',10000);
86	#
87	# 4. To silence the textual output to the screen, do this
88	#
89	# p = plpva(x, 1,'reps',10000,'silent');
90	#
91
92	def plpva(x,xmin, *varargin):
93	vec = []
94	sample = []
95	xminx = []
96	limit = []
97	Bt = []
98	quiet = False
99
100
101	# parse command-line parameters trap for bad input
102	i=0
103	while i<len(varargin):
104	argok = 1
105	if type(varargin[i])==str:
106	if varargin[i]=='range':
107	Range = varargin[i+1]
108	if Range[1]>Range[0]:
109	argok=0
110	vec=[]
111	try:
112	vec=map(lambda X:X*float(Range[2])+Range[0],\
113	range(int((Range[1]-Range[0])/Range[2])))
114
115
116	except:
117	argok=0
118	vec=[]
119
120
121	if Range[0]>=Range[1]:
122	argok=0
123	vec=[]
124	i-=1
125
126	i+=1
127
128
129	elif varargin[i]== 'sample':
130	sample = varargin[i+1]
131	i = i + 1
132	elif varargin[i]== 'limit':
133	limit = varargin[i+1]
134	i = i + 1
135	elif varargin[i]== 'xmin':
136	xminx = varargin[i+1]
137	i = i + 1
138	elif varargin[i]== 'reps':
139	Bt = varargin[i+1]
140	i = i + 1
141	elif varargin[i]== 'silent': quiet = True
142
143	else: argok=0
144
145
146	if not argok:
147	print '(PLPVA) Ignoring invalid argument #',i+1
148
149	i = i+1
150
151	if vec!=[] and (type(vec)!=list or min(vec)<=1):
152	print '(PLPVA) Error: ''range'' argument must contain a vector or minimum <= 1. using default.\n'
153
154	vec = []
155
156	if sample!=[] and sample<2:
157	print'(PLPVA) Error: ''sample'' argument must be a positive integer > 1. using default.\n'
158	sample = []
159
160	if limit!=[] and limit<min(x):
161	print'(PLPVA) Error: ''limit'' argument must be a positive value >= 1. using default.\n'
162	limit = []
163
164	if xminx!=[] and xminx>=max(x):
165	print'(PLPVA) Error: ''xmin'' argument must be a positive value < max(x). using default behavior.\n'
166	xminx = []
167	if Bt!=[] and Bt<2:
168	print '(PLPVA) Error: ''reps'' argument must be a positive value > 1; using default.\n'
169	Bt = [];
170
171
172	# select method (discrete or continuous) for fitting
173	if reduce(lambda X,Y:X==True and floor(Y)==float(Y),x,True): f_dattype = 'INTS'
174	elif reduce(lambda X,Y:X==True and (type(Y)==int or type(Y)==float or type(Y)==long),x,True): f_dattype = 'REAL'
175	else: f_dattype = 'UNKN'
176
177	if f_dattype=='INTS' and min(x) > 1000 and len(x)>100:
178	f_dattype = 'REAL'
179
180	N=len(x)
181
182	if Bt==[]: Bt=1000
183	nof = []
184	if not quiet:
185	print 'Power-law Distribution, p-value calculation\n'
186	print ' Copyright 2007-2010 Aaron Clauset\n'
187	print ' Warning: This can be a slow calculation; please be patient.\n'
188	print ' n = ',len(x),'\n xmin = ',xmin,'\n reps = ',Bt
189	# estimate xmin and alpha, accordingly
190	if f_dattype== 'REAL':
191	# compute D for the empirical distribution
192	z = filter(lambda X:X>=xmin,x)
193	z.sort()
194	nz = len(z)
195	y = filter(lambda X:X<xmin,x)
196	ny = len(y)
197	alpha = 1 + float(nz) / sum(map(lambda X: log(float(X)/xmin),z))
198	cz = map(lambda X:X/float(nz),range(0,nz))
199	cf = map(lambda X:1-pow((xmin/float(X)),(alpha-1)),z)
200	gof = max( map(lambda X:abs(cz[X] - cf[X]),range(0,nz)))
201	pz = nz/float(N)
202
203	# compute distribution of gofs from semi-parametric bootstrap
204	# of entire data set with fit
205	for B in range(0,Bt):
206	# semi-parametric bootstrap of data
207	n1=0
208	for i in range(0,N):
209	if random()>pz: n1+=1
210
211	q1=[]
212	for i in range(0,n1):
213	q1.append(y[int(floor(ny*random()))])
214	n2 = N-n1;
215	q2=[]
216	for i in range (0,n2):
217	q2.append(random())
218	q2 = map(lambda X:xmin*pow(1.-X,(-1./(alpha-1.))),q2)
219
220	q = q1+q2
221	q.sort()
222
223
224
225	# estimate xmin and alpha via GoF-method
226	qmins = unique(q);
227	qmins.sort()
228	qmins = qmins[0:-1];
229
230	if xminx!=[]:
231
232	qmins = [min(filter(lambda X: X>=xminx,qmins))]
233
234
235	if limit!=[]:
236	qmins=filter(lambda X: X<=limit,qmins)
237	if qmins==[]: qmins=[min(q)]
238
239	if sample!=[]:
240	step = float(len(qmins))/(sample-1)
241	index_curr=0
242	new_qmins=[]
243	for i in range (0,sample):
244	if round(index_curr)==len(qmins): index_curr-=1
245	new_qmins.append(qmins[int(round(index_curr))])
246	index_curr+=step
247	qmins = unique(new_qmins)
248	qmins.sort()
249
250
251
252	dat = []
253
254	for qm in range(0,len(qmins)):
255	qmin = qmins[qm]
256	zq = filter(lambda X:X>=qmin,q)
257	nq = len(zq)
258
259	a = float(nq)/sum(map(lambda X: log(float(X)/qmin),zq))
260	cf = map(lambda X:1-pow((float(qmin)/X),a),zq)
261
262	dat.append( max( map(lambda X: abs(cf[X]-float(X)/nq),range(0,nq))))
263	if not quiet:
264	print '['+str(B+1)+']\tp = ',round(len(filter(lambda X:X>=gof,nof))/float(B+1),3)
265	nof.append(min(dat))
266	p=len(filter(lambda X:X>=gof,nof))/float(Bt+1)
267
268	elif f_dattype== 'INTS':
269	x=map(int,x)
270	if vec==[]:
271	for X in range(150,351):
272	vec.append(X/100.) # covers range of most practical
273	# scaling parameters
274	zvec = map(zeta, vec)
275	z = filter (lambda X:X>=xmin,x)
276	nz = len(z)
277	y = filter (lambda X:X<xmin,x)
278	ny = len(y)
279	xmax=max(z)
280	L = []
281	for k in range(0,len(vec)):
282	L.append(-vec[k]float(sum(map(log,z))) - float(nz)log(float(zvec[k]) - sum(map(lambda X:pow(float(X),-vec[k]),range(1,xmin)))))
283	I = L.index(max(L))
284	alpha=vec[I]
285	fit = reduce(lambda X,Y: X+[Y+X[-1]],\
286	(map(lambda X: pow(X,-alpha)/(float(zvec[I])-sum(map(lambda X: pow(X,-alpha),map(float,range(1,xmin))))),range(xmin,xmax+1))),[0])[1:]
287	cdi=[]
288	for XM in range(xmin,xmax+1):
289	cdi.append(len(filter(lambda X: floor(X)<=XM,z))/float(nz))
290
291	gof= max( map(lambda X: abs(fit[X] - cdi[X]),range(0,xmax-xmin+1)))
292	pz=nz/float(N)
293	mmax = 20*xmax
294	pdf=[]
295	for i in range(0,xmin):
296	pdf.append(0)
297	pdf += map(lambda X: pow(X,-alpha)/(float(zvec[I])-sum(map(lambda X: pow(X,-alpha),map(float,range(1,xmin))))),range(xmin,mmax+1))
298	cdf = reduce(lambda X,Y: X+[Y+X[-1]],pdf,[0])[1:]+[1]
299
300	# compute distribution of gofs from semi-parametric bootstrap
301	# of entire data set with fit
302	for B in range(0,Bt):
303	# semi-parametric bootstrap of data
304	n1=0
305	for i in range(0,N):
306	if random()>pz: n1+=1
307
308	q1=[]
309	for i in range(0,n1):
310	q1.append( y[int(floor(ny*random()))])
311	n2 = N-n1;
312	r2=[]
313	for i in range (0,n2):
314	r2.append(random())
315	r2.sort()
316	c=0
317	k=0
318	q2=[]
319	for i in range(xmin,mmax+2):
320	while c<n2 and r2[c]<=cdf[i]:c+=1
321	for k in range(k,c):q2.append(i)
322	k=c
323	if k>=n2: break
324
325	q = q1+q2
326	qmins = unique(q)
327	qmins.sort()
328	qmins = qmins[0:-1]
329
330	if xminx!=[]:
331
332	qmins = [min(filter(lambda X: X>=xminx,qmins))]
333
334
335	if limit!=[]:
336	qmins=filter(lambda X: X<=limit,qmins)
337	if qmins==[]: qmins=[min(q)]
338
339	if sample!=[]:
340	step = float(len(qmins))/(sample-1)
341	index_curr=0
342	new_qmins=[]
343	for i in range (0,sample):
344	if round(index_curr)==len(qmins): index_curr-=1
345	new_qmins.append(qmins[int(round(index_curr))])
346	index_curr+=step
347	qmins = unique(new_qmins)
348	qmins.sort()
349
350	qmax = max(q)
351	dat=[]
352	zq=q
353
354
355	for qm in range(0,len(qmins)):
356	qmin = qmins[qm]
357	zq = filter(lambda X:X>=qmin,zq)
358	nq = len(zq)
359	if nq>1:
360	L=[]
361	slogzq = sum(map(log,zq))
362	qminvec = range (1,qmin)
363	for k in range (1,len(vec)):
364	L.append(-vec[k]float(slogzq) - float(nq)log(float(zvec[k]) - sum(map(lambda X:pow(float(X),-vec[k]),qminvec))))
365
366	I = L.index(max(L))
367	fit = reduce(lambda X,Y: X+[Y+X[-1]],\
368	(map(lambda X: pow(X,-vec[I])/(float(zvec[I])-sum(map(lambda X: pow(X,-vec[I]),map(float,range(1,qmin))))),range(qmin,qmax+1))),[0])[1:]
369	cdi=[]
370	for QM in range(qmin,qmax+1):
371	cdi.append(len(filter(lambda X: floor(X)<=QM,zq))/float(nq))
372
373	dat.append(max( map(lambda X: abs(fit[X] - cdi[X]),range(0,qmax-qmin+1))))
374	else: dat[qm]=float('-inf')
375	if not quiet:
376	print '['+str(B+1)+']\tp = ',round(len(filter(lambda X:X>=gof,nof))/float(B+1),3)
377	nof.append(min(dat))
378
379	p=len(filter(lambda X:X>=gof,nof))/float(Bt)
380	else:
381	print '(PLPVA) Error: x must contain only reals or only integers.\n'
382	p = []
383	gof = []
384
385
386	return [p,gof]
387
388
389	# helper functions (unique and zeta)
390
391
392	def unique(seq):
393	# not order preserving
394	set = {}
395	map(set.__setitem__, seq, [])
396	return set.keys()
397
398	def _polyval(coeffs, x):
399	p = coeffs[0]
400	for c in coeffs[1:]:
401	p = c + x*p
402	return p
403
404	_zeta_int = [\
405	-0.5,
406	0.0,
407	1.6449340668482264365,1.2020569031595942854,1.0823232337111381915,
408	1.0369277551433699263,1.0173430619844491397,1.0083492773819228268,
409	1.0040773561979443394,1.0020083928260822144,1.0009945751278180853,
410	1.0004941886041194646,1.0002460865533080483,1.0001227133475784891,
411	1.0000612481350587048,1.0000305882363070205,1.0000152822594086519,
412	1.0000076371976378998,1.0000038172932649998,1.0000019082127165539,
413	1.0000009539620338728,1.0000004769329867878,1.0000002384505027277,
414	1.0000001192199259653,1.0000000596081890513,1.0000000298035035147,
415	1.0000000149015548284]
416
417	_zeta_P = [-3.50000000087575873, -0.701274355654678147,
418	-0.0672313458590012612, -0.00398731457954257841,
419	-0.000160948723019303141, -4.67633010038383371e-6,
420	-1.02078104417700585e-7, -1.68030037095896287e-9,
421	-1.85231868742346722e-11][::-1]
422
423	_zeta_Q = [1.00000000000000000, -0.936552848762465319,
424	-0.0588835413263763741, -0.00441498861482948666,
425	-0.000143416758067432622, -5.10691659585090782e-6,
426	-9.58813053268913799e-8, -1.72963791443181972e-9,
427	-1.83527919681474132e-11][::-1]
428
429	_zeta_1 = [3.03768838606128127e-10, -1.21924525236601262e-8,
430	2.01201845887608893e-7, -1.53917240683468381e-6,
431	-5.09890411005967954e-7, 0.000122464707271619326,
432	-0.000905721539353130232, -0.00239315326074843037,
433	0.084239750013159168, 0.418938517907442414, 0.500000001921884009]
434
435	_zeta_0 = [-3.46092485016748794e-10, -6.42610089468292485e-9,
436	1.76409071536679773e-7, -1.47141263991560698e-6, -6.38880222546167613e-7,
437	0.000122641099800668209, -0.000905894913516772796, -0.00239303348507992713,
438	0.0842396947501199816, 0.418938533204660256, 0.500000000000000052]
439
440	def zeta(s):
441	"""
442	Riemann zeta function, real argument
443	"""
444	if not isinstance(s, (float, int)):
445	try:
446	s = float(s)
447	except (ValueError, TypeError):
448	try:
449	s = complex(s)
450	if not s.imag:
451	return complex(zeta(s.real))
452	except (ValueError, TypeError):
453	pass
454	raise NotImplementedError
455	if s == 1:
456	raise ValueError("zeta(1) pole")
457	if s >= 27:
458	return 1.0 + 2.0(-s) + 3.0(-s)
459	n = int(s)
460	if n == s:
461	if n >= 0:
462	return _zeta_int[n]
463	if not (n % 2):
464	return 0.0
465	if s <= 0.0:
466	return 0
467	if s <= 2.0:
468	if s <= 1.0:
469	return _polyval(_zeta_0,s)/(s-1)
470	return _polyval(_zeta_1,s)/(s-1)
471	z = _polyval(_zeta_P,s) / _polyval(_zeta_Q,s)
472	return 1.0 + 2.0(-s) + 3.0(-s) + 4.0*(-s)z
473
474
475
476

Note: See TracBrowser for help on using the repository browser.

Download in other formats: