Context Navigation

source: trunk/UTIL/PYTHON/powerlaw/previous_tests/plvar.py @ 1062

Last change on this file since 1062 was 1053, checked in by aslmd, 12 years ago
UTIL PYTHON. tools to study dust devils in LES.
File size: 15.0 KB

Line
1	from math import *
2	from random import *
3
4	# function [alpha, xmin, n]=plvar(x, varargin)
5	# PLVAR estimates the uncertainty in the estimated power-law parameters.
6	# Source: http://www.santafe.edu/~aaronc/powerlaws/
7	#
8	# PLVAR(x) takes a vector of observations x and returns estimated
9	# uncertainties in the estimated power-law parameters, based on the
10	# nonparametric approach described in Clauset, Shalizi, Newman (2007).
11	# PLVAR automatically detects whether x is composed of real or integer
12	# values, and applies the appropriate method. For discrete data, if
13	# min(x) > 1000, PLVAR uses the continuous approximation, which is
14	# a reliable in this regime.
15	#
16	# The fitting procedure works as follows:
17	# 1) For each possible choice of x_min, we estimate alpha via the
18	# method of maximum likelihood, and calculate the Kolmogorov-Smirnov
19	# goodness-of-fit statistic D.
20	# 2) We then select as our estimate of x_min, the value that gives the
21	# minimum value D over all values of x_min.
22	#
23	# Note that this procedure gives no estimate of the validity of the fit.
24	#
25	# Example:
26	#
27	# x = [500,150,90,81,75,75,70,65,60,58,49,47,40]
28	# [alpha, xmin, ntail] = plvar(x);
29	#
30	# For more information, try 'type plvar'
31	#
32	# See also PLFIT, PLPVA
33
34
35	# Version 1.0.8 (2010 April)
36	# Copyright (C) 2008-2011 Aaron Clauset (Santa Fe Institute)
37
38	# Ported to Python by Joel Ornstein (2011 July)
39	#(joel_ornstein@hmc.edu)
40
41	# Distributed under GPL 2.0
42	# http://www.gnu.org/copyleft/gpl.html
43	# PLVAR comes with ABSOLUTELY NO WARRANTY
44	#
45	#
46	# The 'zeta' helper function is modified from the open-source library 'mpmath'
47	# mpmath: a Python library for arbitrary-precision floating-point arithmetic
48	# http://code.google.com/p/mpmath/
49	# version 0.17 (February 2011) by Fredrik Johansson and others
50	#
51
52	# Notes:
53	#
54	# 1. In order to implement the integer-based methods in Matlab, the numeric
55	# maximization of the log-likelihood function was used. This requires
56	# that we specify the range of scaling parameters considered. We set
57	# this range to be 1.50 to 3.50 at 0.01 intervals by default.
58	# This range can be set by the user like so,
59	#
60	# a = plvar(x,'range',[1.50,3.50,0.01])
61	#
62	#
63	# 2. PLVAR can be told to limit the range of values considered as estimates
64	# for xmin in three ways. First, it can be instructed to sample these
65	# possible values like so,
66	#
67	# a = plvar(x,'sample',100);
68	#
69	# which uses 100 uniformly distributed values on the sorted list of
70	# unique values in the data set. Second, it can simply omit all
71	# candidates above a hard limit, like so
72	#
73	# a = plvar(x,'limit',3.4);
74	#
75	# Finally, it can be forced to use a fixed value, like so
76	#
77	# a = plvar(x,'xmin',3.4);
78	#
79	# In the case of discrete data, it rounds the limit to the nearest
80	# integer.
81	#
82	# 3. The default number of nonparametric repetitions of the fitting
83	# procedure is 1000. This number can be changed like so
84	#
85	# a = plvar(x,'reps',10000);
86	#
87	# 4. To silence the textual output to the screen, do this
88	#
89	# p = plvar(x,'silent');
90	#
91	#
92
93	def plvar(x,*varargin):
94	vec = []
95	sample = []
96	xminx = []
97	limit = []
98	Bt = []
99	quiet = False
100
101
102	# parse command-line parameters trap for bad input
103	i=0
104	while i<len(varargin):
105	argok = 1
106	if type(varargin[i])==str:
107	if varargin[i]=='range':
108	Range = varargin[i+1]
109	if Range[1]>Range[0]:
110	argok=0
111	vec=[]
112	try:
113	vec=map(lambda X:X*float(Range[2])+Range[0],\
114	range(int((Range[1]-Range[0])/Range[2])))
115
116
117	except:
118	argok=0
119	vec=[]
120
121
122	if Range[0]>=Range[1]:
123	argok=0
124	vec=[]
125	i-=1
126
127	i+=1
128
129
130	elif varargin[i]== 'sample':
131	sample = varargin[i+1]
132	i = i + 1
133	elif varargin[i]== 'limit':
134	limit = varargin[i+1]
135	i = i + 1
136	elif varargin[i]== 'xmin':
137	xminx = varargin[i+1]
138	i = i + 1
139	elif varargin[i]== 'reps':
140	Bt = varargin[i+1]
141	i = i + 1
142	elif varargin[i]== 'silent': quiet = True
143
144	else: argok=0
145
146
147	if not argok:
148	print '(PLVAR) Ignoring invalid argument #',i+1
149
150	i = i+1
151
152	if vec!=[] and (type(vec)!=list or min(vec)<=1):
153	print '(PLVAR) Error: ''range'' argument must contain a vector or minimum <= 1. using default.\n'
154
155	vec = []
156
157	if sample!=[] and sample<2:
158	print'(PLVAR) Error: ''sample'' argument must be a positive integer > 1. using default.\n'
159	sample = []
160
161	if limit!=[] and limit<min(x):
162	print'(PLVAR) Error: ''limit'' argument must be a positive value >= 1. using default.\n'
163	limit = []
164
165	if xminx!=[] and xminx>=max(x):
166	print'(PLVAR) Error: ''xmin'' argument must be a positive value < max(x). using default behavior.\n'
167	xminx = []
168	if Bt!=[] and Bt<2:
169	print '(PLVAR) Error: ''reps'' argument must be a positive value > 1; using default.\n'
170	Bt = [];
171
172
173	# select method (discrete or continuous) for fitting
174	if reduce(lambda X,Y:X==True and floor(Y)==float(Y),x,True): f_dattype = 'INTS'
175	elif reduce(lambda X,Y:X==True and (type(Y)==int or type(Y)==float or type(Y)==long),x,True): f_dattype = 'REAL'
176	else: f_dattype = 'UNKN'
177
178	if f_dattype=='INTS' and min(x) > 1000 and len(x)>100:
179	f_dattype = 'REAL'
180
181	N=len(x)
182
183	if Bt==[]: Bt=1000
184	bofA = []
185	bofB = []
186	bofC = []
187	if not quiet:
188	print 'Power-law Distribution, parameter error calculation\n'
189	print ' Copyright 2007-2009 Aaron Clauset\n'
190	print ' Warning: This can be a slow calculation; please be patient.\n'
191	print ' n = ',len(x),'\n reps = ',Bt
192	# estimate xmin and alpha, accordingly
193	if f_dattype== 'REAL':
194
195	for B in range(0,Bt):
196	# bootstrap resample
197	y = []
198	for i in range(0,N):
199	y.append(x[int(floor(N*random()))])
200	ymins = unique(y)
201	ymins.sort()
202	ymins=ymins[0:-1]
203
204	if xminx!=[]:
205
206	ymins = [min(filter(lambda X: X>=xminx,ymins))]
207
208
209	if limit!=[]:
210	qmins=filter(lambda X: X<=limit,qmins)
211	if qmins==[]: qmins=[min(y)]
212
213	if sample!=[]:
214	step = float(len(ymins))/(sample-1)
215	index_curr=0
216	new_ymins=[]
217	for i in range (0,sample):
218	if round(index_curr)==len(ymins): index_curr-=1
219	new_ymins.append(ymins[int(round(index_curr))])
220	index_curr+=step
221	ymins = unique(new_ymins)
222	ymins.sort()
223
224	z = sorted(y)
225
226	dat = []
227
228	for xm in range(0,len(ymins)):
229	xmin = ymins[xm]
230	z = filter(lambda X:X>=xmin,z)
231	n = len(z)
232	# estimate alpha using direct MLE
233	a = float(n)/sum(map(lambda X: log(float(X)/xmin),z))
234	# compute KS statistic
235	cf = map(lambda X:1-pow((float(xmin)/X),a),z)
236
237	dat.append( max( map(lambda X: abs(cf[X]-float(X)/n),range(0,n))))
238	ymin = ymins[dat.index(min(dat))]
239	z = filter(lambda X: X>=ymin,y)
240	n = len(z)
241	alpha = 1+float(n)/sum(map(lambda X: log(float(X)/ymin),z))
242	bofA.append(n)
243	bofB.append(ymin)
244	bofC.append(alpha)
245	# store distribution of estimated parameter values
246	if not quiet:
247	print '['+str(B+1)+']\tntail = ',round(mean(bofA),3),' (',round(std(bofA),3),')','\txmin = ',\
248	round(mean(bofB),3),' (',round(std(bofB),3),')','\talpha = ',round(mean(bofC),3),' (',round(std(bofC),3),')'
249
250	n = std(bofA)
251	xmin = std(bofB)
252	alpha = std(bofC)
253
254	elif f_dattype== 'INTS':
255	x=map(int,x)
256	if vec==[]:
257	for X in range(150,351):
258	vec.append(X/100.) # covers range of most practical
259	# scaling parameters
260	zvec = map(zeta, vec)
261
262	for B in range(0,Bt):
263	# bootstrap resample
264	y = []
265	for i in range(0,N):
266	y.append(x[int(floor(N*random()))])
267	ymins = unique(y)
268	ymins.sort()
269	ymins=ymins[0:-1]
270
271	if xminx!=[]:
272
273	ymins = [min(filter(lambda X: X>=xminx,ymins))]
274
275
276	if limit!=[]:
277	qmins=filter(lambda X: X<=limit,qmins)
278	if qmins==[]: qmins=[min(y)]
279
280	if sample!=[]:
281	step = float(len(ymins))/(sample-1)
282	index_curr=0
283	new_ymins=[]
284	for i in range (0,sample):
285	if round(index_curr)==len(ymins): index_curr-=1
286	new_ymins.append(ymins[int(round(index_curr))])
287	index_curr+=step
288	ymins = unique(new_ymins)
289	ymins.sort()
290
291	ymax = max(y)
292	z = sorted(y)
293
294	datA = []
295	datB = []
296	for xm in range(0,len(ymins)):
297	xmin = ymins[xm]
298	z = filter(lambda X:X>=xmin,z)
299	n = len(z)
300	L = []
301	slogz = sum(map(log,z))
302	xminvec = range (1,xmin)
303	for k in range (1,len(vec)):
304	L.append(-vec[k]float(slogz) - float(n)log(float(zvec[k]) - sum(map(lambda X:pow(float(X),-vec[k]),xminvec))))
305
306	I = L.index(max(L))
307
308	# compute KS statistic
309	fit = reduce(lambda X,Y: X+[Y+X[-1]],\
310	(map(lambda X: pow(X,-vec[I])/(float(zvec[I])-sum(map(lambda X: pow(X,-vec[I]),map(float,range(1,xmin))))),range(xmin,ymax+1))),[0])[1:]
311	cdi=[]
312	for XM in range(xmin,ymax+1):
313	cdi.append(len(filter(lambda X: floor(X)<=XM,z))/float(n))
314
315	datA.append(max( map(lambda X: abs(fit[X] - cdi[X]),range(0,ymax-xmin+1))))
316	datB.append(vec[I])
317
318	I = datA.index(min(datA))
319	ymin = ymins[I]
320	n = len(filter(lambda X:X>=ymin,y))
321	alpha = datB[I]
322
323	bofA.append(n)
324	bofB.append(ymin)
325	bofC.append(alpha)
326	# store distribution of estimated parameter values
327	if not quiet:
328	print '['+str(B+1)+']\tntail = ',round(mean(bofA),3),' (',round(std(bofA),3),')','\txmin = ',\
329	round(mean(bofB),3),' (',round(std(bofB),3),')','\talpha = ',round(mean(bofC),3),' (',round(std(bofC),3),')'
330	n = std(bofA)
331	xmin = std(bofB)
332	alpha = std(bofC)
333
334	else:
335	print '(PLVAR) Error: x must contain only reals or only integers.\n'
336	n = []
337	xmin = []
338	alpha = []
339
340
341	return [alpha,xmin,n]
342
343
344	# helper functions (unique and zeta)
345	def mean(L):
346	try:
347	return float(sum(L))/len(L)
348	except:
349
350	return 0
351
352	def std(L):
353	try:
354	u = mean(L)
355	return sqrt((1./(len(L)-1))*sum(map(lambda X: pow(X-u,2),L)))
356	except:
357	return 0
358
359
360	def unique(seq):
361	# not order preserving
362	set = {}
363	map(set.__setitem__, seq, [])
364	return set.keys()
365
366	def _polyval(coeffs, x):
367	p = coeffs[0]
368	for c in coeffs[1:]:
369	p = c + x*p
370	return p
371
372	_zeta_int = [\
373	-0.5,
374	0.0,
375	1.6449340668482264365,1.2020569031595942854,1.0823232337111381915,
376	1.0369277551433699263,1.0173430619844491397,1.0083492773819228268,
377	1.0040773561979443394,1.0020083928260822144,1.0009945751278180853,
378	1.0004941886041194646,1.0002460865533080483,1.0001227133475784891,
379	1.0000612481350587048,1.0000305882363070205,1.0000152822594086519,
380	1.0000076371976378998,1.0000038172932649998,1.0000019082127165539,
381	1.0000009539620338728,1.0000004769329867878,1.0000002384505027277,
382	1.0000001192199259653,1.0000000596081890513,1.0000000298035035147,
383	1.0000000149015548284]
384
385	_zeta_P = [-3.50000000087575873, -0.701274355654678147,
386	-0.0672313458590012612, -0.00398731457954257841,
387	-0.000160948723019303141, -4.67633010038383371e-6,
388	-1.02078104417700585e-7, -1.68030037095896287e-9,
389	-1.85231868742346722e-11][::-1]
390
391	_zeta_Q = [1.00000000000000000, -0.936552848762465319,
392	-0.0588835413263763741, -0.00441498861482948666,
393	-0.000143416758067432622, -5.10691659585090782e-6,
394	-9.58813053268913799e-8, -1.72963791443181972e-9,
395	-1.83527919681474132e-11][::-1]
396
397	_zeta_1 = [3.03768838606128127e-10, -1.21924525236601262e-8,
398	2.01201845887608893e-7, -1.53917240683468381e-6,
399	-5.09890411005967954e-7, 0.000122464707271619326,
400	-0.000905721539353130232, -0.00239315326074843037,
401	0.084239750013159168, 0.418938517907442414, 0.500000001921884009]
402
403	_zeta_0 = [-3.46092485016748794e-10, -6.42610089468292485e-9,
404	1.76409071536679773e-7, -1.47141263991560698e-6, -6.38880222546167613e-7,
405	0.000122641099800668209, -0.000905894913516772796, -0.00239303348507992713,
406	0.0842396947501199816, 0.418938533204660256, 0.500000000000000052]
407
408	def zeta(s):
409	"""
410	Riemann zeta function, real argument
411	"""
412	if not isinstance(s, (float, int)):
413	try:
414	s = float(s)
415	except (ValueError, TypeError):
416	try:
417	s = complex(s)
418	if not s.imag:
419	return complex(zeta(s.real))
420	except (ValueError, TypeError):
421	pass
422	raise NotImplementedError
423	if s == 1:
424	raise ValueError("zeta(1) pole")
425	if s >= 27:
426	return 1.0 + 2.0(-s) + 3.0(-s)
427	n = int(s)
428	if n == s:
429	if n >= 0:
430	return _zeta_int[n]
431	if not (n % 2):
432	return 0.0
433	if s <= 0.0:
434	return 0
435	if s <= 2.0:
436	if s <= 1.0:
437	return _polyval(_zeta_0,s)/(s-1)
438	return _polyval(_zeta_1,s)/(s-1)
439	z = _polyval(_zeta_P,s) / _polyval(_zeta_Q,s)
440	return 1.0 + 2.0(-s) + 3.0(-s) + 4.0*(-s)z
441
442
443
444

Note: See TracBrowser for help on using the repository browser.

Download in other formats: