Context Navigation

source: trunk/UTIL/PYTHON/powerlaw/plvar.py @ 1050

Last change on this file since 1050 was 943, checked in by aslmd, 12 years ago
clean and organized UTIL/PYTHON folder
File size: 15.0 KB

Rev	Line
[943]	1	from math import *
	2	from random import *
	3
	4	# function [alpha, xmin, n]=plvar(x, varargin)
	5	# PLVAR estimates the uncertainty in the estimated power-law parameters.
	6	# Source: http://www.santafe.edu/~aaronc/powerlaws/
	7	#
	8	# PLVAR(x) takes a vector of observations x and returns estimated
	9	# uncertainties in the estimated power-law parameters, based on the
	10	# nonparametric approach described in Clauset, Shalizi, Newman (2007).
	11	# PLVAR automatically detects whether x is composed of real or integer
	12	# values, and applies the appropriate method. For discrete data, if
	13	# min(x) > 1000, PLVAR uses the continuous approximation, which is
	14	# a reliable in this regime.
	15	#
	16	# The fitting procedure works as follows:
	17	# 1) For each possible choice of x_min, we estimate alpha via the
	18	# method of maximum likelihood, and calculate the Kolmogorov-Smirnov
	19	# goodness-of-fit statistic D.
	20	# 2) We then select as our estimate of x_min, the value that gives the
	21	# minimum value D over all values of x_min.
	22	#
	23	# Note that this procedure gives no estimate of the validity of the fit.
	24	#
	25	# Example:
	26	#
	27	# x = [500,150,90,81,75,75,70,65,60,58,49,47,40]
	28	# [alpha, xmin, ntail] = plvar(x);
	29	#
	30	# For more information, try 'type plvar'
	31	#
	32	# See also PLFIT, PLPVA
	33
	34
	35	# Version 1.0.8 (2010 April)
	36	# Copyright (C) 2008-2011 Aaron Clauset (Santa Fe Institute)
	37
	38	# Ported to Python by Joel Ornstein (2011 July)
	39	#(joel_ornstein@hmc.edu)
	40
	41	# Distributed under GPL 2.0
	42	# http://www.gnu.org/copyleft/gpl.html
	43	# PLVAR comes with ABSOLUTELY NO WARRANTY
	44	#
	45	#
	46	# The 'zeta' helper function is modified from the open-source library 'mpmath'
	47	# mpmath: a Python library for arbitrary-precision floating-point arithmetic
	48	# http://code.google.com/p/mpmath/
	49	# version 0.17 (February 2011) by Fredrik Johansson and others
	50	#
	51
	52	# Notes:
	53	#
	54	# 1. In order to implement the integer-based methods in Matlab, the numeric
	55	# maximization of the log-likelihood function was used. This requires
	56	# that we specify the range of scaling parameters considered. We set
	57	# this range to be 1.50 to 3.50 at 0.01 intervals by default.
	58	# This range can be set by the user like so,
	59	#
	60	# a = plvar(x,'range',[1.50,3.50,0.01])
	61	#
	62	#
	63	# 2. PLVAR can be told to limit the range of values considered as estimates
	64	# for xmin in three ways. First, it can be instructed to sample these
	65	# possible values like so,
	66	#
	67	# a = plvar(x,'sample',100);
	68	#
	69	# which uses 100 uniformly distributed values on the sorted list of
	70	# unique values in the data set. Second, it can simply omit all
	71	# candidates above a hard limit, like so
	72	#
	73	# a = plvar(x,'limit',3.4);
	74	#
	75	# Finally, it can be forced to use a fixed value, like so
	76	#
	77	# a = plvar(x,'xmin',3.4);
	78	#
	79	# In the case of discrete data, it rounds the limit to the nearest
	80	# integer.
	81	#
	82	# 3. The default number of nonparametric repetitions of the fitting
	83	# procedure is 1000. This number can be changed like so
	84	#
	85	# a = plvar(x,'reps',10000);
	86	#
	87	# 4. To silence the textual output to the screen, do this
	88	#
	89	# p = plvar(x,'silent');
	90	#
	91	#
	92
	93	def plvar(x,*varargin):
	94	vec = []
	95	sample = []
	96	xminx = []
	97	limit = []
	98	Bt = []
	99	quiet = False
	100
	101
	102	# parse command-line parameters trap for bad input
	103	i=0
	104	while i<len(varargin):
	105	argok = 1
	106	if type(varargin[i])==str:
	107	if varargin[i]=='range':
	108	Range = varargin[i+1]
	109	if Range[1]>Range[0]:
	110	argok=0
	111	vec=[]
	112	try:
	113	vec=map(lambda X:X*float(Range[2])+Range[0],\
	114	range(int((Range[1]-Range[0])/Range[2])))
	115
	116
	117	except:
	118	argok=0
	119	vec=[]
	120
	121
	122	if Range[0]>=Range[1]:
	123	argok=0
	124	vec=[]
	125	i-=1
	126
	127	i+=1
	128
	129
	130	elif varargin[i]== 'sample':
	131	sample = varargin[i+1]
	132	i = i + 1
	133	elif varargin[i]== 'limit':
	134	limit = varargin[i+1]
	135	i = i + 1
	136	elif varargin[i]== 'xmin':
	137	xminx = varargin[i+1]
	138	i = i + 1
	139	elif varargin[i]== 'reps':
	140	Bt = varargin[i+1]
	141	i = i + 1
	142	elif varargin[i]== 'silent': quiet = True
	143
	144	else: argok=0
	145
	146
	147	if not argok:
	148	print '(PLVAR) Ignoring invalid argument #',i+1
	149
	150	i = i+1
	151
	152	if vec!=[] and (type(vec)!=list or min(vec)<=1):
	153	print '(PLVAR) Error: ''range'' argument must contain a vector or minimum <= 1. using default.\n'
	154
	155	vec = []
	156
	157	if sample!=[] and sample<2:
	158	print'(PLVAR) Error: ''sample'' argument must be a positive integer > 1. using default.\n'
	159	sample = []
	160
	161	if limit!=[] and limit<min(x):
	162	print'(PLVAR) Error: ''limit'' argument must be a positive value >= 1. using default.\n'
	163	limit = []
	164
	165	if xminx!=[] and xminx>=max(x):
	166	print'(PLVAR) Error: ''xmin'' argument must be a positive value < max(x). using default behavior.\n'
	167	xminx = []
	168	if Bt!=[] and Bt<2:
	169	print '(PLVAR) Error: ''reps'' argument must be a positive value > 1; using default.\n'
	170	Bt = [];
	171
	172
	173	# select method (discrete or continuous) for fitting
	174	if reduce(lambda X,Y:X==True and floor(Y)==float(Y),x,True): f_dattype = 'INTS'
	175	elif reduce(lambda X,Y:X==True and (type(Y)==int or type(Y)==float or type(Y)==long),x,True): f_dattype = 'REAL'
	176	else: f_dattype = 'UNKN'
	177
	178	if f_dattype=='INTS' and min(x) > 1000 and len(x)>100:
	179	f_dattype = 'REAL'
	180
	181	N=len(x)
	182
	183	if Bt==[]: Bt=1000
	184	bofA = []
	185	bofB = []
	186	bofC = []
	187	if not quiet:
	188	print 'Power-law Distribution, parameter error calculation\n'
	189	print ' Copyright 2007-2009 Aaron Clauset\n'
	190	print ' Warning: This can be a slow calculation; please be patient.\n'
	191	print ' n = ',len(x),'\n reps = ',Bt
	192	# estimate xmin and alpha, accordingly
	193	if f_dattype== 'REAL':
	194
	195	for B in range(0,Bt):
	196	# bootstrap resample
	197	y = []
	198	for i in range(0,N):
	199	y.append(x[int(floor(N*random()))])
	200	ymins = unique(y)
	201	ymins.sort()
	202	ymins=ymins[0:-1]
	203
	204	if xminx!=[]:
	205
	206	ymins = [min(filter(lambda X: X>=xminx,ymins))]
	207
	208
	209	if limit!=[]:
	210	qmins=filter(lambda X: X<=limit,qmins)
	211	if qmins==[]: qmins=[min(y)]
	212
	213	if sample!=[]:
	214	step = float(len(ymins))/(sample-1)
	215	index_curr=0
	216	new_ymins=[]
	217	for i in range (0,sample):
	218	if round(index_curr)==len(ymins): index_curr-=1
	219	new_ymins.append(ymins[int(round(index_curr))])
	220	index_curr+=step
	221	ymins = unique(new_ymins)
	222	ymins.sort()
	223
	224	z = sorted(y)
	225
	226	dat = []
	227
	228	for xm in range(0,len(ymins)):
	229	xmin = ymins[xm]
	230	z = filter(lambda X:X>=xmin,z)
	231	n = len(z)
	232	# estimate alpha using direct MLE
	233	a = float(n)/sum(map(lambda X: log(float(X)/xmin),z))
	234	# compute KS statistic
	235	cf = map(lambda X:1-pow((float(xmin)/X),a),z)
	236
	237	dat.append( max( map(lambda X: abs(cf[X]-float(X)/n),range(0,n))))
	238	ymin = ymins[dat.index(min(dat))]
	239	z = filter(lambda X: X>=ymin,y)
	240	n = len(z)
	241	alpha = 1+float(n)/sum(map(lambda X: log(float(X)/ymin),z))
	242	bofA.append(n)
	243	bofB.append(ymin)
	244	bofC.append(alpha)
	245	# store distribution of estimated parameter values
	246	if not quiet:
	247	print '['+str(B+1)+']\tntail = ',round(mean(bofA),3),' (',round(std(bofA),3),')','\txmin = ',\
	248	round(mean(bofB),3),' (',round(std(bofB),3),')','\talpha = ',round(mean(bofC),3),' (',round(std(bofC),3),')'
	249
	250	n = std(bofA)
	251	xmin = std(bofB)
	252	alpha = std(bofC)
	253
	254	elif f_dattype== 'INTS':
	255	x=map(int,x)
	256	if vec==[]:
	257	for X in range(150,351):
	258	vec.append(X/100.) # covers range of most practical
	259	# scaling parameters
	260	zvec = map(zeta, vec)
	261
	262	for B in range(0,Bt):
	263	# bootstrap resample
	264	y = []
	265	for i in range(0,N):
	266	y.append(x[int(floor(N*random()))])
	267	ymins = unique(y)
	268	ymins.sort()
	269	ymins=ymins[0:-1]
	270
	271	if xminx!=[]:
	272
	273	ymins = [min(filter(lambda X: X>=xminx,ymins))]
	274
	275
	276	if limit!=[]:
	277	qmins=filter(lambda X: X<=limit,qmins)
	278	if qmins==[]: qmins=[min(y)]
	279
	280	if sample!=[]:
	281	step = float(len(ymins))/(sample-1)
	282	index_curr=0
	283	new_ymins=[]
	284	for i in range (0,sample):
	285	if round(index_curr)==len(ymins): index_curr-=1
	286	new_ymins.append(ymins[int(round(index_curr))])
	287	index_curr+=step
	288	ymins = unique(new_ymins)
	289	ymins.sort()
	290
	291	ymax = max(y)
	292	z = sorted(y)
	293
	294	datA = []
	295	datB = []
	296	for xm in range(0,len(ymins)):
	297	xmin = ymins[xm]
	298	z = filter(lambda X:X>=xmin,z)
	299	n = len(z)
	300	L = []
	301	slogz = sum(map(log,z))
	302	xminvec = range (1,xmin)
	303	for k in range (1,len(vec)):
	304	L.append(-vec[k]float(slogz) - float(n)log(float(zvec[k]) - sum(map(lambda X:pow(float(X),-vec[k]),xminvec))))
	305
	306	I = L.index(max(L))
	307
	308	# compute KS statistic
	309	fit = reduce(lambda X,Y: X+[Y+X[-1]],\
	310	(map(lambda X: pow(X,-vec[I])/(float(zvec[I])-sum(map(lambda X: pow(X,-vec[I]),map(float,range(1,xmin))))),range(xmin,ymax+1))),[0])[1:]
	311	cdi=[]
	312	for XM in range(xmin,ymax+1):
	313	cdi.append(len(filter(lambda X: floor(X)<=XM,z))/float(n))
	314
	315	datA.append(max( map(lambda X: abs(fit[X] - cdi[X]),range(0,ymax-xmin+1))))
	316	datB.append(vec[I])
	317
	318	I = datA.index(min(datA))
	319	ymin = ymins[I]
	320	n = len(filter(lambda X:X>=ymin,y))
	321	alpha = datB[I]
	322
	323	bofA.append(n)
	324	bofB.append(ymin)
	325	bofC.append(alpha)
	326	# store distribution of estimated parameter values
	327	if not quiet:
	328	print '['+str(B+1)+']\tntail = ',round(mean(bofA),3),' (',round(std(bofA),3),')','\txmin = ',\
	329	round(mean(bofB),3),' (',round(std(bofB),3),')','\talpha = ',round(mean(bofC),3),' (',round(std(bofC),3),')'
	330	n = std(bofA)
	331	xmin = std(bofB)
	332	alpha = std(bofC)
	333
	334	else:
	335	print '(PLVAR) Error: x must contain only reals or only integers.\n'
	336	n = []
	337	xmin = []
	338	alpha = []
	339
	340
	341	return [alpha,xmin,n]
	342
	343
	344	# helper functions (unique and zeta)
	345	def mean(L):
	346	try:
	347	return float(sum(L))/len(L)
	348	except:
	349
	350	return 0
	351
	352	def std(L):
	353	try:
	354	u = mean(L)
	355	return sqrt((1./(len(L)-1))*sum(map(lambda X: pow(X-u,2),L)))
	356	except:
	357	return 0
	358
	359
	360	def unique(seq):
	361	# not order preserving
	362	set = {}
	363	map(set.__setitem__, seq, [])
	364	return set.keys()
	365
	366	def _polyval(coeffs, x):
	367	p = coeffs[0]
	368	for c in coeffs[1:]:
	369	p = c + x*p
	370	return p
	371
	372	_zeta_int = [\
	373	-0.5,
	374	0.0,
	375	1.6449340668482264365,1.2020569031595942854,1.0823232337111381915,
	376	1.0369277551433699263,1.0173430619844491397,1.0083492773819228268,
	377	1.0040773561979443394,1.0020083928260822144,1.0009945751278180853,
	378	1.0004941886041194646,1.0002460865533080483,1.0001227133475784891,
	379	1.0000612481350587048,1.0000305882363070205,1.0000152822594086519,
	380	1.0000076371976378998,1.0000038172932649998,1.0000019082127165539,
	381	1.0000009539620338728,1.0000004769329867878,1.0000002384505027277,
	382	1.0000001192199259653,1.0000000596081890513,1.0000000298035035147,
	383	1.0000000149015548284]
	384
	385	_zeta_P = [-3.50000000087575873, -0.701274355654678147,
	386	-0.0672313458590012612, -0.00398731457954257841,
	387	-0.000160948723019303141, -4.67633010038383371e-6,
	388	-1.02078104417700585e-7, -1.68030037095896287e-9,
	389	-1.85231868742346722e-11][::-1]
	390
	391	_zeta_Q = [1.00000000000000000, -0.936552848762465319,
	392	-0.0588835413263763741, -0.00441498861482948666,
	393	-0.000143416758067432622, -5.10691659585090782e-6,
	394	-9.58813053268913799e-8, -1.72963791443181972e-9,
	395	-1.83527919681474132e-11][::-1]
	396
	397	_zeta_1 = [3.03768838606128127e-10, -1.21924525236601262e-8,
	398	2.01201845887608893e-7, -1.53917240683468381e-6,
	399	-5.09890411005967954e-7, 0.000122464707271619326,
	400	-0.000905721539353130232, -0.00239315326074843037,
	401	0.084239750013159168, 0.418938517907442414, 0.500000001921884009]
	402
	403	_zeta_0 = [-3.46092485016748794e-10, -6.42610089468292485e-9,
	404	1.76409071536679773e-7, -1.47141263991560698e-6, -6.38880222546167613e-7,
	405	0.000122641099800668209, -0.000905894913516772796, -0.00239303348507992713,
	406	0.0842396947501199816, 0.418938533204660256, 0.500000000000000052]
	407
	408	def zeta(s):
	409	"""
	410	Riemann zeta function, real argument
	411	"""
	412	if not isinstance(s, (float, int)):
	413	try:
	414	s = float(s)
	415	except (ValueError, TypeError):
	416	try:
	417	s = complex(s)
	418	if not s.imag:
	419	return complex(zeta(s.real))
	420	except (ValueError, TypeError):
	421	pass
	422	raise NotImplementedError
	423	if s == 1:
	424	raise ValueError("zeta(1) pole")
	425	if s >= 27:
	426	return 1.0 + 2.0(-s) + 3.0(-s)
	427	n = int(s)
	428	if n == s:
	429	if n >= 0:
	430	return _zeta_int[n]
	431	if not (n % 2):
	432	return 0.0
	433	if s <= 0.0:
	434	return 0
	435	if s <= 2.0:
	436	if s <= 1.0:
	437	return _polyval(_zeta_0,s)/(s-1)
	438	return _polyval(_zeta_1,s)/(s-1)
	439	z = _polyval(_zeta_P,s) / _polyval(_zeta_Q,s)
	440	return 1.0 + 2.0(-s) + 3.0(-s) + 4.0*(-s)z
	441
	442
	443
	444

Note: See TracBrowser for help on using the repository browser.

Download in other formats: