728 lines
24 KiB
Python
728 lines
24 KiB
Python
'''patching scipy to fit distributions and expect method
|
|
|
|
This adds new methods to estimate continuous distribution parameters with some
|
|
fixed/frozen parameters. It also contains functions that calculate the expected
|
|
value of a function for any continuous or discrete distribution
|
|
|
|
It temporarily also contains Bootstrap and Monte Carlo function for testing the
|
|
distribution fit, but these are neither general nor verified.
|
|
|
|
Author: josef-pktd
|
|
License: Simplified BSD
|
|
'''
|
|
from statsmodels.compat.python import lmap
|
|
import numpy as np
|
|
from scipy import stats, optimize, integrate
|
|
|
|
|
|
########## patching scipy
|
|
|
|
#vonmises does not define finite bounds, because it is intended for circular
|
|
#support which does not define a proper pdf on the real line
|
|
|
|
stats.distributions.vonmises.a = -np.pi
|
|
stats.distributions.vonmises.b = np.pi
|
|
|
|
#the next 3 functions are for fit with some fixed parameters
|
|
#As they are written, they do not work as functions, only as methods
|
|
|
|
def _fitstart(self, x):
|
|
'''example method, method of moment estimator as starting values
|
|
|
|
Parameters
|
|
----------
|
|
x : ndarray
|
|
data for which the parameters are estimated
|
|
|
|
Returns
|
|
-------
|
|
est : tuple
|
|
preliminary estimates used as starting value for fitting, not
|
|
necessarily a consistent estimator
|
|
|
|
Notes
|
|
-----
|
|
This needs to be written and attached to each individual distribution
|
|
|
|
This example was written for the gamma distribution, but not verified
|
|
with literature
|
|
|
|
'''
|
|
loc = np.min([x.min(),0])
|
|
a = 4/stats.skew(x)**2
|
|
scale = np.std(x) / np.sqrt(a)
|
|
return (a, loc, scale)
|
|
|
|
def _fitstart_beta(self, x, fixed=None):
|
|
'''method of moment estimator as starting values for beta distribution
|
|
|
|
Parameters
|
|
----------
|
|
x : ndarray
|
|
data for which the parameters are estimated
|
|
fixed : None or array_like
|
|
sequence of numbers and np.nan to indicate fixed parameters and parameters
|
|
to estimate
|
|
|
|
Returns
|
|
-------
|
|
est : tuple
|
|
preliminary estimates used as starting value for fitting, not
|
|
necessarily a consistent estimator
|
|
|
|
Notes
|
|
-----
|
|
This needs to be written and attached to each individual distribution
|
|
|
|
References
|
|
----------
|
|
for method of moment estimator for known loc and scale
|
|
https://en.wikipedia.org/wiki/Beta_distribution#Parameter_estimation
|
|
http://www.itl.nist.gov/div898/handbook/eda/section3/eda366h.htm
|
|
NIST reference also includes reference to MLE in
|
|
Johnson, Kotz, and Balakrishan, Volume II, pages 221-235
|
|
|
|
'''
|
|
#todo: separate out this part to be used for other compact support distributions
|
|
# e.g. rdist, vonmises, and truncnorm
|
|
# but this might not work because it might still be distribution specific
|
|
a, b = x.min(), x.max()
|
|
eps = (a-b)*0.01
|
|
if fixed is None:
|
|
#this part not checked with books
|
|
loc = a - eps
|
|
scale = (a - b) * (1 + 2*eps)
|
|
else:
|
|
if np.isnan(fixed[-2]):
|
|
#estimate loc
|
|
loc = a - eps
|
|
else:
|
|
loc = fixed[-2]
|
|
if np.isnan(fixed[-1]):
|
|
#estimate scale
|
|
scale = (b + eps) - loc
|
|
else:
|
|
scale = fixed[-1]
|
|
|
|
#method of moment for known loc scale:
|
|
scale = float(scale)
|
|
xtrans = (x - loc)/scale
|
|
xm = xtrans.mean()
|
|
xv = xtrans.var()
|
|
tmp = (xm*(1-xm)/xv - 1)
|
|
p = xm * tmp
|
|
q = (1 - xm) * tmp
|
|
|
|
return (p, q, loc, scale) #check return type and should fixed be returned ?
|
|
|
|
def _fitstart_poisson(self, x, fixed=None):
|
|
'''maximum likelihood estimator as starting values for Poisson distribution
|
|
|
|
Parameters
|
|
----------
|
|
x : ndarray
|
|
data for which the parameters are estimated
|
|
fixed : None or array_like
|
|
sequence of numbers and np.nan to indicate fixed parameters and parameters
|
|
to estimate
|
|
|
|
Returns
|
|
-------
|
|
est : tuple
|
|
preliminary estimates used as starting value for fitting, not
|
|
necessarily a consistent estimator
|
|
|
|
Notes
|
|
-----
|
|
This needs to be written and attached to each individual distribution
|
|
|
|
References
|
|
----------
|
|
MLE :
|
|
https://en.wikipedia.org/wiki/Poisson_distribution#Maximum_likelihood
|
|
|
|
'''
|
|
#todo: separate out this part to be used for other compact support distributions
|
|
# e.g. rdist, vonmises, and truncnorm
|
|
# but this might not work because it might still be distribution specific
|
|
a = x.min()
|
|
eps = 0 # is this robust ?
|
|
if fixed is None:
|
|
#this part not checked with books
|
|
loc = a - eps
|
|
else:
|
|
if np.isnan(fixed[-1]):
|
|
#estimate loc
|
|
loc = a - eps
|
|
else:
|
|
loc = fixed[-1]
|
|
|
|
#MLE for standard (unshifted, if loc=0) Poisson distribution
|
|
|
|
xtrans = (x - loc)
|
|
lambd = xtrans.mean()
|
|
#second derivative d loglike/ dlambd Not used
|
|
#dlldlambd = 1/lambd # check
|
|
|
|
return (lambd, loc) #check return type and should fixed be returned ?
|
|
|
|
|
|
def nnlf_fr(self, thetash, x, frmask):
|
|
# new frozen version
|
|
# - sum (log pdf(x, theta),axis=0)
|
|
# where theta are the parameters (including loc and scale)
|
|
#
|
|
try:
|
|
if frmask is not None:
|
|
theta = frmask.copy()
|
|
theta[np.isnan(frmask)] = thetash
|
|
else:
|
|
theta = thetash
|
|
loc = theta[-2]
|
|
scale = theta[-1]
|
|
args = tuple(theta[:-2])
|
|
except IndexError:
|
|
raise ValueError("Not enough input arguments.")
|
|
if not self._argcheck(*args) or scale <= 0:
|
|
return np.inf
|
|
x = np.array((x-loc) / scale)
|
|
cond0 = (x <= self.a) | (x >= self.b)
|
|
if (np.any(cond0)):
|
|
return np.inf
|
|
else:
|
|
N = len(x)
|
|
#raise ValueError
|
|
return self._nnlf(x, *args) + N*np.log(scale)
|
|
|
|
def fit_fr(self, data, *args, **kwds):
|
|
'''estimate distribution parameters by MLE taking some parameters as fixed
|
|
|
|
Parameters
|
|
----------
|
|
data : ndarray, 1d
|
|
data for which the distribution parameters are estimated,
|
|
args : list ? check
|
|
starting values for optimization
|
|
kwds :
|
|
|
|
- 'frozen' : array_like
|
|
values for frozen distribution parameters and, for elements with
|
|
np.nan, the corresponding parameter will be estimated
|
|
|
|
Returns
|
|
-------
|
|
argest : ndarray
|
|
estimated parameters
|
|
|
|
|
|
Examples
|
|
--------
|
|
generate random sample
|
|
>>> np.random.seed(12345)
|
|
>>> x = stats.gamma.rvs(2.5, loc=0, scale=1.2, size=200)
|
|
|
|
estimate all parameters
|
|
>>> stats.gamma.fit(x)
|
|
array([ 2.0243194 , 0.20395655, 1.44411371])
|
|
>>> stats.gamma.fit_fr(x, frozen=[np.nan, np.nan, np.nan])
|
|
array([ 2.0243194 , 0.20395655, 1.44411371])
|
|
|
|
keep loc fixed, estimate shape and scale parameters
|
|
>>> stats.gamma.fit_fr(x, frozen=[np.nan, 0.0, np.nan])
|
|
array([ 2.45603985, 1.27333105])
|
|
|
|
keep loc and scale fixed, estimate shape parameter
|
|
>>> stats.gamma.fit_fr(x, frozen=[np.nan, 0.0, 1.0])
|
|
array([ 3.00048828])
|
|
>>> stats.gamma.fit_fr(x, frozen=[np.nan, 0.0, 1.2])
|
|
array([ 2.57792969])
|
|
|
|
estimate only scale parameter for fixed shape and loc
|
|
>>> stats.gamma.fit_fr(x, frozen=[2.5, 0.0, np.nan])
|
|
array([ 1.25087891])
|
|
|
|
Notes
|
|
-----
|
|
self is an instance of a distribution class. This can be attached to
|
|
scipy.stats.distributions.rv_continuous
|
|
|
|
*Todo*
|
|
|
|
* check if docstring is correct
|
|
* more input checking, args is list ? might also apply to current fit method
|
|
|
|
'''
|
|
loc0, scale0 = lmap(kwds.get, ['loc', 'scale'],[0.0, 1.0])
|
|
Narg = len(args)
|
|
|
|
if Narg == 0 and hasattr(self, '_fitstart'):
|
|
x0 = self._fitstart(data)
|
|
elif Narg > self.numargs:
|
|
raise ValueError("Too many input arguments.")
|
|
else:
|
|
args += (1.0,)*(self.numargs-Narg)
|
|
# location and scale are at the end
|
|
x0 = args + (loc0, scale0)
|
|
|
|
if 'frozen' in kwds:
|
|
frmask = np.array(kwds['frozen'])
|
|
if len(frmask) != self.numargs+2:
|
|
raise ValueError("Incorrect number of frozen arguments.")
|
|
else:
|
|
# keep starting values for not frozen parameters
|
|
for n in range(len(frmask)):
|
|
# Troubleshooting ex_generic_mle_tdist
|
|
if isinstance(frmask[n], np.ndarray) and frmask[n].size == 1:
|
|
frmask[n] = frmask[n].item()
|
|
|
|
# If there were array elements, then frmask will be object-dtype,
|
|
# in which case np.isnan will raise TypeError
|
|
frmask = frmask.astype(np.float64)
|
|
x0 = np.array(x0)[np.isnan(frmask)]
|
|
else:
|
|
frmask = None
|
|
|
|
#print(x0
|
|
#print(frmask
|
|
return optimize.fmin(self.nnlf_fr, x0,
|
|
args=(np.ravel(data), frmask), disp=0)
|
|
|
|
|
|
#The next two functions/methods calculate expected value of an arbitrary
|
|
#function, however for the continuous functions intquad is use, which might
|
|
#require continuouity or smoothness in the function.
|
|
|
|
|
|
#TODO: add option for Monte Carlo integration
|
|
|
|
def expect(self, fn=None, args=(), loc=0, scale=1, lb=None, ub=None, conditional=False):
|
|
'''calculate expected value of a function with respect to the distribution
|
|
|
|
location and scale only tested on a few examples
|
|
|
|
Parameters
|
|
----------
|
|
all parameters are keyword parameters
|
|
fn : function (default: identity mapping)
|
|
Function for which integral is calculated. Takes only one argument.
|
|
args : tuple
|
|
argument (parameters) of the distribution
|
|
lb, ub : numbers
|
|
lower and upper bound for integration, default is set to the support
|
|
of the distribution
|
|
conditional : bool (False)
|
|
If true then the integral is corrected by the conditional probability
|
|
of the integration interval. The return value is the expectation
|
|
of the function, conditional on being in the given interval.
|
|
|
|
Returns
|
|
-------
|
|
expected value : float
|
|
|
|
Notes
|
|
-----
|
|
This function has not been checked for it's behavior when the integral is
|
|
not finite. The integration behavior is inherited from scipy.integrate.quad.
|
|
|
|
'''
|
|
if fn is None:
|
|
def fun(x, *args):
|
|
return x*self.pdf(x, loc=loc, scale=scale, *args)
|
|
else:
|
|
def fun(x, *args):
|
|
return fn(x)*self.pdf(x, loc=loc, scale=scale, *args)
|
|
if lb is None:
|
|
lb = loc + self.a * scale #(self.a - loc)/(1.0*scale)
|
|
if ub is None:
|
|
ub = loc + self.b * scale #(self.b - loc)/(1.0*scale)
|
|
if conditional:
|
|
invfac = (self.sf(lb, loc=loc, scale=scale, *args)
|
|
- self.sf(ub, loc=loc, scale=scale, *args))
|
|
else:
|
|
invfac = 1.0
|
|
return integrate.quad(fun, lb, ub,
|
|
args=args)[0]/invfac
|
|
|
|
|
|
def expect_v2(self, fn=None, args=(), loc=0, scale=1, lb=None, ub=None, conditional=False):
|
|
'''calculate expected value of a function with respect to the distribution
|
|
|
|
location and scale only tested on a few examples
|
|
|
|
Parameters
|
|
----------
|
|
all parameters are keyword parameters
|
|
fn : function (default: identity mapping)
|
|
Function for which integral is calculated. Takes only one argument.
|
|
args : tuple
|
|
argument (parameters) of the distribution
|
|
lb, ub : numbers
|
|
lower and upper bound for integration, default is set using
|
|
quantiles of the distribution, see Notes
|
|
conditional : bool (False)
|
|
If true then the integral is corrected by the conditional probability
|
|
of the integration interval. The return value is the expectation
|
|
of the function, conditional on being in the given interval.
|
|
|
|
Returns
|
|
-------
|
|
expected value : float
|
|
|
|
Notes
|
|
-----
|
|
This function has not been checked for it's behavior when the integral is
|
|
not finite. The integration behavior is inherited from scipy.integrate.quad.
|
|
|
|
The default limits are lb = self.ppf(1e-9, *args), ub = self.ppf(1-1e-9, *args)
|
|
|
|
For some heavy tailed distributions, 'alpha', 'cauchy', 'halfcauchy',
|
|
'levy', 'levy_l', and for 'ncf', the default limits are not set correctly
|
|
even when the expectation of the function is finite. In this case, the
|
|
integration limits, lb and ub, should be chosen by the user. For example,
|
|
for the ncf distribution, ub=1000 works in the examples.
|
|
|
|
There are also problems with numerical integration in some other cases,
|
|
for example if the distribution is very concentrated and the default limits
|
|
are too large.
|
|
|
|
'''
|
|
#changes: 20100809
|
|
#correction and refactoring how loc and scale are handled
|
|
#uses now _pdf
|
|
#needs more testing for distribution with bound support, e.g. genpareto
|
|
|
|
if fn is None:
|
|
def fun(x, *args):
|
|
return (loc + x*scale)*self._pdf(x, *args)
|
|
else:
|
|
def fun(x, *args):
|
|
return fn(loc + x*scale)*self._pdf(x, *args)
|
|
if lb is None:
|
|
#lb = self.a
|
|
try:
|
|
lb = self.ppf(1e-9, *args) #1e-14 quad fails for pareto
|
|
except ValueError:
|
|
lb = self.a
|
|
else:
|
|
lb = max(self.a, (lb - loc)/(1.0*scale)) #transform to standardized
|
|
if ub is None:
|
|
#ub = self.b
|
|
try:
|
|
ub = self.ppf(1-1e-9, *args)
|
|
except ValueError:
|
|
ub = self.b
|
|
else:
|
|
ub = min(self.b, (ub - loc)/(1.0*scale))
|
|
if conditional:
|
|
invfac = self._sf(lb,*args) - self._sf(ub,*args)
|
|
else:
|
|
invfac = 1.0
|
|
return integrate.quad(fun, lb, ub,
|
|
args=args, limit=500)[0]/invfac
|
|
|
|
### for discrete distributions
|
|
|
|
#TODO: check that for a distribution with finite support the calculations are
|
|
# done with one array summation (np.dot)
|
|
|
|
#based on _drv2_moment(self, n, *args), but streamlined
|
|
def expect_discrete(self, fn=None, args=(), loc=0, lb=None, ub=None,
|
|
conditional=False):
|
|
'''calculate expected value of a function with respect to the distribution
|
|
for discrete distribution
|
|
|
|
Parameters
|
|
----------
|
|
(self : distribution instance as defined in scipy stats)
|
|
fn : function (default: identity mapping)
|
|
Function for which integral is calculated. Takes only one argument.
|
|
args : tuple
|
|
argument (parameters) of the distribution
|
|
optional keyword parameters
|
|
lb, ub : numbers
|
|
lower and upper bound for integration, default is set to the support
|
|
of the distribution, lb and ub are inclusive (ul<=k<=ub)
|
|
conditional : bool (False)
|
|
If true then the expectation is corrected by the conditional
|
|
probability of the integration interval. The return value is the
|
|
expectation of the function, conditional on being in the given
|
|
interval (k such that ul<=k<=ub).
|
|
|
|
Returns
|
|
-------
|
|
expected value : float
|
|
|
|
Notes
|
|
-----
|
|
* function is not vectorized
|
|
* accuracy: uses self.moment_tol as stopping criterium
|
|
for heavy tailed distribution e.g. zipf(4), accuracy for
|
|
mean, variance in example is only 1e-5,
|
|
increasing precision (moment_tol) makes zipf very slow
|
|
* suppnmin=100 internal parameter for minimum number of points to evaluate
|
|
could be added as keyword parameter, to evaluate functions with
|
|
non-monotonic shapes, points include integers in (-suppnmin, suppnmin)
|
|
* uses maxcount=1000 limits the number of points that are evaluated
|
|
to break loop for infinite sums
|
|
(a maximum of suppnmin+1000 positive plus suppnmin+1000 negative integers
|
|
are evaluated)
|
|
|
|
|
|
'''
|
|
|
|
#moment_tol = 1e-12 # increase compared to self.moment_tol,
|
|
# too slow for only small gain in precision for zipf
|
|
|
|
#avoid endless loop with unbound integral, eg. var of zipf(2)
|
|
maxcount = 1000
|
|
suppnmin = 100 #minimum number of points to evaluate (+ and -)
|
|
|
|
if fn is None:
|
|
def fun(x):
|
|
#loc and args from outer scope
|
|
return (x+loc)*self._pmf(x, *args)
|
|
else:
|
|
def fun(x):
|
|
#loc and args from outer scope
|
|
return fn(x+loc)*self._pmf(x, *args)
|
|
# used pmf because _pmf does not check support in randint
|
|
# and there might be problems(?) with correct self.a, self.b at this stage
|
|
# maybe not anymore, seems to work now with _pmf
|
|
|
|
self._argcheck(*args) # (re)generate scalar self.a and self.b
|
|
if lb is None:
|
|
lb = (self.a)
|
|
else:
|
|
lb = lb - loc
|
|
|
|
if ub is None:
|
|
ub = (self.b)
|
|
else:
|
|
ub = ub - loc
|
|
if conditional:
|
|
invfac = self.sf(lb,*args) - self.sf(ub+1,*args)
|
|
else:
|
|
invfac = 1.0
|
|
|
|
tot = 0.0
|
|
low, upp = self._ppf(0.001, *args), self._ppf(0.999, *args)
|
|
low = max(min(-suppnmin, low), lb)
|
|
upp = min(max(suppnmin, upp), ub)
|
|
supp = np.arange(low, upp+1, self.inc) #check limits
|
|
#print('low, upp', low, upp
|
|
tot = np.sum(fun(supp))
|
|
diff = 1e100
|
|
pos = upp + self.inc
|
|
count = 0
|
|
|
|
#handle cases with infinite support
|
|
|
|
while (pos <= ub) and (diff > self.moment_tol) and count <= maxcount:
|
|
diff = fun(pos)
|
|
tot += diff
|
|
pos += self.inc
|
|
count += 1
|
|
|
|
if self.a < 0: #handle case when self.a = -inf
|
|
diff = 1e100
|
|
pos = low - self.inc
|
|
while (pos >= lb) and (diff > self.moment_tol) and count <= maxcount:
|
|
diff = fun(pos)
|
|
tot += diff
|
|
pos -= self.inc
|
|
count += 1
|
|
if count > maxcount:
|
|
# replace with proper warning
|
|
print('sum did not converge')
|
|
return tot/invfac
|
|
|
|
stats.distributions.rv_continuous.fit_fr = fit_fr
|
|
stats.distributions.rv_continuous.nnlf_fr = nnlf_fr
|
|
stats.distributions.rv_continuous.expect = expect
|
|
stats.distributions.rv_discrete.expect = expect_discrete
|
|
stats.distributions.beta_gen._fitstart = _fitstart_beta #not tried out yet
|
|
stats.distributions.poisson_gen._fitstart = _fitstart_poisson #not tried out yet
|
|
|
|
########## end patching scipy
|
|
|
|
|
|
def distfitbootstrap(sample, distr, nrepl=100):
|
|
'''run bootstrap for estimation of distribution parameters
|
|
|
|
hard coded: only one shape parameter is allowed and estimated,
|
|
loc=0 and scale=1 are fixed in the estimation
|
|
|
|
Parameters
|
|
----------
|
|
sample : ndarray
|
|
original sample data for bootstrap
|
|
distr : distribution instance with fit_fr method
|
|
nrepl : int
|
|
number of bootstrap replications
|
|
|
|
Returns
|
|
-------
|
|
res : array (nrepl,)
|
|
parameter estimates for all bootstrap replications
|
|
|
|
'''
|
|
nobs = len(sample)
|
|
res = np.zeros(nrepl)
|
|
for ii in range(nrepl):
|
|
rvsind = np.random.randint(nobs, size=nobs)
|
|
x = sample[rvsind]
|
|
res[ii] = distr.fit_fr(x, frozen=[np.nan, 0.0, 1.0])
|
|
return res
|
|
|
|
def distfitmc(sample, distr, nrepl=100, distkwds={}):
|
|
'''run Monte Carlo for estimation of distribution parameters
|
|
|
|
hard coded: only one shape parameter is allowed and estimated,
|
|
loc=0 and scale=1 are fixed in the estimation
|
|
|
|
Parameters
|
|
----------
|
|
sample : ndarray
|
|
original sample data, in Monte Carlo only used to get nobs,
|
|
distr : distribution instance with fit_fr method
|
|
nrepl : int
|
|
number of Monte Carlo replications
|
|
|
|
Returns
|
|
-------
|
|
res : array (nrepl,)
|
|
parameter estimates for all Monte Carlo replications
|
|
|
|
'''
|
|
arg = distkwds.pop('arg')
|
|
nobs = len(sample)
|
|
res = np.zeros(nrepl)
|
|
for ii in range(nrepl):
|
|
x = distr.rvs(arg, size=nobs, **distkwds)
|
|
res[ii] = distr.fit_fr(x, frozen=[np.nan, 0.0, 1.0])
|
|
return res
|
|
|
|
|
|
def printresults(sample, arg, bres, kind='bootstrap'):
|
|
'''calculate and print(Bootstrap or Monte Carlo result
|
|
|
|
Parameters
|
|
----------
|
|
sample : ndarray
|
|
original sample data
|
|
arg : float (for general case will be array)
|
|
bres : ndarray
|
|
parameter estimates from Bootstrap or Monte Carlo run
|
|
kind : {'bootstrap', 'montecarlo'}
|
|
output is printed for Mootstrap (default) or Monte Carlo
|
|
|
|
Returns
|
|
-------
|
|
None, currently only printing
|
|
|
|
Notes
|
|
-----
|
|
still a bit a mess because it is used for both Bootstrap and Monte Carlo
|
|
|
|
made correction:
|
|
reference point for bootstrap is estimated parameter
|
|
|
|
not clear:
|
|
I'm not doing any ddof adjustment in estimation of variance, do we
|
|
need ddof>0 ?
|
|
|
|
todo: return results and string instead of printing
|
|
|
|
'''
|
|
print('true parameter value')
|
|
print(arg)
|
|
print('MLE estimate of parameters using sample (nobs=%d)'% (nobs))
|
|
argest = distr.fit_fr(sample, frozen=[np.nan, 0.0, 1.0])
|
|
print(argest)
|
|
if kind == 'bootstrap':
|
|
#bootstrap compares to estimate from sample
|
|
argorig = arg
|
|
arg = argest
|
|
|
|
print('%s distribution of parameter estimate (nrepl=%d)'% (kind, nrepl))
|
|
print(f'mean = {bres.mean(0):f}, bias={bres.mean(0)-arg:f}')
|
|
print('median', np.median(bres, axis=0))
|
|
print('var and std', bres.var(0), np.sqrt(bres.var(0)))
|
|
bmse = ((bres - arg)**2).mean(0)
|
|
print('mse, rmse', bmse, np.sqrt(bmse))
|
|
bressorted = np.sort(bres)
|
|
print('%s confidence interval (90%% coverage)' % kind)
|
|
print(bressorted[np.floor(nrepl*0.05)], bressorted[np.floor(nrepl*0.95)])
|
|
print('%s confidence interval (90%% coverage) normal approximation' % kind)
|
|
print(stats.norm.ppf(0.05, loc=bres.mean(), scale=bres.std()),)
|
|
print(stats.norm.isf(0.05, loc=bres.mean(), scale=bres.std()))
|
|
print('Kolmogorov-Smirnov test for normality of %s distribution' % kind)
|
|
print(' - estimated parameters, p-values not really correct')
|
|
print(stats.kstest(bres, 'norm', (bres.mean(), bres.std())))
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
examplecases = ['largenumber', 'bootstrap', 'montecarlo'][:]
|
|
|
|
if 'largenumber' in examplecases:
|
|
|
|
print('\nDistribution: vonmises')
|
|
|
|
for nobs in [200]:#[20000, 1000, 100]:
|
|
x = stats.vonmises.rvs(1.23, loc=0, scale=1, size=nobs)
|
|
print('\nnobs:', nobs)
|
|
print('true parameter')
|
|
print('1.23, loc=0, scale=1')
|
|
print('unconstrained')
|
|
print(stats.vonmises.fit(x))
|
|
print(stats.vonmises.fit_fr(x, frozen=[np.nan, np.nan, np.nan]))
|
|
print('with fixed loc and scale')
|
|
print(stats.vonmises.fit_fr(x, frozen=[np.nan, 0.0, 1.0]))
|
|
|
|
print('\nDistribution: gamma')
|
|
distr = stats.gamma
|
|
arg, loc, scale = 2.5, 0., 20.
|
|
|
|
for nobs in [200]:#[20000, 1000, 100]:
|
|
x = distr.rvs(arg, loc=loc, scale=scale, size=nobs)
|
|
print('\nnobs:', nobs)
|
|
print('true parameter')
|
|
print(f'{arg:f}, loc={loc:f}, scale={scale:f}')
|
|
print('unconstrained')
|
|
print(distr.fit(x))
|
|
print(distr.fit_fr(x, frozen=[np.nan, np.nan, np.nan]))
|
|
print('with fixed loc and scale')
|
|
print(distr.fit_fr(x, frozen=[np.nan, 0.0, 1.0]))
|
|
print('with fixed loc')
|
|
print(distr.fit_fr(x, frozen=[np.nan, 0.0, np.nan]))
|
|
|
|
|
|
ex = ['gamma', 'vonmises'][0]
|
|
|
|
if ex == 'gamma':
|
|
distr = stats.gamma
|
|
arg, loc, scale = 2.5, 0., 1
|
|
elif ex == 'vonmises':
|
|
distr = stats.vonmises
|
|
arg, loc, scale = 1.5, 0., 1
|
|
else:
|
|
raise ValueError('wrong example')
|
|
|
|
nobs = 100
|
|
nrepl = 1000
|
|
|
|
sample = distr.rvs(arg, loc=loc, scale=scale, size=nobs)
|
|
|
|
print('\nDistribution:', distr)
|
|
if 'bootstrap' in examplecases:
|
|
print('\nBootstrap')
|
|
bres = distfitbootstrap(sample, distr, nrepl=nrepl )
|
|
printresults(sample, arg, bres)
|
|
|
|
if 'montecarlo' in examplecases:
|
|
print('\nMonteCarlo')
|
|
mcres = distfitmc(sample, distr, nrepl=nrepl,
|
|
distkwds=dict(arg=arg, loc=loc, scale=scale))
|
|
printresults(sample, arg, mcres, kind='montecarlo')
|