383 lines
12 KiB
Python
383 lines
12 KiB
Python
'''gradient/Jacobian of normal and t loglikelihood
|
|
|
|
use chain rule
|
|
|
|
normal derivative wrt mu, sigma and beta
|
|
|
|
new version: loc-scale distributions, derivative wrt loc, scale
|
|
|
|
also includes "standardized" t distribution (for use in GARCH)
|
|
|
|
TODO:
|
|
* use sympy for derivative of loglike wrt shape parameters
|
|
it works for df of t distribution dlog(gamma(a))da = polygamma(0,a) check
|
|
polygamma is available in scipy.special
|
|
* get loc-scale example to work with mean = X*b
|
|
* write some full unit test examples
|
|
|
|
A: josef-pktd
|
|
|
|
'''
|
|
|
|
import numpy as np
|
|
from scipy import special
|
|
from scipy.special import gammaln
|
|
|
|
|
|
def norm_lls(y, params):
|
|
'''normal loglikelihood given observations and mean mu and variance sigma2
|
|
|
|
Parameters
|
|
----------
|
|
y : ndarray, 1d
|
|
normally distributed random variable
|
|
params : ndarray, (nobs, 2)
|
|
array of mean, variance (mu, sigma2) with observations in rows
|
|
|
|
Returns
|
|
-------
|
|
lls : ndarray
|
|
contribution to loglikelihood for each observation
|
|
'''
|
|
|
|
mu, sigma2 = params.T
|
|
lls = -0.5*(np.log(2*np.pi) + np.log(sigma2) + (y-mu)**2/sigma2)
|
|
return lls
|
|
|
|
def norm_lls_grad(y, params):
|
|
'''Jacobian of normal loglikelihood wrt mean mu and variance sigma2
|
|
|
|
Parameters
|
|
----------
|
|
y : ndarray, 1d
|
|
normally distributed random variable
|
|
params : ndarray, (nobs, 2)
|
|
array of mean, variance (mu, sigma2) with observations in rows
|
|
|
|
Returns
|
|
-------
|
|
grad : array (nobs, 2)
|
|
derivative of loglikelihood for each observation wrt mean in first
|
|
column, and wrt variance in second column
|
|
|
|
Notes
|
|
-----
|
|
this is actually the derivative wrt sigma not sigma**2, but evaluated
|
|
with parameter sigma2 = sigma**2
|
|
|
|
'''
|
|
mu, sigma2 = params.T
|
|
dllsdmu = (y-mu)/sigma2
|
|
dllsdsigma2 = ((y-mu)**2/sigma2 - 1)/np.sqrt(sigma2)
|
|
return np.column_stack((dllsdmu, dllsdsigma2))
|
|
|
|
|
|
def mean_grad(x, beta):
|
|
'''gradient/Jacobian for d (x*beta)/ d beta
|
|
'''
|
|
return x
|
|
|
|
def normgrad(y, x, params):
|
|
'''Jacobian of normal loglikelihood wrt mean mu and variance sigma2
|
|
|
|
Parameters
|
|
----------
|
|
y : ndarray, 1d
|
|
normally distributed random variable with mean x*beta, and variance sigma2
|
|
x : ndarray, 2d
|
|
explanatory variables, observation in rows, variables in columns
|
|
params : array_like, (nvars + 1)
|
|
array of coefficients and variance (beta, sigma2)
|
|
|
|
Returns
|
|
-------
|
|
grad : array (nobs, 2)
|
|
derivative of loglikelihood for each observation wrt mean in first
|
|
column, and wrt scale (sigma) in second column
|
|
assume params = (beta, sigma2)
|
|
|
|
Notes
|
|
-----
|
|
TODO: for heteroscedasticity need sigma to be a 1d array
|
|
|
|
'''
|
|
beta = params[:-1]
|
|
sigma2 = params[-1]*np.ones((len(y),1))
|
|
dmudbeta = mean_grad(x, beta)
|
|
mu = np.dot(x, beta)
|
|
#print(beta, sigma2)
|
|
params2 = np.column_stack((mu,sigma2))
|
|
dllsdms = norm_lls_grad(y,params2)
|
|
grad = np.column_stack((dllsdms[:,:1]*dmudbeta, dllsdms[:,:1]))
|
|
return grad
|
|
|
|
|
|
|
|
def tstd_lls(y, params, df):
|
|
'''t loglikelihood given observations and mean mu and variance sigma2 = 1
|
|
|
|
Parameters
|
|
----------
|
|
y : ndarray, 1d
|
|
normally distributed random variable
|
|
params : ndarray, (nobs, 2)
|
|
array of mean, variance (mu, sigma2) with observations in rows
|
|
df : int
|
|
degrees of freedom of the t distribution
|
|
|
|
Returns
|
|
-------
|
|
lls : ndarray
|
|
contribution to loglikelihood for each observation
|
|
|
|
Notes
|
|
-----
|
|
parametrized for garch
|
|
'''
|
|
|
|
mu, sigma2 = params.T
|
|
df = df*1.0
|
|
#lls = gammaln((df+1)/2.) - gammaln(df/2.) - 0.5*np.log((df-2)*np.pi)
|
|
#lls -= (df+1)/2. * np.log(1. + (y-mu)**2/(df-2.)/sigma2) + 0.5 * np.log(sigma2)
|
|
lls = gammaln((df+1)/2.) - gammaln(df/2.) - 0.5*np.log((df-2)*np.pi)
|
|
lls -= (df+1)/2. * np.log(1. + (y-mu)**2/(df-2)/sigma2) + 0.5 * np.log(sigma2)
|
|
|
|
return lls
|
|
|
|
def norm_dlldy(y):
|
|
'''derivative of log pdf of standard normal with respect to y
|
|
'''
|
|
return -y
|
|
|
|
|
|
def tstd_pdf(x, df):
|
|
'''pdf for standardized (not standard) t distribution, variance is one
|
|
|
|
'''
|
|
|
|
r = np.array(df*1.0)
|
|
Px = np.exp(special.gammaln((r+1)/2.)-special.gammaln(r/2.))/np.sqrt((r-2)*np.pi)
|
|
Px /= (1+(x**2)/(r-2))**((r+1)/2.)
|
|
return Px
|
|
|
|
def ts_lls(y, params, df):
|
|
'''t loglikelihood given observations and mean mu and variance sigma2 = 1
|
|
|
|
Parameters
|
|
----------
|
|
y : ndarray, 1d
|
|
normally distributed random variable
|
|
params : ndarray, (nobs, 2)
|
|
array of mean, variance (mu, sigma2) with observations in rows
|
|
df : int
|
|
degrees of freedom of the t distribution
|
|
|
|
Returns
|
|
-------
|
|
lls : ndarray
|
|
contribution to loglikelihood for each observation
|
|
|
|
Notes
|
|
-----
|
|
parametrized for garch
|
|
normalized/rescaled so that sigma2 is the variance
|
|
|
|
>>> df = 10; sigma = 1.
|
|
>>> stats.t.stats(df, loc=0., scale=sigma.*np.sqrt((df-2.)/df))
|
|
(array(0.0), array(1.0))
|
|
>>> sigma = np.sqrt(2.)
|
|
>>> stats.t.stats(df, loc=0., scale=sigma*np.sqrt((df-2.)/df))
|
|
(array(0.0), array(2.0))
|
|
'''
|
|
print(y, params, df)
|
|
mu, sigma2 = params.T
|
|
df = df*1.0
|
|
#lls = gammaln((df+1)/2.) - gammaln(df/2.) - 0.5*np.log((df-2)*np.pi)
|
|
#lls -= (df+1)/2. * np.log(1. + (y-mu)**2/(df-2.)/sigma2) + 0.5 * np.log(sigma2)
|
|
lls = gammaln((df+1)/2.) - gammaln(df/2.) - 0.5*np.log((df)*np.pi)
|
|
lls -= (df+1.)/2. * np.log(1. + (y-mu)**2/(df)/sigma2) + 0.5 * np.log(sigma2)
|
|
return lls
|
|
|
|
|
|
def ts_dlldy(y, df):
|
|
'''derivative of log pdf of standard t with respect to y
|
|
|
|
Parameters
|
|
----------
|
|
y : array_like
|
|
data points of random variable at which loglike is evaluated
|
|
df : array_like
|
|
degrees of freedom,shape parameters of log-likelihood function
|
|
of t distribution
|
|
|
|
Returns
|
|
-------
|
|
dlldy : ndarray
|
|
derivative of loglikelihood wrt random variable y evaluated at the
|
|
points given in y
|
|
|
|
Notes
|
|
-----
|
|
with mean 0 and scale 1, but variance is df/(df-2)
|
|
|
|
'''
|
|
df = df*1.
|
|
#(df+1)/2. / (1 + y**2/(df-2.)) * 2.*y/(df-2.)
|
|
#return -(df+1)/(df-2.) / (1 + y**2/(df-2.)) * y
|
|
return -(df+1)/(df) / (1 + y**2/(df)) * y
|
|
|
|
def tstd_dlldy(y, df):
|
|
'''derivative of log pdf of standardized t with respect to y
|
|
|
|
Parameters
|
|
----------
|
|
y : array_like
|
|
data points of random variable at which loglike is evaluated
|
|
df : array_like
|
|
degrees of freedom,shape parameters of log-likelihood function
|
|
of t distribution
|
|
|
|
Returns
|
|
-------
|
|
dlldy : ndarray
|
|
derivative of loglikelihood wrt random variable y evaluated at the
|
|
points given in y
|
|
|
|
|
|
Notes
|
|
-----
|
|
parametrized for garch, standardized to variance=1
|
|
'''
|
|
#(df+1)/2. / (1 + y**2/(df-2.)) * 2.*y/(df-2.)
|
|
return -(df+1)/(df-2.) / (1 + y**2/(df-2.)) * y
|
|
#return (df+1)/(df) / (1 + y**2/(df)) * y
|
|
|
|
def locscale_grad(y, loc, scale, dlldy, *args):
|
|
'''derivative of log-likelihood with respect to location and scale
|
|
|
|
Parameters
|
|
----------
|
|
y : array_like
|
|
data points of random variable at which loglike is evaluated
|
|
loc : float
|
|
location parameter of distribution
|
|
scale : float
|
|
scale parameter of distribution
|
|
dlldy : function
|
|
derivative of loglikelihood fuction wrt. random variable x
|
|
args : array_like
|
|
shape parameters of log-likelihood function
|
|
|
|
Returns
|
|
-------
|
|
dlldloc : ndarray
|
|
derivative of loglikelihood wrt location evaluated at the
|
|
points given in y
|
|
dlldscale : ndarray
|
|
derivative of loglikelihood wrt scale evaluated at the
|
|
points given in y
|
|
|
|
'''
|
|
yst = (y-loc)/scale #ystandardized
|
|
dlldloc = -dlldy(yst, *args) / scale
|
|
dlldscale = -1./scale - dlldy(yst, *args) * (y-loc)/scale**2
|
|
return dlldloc, dlldscale
|
|
|
|
if __name__ == '__main__':
|
|
verbose = 0
|
|
if verbose:
|
|
sig = 0.1
|
|
beta = np.ones(2)
|
|
rvs = np.random.randn(10,3)
|
|
x = rvs[:,1:]
|
|
y = np.dot(x,beta) + sig*rvs[:,0]
|
|
|
|
params = [1,1,1]
|
|
print(normgrad(y, x, params))
|
|
|
|
dllfdbeta = (y-np.dot(x, beta))[:,None]*x #for sigma = 1
|
|
print(dllfdbeta)
|
|
|
|
print(locscale_grad(y, np.dot(x, beta), 1, norm_dlldy))
|
|
print(y-np.dot(x, beta))
|
|
|
|
from scipy import stats, misc
|
|
|
|
def llt(y,loc,scale,df):
|
|
return np.log(stats.t.pdf(y, df, loc=loc, scale=scale))
|
|
def lltloc(loc,y,scale,df):
|
|
return np.log(stats.t.pdf(y, df, loc=loc, scale=scale))
|
|
def lltscale(scale,y,loc,df):
|
|
return np.log(stats.t.pdf(y, df, loc=loc, scale=scale))
|
|
|
|
def llnorm(y,loc,scale):
|
|
return np.log(stats.norm.pdf(y, loc=loc, scale=scale))
|
|
def llnormloc(loc,y,scale):
|
|
return np.log(stats.norm.pdf(y, loc=loc, scale=scale))
|
|
def llnormscale(scale,y,loc):
|
|
return np.log(stats.norm.pdf(y, loc=loc, scale=scale))
|
|
|
|
if verbose:
|
|
print('\ngradient of t')
|
|
print(misc.derivative(llt, 1, dx=1e-6, n=1, args=(0,1,10), order=3))
|
|
print('t ', locscale_grad(1, 0, 1, tstd_dlldy, 10))
|
|
print('ts', locscale_grad(1, 0, 1, ts_dlldy, 10))
|
|
print(misc.derivative(llt, 1.5, dx=1e-10, n=1, args=(0,1,20), order=3),)
|
|
print('ts', locscale_grad(1.5, 0, 1, ts_dlldy, 20))
|
|
print(misc.derivative(llt, 1.5, dx=1e-10, n=1, args=(0,2,20), order=3),)
|
|
print('ts', locscale_grad(1.5, 0, 2, ts_dlldy, 20))
|
|
print(misc.derivative(llt, 1.5, dx=1e-10, n=1, args=(1,2,20), order=3),)
|
|
print('ts', locscale_grad(1.5, 1, 2, ts_dlldy, 20))
|
|
print(misc.derivative(lltloc, 1, dx=1e-10, n=1, args=(1.5,2,20), order=3),)
|
|
print(misc.derivative(lltscale, 2, dx=1e-10, n=1, args=(1.5,1,20), order=3))
|
|
y,loc,scale,df = 1.5, 1, 2, 20
|
|
print('ts', locscale_grad(y,loc,scale, ts_dlldy, 20))
|
|
print(misc.derivative(lltloc, loc, dx=1e-10, n=1, args=(y,scale,df), order=3),)
|
|
print(misc.derivative(lltscale, scale, dx=1e-10, n=1, args=(y,loc,df), order=3))
|
|
|
|
print('\ngradient of norm')
|
|
print(misc.derivative(llnorm, 1, dx=1e-6, n=1, args=(0,1), order=3))
|
|
print(locscale_grad(1, 0, 1, norm_dlldy))
|
|
y,loc,scale = 1.5, 1, 2
|
|
print('ts', locscale_grad(y,loc,scale, norm_dlldy))
|
|
print(misc.derivative(llnormloc, loc, dx=1e-10, n=1, args=(y,scale), order=3),)
|
|
print(misc.derivative(llnormscale, scale, dx=1e-10, n=1, args=(y,loc), order=3))
|
|
y,loc,scale = 1.5, 0, 1
|
|
print('ts', locscale_grad(y,loc,scale, norm_dlldy))
|
|
print(misc.derivative(llnormloc, loc, dx=1e-10, n=1, args=(y,scale), order=3),)
|
|
print(misc.derivative(llnormscale, scale, dx=1e-10, n=1, args=(y,loc), order=3))
|
|
#print('still something wrong with handling of scale and variance'
|
|
#looks ok now
|
|
print('\nloglike of t')
|
|
print(tstd_lls(1, np.array([0,1]), 100), llt(1,0,1,100), 'differently standardized')
|
|
print(tstd_lls(1, np.array([0,1]), 10), llt(1,0,1,10), 'differently standardized')
|
|
print(ts_lls(1, np.array([0,1]), 10), llt(1,0,1,10))
|
|
print(tstd_lls(1, np.array([0,1.*10./8.]), 10), llt(1.,0,1.,10))
|
|
print(ts_lls(1, np.array([0,1]), 100), llt(1,0,1,100))
|
|
|
|
print(tstd_lls(1, np.array([0,1]), 10), llt(1,0,1.*np.sqrt(8/10.),10))
|
|
|
|
|
|
from numpy.testing import assert_almost_equal
|
|
params =[(0, 1), (1.,1.), (0.,2.), ( 1., 2.)]
|
|
yt = np.linspace(-2.,2.,11)
|
|
for loc,scale in params:
|
|
dlldlo = misc.derivative(llnormloc, loc, dx=1e-10, n=1, args=(yt,scale), order=3)
|
|
dlldsc = misc.derivative(llnormscale, scale, dx=1e-10, n=1, args=(yt,loc), order=3)
|
|
gr = locscale_grad(yt, loc, scale, norm_dlldy)
|
|
assert_almost_equal(dlldlo, gr[0], 5, err_msg='deriv loc')
|
|
assert_almost_equal(dlldsc, gr[1], 5, err_msg='deriv scale')
|
|
for df in [3, 10, 100]:
|
|
for loc,scale in params:
|
|
dlldlo = misc.derivative(lltloc, loc, dx=1e-10, n=1, args=(yt,scale,df), order=3)
|
|
dlldsc = misc.derivative(lltscale, scale, dx=1e-10, n=1, args=(yt,loc,df), order=3)
|
|
gr = locscale_grad(yt, loc, scale, ts_dlldy, df)
|
|
assert_almost_equal(dlldlo, gr[0], 4, err_msg='deriv loc')
|
|
assert_almost_equal(dlldsc, gr[1], 4, err_msg='deriv scale')
|
|
assert_almost_equal(ts_lls(yt, np.array([loc, scale**2]), df),
|
|
llt(yt,loc,scale,df), 5,
|
|
err_msg='loglike')
|
|
assert_almost_equal(tstd_lls(yt, np.array([loc, scale**2]), df),
|
|
llt(yt,loc,scale*np.sqrt((df-2.)/df),df), 5,
|
|
err_msg='loglike')
|