AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/miscmodels/try_mlecov.py

'''Multivariate Normal Model with full covariance matrix

toeplitz structure is not exploited, need cholesky or inv for toeplitz

Author: josef-pktd
'''

import numpy as np
from scipy import linalg
from scipy.linalg import toeplitz

from statsmodels.base.model import GenericLikelihoodModel
from statsmodels.datasets import sunspots
from statsmodels.tsa.arima_process import (
    ArmaProcess,
    arma_acovf,
    arma_generate_sample,
)


def mvn_loglike_sum(x, sigma):
    '''loglike multivariate normal

    copied from GLS and adjusted names
    not sure why this differes from mvn_loglike
    '''
    nobs = len(x)
    nobs2 = nobs / 2.0
    SSR = (x**2).sum()
    llf = -np.log(SSR) * nobs2      # concentrated likelihood
    llf -= (1+np.log(np.pi/nobs2))*nobs2  # with likelihood constant
    if np.any(sigma) and sigma.ndim == 2:
    #FIXME: robust-enough check?  unneeded if _det_sigma gets defined
        llf -= .5*np.log(np.linalg.det(sigma))
    return llf

def mvn_loglike(x, sigma):
    '''loglike multivariate normal

    assumes x is 1d, (nobs,) and sigma is 2d (nobs, nobs)

    brute force from formula
    no checking of correct inputs
    use of inv and log-det should be replace with something more efficient
    '''
    #see numpy thread
    #Sturla: sqmahal = (cx*cho_solve(cho_factor(S),cx.T).T).sum(axis=1)
    sigmainv = linalg.inv(sigma)
    logdetsigma = np.log(np.linalg.det(sigma))
    nobs = len(x)

    llf = - np.dot(x, np.dot(sigmainv, x))
    llf -= nobs * np.log(2 * np.pi)
    llf -= logdetsigma
    llf *= 0.5
    return llf

def mvn_loglike_chol(x, sigma):
    '''loglike multivariate normal

    assumes x is 1d, (nobs,) and sigma is 2d (nobs, nobs)

    brute force from formula
    no checking of correct inputs
    use of inv and log-det should be replace with something more efficient
    '''
    #see numpy thread
    #Sturla: sqmahal = (cx*cho_solve(cho_factor(S),cx.T).T).sum(axis=1)
    sigmainv = np.linalg.inv(sigma)
    cholsigmainv = np.linalg.cholesky(sigmainv).T
    x_whitened = np.dot(cholsigmainv, x)

    logdetsigma = np.log(np.linalg.det(sigma))
    nobs = len(x)
    from scipy import stats
    print('scipy.stats')
    print(np.log(stats.norm.pdf(x_whitened)).sum())

    llf = - np.dot(x_whitened.T, x_whitened)
    llf -= nobs * np.log(2 * np.pi)
    llf -= logdetsigma
    llf *= 0.5
    return llf, logdetsigma, 2 * np.sum(np.log(np.diagonal(cholsigmainv)))
#0.5 * np.dot(x_whitened.T, x_whitened) + nobs * np.log(2 * np.pi) + logdetsigma)

def mvn_nloglike_obs(x, sigma):
    '''loglike multivariate normal

    assumes x is 1d, (nobs,) and sigma is 2d (nobs, nobs)

    brute force from formula
    no checking of correct inputs
    use of inv and log-det should be replace with something more efficient
    '''
    #see numpy thread
    #Sturla: sqmahal = (cx*cho_solve(cho_factor(S),cx.T).T).sum(axis=1)

    #Still wasteful to calculate pinv first
    sigmainv = np.linalg.inv(sigma)
    cholsigmainv = np.linalg.cholesky(sigmainv).T
    #2 * np.sum(np.log(np.diagonal(np.linalg.cholesky(A)))) #Dag mailinglist
    # logdet not needed ???
    #logdetsigma = 2 * np.sum(np.log(np.diagonal(cholsigmainv)))
    x_whitened = np.dot(cholsigmainv, x)

    #sigmainv = linalg.cholesky(sigma)
    logdetsigma = np.log(np.linalg.det(sigma))

    sigma2 = 1. # error variance is included in sigma

    llike  =  0.5 * (np.log(sigma2) - 2.* np.log(np.diagonal(cholsigmainv))
                          + (x_whitened**2)/sigma2
                          +  np.log(2*np.pi))

    return llike


def invertibleroots(ma):
    proc = ArmaProcess(ma=ma)
    return proc.invertroots(retnew=False)


def getpoly(self, params):
    ar = np.r_[[1], -params[:self.nar]]
    ma = np.r_[[1], params[-self.nma:]]
    import numpy.polynomial as poly
    return poly.Polynomial(ar), poly.Polynomial(ma)

class MLEGLS(GenericLikelihoodModel):
    '''ARMA model with exact loglikelhood for short time series

    Inverts (nobs, nobs) matrix, use only for nobs <= 200 or so.

    This class is a pattern for small sample GLS-like models. Intended use
    for loglikelihood of initial observations for ARMA.


    TODO:
    This might be missing the error variance. Does it assume error is
       distributed N(0,1)
    Maybe extend to mean handling, or assume it is already removed.
    '''


    def _params2cov(self, params, nobs):
        '''get autocovariance matrix from ARMA regression parameter

        ar parameters are assumed to have rhs parameterization

        '''
        ar = np.r_[[1], -params[:self.nar]]
        ma = np.r_[[1], params[-self.nma:]]
        #print('ar', ar
        #print('ma', ma
        #print('nobs', nobs
        autocov = arma_acovf(ar, ma, nobs=nobs)
        #print('arma_acovf(%r, %r, nobs=%d)' % (ar, ma, nobs)
        #print(autocov.shape
        #something is strange  fixed in aram_acovf
        autocov = autocov[:nobs]
        sigma = toeplitz(autocov)
        return sigma

    def loglike(self, params):
        sig = self._params2cov(params[:-1], self.nobs)
        sig = sig * params[-1]**2
        loglik = mvn_loglike(self.endog, sig)
        return loglik

    def fit_invertible(self, *args, **kwds):
        res = self.fit(*args, **kwds)
        ma = np.r_[[1], res.params[self.nar: self.nar+self.nma]]
        mainv, wasinvertible = invertibleroots(ma)
        if not wasinvertible:
            start_params = res.params.copy()
            start_params[self.nar: self.nar+self.nma] = mainv[1:]
            #need to add args kwds
            res = self.fit(start_params=start_params)
        return res


if __name__ == '__main__':
    nobs = 50
    ar = [1.0, -0.8, 0.1]
    ma = [1.0,  0.1,  0.2]
    #ma = [1]
    np.random.seed(9875789)
    y = arma_generate_sample(ar,ma,nobs,2)
    y -= y.mean() #I have not checked treatment of mean yet, so remove
    mod = MLEGLS(y)
    mod.nar, mod.nma = 2, 2   #needs to be added, no init method
    mod.nobs = len(y)
    res = mod.fit(start_params=[0.1, -0.8, 0.2, 0.1, 1.])
    print('DGP', ar, ma)
    print(res.params)
    from statsmodels.regression import yule_walker
    print(yule_walker(y, 2))
    #resi = mod.fit_invertible(start_params=[0.1,0,0.2,0, 0.5])
    #print(resi.params

    arpoly, mapoly = getpoly(mod, res.params[:-1])

    data = sunspots.load()
    #ys = data.endog[-100:]
##    ys = data.endog[12:]-data.endog[:-12]
##    ys -= ys.mean()
##    mods = MLEGLS(ys)
##    mods.nar, mods.nma = 13, 1   #needs to be added, no init method
##    mods.nobs = len(ys)
##    ress = mods.fit(start_params=np.r_[0.4, np.zeros(12), [0.2, 5.]],maxiter=200)
##    print(ress.params
##    import matplotlib.pyplot as plt
##    plt.plot(data.endog[1])
##    #plt.show()

    sigma = mod._params2cov(res.params[:-1], nobs) * res.params[-1]**2
    print(mvn_loglike(y, sigma))
    llo = mvn_nloglike_obs(y, sigma)
    print(llo.sum(), llo.shape)
    print(mvn_loglike_chol(y, sigma))
    print(mvn_loglike_sum(y, sigma))
lab 1 is done 2024-10-02 22:15:59 +04:00			`'''Multivariate Normal Model with full covariance matrix`

			`toeplitz structure is not exploited, need cholesky or inv for toeplitz`

			`Author: josef-pktd`
			`'''`

			`import numpy as np`
			`from scipy import linalg`
			`from scipy.linalg import toeplitz`

			`from statsmodels.base.model import GenericLikelihoodModel`
			`from statsmodels.datasets import sunspots`
			`from statsmodels.tsa.arima_process import (`
			`ArmaProcess,`
			`arma_acovf,`
			`arma_generate_sample,`
			`)`


			`def mvn_loglike_sum(x, sigma):`
			`'''loglike multivariate normal`

			`copied from GLS and adjusted names`
			`not sure why this differes from mvn_loglike`
			`'''`
			`nobs = len(x)`
			`nobs2 = nobs / 2.0`
			`SSR = (x**2).sum()`
			`llf = -np.log(SSR) * nobs2 # concentrated likelihood`
			`llf -= (1+np.log(np.pi/nobs2))*nobs2 # with likelihood constant`
			`if np.any(sigma) and sigma.ndim == 2:`
			`#FIXME: robust-enough check? unneeded if _det_sigma gets defined`
			`llf -= .5*np.log(np.linalg.det(sigma))`
			`return llf`

			`def mvn_loglike(x, sigma):`
			`'''loglike multivariate normal`

			`assumes x is 1d, (nobs,) and sigma is 2d (nobs, nobs)`

			`brute force from formula`
			`no checking of correct inputs`
			`use of inv and log-det should be replace with something more efficient`
			`'''`
			`#see numpy thread`
			`#Sturla: sqmahal = (cx*cho_solve(cho_factor(S),cx.T).T).sum(axis=1)`
			`sigmainv = linalg.inv(sigma)`
			`logdetsigma = np.log(np.linalg.det(sigma))`
			`nobs = len(x)`

			`llf = - np.dot(x, np.dot(sigmainv, x))`
			`llf -= nobs * np.log(2 * np.pi)`
			`llf -= logdetsigma`
			`llf *= 0.5`
			`return llf`

			`def mvn_loglike_chol(x, sigma):`
			`'''loglike multivariate normal`

			`assumes x is 1d, (nobs,) and sigma is 2d (nobs, nobs)`

			`brute force from formula`
			`no checking of correct inputs`
			`use of inv and log-det should be replace with something more efficient`
			`'''`
			`#see numpy thread`
			`#Sturla: sqmahal = (cx*cho_solve(cho_factor(S),cx.T).T).sum(axis=1)`
			`sigmainv = np.linalg.inv(sigma)`
			`cholsigmainv = np.linalg.cholesky(sigmainv).T`
			`x_whitened = np.dot(cholsigmainv, x)`

			`logdetsigma = np.log(np.linalg.det(sigma))`
			`nobs = len(x)`
			`from scipy import stats`
			`print('scipy.stats')`
			`print(np.log(stats.norm.pdf(x_whitened)).sum())`

			`llf = - np.dot(x_whitened.T, x_whitened)`
			`llf -= nobs * np.log(2 * np.pi)`
			`llf -= logdetsigma`
			`llf *= 0.5`
			`return llf, logdetsigma, 2 * np.sum(np.log(np.diagonal(cholsigmainv)))`
			`#0.5 * np.dot(x_whitened.T, x_whitened) + nobs * np.log(2 * np.pi) + logdetsigma)`

			`def mvn_nloglike_obs(x, sigma):`
			`'''loglike multivariate normal`

			`assumes x is 1d, (nobs,) and sigma is 2d (nobs, nobs)`

			`brute force from formula`
			`no checking of correct inputs`
			`use of inv and log-det should be replace with something more efficient`
			`'''`
			`#see numpy thread`
			`#Sturla: sqmahal = (cx*cho_solve(cho_factor(S),cx.T).T).sum(axis=1)`

			`#Still wasteful to calculate pinv first`
			`sigmainv = np.linalg.inv(sigma)`
			`cholsigmainv = np.linalg.cholesky(sigmainv).T`
			`#2 * np.sum(np.log(np.diagonal(np.linalg.cholesky(A)))) #Dag mailinglist`
			`# logdet not needed ???`
			`#logdetsigma = 2 * np.sum(np.log(np.diagonal(cholsigmainv)))`
			`x_whitened = np.dot(cholsigmainv, x)`

			`#sigmainv = linalg.cholesky(sigma)`
			`logdetsigma = np.log(np.linalg.det(sigma))`

			`sigma2 = 1. # error variance is included in sigma`

			`llike = 0.5 * (np.log(sigma2) - 2.* np.log(np.diagonal(cholsigmainv))`
			`+ (x_whitened**2)/sigma2`
			`+ np.log(2*np.pi))`

			`return llike`


			`def invertibleroots(ma):`
			`proc = ArmaProcess(ma=ma)`
			`return proc.invertroots(retnew=False)`


			`def getpoly(self, params):`
			`ar = np.r_[[1], -params[:self.nar]]`
			`ma = np.r_[[1], params[-self.nma:]]`
			`import numpy.polynomial as poly`
			`return poly.Polynomial(ar), poly.Polynomial(ma)`

			`class MLEGLS(GenericLikelihoodModel):`
			`'''ARMA model with exact loglikelhood for short time series`

			`Inverts (nobs, nobs) matrix, use only for nobs <= 200 or so.`

			`This class is a pattern for small sample GLS-like models. Intended use`
			`for loglikelihood of initial observations for ARMA.`



			`TODO:`
			`This might be missing the error variance. Does it assume error is`
			`distributed N(0,1)`
			`Maybe extend to mean handling, or assume it is already removed.`
			`'''`


			`def _params2cov(self, params, nobs):`
			`'''get autocovariance matrix from ARMA regression parameter`

			`ar parameters are assumed to have rhs parameterization`

			`'''`
			`ar = np.r_[[1], -params[:self.nar]]`
			`ma = np.r_[[1], params[-self.nma:]]`
			`#print('ar', ar`
			`#print('ma', ma`
			`#print('nobs', nobs`
			`autocov = arma_acovf(ar, ma, nobs=nobs)`
			`#print('arma_acovf(%r, %r, nobs=%d)' % (ar, ma, nobs)`
			`#print(autocov.shape`
			`#something is strange fixed in aram_acovf`
			`autocov = autocov[:nobs]`
			`sigma = toeplitz(autocov)`
			`return sigma`

			`def loglike(self, params):`
			`sig = self._params2cov(params[:-1], self.nobs)`
			`sig = sig * params[-1]**2`
			`loglik = mvn_loglike(self.endog, sig)`
			`return loglik`

			`def fit_invertible(self, args, *kwds):`
			`res = self.fit(args, *kwds)`
			`ma = np.r_[[1], res.params[self.nar: self.nar+self.nma]]`
			`mainv, wasinvertible = invertibleroots(ma)`
			`if not wasinvertible:`
			`start_params = res.params.copy()`
			`start_params[self.nar: self.nar+self.nma] = mainv[1:]`
			`#need to add args kwds`
			`res = self.fit(start_params=start_params)`
			`return res`



			`if __name__ == '__main__':`
			`nobs = 50`
			`ar = [1.0, -0.8, 0.1]`
			`ma = [1.0, 0.1, 0.2]`
			`#ma = [1]`
			`np.random.seed(9875789)`
			`y = arma_generate_sample(ar,ma,nobs,2)`
			`y -= y.mean() #I have not checked treatment of mean yet, so remove`
			`mod = MLEGLS(y)`
			`mod.nar, mod.nma = 2, 2 #needs to be added, no init method`
			`mod.nobs = len(y)`
			`res = mod.fit(start_params=[0.1, -0.8, 0.2, 0.1, 1.])`
			`print('DGP', ar, ma)`
			`print(res.params)`
			`from statsmodels.regression import yule_walker`
			`print(yule_walker(y, 2))`
			`#resi = mod.fit_invertible(start_params=[0.1,0,0.2,0, 0.5])`
			`#print(resi.params`

			`arpoly, mapoly = getpoly(mod, res.params[:-1])`

			`data = sunspots.load()`
			`#ys = data.endog[-100:]`
			`## ys = data.endog[12:]-data.endog[:-12]`
			`## ys -= ys.mean()`
			`## mods = MLEGLS(ys)`
			`## mods.nar, mods.nma = 13, 1 #needs to be added, no init method`
			`## mods.nobs = len(ys)`
			`## ress = mods.fit(start_params=np.r_[0.4, np.zeros(12), [0.2, 5.]],maxiter=200)`
			`## print(ress.params`
			`## import matplotlib.pyplot as plt`
			`## plt.plot(data.endog[1])`
			`## #plt.show()`

			`sigma = mod._params2cov(res.params[:-1], nobs) * res.params[-1]**2`
			`print(mvn_loglike(y, sigma))`
			`llo = mvn_nloglike_obs(y, sigma)`
			`print(llo.sum(), llo.shape)`
			`print(mvn_loglike_chol(y, sigma))`
			`print(mvn_loglike_sum(y, sigma))`