AIM-PIbd-32-Kurbanova-A-A/aimenv/Lib/site-packages/statsmodels/sandbox/tools/tools_pca.py

"""Principal Component Analysis


Created on Tue Sep 29 20:11:23 2009
Author: josef-pktd

TODO : add class for better reuse of results
"""

import numpy as np


def pca(data, keepdim=0, normalize=0, demean=True):
    '''principal components with eigenvector decomposition
    similar to princomp in matlab

    Parameters
    ----------
    data : ndarray, 2d
        data with observations by rows and variables in columns
    keepdim : int
        number of eigenvectors to keep
        if keepdim is zero, then all eigenvectors are included
    normalize : bool
        if true, then eigenvectors are normalized by sqrt of eigenvalues
    demean : bool
        if true, then the column mean is subtracted from the data

    Returns
    -------
    xreduced : ndarray, 2d, (nobs, nvars)
        projection of the data x on the kept eigenvectors
    factors : ndarray, 2d, (nobs, nfactors)
        factor matrix, given by np.dot(x, evecs)
    evals : ndarray, 2d, (nobs, nfactors)
        eigenvalues
    evecs : ndarray, 2d, (nobs, nfactors)
        eigenvectors, normalized if normalize is true

    Notes
    -----

    See Also
    --------
    pcasvd : principal component analysis using svd

    '''
    x = np.array(data)
    #make copy so original does not change, maybe not necessary anymore
    if demean:
        m = x.mean(0)
    else:
        m = np.zeros(x.shape[1])
    x -= m

    # Covariance matrix
    xcov = np.cov(x, rowvar=0)

    # Compute eigenvalues and sort into descending order
    evals, evecs = np.linalg.eig(xcov)
    indices = np.argsort(evals)
    indices = indices[::-1]
    evecs = evecs[:,indices]
    evals = evals[indices]

    if keepdim > 0 and keepdim < x.shape[1]:
        evecs = evecs[:,:keepdim]
        evals = evals[:keepdim]

    if normalize:
        #for i in range(shape(evecs)[1]):
        #    evecs[:,i] / linalg.norm(evecs[:,i]) * sqrt(evals[i])
        evecs = evecs/np.sqrt(evals) #np.sqrt(np.dot(evecs.T, evecs) * evals)

    # get factor matrix
    #x = np.dot(evecs.T, x.T)
    factors = np.dot(x, evecs)
    # get original data from reduced number of components
    #xreduced = np.dot(evecs.T, factors) + m
    #print x.shape, factors.shape, evecs.shape, m.shape
    xreduced = np.dot(factors, evecs.T) + m
    return xreduced, factors, evals, evecs


def pcasvd(data, keepdim=0, demean=True):
    '''principal components with svd

    Parameters
    ----------
    data : ndarray, 2d
        data with observations by rows and variables in columns
    keepdim : int
        number of eigenvectors to keep
        if keepdim is zero, then all eigenvectors are included
    demean : bool
        if true, then the column mean is subtracted from the data

    Returns
    -------
    xreduced : ndarray, 2d, (nobs, nvars)
        projection of the data x on the kept eigenvectors
    factors : ndarray, 2d, (nobs, nfactors)
        factor matrix, given by np.dot(x, evecs)
    evals : ndarray, 2d, (nobs, nfactors)
        eigenvalues
    evecs : ndarray, 2d, (nobs, nfactors)
        eigenvectors, normalized if normalize is true

    See Also
    --------
    pca : principal component analysis using eigenvector decomposition

    Notes
    -----
    This does not have yet the normalize option of pca.

    '''
    nobs, nvars = data.shape
    #print nobs, nvars, keepdim
    x = np.array(data)
    #make copy so original does not change
    if demean:
        m = x.mean(0)
    else:
        m = 0
##    if keepdim == 0:
##        keepdim = nvars
##        "print reassigning keepdim to max", keepdim
    x -= m
    U, s, v = np.linalg.svd(x.T, full_matrices=1)
    factors = np.dot(U.T, x.T).T #princomps
    if keepdim:
        xreduced = np.dot(factors[:,:keepdim], U[:,:keepdim].T) + m
    else:
        xreduced = data
        keepdim = nvars
        "print reassigning keepdim to max", keepdim

    # s = evals, U = evecs
    # no idea why denominator for s is with minus 1
    evals = s**2/(x.shape[0]-1)
    #print keepdim
    return xreduced, factors[:,:keepdim], evals[:keepdim], U[:,:keepdim] #, v


__all__ = ['pca', 'pcasvd']
lab 1 is done 2024-10-02 22:15:59 +04:00			`"""Principal Component Analysis`


			`Created on Tue Sep 29 20:11:23 2009`
			`Author: josef-pktd`

			`TODO : add class for better reuse of results`
			`"""`

			`import numpy as np`


			`def pca(data, keepdim=0, normalize=0, demean=True):`
			`'''principal components with eigenvector decomposition`
			`similar to princomp in matlab`

			`Parameters`
			`----------`
			`data : ndarray, 2d`
			`data with observations by rows and variables in columns`
			`keepdim : int`
			`number of eigenvectors to keep`
			`if keepdim is zero, then all eigenvectors are included`
			`normalize : bool`
			`if true, then eigenvectors are normalized by sqrt of eigenvalues`
			`demean : bool`
			`if true, then the column mean is subtracted from the data`

			`Returns`
			`-------`
			`xreduced : ndarray, 2d, (nobs, nvars)`
			`projection of the data x on the kept eigenvectors`
			`factors : ndarray, 2d, (nobs, nfactors)`
			`factor matrix, given by np.dot(x, evecs)`
			`evals : ndarray, 2d, (nobs, nfactors)`
			`eigenvalues`
			`evecs : ndarray, 2d, (nobs, nfactors)`
			`eigenvectors, normalized if normalize is true`

			`Notes`
			`-----`

			`See Also`
			`--------`
			`pcasvd : principal component analysis using svd`

			`'''`
			`x = np.array(data)`
			`#make copy so original does not change, maybe not necessary anymore`
			`if demean:`
			`m = x.mean(0)`
			`else:`
			`m = np.zeros(x.shape[1])`
			`x -= m`

			`# Covariance matrix`
			`xcov = np.cov(x, rowvar=0)`

			`# Compute eigenvalues and sort into descending order`
			`evals, evecs = np.linalg.eig(xcov)`
			`indices = np.argsort(evals)`
			`indices = indices[::-1]`
			`evecs = evecs[:,indices]`
			`evals = evals[indices]`

			`if keepdim > 0 and keepdim < x.shape[1]:`
			`evecs = evecs[:,:keepdim]`
			`evals = evals[:keepdim]`

			`if normalize:`
			`#for i in range(shape(evecs)[1]):`
			`# evecs[:,i] / linalg.norm(evecs[:,i]) * sqrt(evals[i])`
			`evecs = evecs/np.sqrt(evals) #np.sqrt(np.dot(evecs.T, evecs) * evals)`

			`# get factor matrix`
			`#x = np.dot(evecs.T, x.T)`
			`factors = np.dot(x, evecs)`
			`# get original data from reduced number of components`
			`#xreduced = np.dot(evecs.T, factors) + m`
			`#print x.shape, factors.shape, evecs.shape, m.shape`
			`xreduced = np.dot(factors, evecs.T) + m`
			`return xreduced, factors, evals, evecs`



			`def pcasvd(data, keepdim=0, demean=True):`
			`'''principal components with svd`

			`Parameters`
			`----------`
			`data : ndarray, 2d`
			`data with observations by rows and variables in columns`
			`keepdim : int`
			`number of eigenvectors to keep`
			`if keepdim is zero, then all eigenvectors are included`
			`demean : bool`
			`if true, then the column mean is subtracted from the data`

			`Returns`
			`-------`
			`xreduced : ndarray, 2d, (nobs, nvars)`
			`projection of the data x on the kept eigenvectors`
			`factors : ndarray, 2d, (nobs, nfactors)`
			`factor matrix, given by np.dot(x, evecs)`
			`evals : ndarray, 2d, (nobs, nfactors)`
			`eigenvalues`
			`evecs : ndarray, 2d, (nobs, nfactors)`
			`eigenvectors, normalized if normalize is true`

			`See Also`
			`--------`
			`pca : principal component analysis using eigenvector decomposition`

			`Notes`
			`-----`
			`This does not have yet the normalize option of pca.`

			`'''`
			`nobs, nvars = data.shape`
			`#print nobs, nvars, keepdim`
			`x = np.array(data)`
			`#make copy so original does not change`
			`if demean:`
			`m = x.mean(0)`
			`else:`
			`m = 0`
			`## if keepdim == 0:`
			`## keepdim = nvars`
			`## "print reassigning keepdim to max", keepdim`
			`x -= m`
			`U, s, v = np.linalg.svd(x.T, full_matrices=1)`
			`factors = np.dot(U.T, x.T).T #princomps`
			`if keepdim:`
			`xreduced = np.dot(factors[:,:keepdim], U[:,:keepdim].T) + m`
			`else:`
			`xreduced = data`
			`keepdim = nvars`
			`"print reassigning keepdim to max", keepdim`

			`# s = evals, U = evecs`
			`# no idea why denominator for s is with minus 1`
			`evals = s**2/(x.shape[0]-1)`
			`#print keepdim`
			`return xreduced, factors[:,:keepdim], evals[:keepdim], U[:,:keepdim] #, v`


			`__all__ = ['pca', 'pcasvd']`