148 lines
4.0 KiB
Python
148 lines
4.0 KiB
Python
|
"""Principal Component Analysis
|
||
|
|
||
|
|
||
|
Created on Tue Sep 29 20:11:23 2009
|
||
|
Author: josef-pktd
|
||
|
|
||
|
TODO : add class for better reuse of results
|
||
|
"""
|
||
|
|
||
|
import numpy as np
|
||
|
|
||
|
|
||
|
def pca(data, keepdim=0, normalize=0, demean=True):
|
||
|
'''principal components with eigenvector decomposition
|
||
|
similar to princomp in matlab
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
data : ndarray, 2d
|
||
|
data with observations by rows and variables in columns
|
||
|
keepdim : int
|
||
|
number of eigenvectors to keep
|
||
|
if keepdim is zero, then all eigenvectors are included
|
||
|
normalize : bool
|
||
|
if true, then eigenvectors are normalized by sqrt of eigenvalues
|
||
|
demean : bool
|
||
|
if true, then the column mean is subtracted from the data
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
xreduced : ndarray, 2d, (nobs, nvars)
|
||
|
projection of the data x on the kept eigenvectors
|
||
|
factors : ndarray, 2d, (nobs, nfactors)
|
||
|
factor matrix, given by np.dot(x, evecs)
|
||
|
evals : ndarray, 2d, (nobs, nfactors)
|
||
|
eigenvalues
|
||
|
evecs : ndarray, 2d, (nobs, nfactors)
|
||
|
eigenvectors, normalized if normalize is true
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
pcasvd : principal component analysis using svd
|
||
|
|
||
|
'''
|
||
|
x = np.array(data)
|
||
|
#make copy so original does not change, maybe not necessary anymore
|
||
|
if demean:
|
||
|
m = x.mean(0)
|
||
|
else:
|
||
|
m = np.zeros(x.shape[1])
|
||
|
x -= m
|
||
|
|
||
|
# Covariance matrix
|
||
|
xcov = np.cov(x, rowvar=0)
|
||
|
|
||
|
# Compute eigenvalues and sort into descending order
|
||
|
evals, evecs = np.linalg.eig(xcov)
|
||
|
indices = np.argsort(evals)
|
||
|
indices = indices[::-1]
|
||
|
evecs = evecs[:,indices]
|
||
|
evals = evals[indices]
|
||
|
|
||
|
if keepdim > 0 and keepdim < x.shape[1]:
|
||
|
evecs = evecs[:,:keepdim]
|
||
|
evals = evals[:keepdim]
|
||
|
|
||
|
if normalize:
|
||
|
#for i in range(shape(evecs)[1]):
|
||
|
# evecs[:,i] / linalg.norm(evecs[:,i]) * sqrt(evals[i])
|
||
|
evecs = evecs/np.sqrt(evals) #np.sqrt(np.dot(evecs.T, evecs) * evals)
|
||
|
|
||
|
# get factor matrix
|
||
|
#x = np.dot(evecs.T, x.T)
|
||
|
factors = np.dot(x, evecs)
|
||
|
# get original data from reduced number of components
|
||
|
#xreduced = np.dot(evecs.T, factors) + m
|
||
|
#print x.shape, factors.shape, evecs.shape, m.shape
|
||
|
xreduced = np.dot(factors, evecs.T) + m
|
||
|
return xreduced, factors, evals, evecs
|
||
|
|
||
|
|
||
|
|
||
|
def pcasvd(data, keepdim=0, demean=True):
|
||
|
'''principal components with svd
|
||
|
|
||
|
Parameters
|
||
|
----------
|
||
|
data : ndarray, 2d
|
||
|
data with observations by rows and variables in columns
|
||
|
keepdim : int
|
||
|
number of eigenvectors to keep
|
||
|
if keepdim is zero, then all eigenvectors are included
|
||
|
demean : bool
|
||
|
if true, then the column mean is subtracted from the data
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
xreduced : ndarray, 2d, (nobs, nvars)
|
||
|
projection of the data x on the kept eigenvectors
|
||
|
factors : ndarray, 2d, (nobs, nfactors)
|
||
|
factor matrix, given by np.dot(x, evecs)
|
||
|
evals : ndarray, 2d, (nobs, nfactors)
|
||
|
eigenvalues
|
||
|
evecs : ndarray, 2d, (nobs, nfactors)
|
||
|
eigenvectors, normalized if normalize is true
|
||
|
|
||
|
See Also
|
||
|
--------
|
||
|
pca : principal component analysis using eigenvector decomposition
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This does not have yet the normalize option of pca.
|
||
|
|
||
|
'''
|
||
|
nobs, nvars = data.shape
|
||
|
#print nobs, nvars, keepdim
|
||
|
x = np.array(data)
|
||
|
#make copy so original does not change
|
||
|
if demean:
|
||
|
m = x.mean(0)
|
||
|
else:
|
||
|
m = 0
|
||
|
## if keepdim == 0:
|
||
|
## keepdim = nvars
|
||
|
## "print reassigning keepdim to max", keepdim
|
||
|
x -= m
|
||
|
U, s, v = np.linalg.svd(x.T, full_matrices=1)
|
||
|
factors = np.dot(U.T, x.T).T #princomps
|
||
|
if keepdim:
|
||
|
xreduced = np.dot(factors[:,:keepdim], U[:,:keepdim].T) + m
|
||
|
else:
|
||
|
xreduced = data
|
||
|
keepdim = nvars
|
||
|
"print reassigning keepdim to max", keepdim
|
||
|
|
||
|
# s = evals, U = evecs
|
||
|
# no idea why denominator for s is with minus 1
|
||
|
evals = s**2/(x.shape[0]-1)
|
||
|
#print keepdim
|
||
|
return xreduced, factors[:,:keepdim], evals[:keepdim], U[:,:keepdim] #, v
|
||
|
|
||
|
|
||
|
__all__ = ['pca', 'pcasvd']
|