"""Generate a random process with panel structure Created on Sat Dec 17 22:15:27 2011 Author: Josef Perktold Notes ----- * written with unbalanced panels in mind, but not flexible enough yet * need more shortcuts and options for balanced panel * need to add random intercept or coefficients * only one-way (repeated measures) so far """ import numpy as np from . import correlation_structures as cs class PanelSample: '''data generating process for panel with within correlation allows various within correlation structures, but no random intercept yet Parameters ---------- nobs : int total number of observations k_vars : int number of explanatory variables to create in exog, including constant n_groups int number of groups in balanced sample exog : None or ndarray default is None, in which case a exog is created within : bool If True (default), then the exog vary within a group. If False, then only variation across groups is used. TODO: this option needs more work corr_structure : ndarray or ?? Default is np.eye. corr_args : tuple arguments for the corr_structure scale : float scale of noise, standard deviation of normal distribution seed : None or int If seed is given, then this is used to create the random numbers for the sample. Notes ----- The behavior for panel robust covariance estimators seems to differ by a large amount by whether exog have mostly within group or across group variation. I do not understand why this should be the case from the theory, and this would warrant more investigation. This is just used in one example so far and needs more usage to see what will be useful to add. ''' def __init__(self, nobs, k_vars, n_groups, exog=None, within=True, corr_structure=np.eye, corr_args=(), scale=1, seed=None): nobs_i = nobs//n_groups nobs = nobs_i * n_groups #make balanced self.nobs = nobs self.nobs_i = nobs_i self.n_groups = n_groups self.k_vars = k_vars self.corr_structure = corr_structure self.groups = np.repeat(np.arange(n_groups), nobs_i) self.group_indices = np.arange(n_groups+1) * nobs_i #check +1 if exog is None: if within: #t = np.tile(np.linspace(-1,1,nobs_i), n_groups) t = np.tile(np.linspace(0, 2, nobs_i), n_groups) #rs2 = np.random.RandomState(9876) #t = 1 + 0.3 * rs2.randn(nobs_i * n_groups) #mix within and across variation #t += np.repeat(np.linspace(-1,1,nobs_i), n_groups) else: #no within group variation, t = np.repeat(np.linspace(-1,1,nobs_i), n_groups) exog = t[:,None]**np.arange(k_vars) self.exog = exog #self.y_true = exog.sum(1) #all coefficients equal 1, #moved to make random coefficients #initialize self.y_true = None self.beta = None if seed is None: seed = np.random.randint(0, 999999) self.seed = seed self.random_state = np.random.RandomState(seed) #this makes overwriting difficult, move to method? self.std = scale * np.ones(nobs_i) corr = self.corr_structure(nobs_i, *corr_args) self.cov = cs.corr2cov(corr, self.std) self.group_means = np.zeros(n_groups) def get_y_true(self): if self.beta is None: self.y_true = self.exog.sum(1) else: self.y_true = np.dot(self.exog, self.beta) def generate_panel(self): ''' generate endog for a random panel dataset with within correlation ''' random = self.random_state if self.y_true is None: self.get_y_true() nobs_i = self.nobs_i n_groups = self.n_groups use_balanced = True if use_balanced: #much faster for balanced case noise = self.random_state.multivariate_normal(np.zeros(nobs_i), self.cov, size=n_groups).ravel() #need to add self.group_means noise += np.repeat(self.group_means, nobs_i) else: noise = np.empty(self.nobs, np.float64) noise.fill(np.nan) for ii in range(self.n_groups): #print ii, idx, idxupp = self.group_indices[ii:ii+2] #print idx, idxupp mean_i = self.group_means[ii] noise[idx:idxupp] = self.random_state.multivariate_normal( mean_i * np.ones(self.nobs_i), self.cov) endog = self.y_true + noise return endog