"""Longley dataset""" from statsmodels.datasets import utils as du __docformat__ = 'restructuredtext' COPYRIGHT = """This is public domain.""" TITLE = __doc__ SOURCE = """ The classic 1967 Longley Data http://www.itl.nist.gov/div898/strd/lls/data/Longley.shtml :: Longley, J.W. (1967) "An Appraisal of Least Squares Programs for the Electronic Comptuer from the Point of View of the User." Journal of the American Statistical Association. 62.319, 819-41. """ DESCRSHORT = """""" DESCRLONG = """The Longley dataset contains various US macroeconomic variables that are known to be highly collinear. It has been used to appraise the accuracy of least squares routines.""" NOTE = """:: Number of Observations - 16 Number of Variables - 6 Variable name definitions:: TOTEMP - Total Employment GNPDEFL - GNP deflator GNP - GNP UNEMP - Number of unemployed ARMED - Size of armed forces POP - Population YEAR - Year (1947 - 1962) """ def load(): """ Load the Longley data and return a Dataset class. Returns ------- Dataset See DATASET_PROPOSAL.txt for more information. """ return load_pandas() def load_pandas(): """ Load the Longley data and return a Dataset class. Returns ------- Dataset See DATASET_PROPOSAL.txt for more information. """ data = _get_data() return du.process_pandas(data, endog_idx=0) def _get_data(): data = du.load_csv(__file__, 'longley.csv') data = data.iloc[:, [1, 2, 3, 4, 5, 6, 7]].astype(float) return data