74 lines
1.6 KiB
Python
74 lines
1.6 KiB
Python
"""Longley dataset"""
|
|
from statsmodels.datasets import utils as du
|
|
|
|
__docformat__ = 'restructuredtext'
|
|
|
|
COPYRIGHT = """This is public domain."""
|
|
TITLE = __doc__
|
|
SOURCE = """
|
|
The classic 1967 Longley Data
|
|
|
|
http://www.itl.nist.gov/div898/strd/lls/data/Longley.shtml
|
|
|
|
::
|
|
|
|
Longley, J.W. (1967) "An Appraisal of Least Squares Programs for the
|
|
Electronic Comptuer from the Point of View of the User." Journal of
|
|
the American Statistical Association. 62.319, 819-41.
|
|
"""
|
|
|
|
DESCRSHORT = """"""
|
|
|
|
DESCRLONG = """The Longley dataset contains various US macroeconomic
|
|
variables that are known to be highly collinear. It has been used to appraise
|
|
the accuracy of least squares routines."""
|
|
|
|
NOTE = """::
|
|
|
|
Number of Observations - 16
|
|
|
|
Number of Variables - 6
|
|
|
|
Variable name definitions::
|
|
|
|
TOTEMP - Total Employment
|
|
GNPDEFL - GNP deflator
|
|
GNP - GNP
|
|
UNEMP - Number of unemployed
|
|
ARMED - Size of armed forces
|
|
POP - Population
|
|
YEAR - Year (1947 - 1962)
|
|
"""
|
|
|
|
|
|
|
|
def load():
|
|
"""
|
|
Load the Longley data and return a Dataset class.
|
|
|
|
Returns
|
|
-------
|
|
Dataset
|
|
See DATASET_PROPOSAL.txt for more information.
|
|
"""
|
|
return load_pandas()
|
|
|
|
|
|
def load_pandas():
|
|
"""
|
|
Load the Longley data and return a Dataset class.
|
|
|
|
Returns
|
|
-------
|
|
Dataset
|
|
See DATASET_PROPOSAL.txt for more information.
|
|
"""
|
|
data = _get_data()
|
|
return du.process_pandas(data, endog_idx=0)
|
|
|
|
|
|
def _get_data():
|
|
data = du.load_csv(__file__, 'longley.csv')
|
|
data = data.iloc[:, [1, 2, 3, 4, 5, 6, 7]].astype(float)
|
|
return data
|