61 lines
1.5 KiB
Python
61 lines
1.5 KiB
Python
|
"""Yearly sunspots data 1700-2008"""
|
||
|
from statsmodels.datasets import utils as du
|
||
|
|
||
|
__docformat__ = 'restructuredtext'
|
||
|
|
||
|
COPYRIGHT = """This data is public domain."""
|
||
|
TITLE = __doc__
|
||
|
SOURCE = """
|
||
|
http://www.ngdc.noaa.gov/stp/solar/solarda3.html
|
||
|
|
||
|
The original dataset contains monthly data on sunspot activity in the file
|
||
|
./src/sunspots_yearly.dat. There is also sunspots_monthly.dat.
|
||
|
"""
|
||
|
|
||
|
DESCRSHORT = """Yearly (1700-2008) data on sunspots from the National
|
||
|
Geophysical Data Center."""
|
||
|
|
||
|
DESCRLONG = DESCRSHORT
|
||
|
|
||
|
NOTE = """::
|
||
|
|
||
|
Number of Observations - 309 (Annual 1700 - 2008)
|
||
|
Number of Variables - 1
|
||
|
Variable name definitions::
|
||
|
|
||
|
SUNACTIVITY - Number of sunspots for each year
|
||
|
|
||
|
The data file contains a 'YEAR' variable that is not returned by load.
|
||
|
"""
|
||
|
|
||
|
|
||
|
def load_pandas():
|
||
|
data = _get_data()
|
||
|
# TODO: time series
|
||
|
endog = data.set_index(data.YEAR).SUNACTIVITY
|
||
|
dataset = du.Dataset(data=data, names=list(data.columns),
|
||
|
endog=endog, endog_name='volume')
|
||
|
return dataset
|
||
|
|
||
|
|
||
|
def load():
|
||
|
"""
|
||
|
Load the yearly sunspot data and returns a data class.
|
||
|
|
||
|
Returns
|
||
|
-------
|
||
|
Dataset
|
||
|
See DATASET_PROPOSAL.txt for more information.
|
||
|
|
||
|
Notes
|
||
|
-----
|
||
|
This dataset only contains data for one variable, so the attributes
|
||
|
data, raw_data, and endog are all the same variable. There is no exog
|
||
|
attribute defined.
|
||
|
"""
|
||
|
return load_pandas()
|
||
|
|
||
|
|
||
|
def _get_data():
|
||
|
return du.load_csv(__file__, 'sunspots.csv').astype(float)
|