120 lines
3.6 KiB
Python
120 lines
3.6 KiB
Python
"""American National Election Survey 1996"""
|
|
from numpy import log
|
|
|
|
from statsmodels.datasets import utils as du
|
|
|
|
__docformat__ = 'restructuredtext'
|
|
|
|
COPYRIGHT = """This is public domain."""
|
|
TITLE = __doc__
|
|
SOURCE = """
|
|
http://www.electionstudies.org/
|
|
|
|
The American National Election Studies.
|
|
"""
|
|
|
|
DESCRSHORT = """This data is a subset of the American National Election Studies of 1996."""
|
|
|
|
DESCRLONG = DESCRSHORT
|
|
|
|
NOTE = """::
|
|
|
|
Number of observations - 944
|
|
Number of variables - 10
|
|
|
|
Variables name definitions::
|
|
|
|
popul - Census place population in 1000s
|
|
TVnews - Number of times per week that respondent watches TV news.
|
|
PID - Party identification of respondent.
|
|
0 - Strong Democrat
|
|
1 - Weak Democrat
|
|
2 - Independent-Democrat
|
|
3 - Independent-Indpendent
|
|
4 - Independent-Republican
|
|
5 - Weak Republican
|
|
6 - Strong Republican
|
|
age : Age of respondent.
|
|
educ - Education level of respondent
|
|
1 - 1-8 grades
|
|
2 - Some high school
|
|
3 - High school graduate
|
|
4 - Some college
|
|
5 - College degree
|
|
6 - Master's degree
|
|
7 - PhD
|
|
income - Income of household
|
|
1 - None or less than $2,999
|
|
2 - $3,000-$4,999
|
|
3 - $5,000-$6,999
|
|
4 - $7,000-$8,999
|
|
5 - $9,000-$9,999
|
|
6 - $10,000-$10,999
|
|
7 - $11,000-$11,999
|
|
8 - $12,000-$12,999
|
|
9 - $13,000-$13,999
|
|
10 - $14,000-$14.999
|
|
11 - $15,000-$16,999
|
|
12 - $17,000-$19,999
|
|
13 - $20,000-$21,999
|
|
14 - $22,000-$24,999
|
|
15 - $25,000-$29,999
|
|
16 - $30,000-$34,999
|
|
17 - $35,000-$39,999
|
|
18 - $40,000-$44,999
|
|
19 - $45,000-$49,999
|
|
20 - $50,000-$59,999
|
|
21 - $60,000-$74,999
|
|
22 - $75,000-89,999
|
|
23 - $90,000-$104,999
|
|
24 - $105,000 and over
|
|
vote - Expected vote
|
|
0 - Clinton
|
|
1 - Dole
|
|
The following 3 variables all take the values:
|
|
1 - Extremely liberal
|
|
2 - Liberal
|
|
3 - Slightly liberal
|
|
4 - Moderate
|
|
5 - Slightly conservative
|
|
6 - Conservative
|
|
7 - Extremely Conservative
|
|
selfLR - Respondent's self-reported political leanings from "Left"
|
|
to "Right".
|
|
ClinLR - Respondents impression of Bill Clinton's political
|
|
leanings from "Left" to "Right".
|
|
DoleLR - Respondents impression of Bob Dole's political leanings
|
|
from "Left" to "Right".
|
|
logpopul - log(popul + .1)
|
|
"""
|
|
|
|
|
|
def load_pandas():
|
|
"""Load the anes96 data and returns a Dataset class.
|
|
|
|
Returns
|
|
-------
|
|
Dataset
|
|
See DATASET_PROPOSAL.txt for more information.
|
|
"""
|
|
data = _get_data()
|
|
return du.process_pandas(data, endog_idx=5, exog_idx=[10, 2, 6, 7, 8])
|
|
|
|
|
|
def load():
|
|
"""Load the anes96 data and returns a Dataset class.
|
|
|
|
Returns
|
|
-------
|
|
Dataset
|
|
See DATASET_PROPOSAL.txt for more information.
|
|
"""
|
|
return load_pandas()
|
|
|
|
|
|
def _get_data():
|
|
data = du.load_csv(__file__, 'anes96.csv', sep=r'\s')
|
|
data = du.strip_column_names(data)
|
|
data['logpopul'] = log(data['popul'] + .1)
|
|
return data.astype(float)
|