{
"cells": [
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" price | \n",
" bedrooms | \n",
" bathrooms | \n",
" sqft_living | \n",
" sqft_lot | \n",
" floors | \n",
" waterfront | \n",
" view | \n",
" condition | \n",
" grade | \n",
" sqft_above | \n",
" sqft_basement | \n",
" yr_built | \n",
" yr_renovated | \n",
" zipcode | \n",
" lat | \n",
" long | \n",
" sqft_living15 | \n",
" sqft_lot15 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 20141013T000000 | \n",
" 221900.0 | \n",
" 3 | \n",
" 1.00 | \n",
" 1180 | \n",
" 5650 | \n",
" 1.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" 1180 | \n",
" 0 | \n",
" 1955 | \n",
" 0 | \n",
" 98178 | \n",
" 47.5112 | \n",
" -122.257 | \n",
" 1340 | \n",
" 5650 | \n",
"
\n",
" \n",
" 1 | \n",
" 20141209T000000 | \n",
" 538000.0 | \n",
" 3 | \n",
" 2.25 | \n",
" 2570 | \n",
" 7242 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" 2170 | \n",
" 400 | \n",
" 1951 | \n",
" 1991 | \n",
" 98125 | \n",
" 47.7210 | \n",
" -122.319 | \n",
" 1690 | \n",
" 7639 | \n",
"
\n",
" \n",
" 2 | \n",
" 20150225T000000 | \n",
" 180000.0 | \n",
" 2 | \n",
" 1.00 | \n",
" 770 | \n",
" 10000 | \n",
" 1.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 6 | \n",
" 770 | \n",
" 0 | \n",
" 1933 | \n",
" 0 | \n",
" 98028 | \n",
" 47.7379 | \n",
" -122.233 | \n",
" 2720 | \n",
" 8062 | \n",
"
\n",
" \n",
" 3 | \n",
" 20141209T000000 | \n",
" 604000.0 | \n",
" 4 | \n",
" 3.00 | \n",
" 1960 | \n",
" 5000 | \n",
" 1.0 | \n",
" 0 | \n",
" 0 | \n",
" 5 | \n",
" 7 | \n",
" 1050 | \n",
" 910 | \n",
" 1965 | \n",
" 0 | \n",
" 98136 | \n",
" 47.5208 | \n",
" -122.393 | \n",
" 1360 | \n",
" 5000 | \n",
"
\n",
" \n",
" 4 | \n",
" 20150218T000000 | \n",
" 510000.0 | \n",
" 3 | \n",
" 2.00 | \n",
" 1680 | \n",
" 8080 | \n",
" 1.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 8 | \n",
" 1680 | \n",
" 0 | \n",
" 1987 | \n",
" 0 | \n",
" 98074 | \n",
" 47.6168 | \n",
" -122.045 | \n",
" 1800 | \n",
" 7503 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 21608 | \n",
" 20140521T000000 | \n",
" 360000.0 | \n",
" 3 | \n",
" 2.50 | \n",
" 1530 | \n",
" 1131 | \n",
" 3.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 8 | \n",
" 1530 | \n",
" 0 | \n",
" 2009 | \n",
" 0 | \n",
" 98103 | \n",
" 47.6993 | \n",
" -122.346 | \n",
" 1530 | \n",
" 1509 | \n",
"
\n",
" \n",
" 21609 | \n",
" 20150223T000000 | \n",
" 400000.0 | \n",
" 4 | \n",
" 2.50 | \n",
" 2310 | \n",
" 5813 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 8 | \n",
" 2310 | \n",
" 0 | \n",
" 2014 | \n",
" 0 | \n",
" 98146 | \n",
" 47.5107 | \n",
" -122.362 | \n",
" 1830 | \n",
" 7200 | \n",
"
\n",
" \n",
" 21610 | \n",
" 20140623T000000 | \n",
" 402101.0 | \n",
" 2 | \n",
" 0.75 | \n",
" 1020 | \n",
" 1350 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" 1020 | \n",
" 0 | \n",
" 2009 | \n",
" 0 | \n",
" 98144 | \n",
" 47.5944 | \n",
" -122.299 | \n",
" 1020 | \n",
" 2007 | \n",
"
\n",
" \n",
" 21611 | \n",
" 20150116T000000 | \n",
" 400000.0 | \n",
" 3 | \n",
" 2.50 | \n",
" 1600 | \n",
" 2388 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 8 | \n",
" 1600 | \n",
" 0 | \n",
" 2004 | \n",
" 0 | \n",
" 98027 | \n",
" 47.5345 | \n",
" -122.069 | \n",
" 1410 | \n",
" 1287 | \n",
"
\n",
" \n",
" 21612 | \n",
" 20141015T000000 | \n",
" 325000.0 | \n",
" 2 | \n",
" 0.75 | \n",
" 1020 | \n",
" 1076 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" 1020 | \n",
" 0 | \n",
" 2008 | \n",
" 0 | \n",
" 98144 | \n",
" 47.5941 | \n",
" -122.299 | \n",
" 1020 | \n",
" 1357 | \n",
"
\n",
" \n",
"
\n",
"
21613 rows × 20 columns
\n",
"
"
],
"text/plain": [
" date price bedrooms bathrooms sqft_living sqft_lot \\\n",
"0 20141013T000000 221900.0 3 1.00 1180 5650 \n",
"1 20141209T000000 538000.0 3 2.25 2570 7242 \n",
"2 20150225T000000 180000.0 2 1.00 770 10000 \n",
"3 20141209T000000 604000.0 4 3.00 1960 5000 \n",
"4 20150218T000000 510000.0 3 2.00 1680 8080 \n",
"... ... ... ... ... ... ... \n",
"21608 20140521T000000 360000.0 3 2.50 1530 1131 \n",
"21609 20150223T000000 400000.0 4 2.50 2310 5813 \n",
"21610 20140623T000000 402101.0 2 0.75 1020 1350 \n",
"21611 20150116T000000 400000.0 3 2.50 1600 2388 \n",
"21612 20141015T000000 325000.0 2 0.75 1020 1076 \n",
"\n",
" floors waterfront view condition grade sqft_above sqft_basement \\\n",
"0 1.0 0 0 3 7 1180 0 \n",
"1 2.0 0 0 3 7 2170 400 \n",
"2 1.0 0 0 3 6 770 0 \n",
"3 1.0 0 0 5 7 1050 910 \n",
"4 1.0 0 0 3 8 1680 0 \n",
"... ... ... ... ... ... ... ... \n",
"21608 3.0 0 0 3 8 1530 0 \n",
"21609 2.0 0 0 3 8 2310 0 \n",
"21610 2.0 0 0 3 7 1020 0 \n",
"21611 2.0 0 0 3 8 1600 0 \n",
"21612 2.0 0 0 3 7 1020 0 \n",
"\n",
" yr_built yr_renovated zipcode lat long sqft_living15 \\\n",
"0 1955 0 98178 47.5112 -122.257 1340 \n",
"1 1951 1991 98125 47.7210 -122.319 1690 \n",
"2 1933 0 98028 47.7379 -122.233 2720 \n",
"3 1965 0 98136 47.5208 -122.393 1360 \n",
"4 1987 0 98074 47.6168 -122.045 1800 \n",
"... ... ... ... ... ... ... \n",
"21608 2009 0 98103 47.6993 -122.346 1530 \n",
"21609 2014 0 98146 47.5107 -122.362 1830 \n",
"21610 2009 0 98144 47.5944 -122.299 1020 \n",
"21611 2004 0 98027 47.5345 -122.069 1410 \n",
"21612 2008 0 98144 47.5941 -122.299 1020 \n",
"\n",
" sqft_lot15 \n",
"0 5650 \n",
"1 7639 \n",
"2 8062 \n",
"3 5000 \n",
"4 7503 \n",
"... ... \n",
"21608 1509 \n",
"21609 7200 \n",
"21610 2007 \n",
"21611 1287 \n",
"21612 1357 \n",
"\n",
"[21613 rows x 20 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"house = pd.read_csv(\"data/kc_house_data.csv\", index_col=\"id\")\n",
"\n",
"house = house.reset_index(drop=True)\n",
"\n",
"house"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" yr_built_1901 | \n",
" yr_built_1902 | \n",
" yr_built_1903 | \n",
" yr_built_1904 | \n",
" yr_built_1905 | \n",
" yr_built_1906 | \n",
" yr_built_1907 | \n",
" yr_built_1908 | \n",
" yr_built_1909 | \n",
" yr_built_1910 | \n",
" ... | \n",
" price_4489000.0 | \n",
" price_4500000.0 | \n",
" price_4668000.0 | \n",
" price_5110800.0 | \n",
" price_5300000.0 | \n",
" price_5350000.0 | \n",
" price_5570000.0 | \n",
" price_6885000.0 | \n",
" price_7062500.0 | \n",
" price_7700000.0 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 21608 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21609 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21610 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21611 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21612 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
21613 rows × 4142 columns
\n",
"
"
],
"text/plain": [
" yr_built_1901 yr_built_1902 yr_built_1903 yr_built_1904 \\\n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"21608 0.0 0.0 0.0 0.0 \n",
"21609 0.0 0.0 0.0 0.0 \n",
"21610 0.0 0.0 0.0 0.0 \n",
"21611 0.0 0.0 0.0 0.0 \n",
"21612 0.0 0.0 0.0 0.0 \n",
"\n",
" yr_built_1905 yr_built_1906 yr_built_1907 yr_built_1908 \\\n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"21608 0.0 0.0 0.0 0.0 \n",
"21609 0.0 0.0 0.0 0.0 \n",
"21610 0.0 0.0 0.0 0.0 \n",
"21611 0.0 0.0 0.0 0.0 \n",
"21612 0.0 0.0 0.0 0.0 \n",
"\n",
" yr_built_1909 yr_built_1910 ... price_4489000.0 price_4500000.0 \\\n",
"0 0.0 0.0 ... 0.0 0.0 \n",
"1 0.0 0.0 ... 0.0 0.0 \n",
"2 0.0 0.0 ... 0.0 0.0 \n",
"3 0.0 0.0 ... 0.0 0.0 \n",
"4 0.0 0.0 ... 0.0 0.0 \n",
"... ... ... ... ... ... \n",
"21608 0.0 0.0 ... 0.0 0.0 \n",
"21609 0.0 0.0 ... 0.0 0.0 \n",
"21610 0.0 0.0 ... 0.0 0.0 \n",
"21611 0.0 0.0 ... 0.0 0.0 \n",
"21612 0.0 0.0 ... 0.0 0.0 \n",
"\n",
" price_4668000.0 price_5110800.0 price_5300000.0 price_5350000.0 \\\n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"21608 0.0 0.0 0.0 0.0 \n",
"21609 0.0 0.0 0.0 0.0 \n",
"21610 0.0 0.0 0.0 0.0 \n",
"21611 0.0 0.0 0.0 0.0 \n",
"21612 0.0 0.0 0.0 0.0 \n",
"\n",
" price_5570000.0 price_6885000.0 price_7062500.0 price_7700000.0 \n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"21608 0.0 0.0 0.0 0.0 \n",
"21609 0.0 0.0 0.0 0.0 \n",
"21610 0.0 0.0 0.0 0.0 \n",
"21611 0.0 0.0 0.0 0.0 \n",
"21612 0.0 0.0 0.0 0.0 \n",
"\n",
"[21613 rows x 4142 columns]"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import OneHotEncoder\n",
"import numpy as np\n",
"\n",
"\n",
"encoder = OneHotEncoder(sparse_output=False, drop=\"first\")\n",
"\n",
"encoded_values = encoder.fit_transform(house[[\"yr_built\", \"price\"]])\n",
"\n",
"encoded_columns = encoder.get_feature_names_out([\"yr_built\", \"price\"])\n",
"\n",
"encoded_values_df = pd.DataFrame(encoded_values, columns=encoded_columns)\n",
"\n",
"encoded_values_df"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" price | \n",
" bedrooms | \n",
" bathrooms | \n",
" sqft_living | \n",
" sqft_lot | \n",
" floors | \n",
" waterfront | \n",
" view | \n",
" condition | \n",
" ... | \n",
" price_4489000.0 | \n",
" price_4500000.0 | \n",
" price_4668000.0 | \n",
" price_5110800.0 | \n",
" price_5300000.0 | \n",
" price_5350000.0 | \n",
" price_5570000.0 | \n",
" price_6885000.0 | \n",
" price_7062500.0 | \n",
" price_7700000.0 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 20141013T000000 | \n",
" 221900.0 | \n",
" 3 | \n",
" 1.00 | \n",
" 1180 | \n",
" 5650 | \n",
" 1.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 20141209T000000 | \n",
" 538000.0 | \n",
" 3 | \n",
" 2.25 | \n",
" 2570 | \n",
" 7242 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 20150225T000000 | \n",
" 180000.0 | \n",
" 2 | \n",
" 1.00 | \n",
" 770 | \n",
" 10000 | \n",
" 1.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 20141209T000000 | \n",
" 604000.0 | \n",
" 4 | \n",
" 3.00 | \n",
" 1960 | \n",
" 5000 | \n",
" 1.0 | \n",
" 0 | \n",
" 0 | \n",
" 5 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 20150218T000000 | \n",
" 510000.0 | \n",
" 3 | \n",
" 2.00 | \n",
" 1680 | \n",
" 8080 | \n",
" 1.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 21608 | \n",
" 20140521T000000 | \n",
" 360000.0 | \n",
" 3 | \n",
" 2.50 | \n",
" 1530 | \n",
" 1131 | \n",
" 3.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21609 | \n",
" 20150223T000000 | \n",
" 400000.0 | \n",
" 4 | \n",
" 2.50 | \n",
" 2310 | \n",
" 5813 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21610 | \n",
" 20140623T000000 | \n",
" 402101.0 | \n",
" 2 | \n",
" 0.75 | \n",
" 1020 | \n",
" 1350 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21611 | \n",
" 20150116T000000 | \n",
" 400000.0 | \n",
" 3 | \n",
" 2.50 | \n",
" 1600 | \n",
" 2388 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21612 | \n",
" 20141015T000000 | \n",
" 325000.0 | \n",
" 2 | \n",
" 0.75 | \n",
" 1020 | \n",
" 1076 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
21613 rows × 4162 columns
\n",
"
"
],
"text/plain": [
" date price bedrooms bathrooms sqft_living sqft_lot \\\n",
"0 20141013T000000 221900.0 3 1.00 1180 5650 \n",
"1 20141209T000000 538000.0 3 2.25 2570 7242 \n",
"2 20150225T000000 180000.0 2 1.00 770 10000 \n",
"3 20141209T000000 604000.0 4 3.00 1960 5000 \n",
"4 20150218T000000 510000.0 3 2.00 1680 8080 \n",
"... ... ... ... ... ... ... \n",
"21608 20140521T000000 360000.0 3 2.50 1530 1131 \n",
"21609 20150223T000000 400000.0 4 2.50 2310 5813 \n",
"21610 20140623T000000 402101.0 2 0.75 1020 1350 \n",
"21611 20150116T000000 400000.0 3 2.50 1600 2388 \n",
"21612 20141015T000000 325000.0 2 0.75 1020 1076 \n",
"\n",
" floors waterfront view condition ... price_4489000.0 \\\n",
"0 1.0 0 0 3 ... 0.0 \n",
"1 2.0 0 0 3 ... 0.0 \n",
"2 1.0 0 0 3 ... 0.0 \n",
"3 1.0 0 0 5 ... 0.0 \n",
"4 1.0 0 0 3 ... 0.0 \n",
"... ... ... ... ... ... ... \n",
"21608 3.0 0 0 3 ... 0.0 \n",
"21609 2.0 0 0 3 ... 0.0 \n",
"21610 2.0 0 0 3 ... 0.0 \n",
"21611 2.0 0 0 3 ... 0.0 \n",
"21612 2.0 0 0 3 ... 0.0 \n",
"\n",
" price_4500000.0 price_4668000.0 price_5110800.0 price_5300000.0 \\\n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"21608 0.0 0.0 0.0 0.0 \n",
"21609 0.0 0.0 0.0 0.0 \n",
"21610 0.0 0.0 0.0 0.0 \n",
"21611 0.0 0.0 0.0 0.0 \n",
"21612 0.0 0.0 0.0 0.0 \n",
"\n",
" price_5350000.0 price_5570000.0 price_6885000.0 price_7062500.0 \\\n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"21608 0.0 0.0 0.0 0.0 \n",
"21609 0.0 0.0 0.0 0.0 \n",
"21610 0.0 0.0 0.0 0.0 \n",
"21611 0.0 0.0 0.0 0.0 \n",
"21612 0.0 0.0 0.0 0.0 \n",
"\n",
" price_7700000.0 \n",
"0 0.0 \n",
"1 0.0 \n",
"2 0.0 \n",
"3 0.0 \n",
"4 0.0 \n",
"... ... \n",
"21608 0.0 \n",
"21609 0.0 \n",
"21610 0.0 \n",
"21611 0.0 \n",
"21612 0.0 \n",
"\n",
"[21613 rows x 4162 columns]"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"house = pd.concat([house, encoded_values_df], axis=1)\n",
"\n",
"house"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"labels = [\"old\", \"middle\", \"new\"]\n",
"num_bins = 3"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([1900. , 1938.33333333, 1976.66666667, 2015. ]),\n",
" array([ 3067, 8120, 10426]))"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hist1, bins1 = np.histogram(\n",
" house[\"yr_built\"].fillna(house[\"yr_built\"].median()), bins=num_bins\n",
")\n",
"bins1, hist1"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" yr_built | \n",
" yr_built | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1955 | \n",
" (1938.333, 1976.667] | \n",
"
\n",
" \n",
" 1 | \n",
" 1951 | \n",
" (1938.333, 1976.667] | \n",
"
\n",
" \n",
" 2 | \n",
" 1933 | \n",
" (1900.0, 1938.333] | \n",
"
\n",
" \n",
" 3 | \n",
" 1965 | \n",
" (1938.333, 1976.667] | \n",
"
\n",
" \n",
" 4 | \n",
" 1987 | \n",
" (1976.667, 2015.0] | \n",
"
\n",
" \n",
" 5 | \n",
" 2001 | \n",
" (1976.667, 2015.0] | \n",
"
\n",
" \n",
" 6 | \n",
" 1995 | \n",
" (1976.667, 2015.0] | \n",
"
\n",
" \n",
" 7 | \n",
" 1963 | \n",
" (1938.333, 1976.667] | \n",
"
\n",
" \n",
" 8 | \n",
" 1960 | \n",
" (1938.333, 1976.667] | \n",
"
\n",
" \n",
" 9 | \n",
" 2003 | \n",
" (1976.667, 2015.0] | \n",
"
\n",
" \n",
" 10 | \n",
" 1965 | \n",
" (1938.333, 1976.667] | \n",
"
\n",
" \n",
" 11 | \n",
" 1942 | \n",
" (1938.333, 1976.667] | \n",
"
\n",
" \n",
" 12 | \n",
" 1927 | \n",
" (1900.0, 1938.333] | \n",
"
\n",
" \n",
" 13 | \n",
" 1977 | \n",
" (1976.667, 2015.0] | \n",
"
\n",
" \n",
" 14 | \n",
" 1900 | \n",
" NaN | \n",
"
\n",
" \n",
" 15 | \n",
" 1979 | \n",
" (1976.667, 2015.0] | \n",
"
\n",
" \n",
" 16 | \n",
" 1994 | \n",
" (1976.667, 2015.0] | \n",
"
\n",
" \n",
" 17 | \n",
" 1916 | \n",
" (1900.0, 1938.333] | \n",
"
\n",
" \n",
" 18 | \n",
" 1921 | \n",
" (1900.0, 1938.333] | \n",
"
\n",
" \n",
" 19 | \n",
" 1969 | \n",
" (1938.333, 1976.667] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" yr_built yr_built\n",
"0 1955 (1938.333, 1976.667]\n",
"1 1951 (1938.333, 1976.667]\n",
"2 1933 (1900.0, 1938.333]\n",
"3 1965 (1938.333, 1976.667]\n",
"4 1987 (1976.667, 2015.0]\n",
"5 2001 (1976.667, 2015.0]\n",
"6 1995 (1976.667, 2015.0]\n",
"7 1963 (1938.333, 1976.667]\n",
"8 1960 (1938.333, 1976.667]\n",
"9 2003 (1976.667, 2015.0]\n",
"10 1965 (1938.333, 1976.667]\n",
"11 1942 (1938.333, 1976.667]\n",
"12 1927 (1900.0, 1938.333]\n",
"13 1977 (1976.667, 2015.0]\n",
"14 1900 NaN\n",
"15 1979 (1976.667, 2015.0]\n",
"16 1994 (1976.667, 2015.0]\n",
"17 1916 (1900.0, 1938.333]\n",
"18 1921 (1900.0, 1938.333]\n",
"19 1969 (1938.333, 1976.667]"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([house[\"yr_built\"], pd.cut(house[\"yr_built\"], list(bins1))], axis=1).head(20)"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" yr_built | \n",
" yr_built | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1955 | \n",
" middle | \n",
"
\n",
" \n",
" 1 | \n",
" 1951 | \n",
" middle | \n",
"
\n",
" \n",
" 2 | \n",
" 1933 | \n",
" old | \n",
"
\n",
" \n",
" 3 | \n",
" 1965 | \n",
" middle | \n",
"
\n",
" \n",
" 4 | \n",
" 1987 | \n",
" new | \n",
"
\n",
" \n",
" 5 | \n",
" 2001 | \n",
" new | \n",
"
\n",
" \n",
" 6 | \n",
" 1995 | \n",
" new | \n",
"
\n",
" \n",
" 7 | \n",
" 1963 | \n",
" middle | \n",
"
\n",
" \n",
" 8 | \n",
" 1960 | \n",
" middle | \n",
"
\n",
" \n",
" 9 | \n",
" 2003 | \n",
" new | \n",
"
\n",
" \n",
" 10 | \n",
" 1965 | \n",
" middle | \n",
"
\n",
" \n",
" 11 | \n",
" 1942 | \n",
" middle | \n",
"
\n",
" \n",
" 12 | \n",
" 1927 | \n",
" old | \n",
"
\n",
" \n",
" 13 | \n",
" 1977 | \n",
" new | \n",
"
\n",
" \n",
" 14 | \n",
" 1900 | \n",
" NaN | \n",
"
\n",
" \n",
" 15 | \n",
" 1979 | \n",
" new | \n",
"
\n",
" \n",
" 16 | \n",
" 1994 | \n",
" new | \n",
"
\n",
" \n",
" 17 | \n",
" 1916 | \n",
" old | \n",
"
\n",
" \n",
" 18 | \n",
" 1921 | \n",
" old | \n",
"
\n",
" \n",
" 19 | \n",
" 1969 | \n",
" middle | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" yr_built yr_built\n",
"0 1955 middle\n",
"1 1951 middle\n",
"2 1933 old\n",
"3 1965 middle\n",
"4 1987 new\n",
"5 2001 new\n",
"6 1995 new\n",
"7 1963 middle\n",
"8 1960 middle\n",
"9 2003 new\n",
"10 1965 middle\n",
"11 1942 middle\n",
"12 1927 old\n",
"13 1977 new\n",
"14 1900 NaN\n",
"15 1979 new\n",
"16 1994 new\n",
"17 1916 old\n",
"18 1921 old\n",
"19 1969 middle"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [house[\"yr_built\"], pd.cut(house[\"yr_built\"], list(bins1), labels=labels)], axis=1\n",
").head(20)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([1899., 1928., 1957., 1986., 2015.]),\n",
" array([2403, 4230, 6914, 8028, 38]))"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"bins2 = np.linspace(1899, 2015, 5)\n",
"tmp_bins2 = np.digitize(house[\"yr_built\"].fillna(house[\"yr_built\"].median()), bins2)\n",
"hist2 = np.bincount(tmp_bins2 - 1)\n",
"bins2, hist2"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" yr_built | \n",
" yr_built | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1955 | \n",
" (1928.0, 1957.0] | \n",
"
\n",
" \n",
" 1 | \n",
" 1951 | \n",
" (1928.0, 1957.0] | \n",
"
\n",
" \n",
" 2 | \n",
" 1933 | \n",
" (1928.0, 1957.0] | \n",
"
\n",
" \n",
" 3 | \n",
" 1965 | \n",
" (1957.0, 1986.0] | \n",
"
\n",
" \n",
" 4 | \n",
" 1987 | \n",
" (1986.0, 2015.0] | \n",
"
\n",
" \n",
" 5 | \n",
" 2001 | \n",
" (1986.0, 2015.0] | \n",
"
\n",
" \n",
" 6 | \n",
" 1995 | \n",
" (1986.0, 2015.0] | \n",
"
\n",
" \n",
" 7 | \n",
" 1963 | \n",
" (1957.0, 1986.0] | \n",
"
\n",
" \n",
" 8 | \n",
" 1960 | \n",
" (1957.0, 1986.0] | \n",
"
\n",
" \n",
" 9 | \n",
" 2003 | \n",
" (1986.0, 2015.0] | \n",
"
\n",
" \n",
" 10 | \n",
" 1965 | \n",
" (1957.0, 1986.0] | \n",
"
\n",
" \n",
" 11 | \n",
" 1942 | \n",
" (1928.0, 1957.0] | \n",
"
\n",
" \n",
" 12 | \n",
" 1927 | \n",
" (1899.0, 1928.0] | \n",
"
\n",
" \n",
" 13 | \n",
" 1977 | \n",
" (1957.0, 1986.0] | \n",
"
\n",
" \n",
" 14 | \n",
" 1900 | \n",
" (1899.0, 1928.0] | \n",
"
\n",
" \n",
" 15 | \n",
" 1979 | \n",
" (1957.0, 1986.0] | \n",
"
\n",
" \n",
" 16 | \n",
" 1994 | \n",
" (1986.0, 2015.0] | \n",
"
\n",
" \n",
" 17 | \n",
" 1916 | \n",
" (1899.0, 1928.0] | \n",
"
\n",
" \n",
" 18 | \n",
" 1921 | \n",
" (1899.0, 1928.0] | \n",
"
\n",
" \n",
" 19 | \n",
" 1969 | \n",
" (1957.0, 1986.0] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" yr_built yr_built\n",
"0 1955 (1928.0, 1957.0]\n",
"1 1951 (1928.0, 1957.0]\n",
"2 1933 (1928.0, 1957.0]\n",
"3 1965 (1957.0, 1986.0]\n",
"4 1987 (1986.0, 2015.0]\n",
"5 2001 (1986.0, 2015.0]\n",
"6 1995 (1986.0, 2015.0]\n",
"7 1963 (1957.0, 1986.0]\n",
"8 1960 (1957.0, 1986.0]\n",
"9 2003 (1986.0, 2015.0]\n",
"10 1965 (1957.0, 1986.0]\n",
"11 1942 (1928.0, 1957.0]\n",
"12 1927 (1899.0, 1928.0]\n",
"13 1977 (1957.0, 1986.0]\n",
"14 1900 (1899.0, 1928.0]\n",
"15 1979 (1957.0, 1986.0]\n",
"16 1994 (1986.0, 2015.0]\n",
"17 1916 (1899.0, 1928.0]\n",
"18 1921 (1899.0, 1928.0]\n",
"19 1969 (1957.0, 1986.0]"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([house[\"yr_built\"], pd.cut(house[\"yr_built\"], list(bins2))], axis=1).head(20)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(array([1899, 1957, 2001, 2015]), array([ 6633, 10439, 4541]))"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"hist3, bins3 = np.histogram(\n",
" house[\"yr_built\"].fillna(house[\"yr_built\"].median()), bins=[1899, 1957, 2001, 2015]\n",
")\n",
"bins3, hist3"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" yr_built | \n",
" yr_built | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1955 | \n",
" (1899, 1957] | \n",
"
\n",
" \n",
" 1 | \n",
" 1951 | \n",
" (1899, 1957] | \n",
"
\n",
" \n",
" 2 | \n",
" 1933 | \n",
" (1899, 1957] | \n",
"
\n",
" \n",
" 3 | \n",
" 1965 | \n",
" (1957, 2001] | \n",
"
\n",
" \n",
" 4 | \n",
" 1987 | \n",
" (1957, 2001] | \n",
"
\n",
" \n",
" 5 | \n",
" 2001 | \n",
" (1957, 2001] | \n",
"
\n",
" \n",
" 6 | \n",
" 1995 | \n",
" (1957, 2001] | \n",
"
\n",
" \n",
" 7 | \n",
" 1963 | \n",
" (1957, 2001] | \n",
"
\n",
" \n",
" 8 | \n",
" 1960 | \n",
" (1957, 2001] | \n",
"
\n",
" \n",
" 9 | \n",
" 2003 | \n",
" (2001, 2015] | \n",
"
\n",
" \n",
" 10 | \n",
" 1965 | \n",
" (1957, 2001] | \n",
"
\n",
" \n",
" 11 | \n",
" 1942 | \n",
" (1899, 1957] | \n",
"
\n",
" \n",
" 12 | \n",
" 1927 | \n",
" (1899, 1957] | \n",
"
\n",
" \n",
" 13 | \n",
" 1977 | \n",
" (1957, 2001] | \n",
"
\n",
" \n",
" 14 | \n",
" 1900 | \n",
" (1899, 1957] | \n",
"
\n",
" \n",
" 15 | \n",
" 1979 | \n",
" (1957, 2001] | \n",
"
\n",
" \n",
" 16 | \n",
" 1994 | \n",
" (1957, 2001] | \n",
"
\n",
" \n",
" 17 | \n",
" 1916 | \n",
" (1899, 1957] | \n",
"
\n",
" \n",
" 18 | \n",
" 1921 | \n",
" (1899, 1957] | \n",
"
\n",
" \n",
" 19 | \n",
" 1969 | \n",
" (1957, 2001] | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" yr_built yr_built\n",
"0 1955 (1899, 1957]\n",
"1 1951 (1899, 1957]\n",
"2 1933 (1899, 1957]\n",
"3 1965 (1957, 2001]\n",
"4 1987 (1957, 2001]\n",
"5 2001 (1957, 2001]\n",
"6 1995 (1957, 2001]\n",
"7 1963 (1957, 2001]\n",
"8 1960 (1957, 2001]\n",
"9 2003 (2001, 2015]\n",
"10 1965 (1957, 2001]\n",
"11 1942 (1899, 1957]\n",
"12 1927 (1899, 1957]\n",
"13 1977 (1957, 2001]\n",
"14 1900 (1899, 1957]\n",
"15 1979 (1957, 2001]\n",
"16 1994 (1957, 2001]\n",
"17 1916 (1899, 1957]\n",
"18 1921 (1899, 1957]\n",
"19 1969 (1957, 2001]"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat([house[\"yr_built\"], pd.cut(house[\"yr_built\"], list(bins3))], axis=1).head(20)"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" yr_built | \n",
" yr_built | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1955 | \n",
" old | \n",
"
\n",
" \n",
" 1 | \n",
" 1951 | \n",
" old | \n",
"
\n",
" \n",
" 2 | \n",
" 1933 | \n",
" old | \n",
"
\n",
" \n",
" 3 | \n",
" 1965 | \n",
" middle | \n",
"
\n",
" \n",
" 4 | \n",
" 1987 | \n",
" middle | \n",
"
\n",
" \n",
" 5 | \n",
" 2001 | \n",
" middle | \n",
"
\n",
" \n",
" 6 | \n",
" 1995 | \n",
" middle | \n",
"
\n",
" \n",
" 7 | \n",
" 1963 | \n",
" middle | \n",
"
\n",
" \n",
" 8 | \n",
" 1960 | \n",
" middle | \n",
"
\n",
" \n",
" 9 | \n",
" 2003 | \n",
" new | \n",
"
\n",
" \n",
" 10 | \n",
" 1965 | \n",
" middle | \n",
"
\n",
" \n",
" 11 | \n",
" 1942 | \n",
" old | \n",
"
\n",
" \n",
" 12 | \n",
" 1927 | \n",
" old | \n",
"
\n",
" \n",
" 13 | \n",
" 1977 | \n",
" middle | \n",
"
\n",
" \n",
" 14 | \n",
" 1900 | \n",
" old | \n",
"
\n",
" \n",
" 15 | \n",
" 1979 | \n",
" middle | \n",
"
\n",
" \n",
" 16 | \n",
" 1994 | \n",
" middle | \n",
"
\n",
" \n",
" 17 | \n",
" 1916 | \n",
" old | \n",
"
\n",
" \n",
" 18 | \n",
" 1921 | \n",
" old | \n",
"
\n",
" \n",
" 19 | \n",
" 1969 | \n",
" middle | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" yr_built yr_built\n",
"0 1955 old\n",
"1 1951 old\n",
"2 1933 old\n",
"3 1965 middle\n",
"4 1987 middle\n",
"5 2001 middle\n",
"6 1995 middle\n",
"7 1963 middle\n",
"8 1960 middle\n",
"9 2003 new\n",
"10 1965 middle\n",
"11 1942 old\n",
"12 1927 old\n",
"13 1977 middle\n",
"14 1900 old\n",
"15 1979 middle\n",
"16 1994 middle\n",
"17 1916 old\n",
"18 1921 old\n",
"19 1969 middle"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [house[\"yr_built\"], pd.cut(house[\"yr_built\"], list(bins3), labels=labels)],\n",
" axis=1,\n",
").head(20)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" yr_built | \n",
" yr_built | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1955 | \n",
" 0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1951 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 1933 | \n",
" 0 | \n",
"
\n",
" \n",
" 3 | \n",
" 1965 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 1987 | \n",
" 1 | \n",
"
\n",
" \n",
" 5 | \n",
" 2001 | \n",
" 2 | \n",
"
\n",
" \n",
" 6 | \n",
" 1995 | \n",
" 2 | \n",
"
\n",
" \n",
" 7 | \n",
" 1963 | \n",
" 1 | \n",
"
\n",
" \n",
" 8 | \n",
" 1960 | \n",
" 1 | \n",
"
\n",
" \n",
" 9 | \n",
" 2003 | \n",
" 2 | \n",
"
\n",
" \n",
" 10 | \n",
" 1965 | \n",
" 1 | \n",
"
\n",
" \n",
" 11 | \n",
" 1942 | \n",
" 0 | \n",
"
\n",
" \n",
" 12 | \n",
" 1927 | \n",
" 0 | \n",
"
\n",
" \n",
" 13 | \n",
" 1977 | \n",
" 1 | \n",
"
\n",
" \n",
" 14 | \n",
" 1900 | \n",
" 0 | \n",
"
\n",
" \n",
" 15 | \n",
" 1979 | \n",
" 1 | \n",
"
\n",
" \n",
" 16 | \n",
" 1994 | \n",
" 2 | \n",
"
\n",
" \n",
" 17 | \n",
" 1916 | \n",
" 0 | \n",
"
\n",
" \n",
" 18 | \n",
" 1921 | \n",
" 0 | \n",
"
\n",
" \n",
" 19 | \n",
" 1969 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" yr_built yr_built\n",
"0 1955 0\n",
"1 1951 0\n",
"2 1933 0\n",
"3 1965 1\n",
"4 1987 1\n",
"5 2001 2\n",
"6 1995 2\n",
"7 1963 1\n",
"8 1960 1\n",
"9 2003 2\n",
"10 1965 1\n",
"11 1942 0\n",
"12 1927 0\n",
"13 1977 1\n",
"14 1900 0\n",
"15 1979 1\n",
"16 1994 2\n",
"17 1916 0\n",
"18 1921 0\n",
"19 1969 1"
]
},
"execution_count": 26,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [house[\"yr_built\"], pd.qcut(house[\"yr_built\"], q=3, labels=False)], axis=1\n",
").head(20)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" yr_built | \n",
" yr_built | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1955 | \n",
" old | \n",
"
\n",
" \n",
" 1 | \n",
" 1951 | \n",
" old | \n",
"
\n",
" \n",
" 2 | \n",
" 1933 | \n",
" old | \n",
"
\n",
" \n",
" 3 | \n",
" 1965 | \n",
" middle | \n",
"
\n",
" \n",
" 4 | \n",
" 1987 | \n",
" middle | \n",
"
\n",
" \n",
" 5 | \n",
" 2001 | \n",
" new | \n",
"
\n",
" \n",
" 6 | \n",
" 1995 | \n",
" new | \n",
"
\n",
" \n",
" 7 | \n",
" 1963 | \n",
" middle | \n",
"
\n",
" \n",
" 8 | \n",
" 1960 | \n",
" middle | \n",
"
\n",
" \n",
" 9 | \n",
" 2003 | \n",
" new | \n",
"
\n",
" \n",
" 10 | \n",
" 1965 | \n",
" middle | \n",
"
\n",
" \n",
" 11 | \n",
" 1942 | \n",
" old | \n",
"
\n",
" \n",
" 12 | \n",
" 1927 | \n",
" old | \n",
"
\n",
" \n",
" 13 | \n",
" 1977 | \n",
" middle | \n",
"
\n",
" \n",
" 14 | \n",
" 1900 | \n",
" old | \n",
"
\n",
" \n",
" 15 | \n",
" 1979 | \n",
" middle | \n",
"
\n",
" \n",
" 16 | \n",
" 1994 | \n",
" new | \n",
"
\n",
" \n",
" 17 | \n",
" 1916 | \n",
" old | \n",
"
\n",
" \n",
" 18 | \n",
" 1921 | \n",
" old | \n",
"
\n",
" \n",
" 19 | \n",
" 1969 | \n",
" middle | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" yr_built yr_built\n",
"0 1955 old\n",
"1 1951 old\n",
"2 1933 old\n",
"3 1965 middle\n",
"4 1987 middle\n",
"5 2001 new\n",
"6 1995 new\n",
"7 1963 middle\n",
"8 1960 middle\n",
"9 2003 new\n",
"10 1965 middle\n",
"11 1942 old\n",
"12 1927 old\n",
"13 1977 middle\n",
"14 1900 old\n",
"15 1979 middle\n",
"16 1994 new\n",
"17 1916 old\n",
"18 1921 old\n",
"19 1969 middle"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [house[\"yr_built\"], pd.qcut(house[\"yr_built\"], q=3, labels=labels)], axis=1\n",
").head(20)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Пример конструирования признаков на основе существующих"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" price | \n",
" bedrooms | \n",
" bathrooms | \n",
" sqft_living | \n",
" sqft_lot | \n",
" floors | \n",
" grade | \n",
" sqft_above | \n",
" sqft_basement | \n",
" yr_built | \n",
" yr_renovated | \n",
" zipcode | \n",
" lat | \n",
" long | \n",
" sqft_living15 | \n",
" sqft_lot15 | \n",
" Price_category | \n",
" Renovated_flag | \n",
" Zipcode_area | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 20141013T000000 | \n",
" 221900.0 | \n",
" 3 | \n",
" 1.00 | \n",
" 1180 | \n",
" 5650 | \n",
" 1.0 | \n",
" 7 | \n",
" 1180 | \n",
" 0 | \n",
" 1955 | \n",
" 0 | \n",
" 98178 | \n",
" 47.5112 | \n",
" -122.257 | \n",
" 1340 | \n",
" 5650 | \n",
" Low | \n",
" 0 | \n",
" 981 | \n",
"
\n",
" \n",
" 1 | \n",
" 20141209T000000 | \n",
" 538000.0 | \n",
" 3 | \n",
" 2.25 | \n",
" 2570 | \n",
" 7242 | \n",
" 2.0 | \n",
" 7 | \n",
" 2170 | \n",
" 400 | \n",
" 1951 | \n",
" 1991 | \n",
" 98125 | \n",
" 47.7210 | \n",
" -122.319 | \n",
" 1690 | \n",
" 7639 | \n",
" Medium | \n",
" 1 | \n",
" 981 | \n",
"
\n",
" \n",
" 2 | \n",
" 20150225T000000 | \n",
" 180000.0 | \n",
" 2 | \n",
" 1.00 | \n",
" 770 | \n",
" 10000 | \n",
" 1.0 | \n",
" 6 | \n",
" 770 | \n",
" 0 | \n",
" 1933 | \n",
" 0 | \n",
" 98028 | \n",
" 47.7379 | \n",
" -122.233 | \n",
" 2720 | \n",
" 8062 | \n",
" Low | \n",
" 0 | \n",
" 980 | \n",
"
\n",
" \n",
" 3 | \n",
" 20141209T000000 | \n",
" 604000.0 | \n",
" 4 | \n",
" 3.00 | \n",
" 1960 | \n",
" 5000 | \n",
" 1.0 | \n",
" 7 | \n",
" 1050 | \n",
" 910 | \n",
" 1965 | \n",
" 0 | \n",
" 98136 | \n",
" 47.5208 | \n",
" -122.393 | \n",
" 1360 | \n",
" 5000 | \n",
" High | \n",
" 0 | \n",
" 981 | \n",
"
\n",
" \n",
" 4 | \n",
" 20150218T000000 | \n",
" 510000.0 | \n",
" 3 | \n",
" 2.00 | \n",
" 1680 | \n",
" 8080 | \n",
" 1.0 | \n",
" 8 | \n",
" 1680 | \n",
" 0 | \n",
" 1987 | \n",
" 0 | \n",
" 98074 | \n",
" 47.6168 | \n",
" -122.045 | \n",
" 1800 | \n",
" 7503 | \n",
" Medium | \n",
" 0 | \n",
" 980 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 21608 | \n",
" 20140521T000000 | \n",
" 360000.0 | \n",
" 3 | \n",
" 2.50 | \n",
" 1530 | \n",
" 1131 | \n",
" 3.0 | \n",
" 8 | \n",
" 1530 | \n",
" 0 | \n",
" 2009 | \n",
" 0 | \n",
" 98103 | \n",
" 47.6993 | \n",
" -122.346 | \n",
" 1530 | \n",
" 1509 | \n",
" Low | \n",
" 0 | \n",
" 981 | \n",
"
\n",
" \n",
" 21609 | \n",
" 20150223T000000 | \n",
" 400000.0 | \n",
" 4 | \n",
" 2.50 | \n",
" 2310 | \n",
" 5813 | \n",
" 2.0 | \n",
" 8 | \n",
" 2310 | \n",
" 0 | \n",
" 2014 | \n",
" 0 | \n",
" 98146 | \n",
" 47.5107 | \n",
" -122.362 | \n",
" 1830 | \n",
" 7200 | \n",
" Medium | \n",
" 0 | \n",
" 981 | \n",
"
\n",
" \n",
" 21610 | \n",
" 20140623T000000 | \n",
" 402101.0 | \n",
" 2 | \n",
" 0.75 | \n",
" 1020 | \n",
" 1350 | \n",
" 2.0 | \n",
" 7 | \n",
" 1020 | \n",
" 0 | \n",
" 2009 | \n",
" 0 | \n",
" 98144 | \n",
" 47.5944 | \n",
" -122.299 | \n",
" 1020 | \n",
" 2007 | \n",
" Medium | \n",
" 0 | \n",
" 981 | \n",
"
\n",
" \n",
" 21611 | \n",
" 20150116T000000 | \n",
" 400000.0 | \n",
" 3 | \n",
" 2.50 | \n",
" 1600 | \n",
" 2388 | \n",
" 2.0 | \n",
" 8 | \n",
" 1600 | \n",
" 0 | \n",
" 2004 | \n",
" 0 | \n",
" 98027 | \n",
" 47.5345 | \n",
" -122.069 | \n",
" 1410 | \n",
" 1287 | \n",
" Medium | \n",
" 0 | \n",
" 980 | \n",
"
\n",
" \n",
" 21612 | \n",
" 20141015T000000 | \n",
" 325000.0 | \n",
" 2 | \n",
" 0.75 | \n",
" 1020 | \n",
" 1076 | \n",
" 2.0 | \n",
" 7 | \n",
" 1020 | \n",
" 0 | \n",
" 2008 | \n",
" 0 | \n",
" 98144 | \n",
" 47.5941 | \n",
" -122.299 | \n",
" 1020 | \n",
" 1357 | \n",
" Low | \n",
" 0 | \n",
" 981 | \n",
"
\n",
" \n",
"
\n",
"
21613 rows × 20 columns
\n",
"
"
],
"text/plain": [
" date price bedrooms bathrooms sqft_living sqft_lot \\\n",
"0 20141013T000000 221900.0 3 1.00 1180 5650 \n",
"1 20141209T000000 538000.0 3 2.25 2570 7242 \n",
"2 20150225T000000 180000.0 2 1.00 770 10000 \n",
"3 20141209T000000 604000.0 4 3.00 1960 5000 \n",
"4 20150218T000000 510000.0 3 2.00 1680 8080 \n",
"... ... ... ... ... ... ... \n",
"21608 20140521T000000 360000.0 3 2.50 1530 1131 \n",
"21609 20150223T000000 400000.0 4 2.50 2310 5813 \n",
"21610 20140623T000000 402101.0 2 0.75 1020 1350 \n",
"21611 20150116T000000 400000.0 3 2.50 1600 2388 \n",
"21612 20141015T000000 325000.0 2 0.75 1020 1076 \n",
"\n",
" floors grade sqft_above sqft_basement yr_built yr_renovated \\\n",
"0 1.0 7 1180 0 1955 0 \n",
"1 2.0 7 2170 400 1951 1991 \n",
"2 1.0 6 770 0 1933 0 \n",
"3 1.0 7 1050 910 1965 0 \n",
"4 1.0 8 1680 0 1987 0 \n",
"... ... ... ... ... ... ... \n",
"21608 3.0 8 1530 0 2009 0 \n",
"21609 2.0 8 2310 0 2014 0 \n",
"21610 2.0 7 1020 0 2009 0 \n",
"21611 2.0 8 1600 0 2004 0 \n",
"21612 2.0 7 1020 0 2008 0 \n",
"\n",
" zipcode lat long sqft_living15 sqft_lot15 Price_category \\\n",
"0 98178 47.5112 -122.257 1340 5650 Low \n",
"1 98125 47.7210 -122.319 1690 7639 Medium \n",
"2 98028 47.7379 -122.233 2720 8062 Low \n",
"3 98136 47.5208 -122.393 1360 5000 High \n",
"4 98074 47.6168 -122.045 1800 7503 Medium \n",
"... ... ... ... ... ... ... \n",
"21608 98103 47.6993 -122.346 1530 1509 Low \n",
"21609 98146 47.5107 -122.362 1830 7200 Medium \n",
"21610 98144 47.5944 -122.299 1020 2007 Medium \n",
"21611 98027 47.5345 -122.069 1410 1287 Medium \n",
"21612 98144 47.5941 -122.299 1020 1357 Low \n",
"\n",
" Renovated_flag Zipcode_area \n",
"0 0 981 \n",
"1 1 981 \n",
"2 0 980 \n",
"3 0 981 \n",
"4 0 980 \n",
"... ... ... \n",
"21608 0 981 \n",
"21609 0 981 \n",
"21610 0 981 \n",
"21611 0 980 \n",
"21612 0 981 \n",
"\n",
"[21613 rows x 20 columns]"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"house_cleaned = house.drop([\"waterfront\", \"view\", \"condition\"], axis=1, errors=\"ignore\")\n",
"\n",
"house_cleaned = house_cleaned.dropna()\n",
"\n",
"# Признак \"Price_category\": разделение домов на категории цен\n",
"house_cleaned[\"Price_category\"] = pd.qcut(\n",
" house_cleaned[\"price\"], q=3, labels=[\"Low\", \"Medium\", \"High\"]\n",
")\n",
"\n",
"# Признак \"Renovated_flag\": 1, если дом был отремонтирован, иначе 0\n",
"house_cleaned[\"Renovated_flag\"] = house_cleaned[\"yr_renovated\"].apply(\n",
" lambda x: 1 if x > 0 else 0\n",
")\n",
"\n",
"# Признак \"Zipcode_area\": используем первые три цифры из почтового индекса\n",
"house_cleaned[\"Zipcode_area\"] = house_cleaned[\"zipcode\"].apply(lambda x: str(x)[:3])\n",
"\n",
"house_cleaned"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" price bedrooms bathrooms sqft_living sqft_lot floors waterfront \\\n",
"id \n",
"0 221900.0 3 1.00 1180 5650 1.0 0 \n",
"1 538000.0 3 2.25 2570 7242 2.0 0 \n",
"2 180000.0 2 1.00 770 10000 1.0 0 \n",
"3 604000.0 4 3.00 1960 5000 1.0 0 \n",
"4 510000.0 3 2.00 1680 8080 1.0 0 \n",
"\n",
" view condition grade ... yr_renovated zipcode lat long \\\n",
"id ... \n",
"0 0 3 7 ... 0 98178 47.5112 -122.257 \n",
"1 0 3 7 ... 1991 98125 47.7210 -122.319 \n",
"2 0 3 6 ... 0 98028 47.7379 -122.233 \n",
"3 0 5 7 ... 0 98136 47.5208 -122.393 \n",
"4 0 3 8 ... 0 98074 47.6168 -122.045 \n",
"\n",
" sqft_living15 sqft_lot15 HOUR(date) MONTH(date) WEEKDAY(date) \\\n",
"id \n",
"0 1340 5650 0 10 0 \n",
"1 1690 7639 0 12 1 \n",
"2 2720 8062 0 2 2 \n",
"3 1360 5000 0 12 1 \n",
"4 1800 7503 0 2 2 \n",
"\n",
" YEAR(date) \n",
"id \n",
"0 2014 \n",
"1 2014 \n",
"2 2015 \n",
"3 2014 \n",
"4 2015 \n",
"\n",
"[5 rows x 23 columns]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Mai\\mai\\.venv\\Lib\\site-packages\\featuretools\\synthesis\\deep_feature_synthesis.py:169: UserWarning: Only one dataframe in entityset, changing max_depth to 1 since deeper features cannot be created\n",
" warnings.warn(\n",
"c:\\Mai\\mai\\.venv\\Lib\\site-packages\\featuretools\\synthesis\\dfs.py:321: UnusedPrimitiveWarning: Some specified primitives were not used during DFS:\n",
" agg_primitives: ['count', 'mean', 'mode', 'sum']\n",
"This may be caused by a using a value of max_depth that is too small, not setting interesting values, or it may indicate no compatible columns for the primitive were found in the data. If the DFS call contained multiple instances of a primitive in the list above, none of them were used.\n",
" warnings.warn(warning_msg, UnusedPrimitiveWarning)\n"
]
}
],
"source": [
"import featuretools as ft\n",
"from woodwork.logical_types import Categorical, Datetime\n",
"\n",
"import featuretools as ft\n",
"from woodwork.logical_types import Categorical, Datetime\n",
"import pandas as pd\n",
"\n",
"# Загрузка данных\n",
"df = pd.read_csv(\"data/kc_house_data.csv\")\n",
"\n",
"# Убедимся, что есть уникальный идентификатор для каждой строки (если нет, создаем)\n",
"df[\"id\"] = range(len(df))\n",
"\n",
"# Создаем EntitySet для данных о домах\n",
"es = ft.EntitySet(id=\"house_sales\")\n",
"\n",
"# Добавляем основной DataFrame в EntitySet с указанием типов данных\n",
"es = es.add_dataframe(\n",
" dataframe_name=\"houses\",\n",
" dataframe=df,\n",
" index=\"id\", # Уникальный идентификатор для домов\n",
" logical_types={\n",
" \"date\": Datetime,\n",
" \"zipcode\": Categorical,\n",
" \"condition\": Categorical,\n",
" \"grade\": Categorical,\n",
" \"view\": Categorical,\n",
" \"waterfront\": Categorical,\n",
" },\n",
")\n",
"ft.primitives.list_primitives()\n",
"# Автоматическое конструирование признаков с применением корректных примитивов\n",
"feature_matrix, feature_defs = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"houses\", # Название основной таблицы\n",
" agg_primitives=[\"mean\", \"count\", \"mode\", \"sum\"], # Агрегирующие примитивы\n",
" trans_primitives=[\n",
" \"year\",\n",
" \"month\",\n",
" \"weekday\",\n",
" \"hour\",\n",
" ], # Корректные трансформационные примитивы\n",
" max_depth=2, # Максимальная глубина для генерации признаков\n",
")\n",
"\n",
"# Просмотр полученной feature_matrix\n",
"print(feature_matrix.head())"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAAGdCAYAAADAAnMpAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAlr0lEQVR4nO3de3CU5aHH8d9CNpsEsuFmbhBoFAzhEiAgEi4Fi9yHMbZknNQx2IO2x4EeOFjspFcj6jJKDnKGFkFr4znTlBZa0hmLkogEpCE2QdIJFNMDVcLRJIhKliS6vJA9f1i27smF3WTxScL3M5OJ++zzvu+TzLzZr+8uuzav1+sVAACAIf1MLwAAANzciBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYFWZ6AYFobW3VBx98oOjoaNlsNtPLAQAAAfB6vbp06ZISExPVr1/H1z96RYx88MEHSkpKMr0MAADQBefOndOIESM6vL9XxEh0dLSkz38Yp9NpeDUAQsmyLBUXF2vhwoWy2+2mlwMghNxut5KSknyP4x3pFTFy7akZp9NJjAB9jGVZioqKktPpJEaAPup6L7HgBawAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMCooGLk8ccfl81m8/saO3Zsp9vs3r1bY8eOVUREhCZOnKh9+/Z1a8EAAKBvCfrKyPjx41VXV+f7OnLkSIdzy8rKlJ2drVWrVun48ePKzMxUZmamTpw40a1FAwCAviPoGAkLC1N8fLzva9iwYR3O3bp1qxYvXqwNGzYoNTVVGzduVHp6urZt29atRQMAgL4j6A/K+5//+R8lJiYqIiJCGRkZcrlcGjlyZLtzjx49qvXr1/uNLVq0SEVFRZ0ew+PxyOPx+G673W5Jn3+glmVZwS4ZQIi1tLSopqYmJPtq+tSjsuozih5UroGRjm7vLyUlRVFRUSFYGYDuCvQxO6gYufPOO1VQUKCUlBTV1dUpLy9Pc+bM0YkTJ9r9eOD6+nrFxcX5jcXFxam+vr7T47hcLuXl5bUZLy4u5o8M0AOcOXNGjz76aEj3+UyI9pOfn6/bbrstRHsD0B0tLS0BzQsqRpYsWeL777S0NN15550aNWqUfvvb32rVqlXBrbATubm5fldU3G63kpKStHDhQjmdzpAdB0DXtLS0aPbs2SHZ19/qGrVh71/17L3jdHtCTLf3x5URoOe49szG9QT9NM0XDRo0SLfffrtOnz7d7v3x8fFqaGjwG2toaFB8fHyn+3U4HHI42l6utdvtstvtXV8wgJCIiYnR9OnTQ7Kv8LMfyXH0siZMTtfkUUNDsk8APUOgj9ndep+RpqYmnTlzRgkJCe3en5GRoQMHDviNlZSUKCMjozuHBQAAfUhQMfK9731Phw4d0nvvvaeysjLde++96t+/v7KzsyVJOTk5ys3N9c1fu3atXnvtNeXn5+udd97R448/rsrKSq1Zsya0PwUAAOi1gnqa5n//93+VnZ2tjz76SLfccotmz56t8vJy3XLLLZKk2tpa9ev3z76ZOXOmCgsL9aMf/Ug/+MEPNGbMGBUVFWnChAmh/SkAAECvFVSM7Nq1q9P7S0tL24xlZWUpKysrqEUBAICbB59NAwAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKO6FSObNm2SzWbTunXrOpxTUFAgm83m9xUREdGdwwIAgD4krKsbVlRUaMeOHUpLS7vuXKfTqZqaGt9tm83W1cMCAIA+pktXRpqamnT//ffrhRde0ODBg68732azKT4+3vcVFxfXlcMCAIA+qEtXRlavXq1ly5bp7rvv1pNPPnnd+U1NTRo1apRaW1uVnp6up59+WuPHj+9wvsfjkcfj8d12u92SJMuyZFlWV5YMoIe6cuWK7zvnN9C3BHpOBx0ju3bt0ttvv62KioqA5qekpOill15SWlqaGhsbtXnzZs2cOVMnT57UiBEj2t3G5XIpLy+vzXhxcbGioqKCXTKAHuxckySFqby8XO+fML0aAKHU0tIS0Dyb1+v1BrrTc+fOadq0aSopKfG9VmTevHmaPHmynnvuuYD2YVmWUlNTlZ2drY0bN7Y7p70rI0lJSbpw4YKcTmegywXQC/yl9mOteKFSex6epkkjh5heDoAQcrvdGjZsmBobGzt9/A7qysixY8d0/vx5paen+8auXr2qw4cPa9u2bfJ4POrfv3+n+7Db7ZoyZYpOnz7d4RyHwyGHw9Hutna7PZglA+jhwsLCfN85v4G+JdBzOqgYmT9/vqqrq/3GvvWtb2ns2LH6/ve/f90QkT6Pl+rqai1dujSYQwMAgD4qqBiJjo7WhAkT/MYGDBigoUOH+sZzcnI0fPhwuVwuSdITTzyhGTNmaPTo0bp48aKeffZZnT17Vg899FCIfgQAANCbdfl9RjpSW1urfv3++S+GP/nkEz388MOqr6/X4MGDNXXqVJWVlWncuHGhPjQAAOiFgnoBqylut1sxMTHXfQEMgN6n6uxHytxerqJHZmjyqKGmlwMghAJ9/OazaQAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGBUt2Jk06ZNstlsWrduXafzdu/erbFjxyoiIkITJ07Uvn37unNYAADQh3Q5RioqKrRjxw6lpaV1Oq+srEzZ2dlatWqVjh8/rszMTGVmZurEiRNdPTQAAOhDuhQjTU1Nuv/++/XCCy9o8ODBnc7dunWrFi9erA0bNig1NVUbN25Uenq6tm3b1qUFAwCAviWsKxutXr1ay5Yt0913360nn3yy07lHjx7V+vXr/cYWLVqkoqKiDrfxeDzyeDy+2263W5JkWZYsy+rKkgH8w3sfNavZc9X0Mnz+Vt/o972nGODor68MHWB6GUCvFuhjdtAxsmvXLr399tuqqKgIaH59fb3i4uL8xuLi4lRfX9/hNi6XS3l5eW3Gi4uLFRUVFdyCAfic/1R6qqpL/w9ywz2295TpJbTxw8lXFBtpehVA79XS0hLQvKD+Kp07d05r165VSUmJIiIiurSwQOTm5vpdTXG73UpKStLChQvldDpv2HGBvu7kB26pqlybV0zU6Ft6xv/1N3/m0WtvVmjxnDs0IMJhejmSpNMfNut7e6p1R8ZsjU/kbw7QVdee2bieoGLk2LFjOn/+vNLT031jV69e1eHDh7Vt2zZ5PB7179/fb5v4+Hg1NDT4jTU0NCg+Pr7D4zgcDjkcbf8o2e122e32YJYM4AvCwj4/5ccmxGjC8BjDq/mcZVm68I40/dZbesz5fe33FBYW1mPWBPRGgZ4/Qb2Adf78+aqurlZVVZXva9q0abr//vtVVVXVJkQkKSMjQwcOHPAbKykpUUZGRjCHBgAAfVRQV0aio6M1YcIEv7EBAwZo6NChvvGcnBwNHz5cLpdLkrR27VrNnTtX+fn5WrZsmXbt2qXKykrt3LkzRD8CAADozUL+Dqy1tbWqq6vz3Z45c6YKCwu1c+dOTZo0SXv27FFRUVGbqAEAADenbr+svrS0tNPbkpSVlaWsrKzuHgoAAPRBfDYNAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjAoqRrZv3660tDQ5nU45nU5lZGTo1Vdf7XB+QUGBbDab31dERES3Fw0AAPqOsGAmjxgxQps2bdKYMWPk9Xr18ssv65577tHx48c1fvz4drdxOp2qqanx3bbZbN1bMQAA6FOCipHly5f73X7qqae0fft2lZeXdxgjNptN8fHxXV8hAADo04KKkS+6evWqdu/erebmZmVkZHQ4r6mpSaNGjVJra6vS09P19NNPdxgu13g8Hnk8Ht9tt9stSbIsS5ZldXXJwE3vypUrvu895Vy6to6esh6pZ/6egN4o0PMn6Biprq5WRkaGPvvsMw0cOFB79+7VuHHj2p2bkpKil156SWlpaWpsbNTmzZs1c+ZMnTx5UiNGjOjwGC6XS3l5eW3Gi4uLFRUVFeySAfzDuSZJCtORI0d0dqDp1fgrKSkxvQSfnvx7AnqTlpaWgObZvF6vN5gdX758WbW1tWpsbNSePXv04osv6tChQx0GyRdZlqXU1FRlZ2dr48aNHc5r78pIUlKSLly4IKfTGcxyAXzByQ/cytxerqJHZmh8Ys84lyzLUklJiRYsWCC73W56OZJ65u8J6I3cbreGDRumxsbGTh+/g74yEh4ertGjR0uSpk6dqoqKCm3dulU7duy47rZ2u11TpkzR6dOnO53ncDjkcDja3b6n/LECeqOwsDDf9552LvWk87sn/56A3iTQ86fb7zPS2trqdxWjM1evXlV1dbUSEhK6e1gAANBHBHVlJDc3V0uWLNHIkSN16dIlFRYWqrS0VPv375ck5eTkaPjw4XK5XJKkJ554QjNmzNDo0aN18eJFPfvsszp79qweeuih0P8kAACgVwoqRs6fP6+cnBzV1dUpJiZGaWlp2r9/vxYsWCBJqq2tVb9+/7zY8sknn+jhhx9WfX29Bg8erKlTp6qsrCyg15cAAICbQ1Ax8otf/KLT+0tLS/1ub9myRVu2bAl6UQAA4ObBZ9MAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAqKBiZPv27UpLS5PT6ZTT6VRGRoZeffXVTrfZvXu3xo4dq4iICE2cOFH79u3r1oIBAEDfElSMjBgxQps2bdKxY8dUWVmpr33ta7rnnnt08uTJdueXlZUpOztbq1at0vHjx5WZmanMzEydOHEiJIsHAAC9X1Axsnz5ci1dulRjxozR7bffrqeeekoDBw5UeXl5u/O3bt2qxYsXa8OGDUpNTdXGjRuVnp6ubdu2hWTxAACg9wvr6oZXr17V7t271dzcrIyMjHbnHD16VOvXr/cbW7RokYqKijrdt8fjkcfj8d12u92SJMuyZFlWV5cM3PQufeqRLcyt/X+r0OlPBphejiTp08uXVfbhB2qpLlNkeLjp5UiSzn3yqWxhbl25coW/OUA3BHr+BB0j1dXVysjI0GeffaaBAwdq7969GjduXLtz6+vrFRcX5zcWFxen+vr6To/hcrmUl5fXZry4uFhRUVHBLhnAPxxtsMk+6C0V1B6Qak2v5gvsUmm16UX4sw+ar4qjUTobaXolQO/V0tIS0LygYyQlJUVVVVVqbGzUnj17tHLlSh06dKjDIOmK3NxcvysqbrdbSUlJWrhwoZxOZ8iOA9xsZjRfVnL1cMVEr1BEWH/Ty5Eknf2oSVsPvqu1dyVr1NCBppfjkxB9i9ITR5leBtCrXXtm43qCjpHw8HCNHj1akjR16lRVVFRo69at2rFjR5u58fHxamho8BtraGhQfHx8p8dwOBxyOBxtxu12u+x2e7BLBvAPcYPs+s6cqaaX4afq7Efa8tllzf1KuiaPGmp6OQBCKNDH7G6/z0hra6vf6zu+KCMjQwcOHPAbKykp6fA1JgAA4OYT1JWR3NxcLVmyRCNHjtSlS5dUWFio0tJS7d+/X5KUk5Oj4cOHy+VySZLWrl2ruXPnKj8/X8uWLdOuXbtUWVmpnTt3hv4nAQAAvVJQMXL+/Hnl5OSorq5OMTExSktL0/79+7VgwQJJUm1trfr1++fFlpkzZ6qwsFA/+tGP9IMf/EBjxoxRUVGRJkyYENqfAgAA9FpBxcgvfvGLTu8vLS1tM5aVlaWsrKygFgUAAG4efDYNAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjAoqRlwul+644w5FR0crNjZWmZmZqqmp6XSbgoIC2Ww2v6+IiIhuLRoAAPQdQcXIoUOHtHr1apWXl6ukpESWZWnhwoVqbm7udDun06m6ujrf19mzZ7u1aAAA0HeEBTP5tdde87tdUFCg2NhYHTt2TF/96lc73M5msyk+Pr5rKwQAAH1aUDHy/zU2NkqShgwZ0um8pqYmjRo1Sq2trUpPT9fTTz+t8ePHdzjf4/HI4/H4brvdbkmSZVmyLKs7SwbQw1y5csX3nfMb6FsCPae7HCOtra1at26dZs2apQkTJnQ4LyUlRS+99JLS0tLU2NiozZs3a+bMmTp58qRGjBjR7jYul0t5eXltxouLixUVFdXVJQPogc41SVKYysvL9f4J06sBEEotLS0BzbN5vV5vVw7wyCOP6NVXX9WRI0c6jIr2WJal1NRUZWdna+PGje3Oae/KSFJSki5cuCCn09mV5QLoof5S+7FWvFCpPQ9P06SRnV9lBdC7uN1uDRs2TI2NjZ0+fnfpysiaNWv0yiuv6PDhw0GFiCTZ7XZNmTJFp0+f7nCOw+GQw+Fod1u73R70egH0XGFhYb7vnN9A3xLoOR3Uv6bxer1as2aN9u7dqzfeeEPJyclBL+zq1auqrq5WQkJC0NsCAIC+J6grI6tXr1ZhYaH+8Ic/KDo6WvX19ZKkmJgYRUZGSpJycnI0fPhwuVwuSdITTzyhGTNmaPTo0bp48aKeffZZnT17Vg899FCIfxQAANAbBRUj27dvlyTNmzfPb/yXv/ylHnzwQUlSbW2t+vX75wWXTz75RA8//LDq6+s1ePBgTZ06VWVlZRo3blz3Vg4AAPqEoGIkkNe6lpaW+t3esmWLtmzZEtSiAADAzYPPpgEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRQcWIy+XSHXfcoejoaMXGxiozM1M1NTXX3W737t0aO3asIiIiNHHiRO3bt6/LCwYAAH1LUDFy6NAhrV69WuXl5SopKZFlWVq4cKGam5s73KasrEzZ2dlatWqVjh8/rszMTGVmZurEiRPdXjwAAOj9bF6v19vVjT/88EPFxsbq0KFD+upXv9runPvuu0/Nzc165ZVXfGMzZszQ5MmT9fzzzwd0HLfbrZiYGDU2NsrpdHZ1uQB6oKqzHylze7mKHpmhyaOGml4OgBAK9PE7rDsHaWxslCQNGTKkwzlHjx7V+vXr/cYWLVqkoqKiDrfxeDzyeDy+2263W5JkWZYsy+rGigGEQktLS0BP0Qbib3WN8tSf1omqcF1uiOn2/lJSUhQVFRWClQHorkAfs7scI62trVq3bp1mzZqlCRMmdDivvr5ecXFxfmNxcXGqr6/vcBuXy6W8vLw248XFxfyRAXqAM2fO6NFHHw3pPh94OTT7yc/P12233RaanQHolpaWloDmdTlGVq9erRMnTujIkSNd3UWHcnNz/a6muN1uJSUlaeHChTxNA/QALS0tmj17dkj21fSpR/vfrNCiOXdoYKSj2/vjygjQc1x7ZuN6uhQja9as0SuvvKLDhw9rxIgRnc6Nj49XQ0OD31hDQ4Pi4+M73MbhcMjhaPtHyW63y263d2XJAEIoJiZG06dPD8m+LMvSpYsfa87MGZzfQB8T6Dkd1L+m8Xq9WrNmjfbu3as33nhDycnJ190mIyNDBw4c8BsrKSlRRkZGMIcGAAB9VFBXRlavXq3CwkL94Q9/UHR0tO91HzExMYqMjJQk5eTkaPjw4XK5XJKktWvXau7cucrPz9eyZcu0a9cuVVZWaufOnSH+UQAAQG8U1JWR7du3q7GxUfPmzVNCQoLv6ze/+Y1vTm1trerq6ny3Z86cqcLCQu3cuVOTJk3Snj17VFRU1OmLXgEAwM0jqCsjgbwlSWlpaZuxrKwsZWVlBXMoAABwk+CzaQAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGAUMQIAAIwiRgAAgFHECAAAMIoYAQAARhEjAADAKGIEAAAYRYwAAACjiBEAAGBU0DFy+PBhLV++XImJibLZbCoqKup0fmlpqWw2W5uv+vr6rq4ZAAD0IUHHSHNzsyZNmqSf/exnQW1XU1Ojuro631dsbGywhwYAAH1QWLAbLFmyREuWLAn6QLGxsRo0aFDQ2wEAgL4t6BjpqsmTJ8vj8WjChAl6/PHHNWvWrA7nejweeTwe32232y1JsixLlmXd8LUC+PJcO6c5t4G+J9Dz+obHSEJCgp5//nlNmzZNHo9HL774oubNm6e33npL6enp7W7jcrmUl5fXZry4uFhRUVE3eskADCgpKTG9BAAh1tLSEtA8m9fr9Xb1IDabTXv37lVmZmZQ282dO1cjR47Uf//3f7d7f3tXRpKSknThwgU5nc6uLhdAD2RZlkpKSrRgwQLZ7XbTywEQQm63W8OGDVNjY2Onj99f2tM0XzR9+nQdOXKkw/sdDoccDkebcbvdzh8roI/i/Ab6nkDPaSPvM1JVVaWEhAQThwYAAD1M0FdGmpqadPr0ad/td999V1VVVRoyZIhGjhyp3Nxcvf/++/qv//ovSdJzzz2n5ORkjR8/Xp999plefPFFvfHGGyouLg7dTwEAAHqtoGOksrJSd911l+/2+vXrJUkrV65UQUGB6urqVFtb67v/8uXLevTRR/X+++8rKipKaWlpev311/32AQAAbl7degHrl8XtdismJua6L4AB0PtYlqV9+/Zp6dKlvGYE6GMCffzms2kAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwChiBAAAGEWMAAAAo4gRAMb8+te/Vnh4uDIzMxUeHq5f//rXppcEwICgY+Tw4cNavny5EhMTZbPZVFRUdN1tSktLlZ6eLofDodGjR6ugoKALSwXQl9hsNn3zm9/0G/vmN78pm81maEUATAk6RpqbmzVp0iT97Gc/C2j+u+++q2XLlumuu+5SVVWV1q1bp4ceekj79+8PerEA+ob/Hxy33nprp/cD6NuCjpElS5boySef1L333hvQ/Oeff17JycnKz89Xamqq1qxZoxUrVmjLli1BLxZA7/fFp2LefPNNXb58Wf/xH/+hy5cv680332x3HoC+LexGH+Do0aO6++67/cYWLVqkdevWdbiNx+ORx+Px3Xa73ZIky7JkWdYNWSeAL8cXn5q58847fee0ZVm68847/eatWLHiS18fgNAJ9DH7hsdIfX294uLi/Mbi4uLkdrv16aefKjIyss02LpdLeXl5bcaLi4sVFRV1w9YK4Mtz6623at++fb7bJSUlkqSkpCSdO3dOkvzuB9D7tLS0BDTvhsdIV+Tm5mr9+vW+2263W0lJSVq4cKGcTqfBlQEIlb///e9aunSpLMtSSUmJFixYILvdrszMTN+cpUuXmlsggG679szG9dzwGImPj1dDQ4PfWENDg5xOZ7tXRSTJ4XDI4XC0Gbfb7bLb7TdknQC+HIWFhb6nat566y3fUzN2u11vvfWW3zzOd6B3C/QcvuHvM5KRkaEDBw74jZWUlCgjI+NGHxpAD5Sdne377zlz5ig8PFzf/e53FR4erjlz5rQ7D0DfFnSMNDU1qaqqSlVVVZI+/6e7VVVVqq2tlfT5Uyw5OTm++f/6r/+qv//973rsscf0zjvv6Oc//7l++9vf6t///d9D8xMA6HW8Xq/f7WuvEenofgB9W9AxUllZqSlTpmjKlCmSpPXr12vKlCn6yU9+Ikmqq6vzhYkkJScn649//KNKSko0adIk5efn68UXX9SiRYtC9CMA6I28Xq8KCwv9xgoLCwkR4CZk8/aCM9/tdismJkaNjY28gBXoYyzL0r59+7R06VJeIwL0MYE+fvPZNAAAwChiBAAAGEWMAAAAo4gRAABgFDECAACMIkYAAIBRxAgAADCKGAEAAEYRIwAAwKgb/qm9oXDtTWID/ShiAL2HZVlqaWmR2+3mHViBPuba4/b13uy9V8TIpUuXJElJSUmGVwIAAIJ16dIlxcTEdHh/r/hsmtbWVn3wwQeKjo6WzWYzvRwAIeR2u5WUlKRz587x2VNAH+P1enXp0iUlJiaqX7+OXxnSK2IEQN/FB2EC4AWsAADAKGIEAAAYRYwAMMrhcOinP/2pHA6H6aUAMITXjAAAAKO4MgIAAIwiRgAAgFHECAAAMIoYAWDMV77yFT333HO+2zabTUVFRZ1u8+CDDyozM/OGrgvAl6tXvB08gJtDXV2dBg8eLEl67733lJycrOPHj2vy5Mm+OVu3br3u51wA6F2IEQA9Rnx8/HXndPb5FgB6J56mASDp88+AeuaZZzR69Gg5HA6NHDlSTz31lCSpurpaX/va1xQZGamhQ4fq29/+tpqamnzbXnvqZPPmzUpISNDQoUO1evVqWZblm3P+/HktX75ckZGRSk5O1q9+9as2a/ji0zTJycmSpClTpshms2nevHl+x7rG4/Ho3/7t3xQbG6uIiAjNnj1bFRUVvvtLS0tls9l04MABTZs2TVFRUZo5c6ZqampC9asD0E3ECABJUm5urjZt2qQf//jH+utf/6rCwkLFxcWpublZixYt0uDBg1VRUaHdu3fr9ddf15o1a/y2P3jwoM6cOaODBw/q5ZdfVkFBgQoKCnz3P/jggzp37pwOHjyoPXv26Oc//7nOnz/f4Xr+/Oc/S5Jef/111dXV6fe//3278x577DH97ne/08svv6y3335bo0eP1qJFi/Txxx/7zfvhD3+o/Px8VVZWKiwsTP/yL//Sxd8UgJDzArjpud1ur8Ph8L7wwgtt7tu5c6d38ODB3qamJt/YH//4R2+/fv289fX1Xq/X6125cqV31KhR3itXrvjmZGVlee+77z6v1+v11tTUeCV5//znP/vuP3XqlFeSd8uWLb4xSd69e/d6vV6v99133/VK8h4/ftxvPStXrvTec889Xq/X621qavLa7Xbvr371K9/9ly9f9iYmJnqfeeYZr9fr9R48eNAryfv666/7rV+S99NPPw3itwTgRuHKCACdOnVKHo9H8+fPb/e+SZMmacCAAb6xWbNmqbW11e+pjvHjx6t///6+2wkJCb4rH6dOnVJYWJimTp3qu3/s2LEaNGhQt9Z95swZWZalWbNm+cbsdrumT5+uU6dO+c1NS0vzW5ukTq/MAPjyECMAFBkZ2e192O12v9s2m02tra3d3m+ofHF9NptNknrU+oCbGTECQGPGjFFkZKQOHDjQ5r7U1FT95S9/UXNzs2/sT3/6k/r166eUlJSA9j927FhduXJFx44d843V1NTo4sWLHW4THh4uSbp69WqHc2677TaFh4frT3/6k2/MsixVVFRo3LhxAa0NgHn8014AioiI0Pe//3099thjCg8P16xZs/Thhx/q5MmTuv/++/XTn/5UK1eu1OOPP64PP/xQ3/3ud/XAAw8oLi4uoP2npKRo8eLF+s53vqPt27crLCxM69at6/SKTGxsrCIjI/Xaa69pxIgRioiIaPPPegcMGKBHHnlEGzZs0JAhQzRy5Eg988wzamlp0apVq7r1OwHw5eHKCABJ0o9//GM9+uij+slPfqLU1FTdd999On/+vKKiorR//359/PHHuuOOO7RixQrNnz9f27ZtC2r/v/zlL5WYmKi5c+fq61//ur797W8rNja2w/lhYWH6z//8T+3YsUOJiYm655572p23adMmfeMb39ADDzyg9PR0nT59Wvv37/e9eRqAns/m9fJWhgAAwByujAAAAKOIEQAAYBQxAgAAjCJGAACAUcQIAAAwihgBAABGESMAAMAoYgQAABhFjAAAAKOIEQAAYBQxAgAAjCJGAACAUf8HxjIn7/9H4JcAAAAASUVORK5CYII=",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"house.boxplot(column=\"condition\")"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" price | \n",
" ConditionClip | \n",
"
\n",
" \n",
" \n",
" \n",
" 36 | \n",
" 20140528T000000 | \n",
" 550000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 380 | \n",
" 20140916T000000 | \n",
" 270000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 397 | \n",
" 20140623T000000 | \n",
" 365000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 1442 | \n",
" 20141107T000000 | \n",
" 352950.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 1734 | \n",
" 20150102T000000 | \n",
" 252000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 2223 | \n",
" 20150316T000000 | \n",
" 535000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 3004 | \n",
" 20141231T000000 | \n",
" 441000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 3202 | \n",
" 20140509T000000 | \n",
" 255000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 3975 | \n",
" 20150511T000000 | \n",
" 210000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 4651 | \n",
" 20141002T000000 | \n",
" 125000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 7376 | \n",
" 20141107T000000 | \n",
" 295000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 7636 | \n",
" 20150120T000000 | \n",
" 190000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 12306 | \n",
" 20150128T000000 | \n",
" 196000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 12453 | \n",
" 20150402T000000 | \n",
" 305000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 12668 | \n",
" 20140729T000000 | \n",
" 227000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 13628 | \n",
" 20140716T000000 | \n",
" 105500.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 13629 | \n",
" 20150316T000000 | \n",
" 445000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 14987 | \n",
" 20141202T000000 | \n",
" 432500.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 15293 | \n",
" 20140506T000000 | \n",
" 78000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 15337 | \n",
" 20140630T000000 | \n",
" 235000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 15371 | \n",
" 20150114T000000 | \n",
" 658000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 15712 | \n",
" 20140724T000000 | \n",
" 150000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 16198 | \n",
" 20150324T000000 | \n",
" 81000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 16893 | \n",
" 20141210T000000 | \n",
" 125000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 16942 | \n",
" 20140611T000000 | \n",
" 427000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 17805 | \n",
" 20150501T000000 | \n",
" 380000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 18332 | \n",
" 20140924T000000 | \n",
" 130000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 18645 | \n",
" 20141216T000000 | \n",
" 575000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 18876 | \n",
" 20150211T000000 | \n",
" 1500000.0 | \n",
" 2 | \n",
"
\n",
" \n",
" 19452 | \n",
" 20140926T000000 | \n",
" 142000.0 | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date price ConditionClip\n",
"36 20140528T000000 550000.0 2\n",
"380 20140916T000000 270000.0 2\n",
"397 20140623T000000 365000.0 2\n",
"1442 20141107T000000 352950.0 2\n",
"1734 20150102T000000 252000.0 2\n",
"2223 20150316T000000 535000.0 2\n",
"3004 20141231T000000 441000.0 2\n",
"3202 20140509T000000 255000.0 2\n",
"3975 20150511T000000 210000.0 2\n",
"4651 20141002T000000 125000.0 2\n",
"7376 20141107T000000 295000.0 2\n",
"7636 20150120T000000 190000.0 2\n",
"12306 20150128T000000 196000.0 2\n",
"12453 20150402T000000 305000.0 2\n",
"12668 20140729T000000 227000.0 2\n",
"13628 20140716T000000 105500.0 2\n",
"13629 20150316T000000 445000.0 2\n",
"14987 20141202T000000 432500.0 2\n",
"15293 20140506T000000 78000.0 2\n",
"15337 20140630T000000 235000.0 2\n",
"15371 20150114T000000 658000.0 2\n",
"15712 20140724T000000 150000.0 2\n",
"16198 20150324T000000 81000.0 2\n",
"16893 20141210T000000 125000.0 2\n",
"16942 20140611T000000 427000.0 2\n",
"17805 20150501T000000 380000.0 2\n",
"18332 20140924T000000 130000.0 2\n",
"18645 20141216T000000 575000.0 2\n",
"18876 20150211T000000 1500000.0 2\n",
"19452 20140926T000000 142000.0 2"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"house_norm = house.copy()\n",
"\n",
"house_norm[\"ConditionClip\"] = house[\"condition\"].clip(2, 5)\n",
"\n",
"house_norm[house_norm[\"condition\"] < 2][[\"date\", \"price\", \"ConditionClip\"]]"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"5.0\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" condition | \n",
" ConditionWinsorize | \n",
"
\n",
" \n",
" \n",
" \n",
" 36 | \n",
" 20140528T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 380 | \n",
" 20140916T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 397 | \n",
" 20140623T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 1442 | \n",
" 20141107T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 1734 | \n",
" 20150102T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 2223 | \n",
" 20150316T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 3004 | \n",
" 20141231T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 3202 | \n",
" 20140509T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 3975 | \n",
" 20150511T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 4651 | \n",
" 20141002T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 7376 | \n",
" 20141107T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 7636 | \n",
" 20150120T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 12306 | \n",
" 20150128T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 12453 | \n",
" 20150402T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 12668 | \n",
" 20140729T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 13628 | \n",
" 20140716T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 13629 | \n",
" 20150316T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 14987 | \n",
" 20141202T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 15293 | \n",
" 20140506T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 15337 | \n",
" 20140630T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 15371 | \n",
" 20150114T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 15712 | \n",
" 20140724T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 16198 | \n",
" 20150324T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 16893 | \n",
" 20141210T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 16942 | \n",
" 20140611T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 17805 | \n",
" 20150501T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 18332 | \n",
" 20140924T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 18645 | \n",
" 20141216T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 18876 | \n",
" 20150211T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
" 19452 | \n",
" 20140926T000000 | \n",
" 1 | \n",
" 3 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date condition ConditionWinsorize\n",
"36 20140528T000000 1 3\n",
"380 20140916T000000 1 3\n",
"397 20140623T000000 1 3\n",
"1442 20141107T000000 1 3\n",
"1734 20150102T000000 1 3\n",
"2223 20150316T000000 1 3\n",
"3004 20141231T000000 1 3\n",
"3202 20140509T000000 1 3\n",
"3975 20150511T000000 1 3\n",
"4651 20141002T000000 1 3\n",
"7376 20141107T000000 1 3\n",
"7636 20150120T000000 1 3\n",
"12306 20150128T000000 1 3\n",
"12453 20150402T000000 1 3\n",
"12668 20140729T000000 1 3\n",
"13628 20140716T000000 1 3\n",
"13629 20150316T000000 1 3\n",
"14987 20141202T000000 1 3\n",
"15293 20140506T000000 1 3\n",
"15337 20140630T000000 1 3\n",
"15371 20150114T000000 1 3\n",
"15712 20140724T000000 1 3\n",
"16198 20150324T000000 1 3\n",
"16893 20141210T000000 1 3\n",
"16942 20140611T000000 1 3\n",
"17805 20150501T000000 1 3\n",
"18332 20140924T000000 1 3\n",
"18645 20141216T000000 1 3\n",
"18876 20150211T000000 1 3\n",
"19452 20140926T000000 1 3"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from scipy.stats.mstats import winsorize\n",
"\n",
"print(house_norm[\"condition\"].quantile(q=0.95))\n",
"\n",
"house_norm[\"ConditionWinsorize\"] = winsorize(\n",
" house_norm[\"condition\"].fillna(house_norm[\"condition\"].mean()), (0.01, 0.05), inplace=False\n",
")\n",
"\n",
"house_norm[house_norm[\"condition\"] < 2][[\"date\", \"condition\", \"ConditionWinsorize\"]]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" price | \n",
" condition | \n",
" ConditionNorm | \n",
" ConditionClipNorm | \n",
" ConditionWinsorizeNorm | \n",
" ConditionWinsorizeNorm2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 221900.0 | \n",
" 3 | \n",
" 0.50 | \n",
" 0.333333 | \n",
" 0.0 | \n",
" -1.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 538000.0 | \n",
" 3 | \n",
" 0.50 | \n",
" 0.333333 | \n",
" 0.0 | \n",
" -1.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 180000.0 | \n",
" 3 | \n",
" 0.50 | \n",
" 0.333333 | \n",
" 0.0 | \n",
" -1.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 604000.0 | \n",
" 5 | \n",
" 1.00 | \n",
" 1.000000 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 510000.0 | \n",
" 3 | \n",
" 0.50 | \n",
" 0.333333 | \n",
" 0.0 | \n",
" -1.0 | \n",
"
\n",
" \n",
" 5 | \n",
" 1225000.0 | \n",
" 3 | \n",
" 0.50 | \n",
" 0.333333 | \n",
" 0.0 | \n",
" -1.0 | \n",
"
\n",
" \n",
" 6 | \n",
" 257500.0 | \n",
" 3 | \n",
" 0.50 | \n",
" 0.333333 | \n",
" 0.0 | \n",
" -1.0 | \n",
"
\n",
" \n",
" 7 | \n",
" 291850.0 | \n",
" 3 | \n",
" 0.50 | \n",
" 0.333333 | \n",
" 0.0 | \n",
" -1.0 | \n",
"
\n",
" \n",
" 8 | \n",
" 229500.0 | \n",
" 3 | \n",
" 0.50 | \n",
" 0.333333 | \n",
" 0.0 | \n",
" -1.0 | \n",
"
\n",
" \n",
" 9 | \n",
" 323000.0 | \n",
" 3 | \n",
" 0.50 | \n",
" 0.333333 | \n",
" 0.0 | \n",
" -1.0 | \n",
"
\n",
" \n",
" 10 | \n",
" 662500.0 | \n",
" 3 | \n",
" 0.50 | \n",
" 0.333333 | \n",
" 0.0 | \n",
" -1.0 | \n",
"
\n",
" \n",
" 11 | \n",
" 468000.0 | \n",
" 4 | \n",
" 0.75 | \n",
" 0.666667 | \n",
" 0.5 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 12 | \n",
" 310000.0 | \n",
" 4 | \n",
" 0.75 | \n",
" 0.666667 | \n",
" 0.5 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 13 | \n",
" 400000.0 | \n",
" 4 | \n",
" 0.75 | \n",
" 0.666667 | \n",
" 0.5 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 14 | \n",
" 530000.0 | \n",
" 3 | \n",
" 0.50 | \n",
" 0.333333 | \n",
" 0.0 | \n",
" -1.0 | \n",
"
\n",
" \n",
" 15 | \n",
" 650000.0 | \n",
" 3 | \n",
" 0.50 | \n",
" 0.333333 | \n",
" 0.0 | \n",
" -1.0 | \n",
"
\n",
" \n",
" 16 | \n",
" 395000.0 | \n",
" 3 | \n",
" 0.50 | \n",
" 0.333333 | \n",
" 0.0 | \n",
" -1.0 | \n",
"
\n",
" \n",
" 17 | \n",
" 485000.0 | \n",
" 4 | \n",
" 0.75 | \n",
" 0.666667 | \n",
" 0.5 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 18 | \n",
" 189000.0 | \n",
" 4 | \n",
" 0.75 | \n",
" 0.666667 | \n",
" 0.5 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 19 | \n",
" 230000.0 | \n",
" 4 | \n",
" 0.75 | \n",
" 0.666667 | \n",
" 0.5 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" price condition ConditionNorm ConditionClipNorm \\\n",
"0 221900.0 3 0.50 0.333333 \n",
"1 538000.0 3 0.50 0.333333 \n",
"2 180000.0 3 0.50 0.333333 \n",
"3 604000.0 5 1.00 1.000000 \n",
"4 510000.0 3 0.50 0.333333 \n",
"5 1225000.0 3 0.50 0.333333 \n",
"6 257500.0 3 0.50 0.333333 \n",
"7 291850.0 3 0.50 0.333333 \n",
"8 229500.0 3 0.50 0.333333 \n",
"9 323000.0 3 0.50 0.333333 \n",
"10 662500.0 3 0.50 0.333333 \n",
"11 468000.0 4 0.75 0.666667 \n",
"12 310000.0 4 0.75 0.666667 \n",
"13 400000.0 4 0.75 0.666667 \n",
"14 530000.0 3 0.50 0.333333 \n",
"15 650000.0 3 0.50 0.333333 \n",
"16 395000.0 3 0.50 0.333333 \n",
"17 485000.0 4 0.75 0.666667 \n",
"18 189000.0 4 0.75 0.666667 \n",
"19 230000.0 4 0.75 0.666667 \n",
"\n",
" ConditionWinsorizeNorm ConditionWinsorizeNorm2 \n",
"0 0.0 -1.0 \n",
"1 0.0 -1.0 \n",
"2 0.0 -1.0 \n",
"3 1.0 1.0 \n",
"4 0.0 -1.0 \n",
"5 0.0 -1.0 \n",
"6 0.0 -1.0 \n",
"7 0.0 -1.0 \n",
"8 0.0 -1.0 \n",
"9 0.0 -1.0 \n",
"10 0.0 -1.0 \n",
"11 0.5 0.0 \n",
"12 0.5 0.0 \n",
"13 0.5 0.0 \n",
"14 0.0 -1.0 \n",
"15 0.0 -1.0 \n",
"16 0.0 -1.0 \n",
"17 0.5 0.0 \n",
"18 0.5 0.0 \n",
"19 0.5 0.0 "
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn import preprocessing\n",
"\n",
"min_max_scaler = preprocessing.MinMaxScaler()\n",
"\n",
"min_max_scaler_2 = preprocessing.MinMaxScaler(feature_range=(-1, 1))\n",
"\n",
"house_norm[\"ConditionNorm\"] = min_max_scaler.fit_transform(\n",
" house_norm[\"condition\"].to_numpy().reshape(-1, 1)\n",
").reshape(house_norm[\"condition\"].shape)\n",
"\n",
"house_norm[\"ConditionClipNorm\"] = min_max_scaler.fit_transform(\n",
" house_norm[\"ConditionClip\"].to_numpy().reshape(-1, 1)\n",
").reshape(house_norm[\"condition\"].shape)\n",
"\n",
"house_norm[\"ConditionWinsorizeNorm\"] = min_max_scaler.fit_transform(\n",
" house_norm[\"ConditionWinsorize\"].to_numpy().reshape(-1, 1)\n",
").reshape(house_norm[\"condition\"].shape)\n",
"\n",
"house_norm[\"ConditionWinsorizeNorm2\"] = min_max_scaler_2.fit_transform(\n",
" house_norm[\"ConditionWinsorize\"].to_numpy().reshape(-1, 1)\n",
").reshape(house_norm[\"condition\"].shape)\n",
"\n",
"house_norm[\n",
" [\n",
" \"price\",\n",
" \"condition\",\n",
" \"ConditionNorm\",\n",
" \"ConditionClipNorm\",\n",
" \"ConditionWinsorizeNorm\",\n",
" \"ConditionWinsorizeNorm2\",\n",
" ]\n",
"].head(20)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" price | \n",
" condition | \n",
" ConditionStand | \n",
" ConditionClipStand | \n",
" ConditionWinsorizeStand | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 221900.0 | \n",
" 3 | \n",
" -0.629187 | \n",
" -0.635310 | \n",
" -0.663482 | \n",
"
\n",
" \n",
" 1 | \n",
" 538000.0 | \n",
" 3 | \n",
" -0.629187 | \n",
" -0.635310 | \n",
" -0.663482 | \n",
"
\n",
" \n",
" 2 | \n",
" 180000.0 | \n",
" 3 | \n",
" -0.629187 | \n",
" -0.635310 | \n",
" -0.663482 | \n",
"
\n",
" \n",
" 3 | \n",
" 604000.0 | \n",
" 5 | \n",
" 2.444294 | \n",
" 2.457597 | \n",
" 2.494726 | \n",
"
\n",
" \n",
" 4 | \n",
" 510000.0 | \n",
" 3 | \n",
" -0.629187 | \n",
" -0.635310 | \n",
" -0.663482 | \n",
"
\n",
" \n",
" 5 | \n",
" 1225000.0 | \n",
" 3 | \n",
" -0.629187 | \n",
" -0.635310 | \n",
" -0.663482 | \n",
"
\n",
" \n",
" 6 | \n",
" 257500.0 | \n",
" 3 | \n",
" -0.629187 | \n",
" -0.635310 | \n",
" -0.663482 | \n",
"
\n",
" \n",
" 7 | \n",
" 291850.0 | \n",
" 3 | \n",
" -0.629187 | \n",
" -0.635310 | \n",
" -0.663482 | \n",
"
\n",
" \n",
" 8 | \n",
" 229500.0 | \n",
" 3 | \n",
" -0.629187 | \n",
" -0.635310 | \n",
" -0.663482 | \n",
"
\n",
" \n",
" 9 | \n",
" 323000.0 | \n",
" 3 | \n",
" -0.629187 | \n",
" -0.635310 | \n",
" -0.663482 | \n",
"
\n",
" \n",
" 10 | \n",
" 662500.0 | \n",
" 3 | \n",
" -0.629187 | \n",
" -0.635310 | \n",
" -0.663482 | \n",
"
\n",
" \n",
" 11 | \n",
" 468000.0 | \n",
" 4 | \n",
" 0.907554 | \n",
" 0.911143 | \n",
" 0.915622 | \n",
"
\n",
" \n",
" 12 | \n",
" 310000.0 | \n",
" 4 | \n",
" 0.907554 | \n",
" 0.911143 | \n",
" 0.915622 | \n",
"
\n",
" \n",
" 13 | \n",
" 400000.0 | \n",
" 4 | \n",
" 0.907554 | \n",
" 0.911143 | \n",
" 0.915622 | \n",
"
\n",
" \n",
" 14 | \n",
" 530000.0 | \n",
" 3 | \n",
" -0.629187 | \n",
" -0.635310 | \n",
" -0.663482 | \n",
"
\n",
" \n",
" 15 | \n",
" 650000.0 | \n",
" 3 | \n",
" -0.629187 | \n",
" -0.635310 | \n",
" -0.663482 | \n",
"
\n",
" \n",
" 16 | \n",
" 395000.0 | \n",
" 3 | \n",
" -0.629187 | \n",
" -0.635310 | \n",
" -0.663482 | \n",
"
\n",
" \n",
" 17 | \n",
" 485000.0 | \n",
" 4 | \n",
" 0.907554 | \n",
" 0.911143 | \n",
" 0.915622 | \n",
"
\n",
" \n",
" 18 | \n",
" 189000.0 | \n",
" 4 | \n",
" 0.907554 | \n",
" 0.911143 | \n",
" 0.915622 | \n",
"
\n",
" \n",
" 19 | \n",
" 230000.0 | \n",
" 4 | \n",
" 0.907554 | \n",
" 0.911143 | \n",
" 0.915622 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" price condition ConditionStand ConditionClipStand \\\n",
"0 221900.0 3 -0.629187 -0.635310 \n",
"1 538000.0 3 -0.629187 -0.635310 \n",
"2 180000.0 3 -0.629187 -0.635310 \n",
"3 604000.0 5 2.444294 2.457597 \n",
"4 510000.0 3 -0.629187 -0.635310 \n",
"5 1225000.0 3 -0.629187 -0.635310 \n",
"6 257500.0 3 -0.629187 -0.635310 \n",
"7 291850.0 3 -0.629187 -0.635310 \n",
"8 229500.0 3 -0.629187 -0.635310 \n",
"9 323000.0 3 -0.629187 -0.635310 \n",
"10 662500.0 3 -0.629187 -0.635310 \n",
"11 468000.0 4 0.907554 0.911143 \n",
"12 310000.0 4 0.907554 0.911143 \n",
"13 400000.0 4 0.907554 0.911143 \n",
"14 530000.0 3 -0.629187 -0.635310 \n",
"15 650000.0 3 -0.629187 -0.635310 \n",
"16 395000.0 3 -0.629187 -0.635310 \n",
"17 485000.0 4 0.907554 0.911143 \n",
"18 189000.0 4 0.907554 0.911143 \n",
"19 230000.0 4 0.907554 0.911143 \n",
"\n",
" ConditionWinsorizeStand \n",
"0 -0.663482 \n",
"1 -0.663482 \n",
"2 -0.663482 \n",
"3 2.494726 \n",
"4 -0.663482 \n",
"5 -0.663482 \n",
"6 -0.663482 \n",
"7 -0.663482 \n",
"8 -0.663482 \n",
"9 -0.663482 \n",
"10 -0.663482 \n",
"11 0.915622 \n",
"12 0.915622 \n",
"13 0.915622 \n",
"14 -0.663482 \n",
"15 -0.663482 \n",
"16 -0.663482 \n",
"17 0.915622 \n",
"18 0.915622 \n",
"19 0.915622 "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn import preprocessing\n",
"\n",
"stndart_scaler = preprocessing.StandardScaler()\n",
"\n",
"house_norm[\"ConditionStand\"] = stndart_scaler.fit_transform(\n",
" house_norm[\"condition\"].to_numpy().reshape(-1, 1)\n",
").reshape(house_norm[\"condition\"].shape)\n",
"\n",
"house_norm[\"ConditionClipStand\"] = stndart_scaler.fit_transform(\n",
" house_norm[\"ConditionClip\"].to_numpy().reshape(-1, 1)\n",
").reshape(house_norm[\"condition\"].shape)\n",
"\n",
"house_norm[\"ConditionWinsorizeStand\"] = stndart_scaler.fit_transform(\n",
" house_norm[\"ConditionWinsorize\"].to_numpy().reshape(-1, 1)\n",
").reshape(house_norm[\"condition\"].shape)\n",
"\n",
"house_norm[\n",
" [\n",
" \"price\",\n",
" \"condition\",\n",
" \"ConditionStand\",\n",
" \"ConditionClipStand\",\n",
" \"ConditionWinsorizeStand\",\n",
" ]\n",
"].head(20)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}