{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Загрузка данных"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" price | \n",
" bedrooms | \n",
" bathrooms | \n",
" sqft_lot | \n",
" floors | \n",
" waterfront | \n",
" view | \n",
" condition | \n",
" grade | \n",
" ... | \n",
" zipcode_98146 | \n",
" zipcode_98148 | \n",
" zipcode_98155 | \n",
" zipcode_98166 | \n",
" zipcode_98168 | \n",
" zipcode_98177 | \n",
" zipcode_98178 | \n",
" zipcode_98188 | \n",
" zipcode_98198 | \n",
" zipcode_98199 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 7129300520 | \n",
" 221900.0 | \n",
" 3 | \n",
" 1.00 | \n",
" 5650 | \n",
" 1.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 6414100192 | \n",
" 538000.0 | \n",
" 3 | \n",
" 2.25 | \n",
" 7242 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 5631500400 | \n",
" 180000.0 | \n",
" 2 | \n",
" 1.00 | \n",
" 10000 | \n",
" 1.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 6 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 2487200875 | \n",
" 604000.0 | \n",
" 4 | \n",
" 3.00 | \n",
" 5000 | \n",
" 1.0 | \n",
" 0 | \n",
" 0 | \n",
" 5 | \n",
" 7 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 1954400510 | \n",
" 510000.0 | \n",
" 3 | \n",
" 2.00 | \n",
" 8080 | \n",
" 1.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 8 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 21608 | \n",
" 263000018 | \n",
" 360000.0 | \n",
" 3 | \n",
" 2.50 | \n",
" 1131 | \n",
" 3.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 8 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21609 | \n",
" 6600060120 | \n",
" 400000.0 | \n",
" 4 | \n",
" 2.50 | \n",
" 5813 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 8 | \n",
" ... | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21610 | \n",
" 1523300141 | \n",
" 402101.0 | \n",
" 2 | \n",
" 0.75 | \n",
" 1350 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21611 | \n",
" 291310100 | \n",
" 400000.0 | \n",
" 3 | \n",
" 2.50 | \n",
" 2388 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 8 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21612 | \n",
" 1523300157 | \n",
" 325000.0 | \n",
" 2 | \n",
" 0.75 | \n",
" 1076 | \n",
" 2.0 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
21613 rows × 4594 columns
\n",
"
"
],
"text/plain": [
" id price bedrooms bathrooms sqft_lot floors \\\n",
"0 7129300520 221900.0 3 1.00 5650 1.0 \n",
"1 6414100192 538000.0 3 2.25 7242 2.0 \n",
"2 5631500400 180000.0 2 1.00 10000 1.0 \n",
"3 2487200875 604000.0 4 3.00 5000 1.0 \n",
"4 1954400510 510000.0 3 2.00 8080 1.0 \n",
"... ... ... ... ... ... ... \n",
"21608 263000018 360000.0 3 2.50 1131 3.0 \n",
"21609 6600060120 400000.0 4 2.50 5813 2.0 \n",
"21610 1523300141 402101.0 2 0.75 1350 2.0 \n",
"21611 291310100 400000.0 3 2.50 2388 2.0 \n",
"21612 1523300157 325000.0 2 0.75 1076 2.0 \n",
"\n",
" waterfront view condition grade ... zipcode_98146 zipcode_98148 \\\n",
"0 0 0 3 7 ... 0.0 0.0 \n",
"1 0 0 3 7 ... 0.0 0.0 \n",
"2 0 0 3 6 ... 0.0 0.0 \n",
"3 0 0 5 7 ... 0.0 0.0 \n",
"4 0 0 3 8 ... 0.0 0.0 \n",
"... ... ... ... ... ... ... ... \n",
"21608 0 0 3 8 ... 0.0 0.0 \n",
"21609 0 0 3 8 ... 1.0 0.0 \n",
"21610 0 0 3 7 ... 0.0 0.0 \n",
"21611 0 0 3 8 ... 0.0 0.0 \n",
"21612 0 0 3 7 ... 0.0 0.0 \n",
"\n",
" zipcode_98155 zipcode_98166 zipcode_98168 zipcode_98177 \\\n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"21608 0.0 0.0 0.0 0.0 \n",
"21609 0.0 0.0 0.0 0.0 \n",
"21610 0.0 0.0 0.0 0.0 \n",
"21611 0.0 0.0 0.0 0.0 \n",
"21612 0.0 0.0 0.0 0.0 \n",
"\n",
" zipcode_98178 zipcode_98188 zipcode_98198 zipcode_98199 \n",
"0 1.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"21608 0.0 0.0 0.0 0.0 \n",
"21609 0.0 0.0 0.0 0.0 \n",
"21610 0.0 0.0 0.0 0.0 \n",
"21611 0.0 0.0 0.0 0.0 \n",
"21612 0.0 0.0 0.0 0.0 \n",
"\n",
"[21613 rows x 4594 columns]"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"import numpy as np # type: ignore\n",
"\n",
"from sklearn import set_config\n",
"\n",
"set_config(transform_output=\"pandas\")\n",
"\n",
"random_state = 9\n",
"\n",
"df = pd.read_csv(\"data/kc_house_data.csv\", index_col=False)\n",
"\n",
"encoder = OneHotEncoder(sparse_output=False, drop=\"first\")\n",
"\n",
"encoded_values = encoder.fit_transform(df[[\"date\",\"price\",\"yr_built\", \"zipcode\"]])\n",
"\n",
"encoded_columns = encoder.get_feature_names_out([\"date\",\"price\", \"yr_built\", \"zipcode\"])\n",
"\n",
"encoded_values_df = pd.DataFrame(encoded_values, columns=encoded_columns)\n",
"\n",
"df = pd.concat([df, encoded_values_df], axis=1)\n",
"\n",
"df = df.drop(\n",
" [\n",
" \"yr_built\",\n",
" \"date\",\n",
" \"lat\",\n",
" \"sqft_living15\",\n",
" \"sqft_lot15\",\n",
" \"zipcode\",\n",
" \"sqft_basement\",\n",
" \"sqft_above\",\n",
" \"sqft_living\",\n",
" ],\n",
" axis=1,\n",
")\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Формирование выборок"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'X_train'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" price | \n",
" bedrooms | \n",
" bathrooms | \n",
" sqft_lot | \n",
" waterfront | \n",
" view | \n",
" condition | \n",
" grade | \n",
" yr_renovated | \n",
" ... | \n",
" zipcode_98146 | \n",
" zipcode_98148 | \n",
" zipcode_98155 | \n",
" zipcode_98166 | \n",
" zipcode_98168 | \n",
" zipcode_98177 | \n",
" zipcode_98178 | \n",
" zipcode_98188 | \n",
" zipcode_98198 | \n",
" zipcode_98199 | \n",
"
\n",
" \n",
" \n",
" \n",
" 20358 | \n",
" 9265880170 | \n",
" 550000.0 | \n",
" 4 | \n",
" 2.50 | \n",
" 5954 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 8 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 10171 | \n",
" 98030400 | \n",
" 790000.0 | \n",
" 4 | \n",
" 3.50 | \n",
" 6098 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 10 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21214 | \n",
" 3343903611 | \n",
" 615000.0 | \n",
" 5 | \n",
" 3.25 | \n",
" 7069 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 9 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 8181 | \n",
" 1139000215 | \n",
" 416000.0 | \n",
" 2 | \n",
" 1.75 | \n",
" 7560 | \n",
" 0 | \n",
" 0 | \n",
" 4 | \n",
" 7 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 15657 | \n",
" 7893805650 | \n",
" 475000.0 | \n",
" 5 | \n",
" 2.00 | \n",
" 10200 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 6 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 14527 | \n",
" 3888100133 | \n",
" 360000.0 | \n",
" 3 | \n",
" 1.00 | \n",
" 10988 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 13392 | \n",
" 7137800085 | \n",
" 185000.0 | \n",
" 3 | \n",
" 1.75 | \n",
" 9085 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 15693 | \n",
" 2402100675 | \n",
" 645000.0 | \n",
" 3 | \n",
" 3.75 | \n",
" 6000 | \n",
" 0 | \n",
" 0 | \n",
" 5 | \n",
" 7 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 19716 | \n",
" 3943600070 | \n",
" 400000.0 | \n",
" 3 | \n",
" 2.50 | \n",
" 4788 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 8 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 7612 | \n",
" 6679000720 | \n",
" 296000.0 | \n",
" 3 | \n",
" 2.50 | \n",
" 5845 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
17290 rows × 4593 columns
\n",
"
"
],
"text/plain": [
" id price bedrooms bathrooms sqft_lot waterfront view \\\n",
"20358 9265880170 550000.0 4 2.50 5954 0 0 \n",
"10171 98030400 790000.0 4 3.50 6098 0 0 \n",
"21214 3343903611 615000.0 5 3.25 7069 0 0 \n",
"8181 1139000215 416000.0 2 1.75 7560 0 0 \n",
"15657 7893805650 475000.0 5 2.00 10200 0 0 \n",
"... ... ... ... ... ... ... ... \n",
"14527 3888100133 360000.0 3 1.00 10988 0 0 \n",
"13392 7137800085 185000.0 3 1.75 9085 0 0 \n",
"15693 2402100675 645000.0 3 3.75 6000 0 0 \n",
"19716 3943600070 400000.0 3 2.50 4788 0 0 \n",
"7612 6679000720 296000.0 3 2.50 5845 0 0 \n",
"\n",
" condition grade yr_renovated ... zipcode_98146 zipcode_98148 \\\n",
"20358 3 8 0 ... 0.0 0.0 \n",
"10171 3 10 0 ... 0.0 0.0 \n",
"21214 3 9 0 ... 0.0 0.0 \n",
"8181 4 7 0 ... 0.0 0.0 \n",
"15657 3 6 0 ... 0.0 0.0 \n",
"... ... ... ... ... ... ... \n",
"14527 3 7 0 ... 0.0 0.0 \n",
"13392 3 7 0 ... 0.0 0.0 \n",
"15693 5 7 0 ... 0.0 0.0 \n",
"19716 3 8 0 ... 0.0 0.0 \n",
"7612 3 7 0 ... 0.0 0.0 \n",
"\n",
" zipcode_98155 zipcode_98166 zipcode_98168 zipcode_98177 \\\n",
"20358 0.0 0.0 0.0 0.0 \n",
"10171 0.0 0.0 0.0 0.0 \n",
"21214 0.0 0.0 0.0 0.0 \n",
"8181 0.0 0.0 0.0 0.0 \n",
"15657 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"14527 0.0 0.0 0.0 0.0 \n",
"13392 0.0 0.0 0.0 0.0 \n",
"15693 0.0 0.0 0.0 0.0 \n",
"19716 0.0 0.0 0.0 0.0 \n",
"7612 0.0 0.0 0.0 0.0 \n",
"\n",
" zipcode_98178 zipcode_98188 zipcode_98198 zipcode_98199 \n",
"20358 0.0 0.0 0.0 0.0 \n",
"10171 0.0 0.0 0.0 0.0 \n",
"21214 0.0 0.0 0.0 0.0 \n",
"8181 0.0 0.0 0.0 0.0 \n",
"15657 0.0 0.0 1.0 0.0 \n",
"... ... ... ... ... \n",
"14527 0.0 0.0 0.0 0.0 \n",
"13392 0.0 0.0 0.0 0.0 \n",
"15693 0.0 0.0 0.0 0.0 \n",
"19716 0.0 0.0 0.0 0.0 \n",
"7612 0.0 0.0 0.0 0.0 \n",
"\n",
"[17290 rows x 4593 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'y_train'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" floors | \n",
"
\n",
" \n",
" \n",
" \n",
" 20358 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 10171 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 21214 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 8181 | \n",
" 1.5 | \n",
"
\n",
" \n",
" 15657 | \n",
" 1.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 14527 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 13392 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 15693 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 19716 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 7612 | \n",
" 2.0 | \n",
"
\n",
" \n",
"
\n",
"
17290 rows × 1 columns
\n",
"
"
],
"text/plain": [
" floors\n",
"20358 2.0\n",
"10171 2.0\n",
"21214 2.0\n",
"8181 1.5\n",
"15657 1.0\n",
"... ...\n",
"14527 1.0\n",
"13392 1.0\n",
"15693 2.0\n",
"19716 2.0\n",
"7612 2.0\n",
"\n",
"[17290 rows x 1 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'X_test'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" price | \n",
" bedrooms | \n",
" bathrooms | \n",
" sqft_lot | \n",
" waterfront | \n",
" view | \n",
" condition | \n",
" grade | \n",
" yr_renovated | \n",
" ... | \n",
" zipcode_98146 | \n",
" zipcode_98148 | \n",
" zipcode_98155 | \n",
" zipcode_98166 | \n",
" zipcode_98168 | \n",
" zipcode_98177 | \n",
" zipcode_98178 | \n",
" zipcode_98188 | \n",
" zipcode_98198 | \n",
" zipcode_98199 | \n",
"
\n",
" \n",
" \n",
" \n",
" 12435 | \n",
" 1217000340 | \n",
" 340000.0 | \n",
" 3 | \n",
" 1.00 | \n",
" 8100 | \n",
" 0 | \n",
" 0 | \n",
" 4 | \n",
" 7 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 19037 | \n",
" 984200690 | \n",
" 299000.0 | \n",
" 5 | \n",
" 2.50 | \n",
" 9360 | \n",
" 0 | \n",
" 0 | \n",
" 4 | \n",
" 7 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 6609 | \n",
" 7971300020 | \n",
" 800000.0 | \n",
" 5 | \n",
" 2.00 | \n",
" 10960 | \n",
" 0 | \n",
" 0 | \n",
" 4 | \n",
" 7 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 11869 | \n",
" 1498303905 | \n",
" 615000.0 | \n",
" 4 | \n",
" 1.50 | \n",
" 3240 | \n",
" 0 | \n",
" 0 | \n",
" 4 | \n",
" 8 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 13414 | \n",
" 4402700230 | \n",
" 352500.0 | \n",
" 3 | \n",
" 1.50 | \n",
" 7680 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 12113 | \n",
" 3861500340 | \n",
" 279900.0 | \n",
" 3 | \n",
" 1.75 | \n",
" 6620 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 5776 | \n",
" 1870400470 | \n",
" 637800.0 | \n",
" 4 | \n",
" 1.75 | \n",
" 4750 | \n",
" 0 | \n",
" 0 | \n",
" 4 | \n",
" 7 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 8671 | \n",
" 4022900571 | \n",
" 385000.0 | \n",
" 5 | \n",
" 2.00 | \n",
" 11750 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 7 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 21551 | \n",
" 1561750040 | \n",
" 1375000.0 | \n",
" 5 | \n",
" 4.50 | \n",
" 13405 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 11 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 6634 | \n",
" 1853200190 | \n",
" 612000.0 | \n",
" 4 | \n",
" 2.50 | \n",
" 5974 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 8 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
4323 rows × 4593 columns
\n",
"
"
],
"text/plain": [
" id price bedrooms bathrooms sqft_lot waterfront view \\\n",
"12435 1217000340 340000.0 3 1.00 8100 0 0 \n",
"19037 984200690 299000.0 5 2.50 9360 0 0 \n",
"6609 7971300020 800000.0 5 2.00 10960 0 0 \n",
"11869 1498303905 615000.0 4 1.50 3240 0 0 \n",
"13414 4402700230 352500.0 3 1.50 7680 0 0 \n",
"... ... ... ... ... ... ... ... \n",
"12113 3861500340 279900.0 3 1.75 6620 0 0 \n",
"5776 1870400470 637800.0 4 1.75 4750 0 0 \n",
"8671 4022900571 385000.0 5 2.00 11750 0 0 \n",
"21551 1561750040 1375000.0 5 4.50 13405 0 0 \n",
"6634 1853200190 612000.0 4 2.50 5974 0 0 \n",
"\n",
" condition grade yr_renovated ... zipcode_98146 zipcode_98148 \\\n",
"12435 4 7 0 ... 0.0 0.0 \n",
"19037 4 7 0 ... 0.0 0.0 \n",
"6609 4 7 0 ... 0.0 0.0 \n",
"11869 4 8 0 ... 0.0 0.0 \n",
"13414 3 7 0 ... 0.0 0.0 \n",
"... ... ... ... ... ... ... \n",
"12113 3 7 0 ... 0.0 0.0 \n",
"5776 4 7 0 ... 0.0 0.0 \n",
"8671 3 7 0 ... 0.0 0.0 \n",
"21551 3 11 0 ... 0.0 0.0 \n",
"6634 3 8 0 ... 0.0 0.0 \n",
"\n",
" zipcode_98155 zipcode_98166 zipcode_98168 zipcode_98177 \\\n",
"12435 0.0 1.0 0.0 0.0 \n",
"19037 0.0 0.0 0.0 0.0 \n",
"6609 0.0 0.0 0.0 0.0 \n",
"11869 0.0 0.0 0.0 0.0 \n",
"13414 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"12113 0.0 0.0 0.0 0.0 \n",
"5776 0.0 0.0 0.0 0.0 \n",
"8671 1.0 0.0 0.0 0.0 \n",
"21551 0.0 0.0 0.0 0.0 \n",
"6634 0.0 0.0 0.0 0.0 \n",
"\n",
" zipcode_98178 zipcode_98188 zipcode_98198 zipcode_98199 \n",
"12435 0.0 0.0 0.0 0.0 \n",
"19037 0.0 0.0 0.0 0.0 \n",
"6609 0.0 0.0 0.0 0.0 \n",
"11869 0.0 0.0 0.0 0.0 \n",
"13414 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"12113 0.0 0.0 0.0 0.0 \n",
"5776 0.0 0.0 0.0 0.0 \n",
"8671 0.0 0.0 0.0 0.0 \n",
"21551 0.0 0.0 0.0 0.0 \n",
"6634 0.0 0.0 0.0 0.0 \n",
"\n",
"[4323 rows x 4593 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'y_test'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" floors | \n",
"
\n",
" \n",
" \n",
" \n",
" 12435 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 19037 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 6609 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 11869 | \n",
" 1.5 | \n",
"
\n",
" \n",
" 13414 | \n",
" 1.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 12113 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 5776 | \n",
" 1.5 | \n",
"
\n",
" \n",
" 8671 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 21551 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 6634 | \n",
" 2.0 | \n",
"
\n",
" \n",
"
\n",
"
4323 rows × 1 columns
\n",
"
"
],
"text/plain": [
" floors\n",
"12435 1.0\n",
"19037 1.0\n",
"6609 1.0\n",
"11869 1.5\n",
"13414 1.0\n",
"... ...\n",
"12113 1.0\n",
"5776 1.5\n",
"8671 1.0\n",
"21551 2.0\n",
"6634 2.0\n",
"\n",
"[4323 rows x 1 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from utils import split_stratified_into_train_val_test\n",
"\n",
"X_train, X_val, X_test, y_train, y_val, y_test = split_stratified_into_train_val_test(\n",
" df,\n",
" stratify_colname=\"floors\",\n",
" frac_train=0.80,\n",
" frac_val=0,\n",
" frac_test=0.20,\n",
" random_state=random_state,\n",
")\n",
"\n",
"X_train = X_train.drop([\"floors\"], axis=1)\n",
"X_test = X_test.drop([\"floors\"], axis=1)\n",
"\n",
"display(\"X_train\", X_train)\n",
"display(\"y_train\", y_train)\n",
"\n",
"display(\"X_test\", X_test)\n",
"display(\"y_test\", y_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Определение перечня алгоритмов решения задачи аппроксимации (регрессии)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn import linear_model, tree, neighbors, ensemble, neural_network\n",
"\n",
"random_state = 9\n",
"\n",
"models = {\n",
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
" \"linear_poly\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(degree=2),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"linear_interact\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(interaction_only=True),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestRegressor(\n",
" max_depth=7, random_state=random_state, n_jobs=-1\n",
" )\n",
" },\n",
" \"mlp\": {\n",
" \"model\": neural_network.MLPRegressor(\n",
" activation=\"tanh\",\n",
" hidden_layer_sizes=(3,),\n",
" max_iter=500,\n",
" early_stopping=True,\n",
" random_state=random_state,\n",
" )\n",
" },\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Определение функции для стандартизации значений в столбце \"Температура\" для MLP"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [],
"source": [
"from pandas import DataFrame\n",
"from sklearn import preprocessing\n",
"\n",
"\n",
"stndart_scaler = preprocessing.StandardScaler()\n",
"\n",
"\n",
"def std_q(df: DataFrame) -> DataFrame:\n",
" df[\"price\"] = np.array(\n",
" stndart_scaler.fit_transform(df[\"price\"].to_numpy().reshape(-1, 1))\n",
" ).reshape(df[\"price\"].shape)\n",
" df[\"bedrooms\"] = np.array(\n",
" stndart_scaler.fit_transform(df[\"bedrooms\"].to_numpy().reshape(-1, 1))\n",
" ).reshape(df[\"bedrooms\"].shape)\n",
" return df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Обучение и оценка моделей с помощью различных алгоритмов"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: linear\n",
"Model: linear_poly\n",
"Model: linear_interact\n",
"Model: ridge\n",
"Model: decision_tree\n",
"Model: knn\n",
"Model: random_forest\n",
"Model: mlp\n"
]
}
],
"source": [
"from sklearn.decomposition import PCA\n",
"import math\n",
"from pandas import DataFrame\n",
"from sklearn import metrics\n",
"\n",
"# Adding PCA to reduce dimensionality\n",
"pca = PCA(n_components=100) # Adjust based on memory constraints\n",
"\n",
"# Transform X_train and X_test\n",
"X_train_reduced = pca.fit_transform(X_train)\n",
"X_test_reduced = pca.transform(X_test)\n",
"\n",
"for model_name in models.keys():\n",
" print(f\"Model: {model_name}\")\n",
"\n",
" X_train_model = X_train_reduced\n",
" X_test_model = X_test_reduced\n",
"\n",
" if model_name == \"mlp\":\n",
" X_train_model = std_q(X_train)\n",
" X_test_model = std_q(X_test)\n",
"\n",
" fitted_model = models[model_name][\"model\"].fit(\n",
" X_train_model, y_train.values.ravel()\n",
" )\n",
" y_train_pred = fitted_model.predict(X_train_model)\n",
" y_test_pred = fitted_model.predict(X_test_model)\n",
" models[model_name][\"fitted\"] = fitted_model\n",
" models[model_name][\"train_preds\"] = y_train_pred\n",
" models[model_name][\"preds\"] = y_test_pred\n",
" models[model_name][\"RMSE_train\"] = math.sqrt(\n",
" metrics.mean_squared_error(y_train, y_train_pred)\n",
" )\n",
" models[model_name][\"RMSE_test\"] = math.sqrt(\n",
" metrics.mean_squared_error(y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"RMAE_test\"] = math.sqrt(\n",
" metrics.mean_absolute_error(y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_pred)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вывод результатов оценки"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" RMSE_train | \n",
" RMSE_test | \n",
" RMAE_test | \n",
" R2_test | \n",
"
\n",
" \n",
" \n",
" \n",
" random_forest | \n",
" 0.345951 | \n",
" 0.377087 | \n",
" 0.535564 | \n",
" 0.513150 | \n",
"
\n",
" \n",
" linear | \n",
" 0.395435 | \n",
" 0.394567 | \n",
" 0.556776 | \n",
" 0.466967 | \n",
"
\n",
" \n",
" decision_tree | \n",
" 0.370941 | \n",
" 0.419609 | \n",
" 0.546781 | \n",
" 0.397161 | \n",
"
\n",
" \n",
" knn | \n",
" 0.407775 | \n",
" 0.480928 | \n",
" 0.599105 | \n",
" 0.208099 | \n",
"
\n",
" \n",
" mlp | \n",
" 0.542558 | \n",
" 0.545098 | \n",
" 0.700044 | \n",
" -0.017329 | \n",
"
\n",
" \n",
" linear_interact | \n",
" 0.689103 | \n",
" 0.688884 | \n",
" 0.681739 | \n",
" -0.624815 | \n",
"
\n",
" \n",
" linear_poly | \n",
" 4.966961 | \n",
" 5.119087 | \n",
" 1.798987 | \n",
" -88.721546 | \n",
"
\n",
" \n",
" ridge | \n",
" 76554.119536 | \n",
" 77087.654118 | \n",
" 261.058309 | \n",
" -20346113161.492393 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n",
" [\"RMSE_train\", \"RMSE_test\", \"RMAE_test\", \"R2_test\"]\n",
"]\n",
"reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n",
" cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n",
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"RMAE_test\", \"R2_test\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вывод реального и \"спрогнозированного\" результата для обучающей и тестовой выборок"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Получение лучшей модели"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'random_forest'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"best_model = str(reg_metrics.sort_values(by=\"RMSE_test\").iloc[0].name)\n",
"\n",
"display(best_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод для обучающей выборки"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" price | \n",
" bedrooms | \n",
" bathrooms | \n",
" sqft_lot | \n",
" waterfront | \n",
" view | \n",
" condition | \n",
" grade | \n",
" yr_renovated | \n",
" ... | \n",
" zipcode_98155 | \n",
" zipcode_98166 | \n",
" zipcode_98168 | \n",
" zipcode_98177 | \n",
" zipcode_98178 | \n",
" zipcode_98188 | \n",
" zipcode_98198 | \n",
" zipcode_98199 | \n",
" floors | \n",
" DensityPred | \n",
"
\n",
" \n",
" \n",
" \n",
" 20358 | \n",
" 9265880170 | \n",
" 0.026934 | \n",
" 0.674642 | \n",
" 2.50 | \n",
" 5954 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 8 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2.0 | \n",
" 1.763424 | \n",
"
\n",
" \n",
" 10171 | \n",
" 98030400 | \n",
" 0.684474 | \n",
" 0.674642 | \n",
" 3.50 | \n",
" 6098 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 10 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2.0 | \n",
" 1.951525 | \n",
"
\n",
" \n",
" 21214 | \n",
" 3343903611 | \n",
" 0.205017 | \n",
" 1.742826 | \n",
" 3.25 | \n",
" 7069 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 9 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 2.0 | \n",
" 1.802658 | \n",
"
\n",
" \n",
" 8181 | \n",
" 1139000215 | \n",
" -0.340193 | \n",
" -1.461725 | \n",
" 1.75 | \n",
" 7560 | \n",
" 0 | \n",
" 0 | \n",
" 4 | \n",
" 7 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.5 | \n",
" 1.183226 | \n",
"
\n",
" \n",
" 15657 | \n",
" 7893805650 | \n",
" -0.178548 | \n",
" 1.742826 | \n",
" 2.00 | \n",
" 10200 | \n",
" 0 | \n",
" 0 | \n",
" 3 | \n",
" 6 | \n",
" 0 | \n",
" ... | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 0.0 | \n",
" 1.0 | \n",
" 1.201648 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 4595 columns
\n",
"
"
],
"text/plain": [
" id price bedrooms bathrooms sqft_lot waterfront view \\\n",
"20358 9265880170 0.026934 0.674642 2.50 5954 0 0 \n",
"10171 98030400 0.684474 0.674642 3.50 6098 0 0 \n",
"21214 3343903611 0.205017 1.742826 3.25 7069 0 0 \n",
"8181 1139000215 -0.340193 -1.461725 1.75 7560 0 0 \n",
"15657 7893805650 -0.178548 1.742826 2.00 10200 0 0 \n",
"\n",
" condition grade yr_renovated ... zipcode_98155 zipcode_98166 \\\n",
"20358 3 8 0 ... 0.0 0.0 \n",
"10171 3 10 0 ... 0.0 0.0 \n",
"21214 3 9 0 ... 0.0 0.0 \n",
"8181 4 7 0 ... 0.0 0.0 \n",
"15657 3 6 0 ... 0.0 0.0 \n",
"\n",
" zipcode_98168 zipcode_98177 zipcode_98178 zipcode_98188 \\\n",
"20358 0.0 0.0 0.0 0.0 \n",
"10171 0.0 0.0 0.0 0.0 \n",
"21214 0.0 0.0 0.0 0.0 \n",
"8181 0.0 0.0 0.0 0.0 \n",
"15657 0.0 0.0 0.0 0.0 \n",
"\n",
" zipcode_98198 zipcode_98199 floors DensityPred \n",
"20358 0.0 0.0 2.0 1.763424 \n",
"10171 0.0 0.0 2.0 1.951525 \n",
"21214 0.0 0.0 2.0 1.802658 \n",
"8181 0.0 0.0 1.5 1.183226 \n",
"15657 1.0 0.0 1.0 1.201648 \n",
"\n",
"[5 rows x 4595 columns]"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [\n",
" X_train,\n",
" y_train,\n",
" pd.Series(\n",
" models[best_model][\"train_preds\"],\n",
" index=y_train.index,\n",
" name=\"FloorPred\",\n",
" ),\n",
" ],\n",
" axis=1,\n",
").head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод для тестовой выборки"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Density | \n",
" DensityPred | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 30 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.05696 | \n",
" 1.057040 | \n",
"
\n",
" \n",
" 1 | \n",
" 55 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.04158 | \n",
" 1.041341 | \n",
"
\n",
" \n",
" 2 | \n",
" 25 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 1.08438 | \n",
" 1.084063 | \n",
"
\n",
" \n",
" 3 | \n",
" 30 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 1.08112 | \n",
" 1.080764 | \n",
"
\n",
" \n",
" 4 | \n",
" 35 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 1.07781 | \n",
" 1.077444 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2 Density DensityPred\n",
"0 30 0.00 0.0 1.05696 1.057040\n",
"1 55 0.00 0.0 1.04158 1.041341\n",
"2 25 0.05 0.0 1.08438 1.084063\n",
"3 30 0.05 0.0 1.08112 1.080764\n",
"4 35 0.05 0.0 1.07781 1.077444"
]
},
"execution_count": 154,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [\n",
" X_test,\n",
" y_test,\n",
" pd.Series(\n",
" models[best_model][\"preds\"],\n",
" index=y_test.index,\n",
" name=\"FloorsPred\",\n",
" ),\n",
" ],\n",
" axis=1,\n",
").head(5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}