MII/lec4_reg.ipynb

3720 lines
129 KiB
Plaintext
Raw Normal View History

2024-11-15 23:06:57 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Загрузка данных"
]
},
{
"cell_type": "code",
"execution_count": 94,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" <th>Levy</th>\n",
" <th>Manufacturer</th>\n",
" <th>Model</th>\n",
" <th>Prod_year</th>\n",
" <th>Category</th>\n",
" <th>Leather_interior</th>\n",
" <th>Fuel type</th>\n",
" <th>Engine volume</th>\n",
" <th>Mileage</th>\n",
" <th>Cylinders</th>\n",
" <th>Gear box type</th>\n",
" <th>Drive wheels</th>\n",
" <th>Doors</th>\n",
" <th>Wheel</th>\n",
" <th>Color</th>\n",
" <th>Airbags</th>\n",
" <th>distance_nokm</th>\n",
" <th>distance_norm</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>45654403</th>\n",
" <td>13328</td>\n",
" <td>1399</td>\n",
" <td>LEXUS</td>\n",
" <td>RX 450</td>\n",
" <td>2010</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>3.5</td>\n",
" <td>186005 km</td>\n",
" <td>6.0</td>\n",
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>12</td>\n",
" <td>186005</td>\n",
" <td>186005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44731507</th>\n",
" <td>16621</td>\n",
" <td>1018</td>\n",
" <td>CHEVROLET</td>\n",
" <td>Equinox</td>\n",
" <td>2011</td>\n",
" <td>Jeep</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>3</td>\n",
" <td>192000 km</td>\n",
" <td>6.0</td>\n",
" <td>Tiptronic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>8</td>\n",
" <td>192000</td>\n",
" <td>192000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45774419</th>\n",
" <td>8467</td>\n",
" <td>0</td>\n",
" <td>HONDA</td>\n",
" <td>FIT</td>\n",
" <td>2006</td>\n",
" <td>Hatchback</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>1.3</td>\n",
" <td>200000 km</td>\n",
" <td>4.0</td>\n",
" <td>Variator</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Right-hand drive</td>\n",
" <td>Black</td>\n",
" <td>2</td>\n",
" <td>200000</td>\n",
" <td>200000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45769185</th>\n",
" <td>3607</td>\n",
" <td>862</td>\n",
" <td>FORD</td>\n",
" <td>Escape</td>\n",
" <td>2011</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>2.5</td>\n",
" <td>168966 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>0</td>\n",
" <td>168966</td>\n",
" <td>168966</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45809263</th>\n",
" <td>11726</td>\n",
" <td>446</td>\n",
" <td>HONDA</td>\n",
" <td>FIT</td>\n",
" <td>2014</td>\n",
" <td>Hatchback</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>1.3</td>\n",
" <td>91901 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>4</td>\n",
" <td>91901</td>\n",
" <td>91901</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45798355</th>\n",
" <td>8467</td>\n",
" <td>0</td>\n",
" <td>MERCEDES-BENZ</td>\n",
" <td>CLK 200</td>\n",
" <td>1999</td>\n",
" <td>Coupe</td>\n",
" <td>Yes</td>\n",
" <td>CNG</td>\n",
" <td>2.0 Turbo</td>\n",
" <td>300000 km</td>\n",
" <td>4.0</td>\n",
" <td>Manual</td>\n",
" <td>Rear</td>\n",
" <td>02-Mar</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>5</td>\n",
" <td>300000</td>\n",
" <td>300000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45778856</th>\n",
" <td>15681</td>\n",
" <td>831</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Sonata</td>\n",
" <td>2011</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>2.4</td>\n",
" <td>161600 km</td>\n",
" <td>4.0</td>\n",
" <td>Tiptronic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Red</td>\n",
" <td>8</td>\n",
" <td>161600</td>\n",
" <td>161600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45804997</th>\n",
" <td>26108</td>\n",
" <td>836</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Tucson</td>\n",
" <td>2010</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>2</td>\n",
" <td>116365 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>4</td>\n",
" <td>116365</td>\n",
" <td>116365</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45793526</th>\n",
" <td>5331</td>\n",
" <td>1288</td>\n",
" <td>CHEVROLET</td>\n",
" <td>Captiva</td>\n",
" <td>2007</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>2</td>\n",
" <td>51258 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>4</td>\n",
" <td>51258</td>\n",
" <td>51258</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45813273</th>\n",
" <td>470</td>\n",
" <td>753</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Sonata</td>\n",
" <td>2012</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>2.4</td>\n",
" <td>186923 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>12</td>\n",
" <td>186923</td>\n",
" <td>186923</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>19237 rows × 19 columns</p>\n",
"</div>"
],
"text/plain": [
" Price Levy Manufacturer Model Prod_year Category \\\n",
"ID \n",
"45654403 13328 1399 LEXUS RX 450 2010 Jeep \n",
"44731507 16621 1018 CHEVROLET Equinox 2011 Jeep \n",
"45774419 8467 0 HONDA FIT 2006 Hatchback \n",
"45769185 3607 862 FORD Escape 2011 Jeep \n",
"45809263 11726 446 HONDA FIT 2014 Hatchback \n",
"... ... ... ... ... ... ... \n",
"45798355 8467 0 MERCEDES-BENZ CLK 200 1999 Coupe \n",
"45778856 15681 831 HYUNDAI Sonata 2011 Sedan \n",
"45804997 26108 836 HYUNDAI Tucson 2010 Jeep \n",
"45793526 5331 1288 CHEVROLET Captiva 2007 Jeep \n",
"45813273 470 753 HYUNDAI Sonata 2012 Sedan \n",
"\n",
" Leather_interior Fuel type Engine volume Mileage Cylinders \\\n",
"ID \n",
"45654403 Yes Hybrid 3.5 186005 km 6.0 \n",
"44731507 No Petrol 3 192000 km 6.0 \n",
"45774419 No Petrol 1.3 200000 km 4.0 \n",
"45769185 Yes Hybrid 2.5 168966 km 4.0 \n",
"45809263 Yes Petrol 1.3 91901 km 4.0 \n",
"... ... ... ... ... ... \n",
"45798355 Yes CNG 2.0 Turbo 300000 km 4.0 \n",
"45778856 Yes Petrol 2.4 161600 km 4.0 \n",
"45804997 Yes Diesel 2 116365 km 4.0 \n",
"45793526 Yes Diesel 2 51258 km 4.0 \n",
"45813273 Yes Hybrid 2.4 186923 km 4.0 \n",
"\n",
" Gear box type Drive wheels Doors Wheel Color \\\n",
"ID \n",
"45654403 Automatic 4x4 04-May Left wheel Silver \n",
"44731507 Tiptronic 4x4 04-May Left wheel Black \n",
"45774419 Variator Front 04-May Right-hand drive Black \n",
"45769185 Automatic 4x4 04-May Left wheel White \n",
"45809263 Automatic Front 04-May Left wheel Silver \n",
"... ... ... ... ... ... \n",
"45798355 Manual Rear 02-Mar Left wheel Silver \n",
"45778856 Tiptronic Front 04-May Left wheel Red \n",
"45804997 Automatic Front 04-May Left wheel Grey \n",
"45793526 Automatic Front 04-May Left wheel Black \n",
"45813273 Automatic Front 04-May Left wheel White \n",
"\n",
" Airbags distance_nokm distance_norm \n",
"ID \n",
"45654403 12 186005 186005 \n",
"44731507 8 192000 192000 \n",
"45774419 2 200000 200000 \n",
"45769185 0 168966 168966 \n",
"45809263 4 91901 91901 \n",
"... ... ... ... \n",
"45798355 5 300000 300000 \n",
"45778856 8 161600 161600 \n",
"45804997 4 116365 116365 \n",
"45793526 4 51258 51258 \n",
"45813273 12 186923 186923 \n",
"\n",
"[19237 rows x 19 columns]"
]
},
"execution_count": 94,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"from sklearn import set_config\n",
"\n",
"set_config(transform_output=\"pandas\")\n",
"\n",
"random_state = 9\n",
"\n",
"df = pd.read_csv(\"data/car_price_prediction.csv\", index_col=\"ID\")\n",
"df[\"distance_nokm\"] = pd.to_numeric(df[\"Mileage\"].str.replace(\" km\", \"\", regex=False))\n",
"\n",
"\n",
"df[\"distance_norm\"] = df[\"distance_nokm\"].clip(0, 350000)\n",
"\n",
"df.boxplot(column=\"distance_norm\")\n",
"df[\"Levy\"] = pd.to_numeric(df[\"Levy\"].str.replace(\"-\", \"0\", regex=False))\n",
"# df[\"Cylinders\"] = pd.to_numeric(df[\"Cylinders\"].str.replace(\".\", \"\", regex=False))\n",
"\n",
"# average_mileage = df[\"distance_norm\"].mean()\n",
"# print(f\"Среднее значение поля 'пробег': {average_mileage}\")\n",
"# average_mileage = df[\"distance_norm\"].mean()\n",
"# df[\"above_average_mileage\"] = (df[\"distance_norm\"] > average_mileage).astype(int)\n",
"\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.discriminant_analysis import StandardScaler\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"\n",
"from transformers import TitanicFeatures\n",
"\n",
"\n",
"# columns_to_drop = [\"Survived\", \"Name\", \"Cabin\", \"Ticket\", \"Embarked\", \"Parch\", \"Fare\"]\n",
"columns_to_drop = [\n",
" # \"Price\",\n",
" \"Doors\",\n",
" # \"Color\",\n",
" # \"Gear box type\",\n",
" # \"Prod_year\",\n",
" \"Mileage\",\n",
" # \"Airbags\",\n",
" \"Levy\",\n",
" # \"Leather_interior\",\n",
" \"Fuel type\",\n",
" \"Drive wheels\",\n",
" \"Engine volume\",\n",
" \"Wheel\",\n",
" \"distance_nokm\",\n",
" \"Model\",\n",
" \"Cylinders\"\n",
"]\n",
"num_columns = [\n",
" \"Airbags\",\n",
" \"distance_norm\",\n",
" # \"Cylinders\"\n",
"]\n",
"cat_columns = [\n",
" \"Color\",\n",
" \"Gear box type\",\n",
" \"Leather_interior\",\n",
" \"Manufacturer\",\n",
" \"Category\"\n",
"]\n",
"\n",
"num_imputer = SimpleImputer(strategy=\"median\")\n",
"num_scaler = StandardScaler()\n",
"preprocessing_num = Pipeline(\n",
" [\n",
" (\"imputer\", num_imputer),\n",
" (\"scaler\", num_scaler),\n",
" ]\n",
")\n",
"\n",
"cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n",
"cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n",
"preprocessing_cat = Pipeline(\n",
" [\n",
" (\"imputer\", cat_imputer),\n",
" (\"encoder\", cat_encoder),\n",
" ]\n",
")\n",
"\n",
"features_preprocessing = ColumnTransformer(\n",
" verbose_feature_names_out=False,\n",
" transformers=[\n",
" (\"prepocessing_num\", preprocessing_num, num_columns),\n",
" (\"prepocessing_cat\", preprocessing_cat, cat_columns),\n",
" #(\"prepocessing_features\", cat_imputer, [\"Name\", \"Cabin\"]),\n",
" ],\n",
" remainder=\"passthrough\"\n",
")\n",
"\n",
"# features_engineering = ColumnTransformer(\n",
"# verbose_feature_names_out=False,\n",
"# transformers=[\n",
"# (\"add_features\", TitanicFeatures(), [\"Name\", \"Cabin\"]),\n",
"# ],\n",
"# remainder=\"passthrough\",\n",
"# )\n",
"\n",
"drop_columns = ColumnTransformer(\n",
" verbose_feature_names_out=False,\n",
" transformers=[\n",
" (\"drop_columns\", \"drop\", columns_to_drop),\n",
" ],\n",
" remainder=\"passthrough\",\n",
")\n",
"\n",
"# features_postprocessing = ColumnTransformer(\n",
"# verbose_feature_names_out=False,\n",
"# transformers=[\n",
"# (\"prepocessing_cat\", preprocessing_cat, [\"Cabin_type\"]),\n",
"# ],\n",
"# remainder=\"passthrough\",\n",
"# )\n",
"\n",
"pipeline_end = Pipeline(\n",
" [\n",
" (\"features_preprocessing\", features_preprocessing),\n",
" # (\"features_engineering\", features_engineering),\n",
" (\"drop_columns\", drop_columns),\n",
" # (\"features_postprocessing\", features_postprocessing),\n",
" ]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [],
"source": [
"preprocessing_result = pipeline_end.fit_transform(df)\n",
"df = pd.DataFrame(\n",
" preprocessing_result,\n",
" columns=pipeline_end.get_feature_names_out(),\n",
")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Формирование выборок"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'X_train'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Airbags</th>\n",
" <th>distance_norm</th>\n",
" <th>Color_Black</th>\n",
" <th>Color_Blue</th>\n",
" <th>Color_Brown</th>\n",
" <th>Color_Carnelian red</th>\n",
" <th>Color_Golden</th>\n",
" <th>Color_Green</th>\n",
" <th>Color_Grey</th>\n",
" <th>Color_Orange</th>\n",
" <th>...</th>\n",
" <th>Category_Hatchback</th>\n",
" <th>Category_Jeep</th>\n",
" <th>Category_Limousine</th>\n",
" <th>Category_Microbus</th>\n",
" <th>Category_Minivan</th>\n",
" <th>Category_Pickup</th>\n",
" <th>Category_Sedan</th>\n",
" <th>Category_Universal</th>\n",
" <th>Price</th>\n",
" <th>Prod_year</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>45664431</th>\n",
" <td>-1.523737</td>\n",
" <td>0.012512</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1019</td>\n",
" <td>2013</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45186186</th>\n",
" <td>1.254005</td>\n",
" <td>-0.768336</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>14113</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45757605</th>\n",
" <td>-0.134866</td>\n",
" <td>0.093264</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>15994</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45761763</th>\n",
" <td>-1.060780</td>\n",
" <td>-0.935158</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>24891</td>\n",
" <td>2011</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45762505</th>\n",
" <td>1.254005</td>\n",
" <td>0.496836</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>706</td>\n",
" <td>2015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45738958</th>\n",
" <td>-0.134866</td>\n",
" <td>1.046858</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>7840</td>\n",
" <td>2005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45761444</th>\n",
" <td>1.254005</td>\n",
" <td>-0.288173</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>37633</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45809946</th>\n",
" <td>-0.597823</td>\n",
" <td>-0.567004</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>5209</td>\n",
" <td>2013</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45789900</th>\n",
" <td>0.328091</td>\n",
" <td>-0.568642</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>14426</td>\n",
" <td>2006</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45788440</th>\n",
" <td>-0.597823</td>\n",
" <td>0.037171</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>12544</td>\n",
" <td>2011</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>15389 rows × 97 columns</p>\n",
"</div>"
],
"text/plain": [
" Airbags distance_norm Color_Black Color_Blue Color_Brown \\\n",
"ID \n",
"45664431 -1.523737 0.012512 1.0 0.0 0.0 \n",
"45186186 1.254005 -0.768336 1.0 0.0 0.0 \n",
"45757605 -0.134866 0.093264 0.0 1.0 0.0 \n",
"45761763 -1.060780 -0.935158 1.0 0.0 0.0 \n",
"45762505 1.254005 0.496836 0.0 0.0 0.0 \n",
"... ... ... ... ... ... \n",
"45738958 -0.134866 1.046858 0.0 0.0 0.0 \n",
"45761444 1.254005 -0.288173 1.0 0.0 0.0 \n",
"45809946 -0.597823 -0.567004 1.0 0.0 0.0 \n",
"45789900 0.328091 -0.568642 1.0 0.0 0.0 \n",
"45788440 -0.597823 0.037171 0.0 0.0 0.0 \n",
"\n",
" Color_Carnelian red Color_Golden Color_Green Color_Grey \\\n",
"ID \n",
"45664431 0.0 0.0 0.0 0.0 \n",
"45186186 0.0 0.0 0.0 0.0 \n",
"45757605 0.0 0.0 0.0 0.0 \n",
"45761763 0.0 0.0 0.0 0.0 \n",
"45762505 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"45738958 0.0 0.0 0.0 0.0 \n",
"45761444 0.0 0.0 0.0 0.0 \n",
"45809946 0.0 0.0 0.0 0.0 \n",
"45789900 0.0 0.0 0.0 0.0 \n",
"45788440 0.0 0.0 0.0 0.0 \n",
"\n",
" Color_Orange ... Category_Hatchback Category_Jeep \\\n",
"ID ... \n",
"45664431 0.0 ... 0.0 1.0 \n",
"45186186 0.0 ... 0.0 0.0 \n",
"45757605 0.0 ... 0.0 0.0 \n",
"45761763 0.0 ... 1.0 0.0 \n",
"45762505 0.0 ... 0.0 0.0 \n",
"... ... ... ... ... \n",
"45738958 0.0 ... 0.0 0.0 \n",
"45761444 0.0 ... 0.0 0.0 \n",
"45809946 0.0 ... 1.0 0.0 \n",
"45789900 0.0 ... 0.0 1.0 \n",
"45788440 0.0 ... 1.0 0.0 \n",
"\n",
" Category_Limousine Category_Microbus Category_Minivan \\\n",
"ID \n",
"45664431 0.0 0.0 0.0 \n",
"45186186 0.0 0.0 0.0 \n",
"45757605 0.0 0.0 0.0 \n",
"45761763 0.0 0.0 0.0 \n",
"45762505 0.0 0.0 0.0 \n",
"... ... ... ... \n",
"45738958 0.0 0.0 0.0 \n",
"45761444 0.0 0.0 0.0 \n",
"45809946 0.0 0.0 0.0 \n",
"45789900 0.0 0.0 0.0 \n",
"45788440 0.0 0.0 0.0 \n",
"\n",
" Category_Pickup Category_Sedan Category_Universal Price \\\n",
"ID \n",
"45664431 0.0 0.0 0.0 1019 \n",
"45186186 0.0 1.0 0.0 14113 \n",
"45757605 0.0 1.0 0.0 15994 \n",
"45761763 0.0 0.0 0.0 24891 \n",
"45762505 0.0 1.0 0.0 706 \n",
"... ... ... ... ... \n",
"45738958 0.0 1.0 0.0 7840 \n",
"45761444 0.0 1.0 0.0 37633 \n",
"45809946 0.0 0.0 0.0 5209 \n",
"45789900 0.0 0.0 0.0 14426 \n",
"45788440 0.0 0.0 0.0 12544 \n",
"\n",
" Prod_year \n",
"ID \n",
"45664431 2013 \n",
"45186186 2010 \n",
"45757605 2010 \n",
"45761763 2011 \n",
"45762505 2015 \n",
"... ... \n",
"45738958 2005 \n",
"45761444 2010 \n",
"45809946 2013 \n",
"45789900 2006 \n",
"45788440 2011 \n",
"\n",
"[15389 rows x 97 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'y_train'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>45664431</th>\n",
" <td>1019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45186186</th>\n",
" <td>14113</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45757605</th>\n",
" <td>15994</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45761763</th>\n",
" <td>24891</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45762505</th>\n",
" <td>706</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45738958</th>\n",
" <td>7840</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45761444</th>\n",
" <td>37633</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45809946</th>\n",
" <td>5209</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45789900</th>\n",
" <td>14426</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45788440</th>\n",
" <td>12544</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>15389 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" Price\n",
"ID \n",
"45664431 1019\n",
"45186186 14113\n",
"45757605 15994\n",
"45761763 24891\n",
"45762505 706\n",
"... ...\n",
"45738958 7840\n",
"45761444 37633\n",
"45809946 5209\n",
"45789900 14426\n",
"45788440 12544\n",
"\n",
"[15389 rows x 1 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'X_test'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Airbags</th>\n",
" <th>distance_norm</th>\n",
" <th>Color_Black</th>\n",
" <th>Color_Blue</th>\n",
" <th>Color_Brown</th>\n",
" <th>Color_Carnelian red</th>\n",
" <th>Color_Golden</th>\n",
" <th>Color_Green</th>\n",
" <th>Color_Grey</th>\n",
" <th>Color_Orange</th>\n",
" <th>...</th>\n",
" <th>Category_Hatchback</th>\n",
" <th>Category_Jeep</th>\n",
" <th>Category_Limousine</th>\n",
" <th>Category_Microbus</th>\n",
" <th>Category_Minivan</th>\n",
" <th>Category_Pickup</th>\n",
" <th>Category_Sedan</th>\n",
" <th>Category_Universal</th>\n",
" <th>Price</th>\n",
" <th>Prod_year</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>41976837</th>\n",
" <td>0.328091</td>\n",
" <td>-1.184551</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>90006</td>\n",
" <td>2015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45793567</th>\n",
" <td>-0.597823</td>\n",
" <td>-0.719669</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>7850</td>\n",
" <td>2009</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45812786</th>\n",
" <td>-1.060780</td>\n",
" <td>1.832171</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2352</td>\n",
" <td>1998</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45808317</th>\n",
" <td>1.254005</td>\n",
" <td>-0.453739</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1333</td>\n",
" <td>2015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45809653</th>\n",
" <td>-0.597823</td>\n",
" <td>-1.437871</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>62493</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45793612</th>\n",
" <td>-1.060780</td>\n",
" <td>-0.018923</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>51746</td>\n",
" <td>2014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45567237</th>\n",
" <td>-1.523737</td>\n",
" <td>0.242249</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>784</td>\n",
" <td>2014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45802935</th>\n",
" <td>-0.597823</td>\n",
" <td>0.299701</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>41221</td>\n",
" <td>2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45102093</th>\n",
" <td>-0.597823</td>\n",
" <td>0.279496</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>10976</td>\n",
" <td>2003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45733630</th>\n",
" <td>-0.597823</td>\n",
" <td>-1.055951</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>55343</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3848 rows × 97 columns</p>\n",
"</div>"
],
"text/plain": [
" Airbags distance_norm Color_Black Color_Blue Color_Brown \\\n",
"ID \n",
"41976837 0.328091 -1.184551 0.0 0.0 0.0 \n",
"45793567 -0.597823 -0.719669 1.0 0.0 0.0 \n",
"45812786 -1.060780 1.832171 0.0 0.0 0.0 \n",
"45808317 1.254005 -0.453739 0.0 0.0 0.0 \n",
"45809653 -0.597823 -1.437871 0.0 0.0 0.0 \n",
"... ... ... ... ... ... \n",
"45793612 -1.060780 -0.018923 0.0 0.0 0.0 \n",
"45567237 -1.523737 0.242249 0.0 0.0 0.0 \n",
"45802935 -0.597823 0.299701 0.0 1.0 0.0 \n",
"45102093 -0.597823 0.279496 1.0 0.0 0.0 \n",
"45733630 -0.597823 -1.055951 0.0 0.0 0.0 \n",
"\n",
" Color_Carnelian red Color_Golden Color_Green Color_Grey \\\n",
"ID \n",
"41976837 0.0 0.0 0.0 0.0 \n",
"45793567 0.0 0.0 0.0 0.0 \n",
"45812786 0.0 0.0 0.0 0.0 \n",
"45808317 0.0 0.0 0.0 0.0 \n",
"45809653 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"45793612 0.0 0.0 0.0 0.0 \n",
"45567237 0.0 0.0 0.0 0.0 \n",
"45802935 0.0 0.0 0.0 0.0 \n",
"45102093 0.0 0.0 0.0 0.0 \n",
"45733630 0.0 0.0 0.0 0.0 \n",
"\n",
" Color_Orange ... Category_Hatchback Category_Jeep \\\n",
"ID ... \n",
"41976837 0.0 ... 0.0 1.0 \n",
"45793567 0.0 ... 0.0 1.0 \n",
"45812786 0.0 ... 1.0 0.0 \n",
"45808317 0.0 ... 0.0 0.0 \n",
"45809653 0.0 ... 0.0 0.0 \n",
"... ... ... ... ... \n",
"45793612 0.0 ... 0.0 0.0 \n",
"45567237 0.0 ... 0.0 1.0 \n",
"45802935 0.0 ... 0.0 1.0 \n",
"45102093 0.0 ... 0.0 1.0 \n",
"45733630 0.0 ... 0.0 0.0 \n",
"\n",
" Category_Limousine Category_Microbus Category_Minivan \\\n",
"ID \n",
"41976837 0.0 0.0 0.0 \n",
"45793567 0.0 0.0 0.0 \n",
"45812786 0.0 0.0 0.0 \n",
"45808317 0.0 0.0 0.0 \n",
"45809653 0.0 0.0 0.0 \n",
"... ... ... ... \n",
"45793612 0.0 1.0 0.0 \n",
"45567237 0.0 0.0 0.0 \n",
"45802935 0.0 0.0 0.0 \n",
"45102093 0.0 0.0 0.0 \n",
"45733630 0.0 0.0 0.0 \n",
"\n",
" Category_Pickup Category_Sedan Category_Universal Price \\\n",
"ID \n",
"41976837 0.0 0.0 0.0 90006 \n",
"45793567 0.0 0.0 0.0 7850 \n",
"45812786 0.0 0.0 0.0 2352 \n",
"45808317 0.0 1.0 0.0 1333 \n",
"45809653 0.0 0.0 0.0 62493 \n",
"... ... ... ... ... \n",
"45793612 0.0 0.0 0.0 51746 \n",
"45567237 0.0 0.0 0.0 784 \n",
"45802935 0.0 0.0 0.0 41221 \n",
"45102093 0.0 0.0 0.0 10976 \n",
"45733630 0.0 1.0 0.0 55343 \n",
"\n",
" Prod_year \n",
"ID \n",
"41976837 2015 \n",
"45793567 2009 \n",
"45812786 1998 \n",
"45808317 2015 \n",
"45809653 2018 \n",
"... ... \n",
"45793612 2014 \n",
"45567237 2014 \n",
"45802935 2016 \n",
"45102093 2003 \n",
"45733630 2018 \n",
"\n",
"[3848 rows x 97 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'y_test'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>41976837</th>\n",
" <td>90006</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45793567</th>\n",
" <td>7850</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45812786</th>\n",
" <td>2352</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45808317</th>\n",
" <td>1333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45809653</th>\n",
" <td>62493</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45793612</th>\n",
" <td>51746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45567237</th>\n",
" <td>784</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45802935</th>\n",
" <td>41221</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45102093</th>\n",
" <td>10976</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45733630</th>\n",
" <td>55343</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3848 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" Price\n",
"ID \n",
"41976837 90006\n",
"45793567 7850\n",
"45812786 2352\n",
"45808317 1333\n",
"45809653 62493\n",
"... ...\n",
"45793612 51746\n",
"45567237 784\n",
"45802935 41221\n",
"45102093 10976\n",
"45733630 55343\n",
"\n",
"[3848 rows x 1 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from utils import split_stratified_into_train_val_test\n",
"\n",
"X_train, X_val, X_test, y_train, y_val, y_test = split_stratified_into_train_val_test(\n",
" df,\n",
" target_colname=\"Price\",\n",
" stratify_colname=\"Airbags\",\n",
" frac_train=0.80,\n",
" frac_val=0,\n",
" frac_test=0.20,\n",
" random_state=random_state,\n",
")\n",
"\n",
"display(\"X_train\", X_train)\n",
"display(\"y_train\", y_train)\n",
"\n",
"display(\"X_test\", X_test)\n",
"display(\"y_test\", y_test)"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Airbags</th>\n",
" <th>distance_norm</th>\n",
" <th>Color_Black</th>\n",
" <th>Color_Blue</th>\n",
" <th>Color_Brown</th>\n",
" <th>Color_Carnelian red</th>\n",
" <th>Color_Golden</th>\n",
" <th>Color_Green</th>\n",
" <th>Color_Grey</th>\n",
" <th>Color_Orange</th>\n",
" <th>...</th>\n",
" <th>Category_Hatchback</th>\n",
" <th>Category_Jeep</th>\n",
" <th>Category_Limousine</th>\n",
" <th>Category_Microbus</th>\n",
" <th>Category_Minivan</th>\n",
" <th>Category_Pickup</th>\n",
" <th>Category_Sedan</th>\n",
" <th>Category_Universal</th>\n",
" <th>Price</th>\n",
" <th>Prod_year</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>41976837</th>\n",
" <td>0.328091</td>\n",
" <td>-1.184551</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>90006</td>\n",
" <td>2015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45793567</th>\n",
" <td>-0.597823</td>\n",
" <td>-0.719669</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>7850</td>\n",
" <td>2009</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45812786</th>\n",
" <td>-1.060780</td>\n",
" <td>1.832171</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2352</td>\n",
" <td>1998</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45808317</th>\n",
" <td>1.254005</td>\n",
" <td>-0.453739</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1333</td>\n",
" <td>2015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45809653</th>\n",
" <td>-0.597823</td>\n",
" <td>-1.437871</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>62493</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45793612</th>\n",
" <td>-1.060780</td>\n",
" <td>-0.018923</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>51746</td>\n",
" <td>2014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45567237</th>\n",
" <td>-1.523737</td>\n",
" <td>0.242249</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>784</td>\n",
" <td>2014</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45802935</th>\n",
" <td>-0.597823</td>\n",
" <td>0.299701</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>41221</td>\n",
" <td>2016</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45102093</th>\n",
" <td>-0.597823</td>\n",
" <td>0.279496</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>10976</td>\n",
" <td>2003</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45733630</th>\n",
" <td>-0.597823</td>\n",
" <td>-1.055951</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>55343</td>\n",
" <td>2018</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3848 rows × 97 columns</p>\n",
"</div>"
],
"text/plain": [
" Airbags distance_norm Color_Black Color_Blue Color_Brown \\\n",
"ID \n",
"41976837 0.328091 -1.184551 0.0 0.0 0.0 \n",
"45793567 -0.597823 -0.719669 1.0 0.0 0.0 \n",
"45812786 -1.060780 1.832171 0.0 0.0 0.0 \n",
"45808317 1.254005 -0.453739 0.0 0.0 0.0 \n",
"45809653 -0.597823 -1.437871 0.0 0.0 0.0 \n",
"... ... ... ... ... ... \n",
"45793612 -1.060780 -0.018923 0.0 0.0 0.0 \n",
"45567237 -1.523737 0.242249 0.0 0.0 0.0 \n",
"45802935 -0.597823 0.299701 0.0 1.0 0.0 \n",
"45102093 -0.597823 0.279496 1.0 0.0 0.0 \n",
"45733630 -0.597823 -1.055951 0.0 0.0 0.0 \n",
"\n",
" Color_Carnelian red Color_Golden Color_Green Color_Grey \\\n",
"ID \n",
"41976837 0.0 0.0 0.0 0.0 \n",
"45793567 0.0 0.0 0.0 0.0 \n",
"45812786 0.0 0.0 0.0 0.0 \n",
"45808317 0.0 0.0 0.0 0.0 \n",
"45809653 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"45793612 0.0 0.0 0.0 0.0 \n",
"45567237 0.0 0.0 0.0 0.0 \n",
"45802935 0.0 0.0 0.0 0.0 \n",
"45102093 0.0 0.0 0.0 0.0 \n",
"45733630 0.0 0.0 0.0 0.0 \n",
"\n",
" Color_Orange ... Category_Hatchback Category_Jeep \\\n",
"ID ... \n",
"41976837 0.0 ... 0.0 1.0 \n",
"45793567 0.0 ... 0.0 1.0 \n",
"45812786 0.0 ... 1.0 0.0 \n",
"45808317 0.0 ... 0.0 0.0 \n",
"45809653 0.0 ... 0.0 0.0 \n",
"... ... ... ... ... \n",
"45793612 0.0 ... 0.0 0.0 \n",
"45567237 0.0 ... 0.0 1.0 \n",
"45802935 0.0 ... 0.0 1.0 \n",
"45102093 0.0 ... 0.0 1.0 \n",
"45733630 0.0 ... 0.0 0.0 \n",
"\n",
" Category_Limousine Category_Microbus Category_Minivan \\\n",
"ID \n",
"41976837 0.0 0.0 0.0 \n",
"45793567 0.0 0.0 0.0 \n",
"45812786 0.0 0.0 0.0 \n",
"45808317 0.0 0.0 0.0 \n",
"45809653 0.0 0.0 0.0 \n",
"... ... ... ... \n",
"45793612 0.0 1.0 0.0 \n",
"45567237 0.0 0.0 0.0 \n",
"45802935 0.0 0.0 0.0 \n",
"45102093 0.0 0.0 0.0 \n",
"45733630 0.0 0.0 0.0 \n",
"\n",
" Category_Pickup Category_Sedan Category_Universal Price \\\n",
"ID \n",
"41976837 0.0 0.0 0.0 90006 \n",
"45793567 0.0 0.0 0.0 7850 \n",
"45812786 0.0 0.0 0.0 2352 \n",
"45808317 0.0 1.0 0.0 1333 \n",
"45809653 0.0 0.0 0.0 62493 \n",
"... ... ... ... ... \n",
"45793612 0.0 0.0 0.0 51746 \n",
"45567237 0.0 0.0 0.0 784 \n",
"45802935 0.0 0.0 0.0 41221 \n",
"45102093 0.0 0.0 0.0 10976 \n",
"45733630 0.0 1.0 0.0 55343 \n",
"\n",
" Prod_year \n",
"ID \n",
"41976837 2015 \n",
"45793567 2009 \n",
"45812786 1998 \n",
"45808317 2015 \n",
"45809653 2018 \n",
"... ... \n",
"45793612 2014 \n",
"45567237 2014 \n",
"45802935 2016 \n",
"45102093 2003 \n",
"45733630 2018 \n",
"\n",
"[3848 rows x 97 columns]"
]
},
"execution_count": 98,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_test"
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Airbags</th>\n",
" <th>distance_norm</th>\n",
" <th>Color_Black</th>\n",
" <th>Color_Blue</th>\n",
" <th>Color_Brown</th>\n",
" <th>Color_Carnelian red</th>\n",
" <th>Color_Golden</th>\n",
" <th>Color_Green</th>\n",
" <th>Color_Grey</th>\n",
" <th>Color_Orange</th>\n",
" <th>...</th>\n",
" <th>Category_Hatchback</th>\n",
" <th>Category_Jeep</th>\n",
" <th>Category_Limousine</th>\n",
" <th>Category_Microbus</th>\n",
" <th>Category_Minivan</th>\n",
" <th>Category_Pickup</th>\n",
" <th>Category_Sedan</th>\n",
" <th>Category_Universal</th>\n",
" <th>Price</th>\n",
" <th>Prod_year</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>45664431</th>\n",
" <td>-1.523737</td>\n",
" <td>0.012512</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1019</td>\n",
" <td>2013</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45186186</th>\n",
" <td>1.254005</td>\n",
" <td>-0.768336</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>14113</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45757605</th>\n",
" <td>-0.134866</td>\n",
" <td>0.093264</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>15994</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45761763</th>\n",
" <td>-1.060780</td>\n",
" <td>-0.935158</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>24891</td>\n",
" <td>2011</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45762505</th>\n",
" <td>1.254005</td>\n",
" <td>0.496836</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>706</td>\n",
" <td>2015</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45738958</th>\n",
" <td>-0.134866</td>\n",
" <td>1.046858</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>7840</td>\n",
" <td>2005</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45761444</th>\n",
" <td>1.254005</td>\n",
" <td>-0.288173</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>37633</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45809946</th>\n",
" <td>-0.597823</td>\n",
" <td>-0.567004</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>5209</td>\n",
" <td>2013</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45789900</th>\n",
" <td>0.328091</td>\n",
" <td>-0.568642</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>14426</td>\n",
" <td>2006</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45788440</th>\n",
" <td>-0.597823</td>\n",
" <td>0.037171</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>12544</td>\n",
" <td>2011</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>15389 rows × 97 columns</p>\n",
"</div>"
],
"text/plain": [
" Airbags distance_norm Color_Black Color_Blue Color_Brown \\\n",
"ID \n",
"45664431 -1.523737 0.012512 1.0 0.0 0.0 \n",
"45186186 1.254005 -0.768336 1.0 0.0 0.0 \n",
"45757605 -0.134866 0.093264 0.0 1.0 0.0 \n",
"45761763 -1.060780 -0.935158 1.0 0.0 0.0 \n",
"45762505 1.254005 0.496836 0.0 0.0 0.0 \n",
"... ... ... ... ... ... \n",
"45738958 -0.134866 1.046858 0.0 0.0 0.0 \n",
"45761444 1.254005 -0.288173 1.0 0.0 0.0 \n",
"45809946 -0.597823 -0.567004 1.0 0.0 0.0 \n",
"45789900 0.328091 -0.568642 1.0 0.0 0.0 \n",
"45788440 -0.597823 0.037171 0.0 0.0 0.0 \n",
"\n",
" Color_Carnelian red Color_Golden Color_Green Color_Grey \\\n",
"ID \n",
"45664431 0.0 0.0 0.0 0.0 \n",
"45186186 0.0 0.0 0.0 0.0 \n",
"45757605 0.0 0.0 0.0 0.0 \n",
"45761763 0.0 0.0 0.0 0.0 \n",
"45762505 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"45738958 0.0 0.0 0.0 0.0 \n",
"45761444 0.0 0.0 0.0 0.0 \n",
"45809946 0.0 0.0 0.0 0.0 \n",
"45789900 0.0 0.0 0.0 0.0 \n",
"45788440 0.0 0.0 0.0 0.0 \n",
"\n",
" Color_Orange ... Category_Hatchback Category_Jeep \\\n",
"ID ... \n",
"45664431 0.0 ... 0.0 1.0 \n",
"45186186 0.0 ... 0.0 0.0 \n",
"45757605 0.0 ... 0.0 0.0 \n",
"45761763 0.0 ... 1.0 0.0 \n",
"45762505 0.0 ... 0.0 0.0 \n",
"... ... ... ... ... \n",
"45738958 0.0 ... 0.0 0.0 \n",
"45761444 0.0 ... 0.0 0.0 \n",
"45809946 0.0 ... 1.0 0.0 \n",
"45789900 0.0 ... 0.0 1.0 \n",
"45788440 0.0 ... 1.0 0.0 \n",
"\n",
" Category_Limousine Category_Microbus Category_Minivan \\\n",
"ID \n",
"45664431 0.0 0.0 0.0 \n",
"45186186 0.0 0.0 0.0 \n",
"45757605 0.0 0.0 0.0 \n",
"45761763 0.0 0.0 0.0 \n",
"45762505 0.0 0.0 0.0 \n",
"... ... ... ... \n",
"45738958 0.0 0.0 0.0 \n",
"45761444 0.0 0.0 0.0 \n",
"45809946 0.0 0.0 0.0 \n",
"45789900 0.0 0.0 0.0 \n",
"45788440 0.0 0.0 0.0 \n",
"\n",
" Category_Pickup Category_Sedan Category_Universal Price \\\n",
"ID \n",
"45664431 0.0 0.0 0.0 1019 \n",
"45186186 0.0 1.0 0.0 14113 \n",
"45757605 0.0 1.0 0.0 15994 \n",
"45761763 0.0 0.0 0.0 24891 \n",
"45762505 0.0 1.0 0.0 706 \n",
"... ... ... ... ... \n",
"45738958 0.0 1.0 0.0 7840 \n",
"45761444 0.0 1.0 0.0 37633 \n",
"45809946 0.0 0.0 0.0 5209 \n",
"45789900 0.0 0.0 0.0 14426 \n",
"45788440 0.0 0.0 0.0 12544 \n",
"\n",
" Prod_year \n",
"ID \n",
"45664431 2013 \n",
"45186186 2010 \n",
"45757605 2010 \n",
"45761763 2011 \n",
"45762505 2015 \n",
"... ... \n",
"45738958 2005 \n",
"45761444 2010 \n",
"45809946 2013 \n",
"45789900 2006 \n",
"45788440 2011 \n",
"\n",
"[15389 rows x 97 columns]"
]
},
"execution_count": 99,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"X_train"
]
},
{
"cell_type": "code",
"execution_count": 100,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>41976837</th>\n",
" <td>90006</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45793567</th>\n",
" <td>7850</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45812786</th>\n",
" <td>2352</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45808317</th>\n",
" <td>1333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45809653</th>\n",
" <td>62493</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45793612</th>\n",
" <td>51746</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45567237</th>\n",
" <td>784</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45802935</th>\n",
" <td>41221</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45102093</th>\n",
" <td>10976</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45733630</th>\n",
" <td>55343</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3848 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" Price\n",
"ID \n",
"41976837 90006\n",
"45793567 7850\n",
"45812786 2352\n",
"45808317 1333\n",
"45809653 62493\n",
"... ...\n",
"45793612 51746\n",
"45567237 784\n",
"45802935 41221\n",
"45102093 10976\n",
"45733630 55343\n",
"\n",
"[3848 rows x 1 columns]"
]
},
"execution_count": 100,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_test"
]
},
{
"cell_type": "code",
"execution_count": 101,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>45664431</th>\n",
" <td>1019</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45186186</th>\n",
" <td>14113</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45757605</th>\n",
" <td>15994</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45761763</th>\n",
" <td>24891</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45762505</th>\n",
" <td>706</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45738958</th>\n",
" <td>7840</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45761444</th>\n",
" <td>37633</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45809946</th>\n",
" <td>5209</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45789900</th>\n",
" <td>14426</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45788440</th>\n",
" <td>12544</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>15389 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" Price\n",
"ID \n",
"45664431 1019\n",
"45186186 14113\n",
"45757605 15994\n",
"45761763 24891\n",
"45762505 706\n",
"... ...\n",
"45738958 7840\n",
"45761444 37633\n",
"45809946 5209\n",
"45789900 14426\n",
"45788440 12544\n",
"\n",
"[15389 rows x 1 columns]"
]
},
"execution_count": 101,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_train"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Определение перечня алгоритмов решения задачи аппроксимации (регрессии)"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn import linear_model, tree, neighbors, ensemble, neural_network\n",
"\n",
"random_state = 9\n",
"\n",
"models = {\n",
" # \"mlp\": {\n",
" # \"model\": neural_network.MLPRegressor(\n",
" # activation=\"tanh\",\n",
" # hidden_layer_sizes=(3,),\n",
" # max_iter=500,\n",
" # early_stopping=True,\n",
" # random_state=random_state,\n",
" # )\n",
" # },\n",
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
" \"linear_poly\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(degree=2),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"linear_interact\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(interaction_only=True),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestRegressor(\n",
" max_depth=7, random_state=random_state, n_jobs=-1\n",
" )\n",
" },\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Определение функции для стандартизации значений в столбце \"Температура\" для MLP"
]
},
{
"cell_type": "code",
"execution_count": 103,
"metadata": {},
"outputs": [],
"source": [
"from pandas import DataFrame\n",
"from sklearn import preprocessing\n",
"import numpy as np\n",
"\n",
"stndart_scaler = preprocessing.StandardScaler()\n",
"\n",
"\n",
"def std_temp(df: DataFrame) -> DataFrame:\n",
" df[\"distance_norm\"] = np.array(stndart_scaler.fit_transform(\n",
" df[\"distance_norm\"].reshape(-1, 1))\n",
" ).reshape(df[\"distance_norm\"].shape)\n",
" return df\n",
"\n",
"\n",
"def std_temp2(df: DataFrame) -> DataFrame:\n",
" # Преобразуем столбец в массив NumPy и применяем reshape\n",
" df[\"distance_norm\"] = stndart_scaler.fit_transform(\n",
" df[\"distance_norm\"].values.reshape(-1, 1) # Изменяем на reshape(-1, 1)\n",
" )\n",
" return df\n",
"\n",
"\n",
"def std_temp3(df: DataFrame) -> DataFrame:\n",
" # Проверка на NaN и заполнение средним значением\n",
" df[\"distance_norm\"].fillna(df[\"distance_norm\"].mean(), inplace=True)\n",
"\n",
" # Преобразуем столбец в массив NumPy и применяем reshape\n",
" df[\"distance_norm\"] = stndart_scaler.fit_transform(\n",
" df[\"distance_norm\"].values.reshape(-1, 1)\n",
" )\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 104,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Airbags 0\n",
"distance_norm 0\n",
"Color_Black 0\n",
"Color_Blue 0\n",
"Color_Brown 0\n",
" ..\n",
"Category_Pickup 0\n",
"Category_Sedan 0\n",
"Category_Universal 0\n",
"Price 0\n",
"Prod_year 0\n",
"Length: 97, dtype: int64\n",
"Airbags 0\n",
"distance_norm 0\n",
"Color_Black 0\n",
"Color_Blue 0\n",
"Color_Brown 0\n",
" ..\n",
"Category_Pickup 0\n",
"Category_Sedan 0\n",
"Category_Universal 0\n",
"Price 0\n",
"Prod_year 0\n",
"Length: 97, dtype: int64\n"
]
}
],
"source": [
"print(X_train.isnull().sum())\n",
"print(X_test.isnull().sum())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Обучение и оценка моделей с помощью различных алгоритмов"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: linear\n",
"Model: linear_poly\n",
"Model: linear_interact\n",
"Model: ridge\n",
"Model: decision_tree\n",
"Model: knn\n",
"Model: random_forest\n"
]
}
],
"source": [
"import math\n",
"from pandas import DataFrame\n",
"from sklearn import metrics\n",
"\n",
"\n",
"\n",
"\n",
"\n",
"for model_name in models.keys():\n",
" print(f\"Model: {model_name}\")\n",
" X_train: DataFrame = X_train.copy()\n",
" X_test: DataFrame = X_test.copy()\n",
"\n",
" if model_name == \"mlp\":\n",
" X_train = std_temp(X_train)\n",
" X_test = std_temp(X_test)\n",
"\n",
" fitted_model = models[model_name][\"model\"].fit(\n",
" X_train.values, y_train.values.ravel()\n",
" )\n",
" y_train_pred = fitted_model.predict(X_train.values)\n",
" y_test_pred = fitted_model.predict(X_test.values)\n",
" models[model_name][\"fitted\"] = fitted_model\n",
" models[model_name][\"train_preds\"] = y_train_pred\n",
" models[model_name][\"preds\"] = y_test_pred\n",
" models[model_name][\"RMSE_train\"] = math.sqrt(\n",
" metrics.mean_squared_error(y_train, y_train_pred)\n",
" )\n",
" models[model_name][\"RMSE_test\"] = math.sqrt(\n",
" metrics.mean_squared_error(y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"RMAE_test\"] = math.sqrt(\n",
" metrics.mean_absolute_error(y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_pred)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вывод результатов оценки"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
"#T_a93c1_row0_col0, #T_a93c1_row0_col1, #T_a93c1_row1_col0, #T_a93c1_row1_col1, #T_a93c1_row2_col0, #T_a93c1_row2_col1 {\n",
" background-color: #26818e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_a93c1_row0_col2, #T_a93c1_row1_col2, #T_a93c1_row2_col2, #T_a93c1_row6_col3 {\n",
" background-color: #4e02a2;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_a93c1_row0_col3, #T_a93c1_row1_col3, #T_a93c1_row2_col3, #T_a93c1_row3_col3, #T_a93c1_row4_col2, #T_a93c1_row5_col2, #T_a93c1_row6_col2 {\n",
" background-color: #da5a6a;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_a93c1_row3_col0 {\n",
" background-color: #20928c;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_a93c1_row3_col1 {\n",
" background-color: #25848e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_a93c1_row3_col2 {\n",
" background-color: #8305a7;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_a93c1_row4_col0 {\n",
" background-color: #228b8d;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_a93c1_row4_col1 {\n",
" background-color: #a2da37;\n",
" color: #000000;\n",
"}\n",
"#T_a93c1_row4_col3 {\n",
" background-color: #5502a4;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_a93c1_row5_col0 {\n",
" background-color: #1e9b8a;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_a93c1_row5_col1 {\n",
" background-color: #a5db36;\n",
" color: #000000;\n",
"}\n",
"#T_a93c1_row5_col3 {\n",
" background-color: #5102a3;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_a93c1_row6_col0, #T_a93c1_row6_col1 {\n",
" background-color: #a8db34;\n",
" color: #000000;\n",
"}\n",
"</style>\n",
"<table id=\"T_a93c1\">\n",
" <thead>\n",
" <tr>\n",
" <th class=\"blank level0\" >&nbsp;</th>\n",
" <th id=\"T_a93c1_level0_col0\" class=\"col_heading level0 col0\" >RMSE_train</th>\n",
" <th id=\"T_a93c1_level0_col1\" class=\"col_heading level0 col1\" >RMSE_test</th>\n",
" <th id=\"T_a93c1_level0_col2\" class=\"col_heading level0 col2\" >RMAE_test</th>\n",
" <th id=\"T_a93c1_level0_col3\" class=\"col_heading level0 col3\" >R2_test</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th id=\"T_a93c1_level0_row0\" class=\"row_heading level0 row0\" >linear</th>\n",
" <td id=\"T_a93c1_row0_col0\" class=\"data row0 col0\" >0.000000</td>\n",
" <td id=\"T_a93c1_row0_col1\" class=\"data row0 col1\" >0.000000</td>\n",
" <td id=\"T_a93c1_row0_col2\" class=\"data row0 col2\" >0.000005</td>\n",
" <td id=\"T_a93c1_row0_col3\" class=\"data row0 col3\" >1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_a93c1_level0_row1\" class=\"row_heading level0 row1\" >linear_interact</th>\n",
" <td id=\"T_a93c1_row1_col0\" class=\"data row1 col0\" >0.000000</td>\n",
" <td id=\"T_a93c1_row1_col1\" class=\"data row1 col1\" >0.000000</td>\n",
" <td id=\"T_a93c1_row1_col2\" class=\"data row1 col2\" >0.000071</td>\n",
" <td id=\"T_a93c1_row1_col3\" class=\"data row1 col3\" >1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_a93c1_level0_row2\" class=\"row_heading level0 row2\" >linear_poly</th>\n",
" <td id=\"T_a93c1_row2_col0\" class=\"data row2 col0\" >0.000001</td>\n",
" <td id=\"T_a93c1_row2_col1\" class=\"data row2 col1\" >0.000069</td>\n",
" <td id=\"T_a93c1_row2_col2\" class=\"data row2 col2\" >0.001155</td>\n",
" <td id=\"T_a93c1_row2_col3\" class=\"data row2 col3\" >1.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_a93c1_level0_row3\" class=\"row_heading level0 row3\" >ridge</th>\n",
" <td id=\"T_a93c1_row3_col0\" class=\"data row3 col0\" >676.783751</td>\n",
" <td id=\"T_a93c1_row3_col1\" class=\"data row3 col1\" >14251.537610</td>\n",
" <td id=\"T_a93c1_row3_col2\" class=\"data row3 col2\" >25.290145</td>\n",
" <td id=\"T_a93c1_row3_col3\" class=\"data row3 col3\" >0.998871</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_a93c1_level0_row4\" class=\"row_heading level0 row4\" >decision_tree</th>\n",
" <td id=\"T_a93c1_row4_col0\" class=\"data row4 col0\" >418.103239</td>\n",
" <td id=\"T_a93c1_row4_col1\" class=\"data row4 col1\" >410021.797952</td>\n",
" <td id=\"T_a93c1_row4_col2\" class=\"data row4 col2\" >82.474628</td>\n",
" <td id=\"T_a93c1_row4_col3\" class=\"data row4 col3\" >0.065532</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_a93c1_level0_row5\" class=\"row_heading level0 row5\" >random_forest</th>\n",
" <td id=\"T_a93c1_row5_col0\" class=\"data row5 col0\" >1051.981912</td>\n",
" <td id=\"T_a93c1_row5_col1\" class=\"data row5 col1\" >413832.680119</td>\n",
" <td id=\"T_a93c1_row5_col2\" class=\"data row5 col2\" >82.077377</td>\n",
" <td id=\"T_a93c1_row5_col3\" class=\"data row5 col3\" >0.048080</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_a93c1_level0_row6\" class=\"row_heading level0 row6\" >knn</th>\n",
" <td id=\"T_a93c1_row6_col0\" class=\"data row6 col0\" >4091.598286</td>\n",
" <td id=\"T_a93c1_row6_col1\" class=\"data row6 col1\" >417393.077890</td>\n",
" <td id=\"T_a93c1_row6_col2\" class=\"data row6 col2\" >82.148464</td>\n",
" <td id=\"T_a93c1_row6_col3\" class=\"data row6 col3\" >0.031630</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x29c1f09e720>"
]
},
"execution_count": 106,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n",
" [\"RMSE_train\", \"RMSE_test\", \"RMAE_test\", \"R2_test\"]\n",
"]\n",
"reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n",
" cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n",
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"RMAE_test\", \"R2_test\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вывод реального и \"спрогнозированного\" результата для обучающей и тестовой выборок"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Получение лучшей модели"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'linear'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"best_model = str(reg_metrics.sort_values(by=\"RMSE_test\").iloc[0].name)\n",
"\n",
"display(best_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод для обучающей выборки"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Airbags</th>\n",
" <th>distance_norm</th>\n",
" <th>Color_Black</th>\n",
" <th>Color_Blue</th>\n",
" <th>Color_Brown</th>\n",
" <th>Color_Carnelian red</th>\n",
" <th>Color_Golden</th>\n",
" <th>Color_Green</th>\n",
" <th>Color_Grey</th>\n",
" <th>Color_Orange</th>\n",
" <th>...</th>\n",
" <th>Category_Limousine</th>\n",
" <th>Category_Microbus</th>\n",
" <th>Category_Minivan</th>\n",
" <th>Category_Pickup</th>\n",
" <th>Category_Sedan</th>\n",
" <th>Category_Universal</th>\n",
" <th>Price</th>\n",
" <th>Prod_year</th>\n",
" <th>Price</th>\n",
" <th>DensityPred</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>45664431</th>\n",
" <td>-1.523737</td>\n",
" <td>0.012512</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1019</td>\n",
" <td>2013</td>\n",
" <td>1019</td>\n",
" <td>1019.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45186186</th>\n",
" <td>1.254005</td>\n",
" <td>-0.768336</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>14113</td>\n",
" <td>2010</td>\n",
" <td>14113</td>\n",
" <td>14113.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45757605</th>\n",
" <td>-0.134866</td>\n",
" <td>0.093264</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>15994</td>\n",
" <td>2010</td>\n",
" <td>15994</td>\n",
" <td>15994.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45761763</th>\n",
" <td>-1.060780</td>\n",
" <td>-0.935158</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>24891</td>\n",
" <td>2011</td>\n",
" <td>24891</td>\n",
" <td>24891.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45762505</th>\n",
" <td>1.254005</td>\n",
" <td>0.496836</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>706</td>\n",
" <td>2015</td>\n",
" <td>706</td>\n",
" <td>706.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 99 columns</p>\n",
"</div>"
],
"text/plain": [
" Airbags distance_norm Color_Black Color_Blue Color_Brown \\\n",
"ID \n",
"45664431 -1.523737 0.012512 1.0 0.0 0.0 \n",
"45186186 1.254005 -0.768336 1.0 0.0 0.0 \n",
"45757605 -0.134866 0.093264 0.0 1.0 0.0 \n",
"45761763 -1.060780 -0.935158 1.0 0.0 0.0 \n",
"45762505 1.254005 0.496836 0.0 0.0 0.0 \n",
"\n",
" Color_Carnelian red Color_Golden Color_Green Color_Grey \\\n",
"ID \n",
"45664431 0.0 0.0 0.0 0.0 \n",
"45186186 0.0 0.0 0.0 0.0 \n",
"45757605 0.0 0.0 0.0 0.0 \n",
"45761763 0.0 0.0 0.0 0.0 \n",
"45762505 0.0 0.0 0.0 0.0 \n",
"\n",
" Color_Orange ... Category_Limousine Category_Microbus \\\n",
"ID ... \n",
"45664431 0.0 ... 0.0 0.0 \n",
"45186186 0.0 ... 0.0 0.0 \n",
"45757605 0.0 ... 0.0 0.0 \n",
"45761763 0.0 ... 0.0 0.0 \n",
"45762505 0.0 ... 0.0 0.0 \n",
"\n",
" Category_Minivan Category_Pickup Category_Sedan \\\n",
"ID \n",
"45664431 0.0 0.0 0.0 \n",
"45186186 0.0 0.0 1.0 \n",
"45757605 0.0 0.0 1.0 \n",
"45761763 0.0 0.0 0.0 \n",
"45762505 0.0 0.0 1.0 \n",
"\n",
" Category_Universal Price Prod_year Price DensityPred \n",
"ID \n",
"45664431 0.0 1019 2013 1019 1019.0 \n",
"45186186 0.0 14113 2010 14113 14113.0 \n",
"45757605 0.0 15994 2010 15994 15994.0 \n",
"45761763 0.0 24891 2011 24891 24891.0 \n",
"45762505 0.0 706 2015 706 706.0 \n",
"\n",
"[5 rows x 99 columns]"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [\n",
" X_train,\n",
" y_train,\n",
" pd.Series(\n",
" models[best_model][\"train_preds\"],\n",
" index=y_train.index,\n",
" name=\"PricePred\",\n",
" ),\n",
" ],\n",
" axis=1,\n",
").head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод для тестовой выборки"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Airbags</th>\n",
" <th>distance_norm</th>\n",
" <th>Color_Black</th>\n",
" <th>Color_Blue</th>\n",
" <th>Color_Brown</th>\n",
" <th>Color_Carnelian red</th>\n",
" <th>Color_Golden</th>\n",
" <th>Color_Green</th>\n",
" <th>Color_Grey</th>\n",
" <th>Color_Orange</th>\n",
" <th>...</th>\n",
" <th>Category_Limousine</th>\n",
" <th>Category_Microbus</th>\n",
" <th>Category_Minivan</th>\n",
" <th>Category_Pickup</th>\n",
" <th>Category_Sedan</th>\n",
" <th>Category_Universal</th>\n",
" <th>Price</th>\n",
" <th>Prod_year</th>\n",
" <th>Price</th>\n",
" <th>DensityPred</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>41976837</th>\n",
" <td>0.328091</td>\n",
" <td>-1.184551</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>90006</td>\n",
" <td>2015</td>\n",
" <td>90006</td>\n",
" <td>90006.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45793567</th>\n",
" <td>-0.597823</td>\n",
" <td>-0.719669</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>7850</td>\n",
" <td>2009</td>\n",
" <td>7850</td>\n",
" <td>7850.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45812786</th>\n",
" <td>-1.060780</td>\n",
" <td>1.832171</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>2352</td>\n",
" <td>1998</td>\n",
" <td>2352</td>\n",
" <td>2352.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45808317</th>\n",
" <td>1.254005</td>\n",
" <td>-0.453739</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>1333</td>\n",
" <td>2015</td>\n",
" <td>1333</td>\n",
" <td>1333.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45809653</th>\n",
" <td>-0.597823</td>\n",
" <td>-1.437871</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>62493</td>\n",
" <td>2018</td>\n",
" <td>62493</td>\n",
" <td>62493.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 99 columns</p>\n",
"</div>"
],
"text/plain": [
" Airbags distance_norm Color_Black Color_Blue Color_Brown \\\n",
"ID \n",
"41976837 0.328091 -1.184551 0.0 0.0 0.0 \n",
"45793567 -0.597823 -0.719669 1.0 0.0 0.0 \n",
"45812786 -1.060780 1.832171 0.0 0.0 0.0 \n",
"45808317 1.254005 -0.453739 0.0 0.0 0.0 \n",
"45809653 -0.597823 -1.437871 0.0 0.0 0.0 \n",
"\n",
" Color_Carnelian red Color_Golden Color_Green Color_Grey \\\n",
"ID \n",
"41976837 0.0 0.0 0.0 0.0 \n",
"45793567 0.0 0.0 0.0 0.0 \n",
"45812786 0.0 0.0 0.0 0.0 \n",
"45808317 0.0 0.0 0.0 0.0 \n",
"45809653 0.0 0.0 0.0 0.0 \n",
"\n",
" Color_Orange ... Category_Limousine Category_Microbus \\\n",
"ID ... \n",
"41976837 0.0 ... 0.0 0.0 \n",
"45793567 0.0 ... 0.0 0.0 \n",
"45812786 0.0 ... 0.0 0.0 \n",
"45808317 0.0 ... 0.0 0.0 \n",
"45809653 0.0 ... 0.0 0.0 \n",
"\n",
" Category_Minivan Category_Pickup Category_Sedan \\\n",
"ID \n",
"41976837 0.0 0.0 0.0 \n",
"45793567 0.0 0.0 0.0 \n",
"45812786 0.0 0.0 0.0 \n",
"45808317 0.0 0.0 1.0 \n",
"45809653 0.0 0.0 0.0 \n",
"\n",
" Category_Universal Price Prod_year Price DensityPred \n",
"ID \n",
"41976837 0.0 90006 2015 90006 90006.0 \n",
"45793567 0.0 7850 2009 7850 7850.0 \n",
"45812786 0.0 2352 1998 2352 2352.0 \n",
"45808317 0.0 1333 2015 1333 1333.0 \n",
"45809653 0.0 62493 2018 62493 62493.0 \n",
"\n",
"[5 rows x 99 columns]"
]
},
"execution_count": 110,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [\n",
" X_test,\n",
" y_test,\n",
" pd.Series(\n",
" models[best_model][\"preds\"],\n",
" index=y_test.index,\n",
" name=\"PricePred\",\n",
" ),\n",
" ],\n",
" axis=1,\n",
").head(5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}