3720 lines
129 KiB
Plaintext
3720 lines
129 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Загрузка данных"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 94,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" <th>Levy</th>\n",
|
|||
|
" <th>Manufacturer</th>\n",
|
|||
|
" <th>Model</th>\n",
|
|||
|
" <th>Prod_year</th>\n",
|
|||
|
" <th>Category</th>\n",
|
|||
|
" <th>Leather_interior</th>\n",
|
|||
|
" <th>Fuel type</th>\n",
|
|||
|
" <th>Engine volume</th>\n",
|
|||
|
" <th>Mileage</th>\n",
|
|||
|
" <th>Cylinders</th>\n",
|
|||
|
" <th>Gear box type</th>\n",
|
|||
|
" <th>Drive wheels</th>\n",
|
|||
|
" <th>Doors</th>\n",
|
|||
|
" <th>Wheel</th>\n",
|
|||
|
" <th>Color</th>\n",
|
|||
|
" <th>Airbags</th>\n",
|
|||
|
" <th>distance_nokm</th>\n",
|
|||
|
" <th>distance_norm</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>ID</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45654403</th>\n",
|
|||
|
" <td>13328</td>\n",
|
|||
|
" <td>1399</td>\n",
|
|||
|
" <td>LEXUS</td>\n",
|
|||
|
" <td>RX 450</td>\n",
|
|||
|
" <td>2010</td>\n",
|
|||
|
" <td>Jeep</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Hybrid</td>\n",
|
|||
|
" <td>3.5</td>\n",
|
|||
|
" <td>186005 km</td>\n",
|
|||
|
" <td>6.0</td>\n",
|
|||
|
" <td>Automatic</td>\n",
|
|||
|
" <td>4x4</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Silver</td>\n",
|
|||
|
" <td>12</td>\n",
|
|||
|
" <td>186005</td>\n",
|
|||
|
" <td>186005</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>44731507</th>\n",
|
|||
|
" <td>16621</td>\n",
|
|||
|
" <td>1018</td>\n",
|
|||
|
" <td>CHEVROLET</td>\n",
|
|||
|
" <td>Equinox</td>\n",
|
|||
|
" <td>2011</td>\n",
|
|||
|
" <td>Jeep</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Petrol</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>192000 km</td>\n",
|
|||
|
" <td>6.0</td>\n",
|
|||
|
" <td>Tiptronic</td>\n",
|
|||
|
" <td>4x4</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Black</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>192000</td>\n",
|
|||
|
" <td>192000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45774419</th>\n",
|
|||
|
" <td>8467</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>HONDA</td>\n",
|
|||
|
" <td>FIT</td>\n",
|
|||
|
" <td>2006</td>\n",
|
|||
|
" <td>Hatchback</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Petrol</td>\n",
|
|||
|
" <td>1.3</td>\n",
|
|||
|
" <td>200000 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Variator</td>\n",
|
|||
|
" <td>Front</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Right-hand drive</td>\n",
|
|||
|
" <td>Black</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>200000</td>\n",
|
|||
|
" <td>200000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45769185</th>\n",
|
|||
|
" <td>3607</td>\n",
|
|||
|
" <td>862</td>\n",
|
|||
|
" <td>FORD</td>\n",
|
|||
|
" <td>Escape</td>\n",
|
|||
|
" <td>2011</td>\n",
|
|||
|
" <td>Jeep</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Hybrid</td>\n",
|
|||
|
" <td>2.5</td>\n",
|
|||
|
" <td>168966 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Automatic</td>\n",
|
|||
|
" <td>4x4</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>168966</td>\n",
|
|||
|
" <td>168966</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45809263</th>\n",
|
|||
|
" <td>11726</td>\n",
|
|||
|
" <td>446</td>\n",
|
|||
|
" <td>HONDA</td>\n",
|
|||
|
" <td>FIT</td>\n",
|
|||
|
" <td>2014</td>\n",
|
|||
|
" <td>Hatchback</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Petrol</td>\n",
|
|||
|
" <td>1.3</td>\n",
|
|||
|
" <td>91901 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Automatic</td>\n",
|
|||
|
" <td>Front</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Silver</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>91901</td>\n",
|
|||
|
" <td>91901</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45798355</th>\n",
|
|||
|
" <td>8467</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>MERCEDES-BENZ</td>\n",
|
|||
|
" <td>CLK 200</td>\n",
|
|||
|
" <td>1999</td>\n",
|
|||
|
" <td>Coupe</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>CNG</td>\n",
|
|||
|
" <td>2.0 Turbo</td>\n",
|
|||
|
" <td>300000 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Manual</td>\n",
|
|||
|
" <td>Rear</td>\n",
|
|||
|
" <td>02-Mar</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Silver</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>300000</td>\n",
|
|||
|
" <td>300000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45778856</th>\n",
|
|||
|
" <td>15681</td>\n",
|
|||
|
" <td>831</td>\n",
|
|||
|
" <td>HYUNDAI</td>\n",
|
|||
|
" <td>Sonata</td>\n",
|
|||
|
" <td>2011</td>\n",
|
|||
|
" <td>Sedan</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Petrol</td>\n",
|
|||
|
" <td>2.4</td>\n",
|
|||
|
" <td>161600 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Tiptronic</td>\n",
|
|||
|
" <td>Front</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Red</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>161600</td>\n",
|
|||
|
" <td>161600</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45804997</th>\n",
|
|||
|
" <td>26108</td>\n",
|
|||
|
" <td>836</td>\n",
|
|||
|
" <td>HYUNDAI</td>\n",
|
|||
|
" <td>Tucson</td>\n",
|
|||
|
" <td>2010</td>\n",
|
|||
|
" <td>Jeep</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Diesel</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>116365 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Automatic</td>\n",
|
|||
|
" <td>Front</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Grey</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>116365</td>\n",
|
|||
|
" <td>116365</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45793526</th>\n",
|
|||
|
" <td>5331</td>\n",
|
|||
|
" <td>1288</td>\n",
|
|||
|
" <td>CHEVROLET</td>\n",
|
|||
|
" <td>Captiva</td>\n",
|
|||
|
" <td>2007</td>\n",
|
|||
|
" <td>Jeep</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Diesel</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>51258 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Automatic</td>\n",
|
|||
|
" <td>Front</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Black</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>51258</td>\n",
|
|||
|
" <td>51258</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45813273</th>\n",
|
|||
|
" <td>470</td>\n",
|
|||
|
" <td>753</td>\n",
|
|||
|
" <td>HYUNDAI</td>\n",
|
|||
|
" <td>Sonata</td>\n",
|
|||
|
" <td>2012</td>\n",
|
|||
|
" <td>Sedan</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Hybrid</td>\n",
|
|||
|
" <td>2.4</td>\n",
|
|||
|
" <td>186923 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Automatic</td>\n",
|
|||
|
" <td>Front</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>12</td>\n",
|
|||
|
" <td>186923</td>\n",
|
|||
|
" <td>186923</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>19237 rows × 19 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Price Levy Manufacturer Model Prod_year Category \\\n",
|
|||
|
"ID \n",
|
|||
|
"45654403 13328 1399 LEXUS RX 450 2010 Jeep \n",
|
|||
|
"44731507 16621 1018 CHEVROLET Equinox 2011 Jeep \n",
|
|||
|
"45774419 8467 0 HONDA FIT 2006 Hatchback \n",
|
|||
|
"45769185 3607 862 FORD Escape 2011 Jeep \n",
|
|||
|
"45809263 11726 446 HONDA FIT 2014 Hatchback \n",
|
|||
|
"... ... ... ... ... ... ... \n",
|
|||
|
"45798355 8467 0 MERCEDES-BENZ CLK 200 1999 Coupe \n",
|
|||
|
"45778856 15681 831 HYUNDAI Sonata 2011 Sedan \n",
|
|||
|
"45804997 26108 836 HYUNDAI Tucson 2010 Jeep \n",
|
|||
|
"45793526 5331 1288 CHEVROLET Captiva 2007 Jeep \n",
|
|||
|
"45813273 470 753 HYUNDAI Sonata 2012 Sedan \n",
|
|||
|
"\n",
|
|||
|
" Leather_interior Fuel type Engine volume Mileage Cylinders \\\n",
|
|||
|
"ID \n",
|
|||
|
"45654403 Yes Hybrid 3.5 186005 km 6.0 \n",
|
|||
|
"44731507 No Petrol 3 192000 km 6.0 \n",
|
|||
|
"45774419 No Petrol 1.3 200000 km 4.0 \n",
|
|||
|
"45769185 Yes Hybrid 2.5 168966 km 4.0 \n",
|
|||
|
"45809263 Yes Petrol 1.3 91901 km 4.0 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"45798355 Yes CNG 2.0 Turbo 300000 km 4.0 \n",
|
|||
|
"45778856 Yes Petrol 2.4 161600 km 4.0 \n",
|
|||
|
"45804997 Yes Diesel 2 116365 km 4.0 \n",
|
|||
|
"45793526 Yes Diesel 2 51258 km 4.0 \n",
|
|||
|
"45813273 Yes Hybrid 2.4 186923 km 4.0 \n",
|
|||
|
"\n",
|
|||
|
" Gear box type Drive wheels Doors Wheel Color \\\n",
|
|||
|
"ID \n",
|
|||
|
"45654403 Automatic 4x4 04-May Left wheel Silver \n",
|
|||
|
"44731507 Tiptronic 4x4 04-May Left wheel Black \n",
|
|||
|
"45774419 Variator Front 04-May Right-hand drive Black \n",
|
|||
|
"45769185 Automatic 4x4 04-May Left wheel White \n",
|
|||
|
"45809263 Automatic Front 04-May Left wheel Silver \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"45798355 Manual Rear 02-Mar Left wheel Silver \n",
|
|||
|
"45778856 Tiptronic Front 04-May Left wheel Red \n",
|
|||
|
"45804997 Automatic Front 04-May Left wheel Grey \n",
|
|||
|
"45793526 Automatic Front 04-May Left wheel Black \n",
|
|||
|
"45813273 Automatic Front 04-May Left wheel White \n",
|
|||
|
"\n",
|
|||
|
" Airbags distance_nokm distance_norm \n",
|
|||
|
"ID \n",
|
|||
|
"45654403 12 186005 186005 \n",
|
|||
|
"44731507 8 192000 192000 \n",
|
|||
|
"45774419 2 200000 200000 \n",
|
|||
|
"45769185 0 168966 168966 \n",
|
|||
|
"45809263 4 91901 91901 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"45798355 5 300000 300000 \n",
|
|||
|
"45778856 8 161600 161600 \n",
|
|||
|
"45804997 4 116365 116365 \n",
|
|||
|
"45793526 4 51258 51258 \n",
|
|||
|
"45813273 12 186923 186923 \n",
|
|||
|
"\n",
|
|||
|
"[19237 rows x 19 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 94,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"from sklearn import set_config\n",
|
|||
|
"\n",
|
|||
|
"set_config(transform_output=\"pandas\")\n",
|
|||
|
"\n",
|
|||
|
"random_state = 9\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv(\"data/car_price_prediction.csv\", index_col=\"ID\")\n",
|
|||
|
"df[\"distance_nokm\"] = pd.to_numeric(df[\"Mileage\"].str.replace(\" km\", \"\", regex=False))\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"df[\"distance_norm\"] = df[\"distance_nokm\"].clip(0, 350000)\n",
|
|||
|
"\n",
|
|||
|
"df.boxplot(column=\"distance_norm\")\n",
|
|||
|
"df[\"Levy\"] = pd.to_numeric(df[\"Levy\"].str.replace(\"-\", \"0\", regex=False))\n",
|
|||
|
"# df[\"Cylinders\"] = pd.to_numeric(df[\"Cylinders\"].str.replace(\".\", \"\", regex=False))\n",
|
|||
|
"\n",
|
|||
|
"# average_mileage = df[\"distance_norm\"].mean()\n",
|
|||
|
"# print(f\"Среднее значение поля 'пробег': {average_mileage}\")\n",
|
|||
|
"# average_mileage = df[\"distance_norm\"].mean()\n",
|
|||
|
"# df[\"above_average_mileage\"] = (df[\"distance_norm\"] > average_mileage).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"df"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 95,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.discriminant_analysis import StandardScaler\n",
|
|||
|
"from sklearn.impute import SimpleImputer\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"\n",
|
|||
|
"from transformers import TitanicFeatures\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# columns_to_drop = [\"Survived\", \"Name\", \"Cabin\", \"Ticket\", \"Embarked\", \"Parch\", \"Fare\"]\n",
|
|||
|
"columns_to_drop = [\n",
|
|||
|
" # \"Price\",\n",
|
|||
|
" \"Doors\",\n",
|
|||
|
" # \"Color\",\n",
|
|||
|
" # \"Gear box type\",\n",
|
|||
|
" # \"Prod_year\",\n",
|
|||
|
" \"Mileage\",\n",
|
|||
|
" # \"Airbags\",\n",
|
|||
|
" \"Levy\",\n",
|
|||
|
" # \"Leather_interior\",\n",
|
|||
|
" \"Fuel type\",\n",
|
|||
|
" \"Drive wheels\",\n",
|
|||
|
" \"Engine volume\",\n",
|
|||
|
" \"Wheel\",\n",
|
|||
|
" \"distance_nokm\",\n",
|
|||
|
" \"Model\",\n",
|
|||
|
" \"Cylinders\"\n",
|
|||
|
"]\n",
|
|||
|
"num_columns = [\n",
|
|||
|
" \"Airbags\",\n",
|
|||
|
" \"distance_norm\",\n",
|
|||
|
" # \"Cylinders\"\n",
|
|||
|
"]\n",
|
|||
|
"cat_columns = [\n",
|
|||
|
" \"Color\",\n",
|
|||
|
" \"Gear box type\",\n",
|
|||
|
" \"Leather_interior\",\n",
|
|||
|
" \"Manufacturer\",\n",
|
|||
|
" \"Category\"\n",
|
|||
|
"]\n",
|
|||
|
"\n",
|
|||
|
"num_imputer = SimpleImputer(strategy=\"median\")\n",
|
|||
|
"num_scaler = StandardScaler()\n",
|
|||
|
"preprocessing_num = Pipeline(\n",
|
|||
|
" [\n",
|
|||
|
" (\"imputer\", num_imputer),\n",
|
|||
|
" (\"scaler\", num_scaler),\n",
|
|||
|
" ]\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n",
|
|||
|
"cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n",
|
|||
|
"preprocessing_cat = Pipeline(\n",
|
|||
|
" [\n",
|
|||
|
" (\"imputer\", cat_imputer),\n",
|
|||
|
" (\"encoder\", cat_encoder),\n",
|
|||
|
" ]\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"features_preprocessing = ColumnTransformer(\n",
|
|||
|
" verbose_feature_names_out=False,\n",
|
|||
|
" transformers=[\n",
|
|||
|
" (\"prepocessing_num\", preprocessing_num, num_columns),\n",
|
|||
|
" (\"prepocessing_cat\", preprocessing_cat, cat_columns),\n",
|
|||
|
" #(\"prepocessing_features\", cat_imputer, [\"Name\", \"Cabin\"]),\n",
|
|||
|
" ],\n",
|
|||
|
" remainder=\"passthrough\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"# features_engineering = ColumnTransformer(\n",
|
|||
|
"# verbose_feature_names_out=False,\n",
|
|||
|
"# transformers=[\n",
|
|||
|
"# (\"add_features\", TitanicFeatures(), [\"Name\", \"Cabin\"]),\n",
|
|||
|
"# ],\n",
|
|||
|
"# remainder=\"passthrough\",\n",
|
|||
|
"# )\n",
|
|||
|
"\n",
|
|||
|
"drop_columns = ColumnTransformer(\n",
|
|||
|
" verbose_feature_names_out=False,\n",
|
|||
|
" transformers=[\n",
|
|||
|
" (\"drop_columns\", \"drop\", columns_to_drop),\n",
|
|||
|
" ],\n",
|
|||
|
" remainder=\"passthrough\",\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"# features_postprocessing = ColumnTransformer(\n",
|
|||
|
"# verbose_feature_names_out=False,\n",
|
|||
|
"# transformers=[\n",
|
|||
|
"# (\"prepocessing_cat\", preprocessing_cat, [\"Cabin_type\"]),\n",
|
|||
|
"# ],\n",
|
|||
|
"# remainder=\"passthrough\",\n",
|
|||
|
"# )\n",
|
|||
|
"\n",
|
|||
|
"pipeline_end = Pipeline(\n",
|
|||
|
" [\n",
|
|||
|
" (\"features_preprocessing\", features_preprocessing),\n",
|
|||
|
" # (\"features_engineering\", features_engineering),\n",
|
|||
|
" (\"drop_columns\", drop_columns),\n",
|
|||
|
" # (\"features_postprocessing\", features_postprocessing),\n",
|
|||
|
" ]\n",
|
|||
|
")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 96,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"preprocessing_result = pipeline_end.fit_transform(df)\n",
|
|||
|
"df = pd.DataFrame(\n",
|
|||
|
" preprocessing_result,\n",
|
|||
|
" columns=pipeline_end.get_feature_names_out(),\n",
|
|||
|
")\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Формирование выборок"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 97,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'X_train'"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Airbags</th>\n",
|
|||
|
" <th>distance_norm</th>\n",
|
|||
|
" <th>Color_Black</th>\n",
|
|||
|
" <th>Color_Blue</th>\n",
|
|||
|
" <th>Color_Brown</th>\n",
|
|||
|
" <th>Color_Carnelian red</th>\n",
|
|||
|
" <th>Color_Golden</th>\n",
|
|||
|
" <th>Color_Green</th>\n",
|
|||
|
" <th>Color_Grey</th>\n",
|
|||
|
" <th>Color_Orange</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>Category_Hatchback</th>\n",
|
|||
|
" <th>Category_Jeep</th>\n",
|
|||
|
" <th>Category_Limousine</th>\n",
|
|||
|
" <th>Category_Microbus</th>\n",
|
|||
|
" <th>Category_Minivan</th>\n",
|
|||
|
" <th>Category_Pickup</th>\n",
|
|||
|
" <th>Category_Sedan</th>\n",
|
|||
|
" <th>Category_Universal</th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" <th>Prod_year</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>ID</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45664431</th>\n",
|
|||
|
" <td>-1.523737</td>\n",
|
|||
|
" <td>0.012512</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1019</td>\n",
|
|||
|
" <td>2013</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45186186</th>\n",
|
|||
|
" <td>1.254005</td>\n",
|
|||
|
" <td>-0.768336</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>14113</td>\n",
|
|||
|
" <td>2010</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45757605</th>\n",
|
|||
|
" <td>-0.134866</td>\n",
|
|||
|
" <td>0.093264</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>15994</td>\n",
|
|||
|
" <td>2010</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45761763</th>\n",
|
|||
|
" <td>-1.060780</td>\n",
|
|||
|
" <td>-0.935158</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>24891</td>\n",
|
|||
|
" <td>2011</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45762505</th>\n",
|
|||
|
" <td>1.254005</td>\n",
|
|||
|
" <td>0.496836</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>706</td>\n",
|
|||
|
" <td>2015</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45738958</th>\n",
|
|||
|
" <td>-0.134866</td>\n",
|
|||
|
" <td>1.046858</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>7840</td>\n",
|
|||
|
" <td>2005</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45761444</th>\n",
|
|||
|
" <td>1.254005</td>\n",
|
|||
|
" <td>-0.288173</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>37633</td>\n",
|
|||
|
" <td>2010</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45809946</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>-0.567004</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>5209</td>\n",
|
|||
|
" <td>2013</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45789900</th>\n",
|
|||
|
" <td>0.328091</td>\n",
|
|||
|
" <td>-0.568642</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>14426</td>\n",
|
|||
|
" <td>2006</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45788440</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>0.037171</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>12544</td>\n",
|
|||
|
" <td>2011</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>15389 rows × 97 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Airbags distance_norm Color_Black Color_Blue Color_Brown \\\n",
|
|||
|
"ID \n",
|
|||
|
"45664431 -1.523737 0.012512 1.0 0.0 0.0 \n",
|
|||
|
"45186186 1.254005 -0.768336 1.0 0.0 0.0 \n",
|
|||
|
"45757605 -0.134866 0.093264 0.0 1.0 0.0 \n",
|
|||
|
"45761763 -1.060780 -0.935158 1.0 0.0 0.0 \n",
|
|||
|
"45762505 1.254005 0.496836 0.0 0.0 0.0 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"45738958 -0.134866 1.046858 0.0 0.0 0.0 \n",
|
|||
|
"45761444 1.254005 -0.288173 1.0 0.0 0.0 \n",
|
|||
|
"45809946 -0.597823 -0.567004 1.0 0.0 0.0 \n",
|
|||
|
"45789900 0.328091 -0.568642 1.0 0.0 0.0 \n",
|
|||
|
"45788440 -0.597823 0.037171 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Color_Carnelian red Color_Golden Color_Green Color_Grey \\\n",
|
|||
|
"ID \n",
|
|||
|
"45664431 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45186186 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45757605 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45761763 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45762505 0.0 0.0 0.0 0.0 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"45738958 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45761444 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45809946 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45789900 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45788440 0.0 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Color_Orange ... Category_Hatchback Category_Jeep \\\n",
|
|||
|
"ID ... \n",
|
|||
|
"45664431 0.0 ... 0.0 1.0 \n",
|
|||
|
"45186186 0.0 ... 0.0 0.0 \n",
|
|||
|
"45757605 0.0 ... 0.0 0.0 \n",
|
|||
|
"45761763 0.0 ... 1.0 0.0 \n",
|
|||
|
"45762505 0.0 ... 0.0 0.0 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"45738958 0.0 ... 0.0 0.0 \n",
|
|||
|
"45761444 0.0 ... 0.0 0.0 \n",
|
|||
|
"45809946 0.0 ... 1.0 0.0 \n",
|
|||
|
"45789900 0.0 ... 0.0 1.0 \n",
|
|||
|
"45788440 0.0 ... 1.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Category_Limousine Category_Microbus Category_Minivan \\\n",
|
|||
|
"ID \n",
|
|||
|
"45664431 0.0 0.0 0.0 \n",
|
|||
|
"45186186 0.0 0.0 0.0 \n",
|
|||
|
"45757605 0.0 0.0 0.0 \n",
|
|||
|
"45761763 0.0 0.0 0.0 \n",
|
|||
|
"45762505 0.0 0.0 0.0 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"45738958 0.0 0.0 0.0 \n",
|
|||
|
"45761444 0.0 0.0 0.0 \n",
|
|||
|
"45809946 0.0 0.0 0.0 \n",
|
|||
|
"45789900 0.0 0.0 0.0 \n",
|
|||
|
"45788440 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Category_Pickup Category_Sedan Category_Universal Price \\\n",
|
|||
|
"ID \n",
|
|||
|
"45664431 0.0 0.0 0.0 1019 \n",
|
|||
|
"45186186 0.0 1.0 0.0 14113 \n",
|
|||
|
"45757605 0.0 1.0 0.0 15994 \n",
|
|||
|
"45761763 0.0 0.0 0.0 24891 \n",
|
|||
|
"45762505 0.0 1.0 0.0 706 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"45738958 0.0 1.0 0.0 7840 \n",
|
|||
|
"45761444 0.0 1.0 0.0 37633 \n",
|
|||
|
"45809946 0.0 0.0 0.0 5209 \n",
|
|||
|
"45789900 0.0 0.0 0.0 14426 \n",
|
|||
|
"45788440 0.0 0.0 0.0 12544 \n",
|
|||
|
"\n",
|
|||
|
" Prod_year \n",
|
|||
|
"ID \n",
|
|||
|
"45664431 2013 \n",
|
|||
|
"45186186 2010 \n",
|
|||
|
"45757605 2010 \n",
|
|||
|
"45761763 2011 \n",
|
|||
|
"45762505 2015 \n",
|
|||
|
"... ... \n",
|
|||
|
"45738958 2005 \n",
|
|||
|
"45761444 2010 \n",
|
|||
|
"45809946 2013 \n",
|
|||
|
"45789900 2006 \n",
|
|||
|
"45788440 2011 \n",
|
|||
|
"\n",
|
|||
|
"[15389 rows x 97 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'y_train'"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>ID</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45664431</th>\n",
|
|||
|
" <td>1019</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45186186</th>\n",
|
|||
|
" <td>14113</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45757605</th>\n",
|
|||
|
" <td>15994</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45761763</th>\n",
|
|||
|
" <td>24891</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45762505</th>\n",
|
|||
|
" <td>706</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45738958</th>\n",
|
|||
|
" <td>7840</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45761444</th>\n",
|
|||
|
" <td>37633</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45809946</th>\n",
|
|||
|
" <td>5209</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45789900</th>\n",
|
|||
|
" <td>14426</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45788440</th>\n",
|
|||
|
" <td>12544</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>15389 rows × 1 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Price\n",
|
|||
|
"ID \n",
|
|||
|
"45664431 1019\n",
|
|||
|
"45186186 14113\n",
|
|||
|
"45757605 15994\n",
|
|||
|
"45761763 24891\n",
|
|||
|
"45762505 706\n",
|
|||
|
"... ...\n",
|
|||
|
"45738958 7840\n",
|
|||
|
"45761444 37633\n",
|
|||
|
"45809946 5209\n",
|
|||
|
"45789900 14426\n",
|
|||
|
"45788440 12544\n",
|
|||
|
"\n",
|
|||
|
"[15389 rows x 1 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'X_test'"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Airbags</th>\n",
|
|||
|
" <th>distance_norm</th>\n",
|
|||
|
" <th>Color_Black</th>\n",
|
|||
|
" <th>Color_Blue</th>\n",
|
|||
|
" <th>Color_Brown</th>\n",
|
|||
|
" <th>Color_Carnelian red</th>\n",
|
|||
|
" <th>Color_Golden</th>\n",
|
|||
|
" <th>Color_Green</th>\n",
|
|||
|
" <th>Color_Grey</th>\n",
|
|||
|
" <th>Color_Orange</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>Category_Hatchback</th>\n",
|
|||
|
" <th>Category_Jeep</th>\n",
|
|||
|
" <th>Category_Limousine</th>\n",
|
|||
|
" <th>Category_Microbus</th>\n",
|
|||
|
" <th>Category_Minivan</th>\n",
|
|||
|
" <th>Category_Pickup</th>\n",
|
|||
|
" <th>Category_Sedan</th>\n",
|
|||
|
" <th>Category_Universal</th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" <th>Prod_year</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>ID</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>41976837</th>\n",
|
|||
|
" <td>0.328091</td>\n",
|
|||
|
" <td>-1.184551</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>90006</td>\n",
|
|||
|
" <td>2015</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45793567</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>-0.719669</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>7850</td>\n",
|
|||
|
" <td>2009</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45812786</th>\n",
|
|||
|
" <td>-1.060780</td>\n",
|
|||
|
" <td>1.832171</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>2352</td>\n",
|
|||
|
" <td>1998</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45808317</th>\n",
|
|||
|
" <td>1.254005</td>\n",
|
|||
|
" <td>-0.453739</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1333</td>\n",
|
|||
|
" <td>2015</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45809653</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>-1.437871</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>62493</td>\n",
|
|||
|
" <td>2018</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45793612</th>\n",
|
|||
|
" <td>-1.060780</td>\n",
|
|||
|
" <td>-0.018923</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>51746</td>\n",
|
|||
|
" <td>2014</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45567237</th>\n",
|
|||
|
" <td>-1.523737</td>\n",
|
|||
|
" <td>0.242249</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>784</td>\n",
|
|||
|
" <td>2014</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45802935</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>0.299701</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>41221</td>\n",
|
|||
|
" <td>2016</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45102093</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>0.279496</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>10976</td>\n",
|
|||
|
" <td>2003</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45733630</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>-1.055951</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>55343</td>\n",
|
|||
|
" <td>2018</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>3848 rows × 97 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Airbags distance_norm Color_Black Color_Blue Color_Brown \\\n",
|
|||
|
"ID \n",
|
|||
|
"41976837 0.328091 -1.184551 0.0 0.0 0.0 \n",
|
|||
|
"45793567 -0.597823 -0.719669 1.0 0.0 0.0 \n",
|
|||
|
"45812786 -1.060780 1.832171 0.0 0.0 0.0 \n",
|
|||
|
"45808317 1.254005 -0.453739 0.0 0.0 0.0 \n",
|
|||
|
"45809653 -0.597823 -1.437871 0.0 0.0 0.0 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"45793612 -1.060780 -0.018923 0.0 0.0 0.0 \n",
|
|||
|
"45567237 -1.523737 0.242249 0.0 0.0 0.0 \n",
|
|||
|
"45802935 -0.597823 0.299701 0.0 1.0 0.0 \n",
|
|||
|
"45102093 -0.597823 0.279496 1.0 0.0 0.0 \n",
|
|||
|
"45733630 -0.597823 -1.055951 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Color_Carnelian red Color_Golden Color_Green Color_Grey \\\n",
|
|||
|
"ID \n",
|
|||
|
"41976837 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45793567 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45812786 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45808317 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45809653 0.0 0.0 0.0 0.0 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"45793612 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45567237 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45802935 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45102093 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45733630 0.0 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Color_Orange ... Category_Hatchback Category_Jeep \\\n",
|
|||
|
"ID ... \n",
|
|||
|
"41976837 0.0 ... 0.0 1.0 \n",
|
|||
|
"45793567 0.0 ... 0.0 1.0 \n",
|
|||
|
"45812786 0.0 ... 1.0 0.0 \n",
|
|||
|
"45808317 0.0 ... 0.0 0.0 \n",
|
|||
|
"45809653 0.0 ... 0.0 0.0 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"45793612 0.0 ... 0.0 0.0 \n",
|
|||
|
"45567237 0.0 ... 0.0 1.0 \n",
|
|||
|
"45802935 0.0 ... 0.0 1.0 \n",
|
|||
|
"45102093 0.0 ... 0.0 1.0 \n",
|
|||
|
"45733630 0.0 ... 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Category_Limousine Category_Microbus Category_Minivan \\\n",
|
|||
|
"ID \n",
|
|||
|
"41976837 0.0 0.0 0.0 \n",
|
|||
|
"45793567 0.0 0.0 0.0 \n",
|
|||
|
"45812786 0.0 0.0 0.0 \n",
|
|||
|
"45808317 0.0 0.0 0.0 \n",
|
|||
|
"45809653 0.0 0.0 0.0 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"45793612 0.0 1.0 0.0 \n",
|
|||
|
"45567237 0.0 0.0 0.0 \n",
|
|||
|
"45802935 0.0 0.0 0.0 \n",
|
|||
|
"45102093 0.0 0.0 0.0 \n",
|
|||
|
"45733630 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Category_Pickup Category_Sedan Category_Universal Price \\\n",
|
|||
|
"ID \n",
|
|||
|
"41976837 0.0 0.0 0.0 90006 \n",
|
|||
|
"45793567 0.0 0.0 0.0 7850 \n",
|
|||
|
"45812786 0.0 0.0 0.0 2352 \n",
|
|||
|
"45808317 0.0 1.0 0.0 1333 \n",
|
|||
|
"45809653 0.0 0.0 0.0 62493 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"45793612 0.0 0.0 0.0 51746 \n",
|
|||
|
"45567237 0.0 0.0 0.0 784 \n",
|
|||
|
"45802935 0.0 0.0 0.0 41221 \n",
|
|||
|
"45102093 0.0 0.0 0.0 10976 \n",
|
|||
|
"45733630 0.0 1.0 0.0 55343 \n",
|
|||
|
"\n",
|
|||
|
" Prod_year \n",
|
|||
|
"ID \n",
|
|||
|
"41976837 2015 \n",
|
|||
|
"45793567 2009 \n",
|
|||
|
"45812786 1998 \n",
|
|||
|
"45808317 2015 \n",
|
|||
|
"45809653 2018 \n",
|
|||
|
"... ... \n",
|
|||
|
"45793612 2014 \n",
|
|||
|
"45567237 2014 \n",
|
|||
|
"45802935 2016 \n",
|
|||
|
"45102093 2003 \n",
|
|||
|
"45733630 2018 \n",
|
|||
|
"\n",
|
|||
|
"[3848 rows x 97 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'y_test'"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>ID</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>41976837</th>\n",
|
|||
|
" <td>90006</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45793567</th>\n",
|
|||
|
" <td>7850</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45812786</th>\n",
|
|||
|
" <td>2352</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45808317</th>\n",
|
|||
|
" <td>1333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45809653</th>\n",
|
|||
|
" <td>62493</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45793612</th>\n",
|
|||
|
" <td>51746</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45567237</th>\n",
|
|||
|
" <td>784</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45802935</th>\n",
|
|||
|
" <td>41221</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45102093</th>\n",
|
|||
|
" <td>10976</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45733630</th>\n",
|
|||
|
" <td>55343</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>3848 rows × 1 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Price\n",
|
|||
|
"ID \n",
|
|||
|
"41976837 90006\n",
|
|||
|
"45793567 7850\n",
|
|||
|
"45812786 2352\n",
|
|||
|
"45808317 1333\n",
|
|||
|
"45809653 62493\n",
|
|||
|
"... ...\n",
|
|||
|
"45793612 51746\n",
|
|||
|
"45567237 784\n",
|
|||
|
"45802935 41221\n",
|
|||
|
"45102093 10976\n",
|
|||
|
"45733630 55343\n",
|
|||
|
"\n",
|
|||
|
"[3848 rows x 1 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from utils import split_stratified_into_train_val_test\n",
|
|||
|
"\n",
|
|||
|
"X_train, X_val, X_test, y_train, y_val, y_test = split_stratified_into_train_val_test(\n",
|
|||
|
" df,\n",
|
|||
|
" target_colname=\"Price\",\n",
|
|||
|
" stratify_colname=\"Airbags\",\n",
|
|||
|
" frac_train=0.80,\n",
|
|||
|
" frac_val=0,\n",
|
|||
|
" frac_test=0.20,\n",
|
|||
|
" random_state=random_state,\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"display(\"X_train\", X_train)\n",
|
|||
|
"display(\"y_train\", y_train)\n",
|
|||
|
"\n",
|
|||
|
"display(\"X_test\", X_test)\n",
|
|||
|
"display(\"y_test\", y_test)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 98,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Airbags</th>\n",
|
|||
|
" <th>distance_norm</th>\n",
|
|||
|
" <th>Color_Black</th>\n",
|
|||
|
" <th>Color_Blue</th>\n",
|
|||
|
" <th>Color_Brown</th>\n",
|
|||
|
" <th>Color_Carnelian red</th>\n",
|
|||
|
" <th>Color_Golden</th>\n",
|
|||
|
" <th>Color_Green</th>\n",
|
|||
|
" <th>Color_Grey</th>\n",
|
|||
|
" <th>Color_Orange</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>Category_Hatchback</th>\n",
|
|||
|
" <th>Category_Jeep</th>\n",
|
|||
|
" <th>Category_Limousine</th>\n",
|
|||
|
" <th>Category_Microbus</th>\n",
|
|||
|
" <th>Category_Minivan</th>\n",
|
|||
|
" <th>Category_Pickup</th>\n",
|
|||
|
" <th>Category_Sedan</th>\n",
|
|||
|
" <th>Category_Universal</th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" <th>Prod_year</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>ID</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>41976837</th>\n",
|
|||
|
" <td>0.328091</td>\n",
|
|||
|
" <td>-1.184551</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>90006</td>\n",
|
|||
|
" <td>2015</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45793567</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>-0.719669</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>7850</td>\n",
|
|||
|
" <td>2009</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45812786</th>\n",
|
|||
|
" <td>-1.060780</td>\n",
|
|||
|
" <td>1.832171</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>2352</td>\n",
|
|||
|
" <td>1998</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45808317</th>\n",
|
|||
|
" <td>1.254005</td>\n",
|
|||
|
" <td>-0.453739</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1333</td>\n",
|
|||
|
" <td>2015</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45809653</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>-1.437871</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>62493</td>\n",
|
|||
|
" <td>2018</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45793612</th>\n",
|
|||
|
" <td>-1.060780</td>\n",
|
|||
|
" <td>-0.018923</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>51746</td>\n",
|
|||
|
" <td>2014</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45567237</th>\n",
|
|||
|
" <td>-1.523737</td>\n",
|
|||
|
" <td>0.242249</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>784</td>\n",
|
|||
|
" <td>2014</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45802935</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>0.299701</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>41221</td>\n",
|
|||
|
" <td>2016</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45102093</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>0.279496</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>10976</td>\n",
|
|||
|
" <td>2003</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45733630</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>-1.055951</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>55343</td>\n",
|
|||
|
" <td>2018</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>3848 rows × 97 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Airbags distance_norm Color_Black Color_Blue Color_Brown \\\n",
|
|||
|
"ID \n",
|
|||
|
"41976837 0.328091 -1.184551 0.0 0.0 0.0 \n",
|
|||
|
"45793567 -0.597823 -0.719669 1.0 0.0 0.0 \n",
|
|||
|
"45812786 -1.060780 1.832171 0.0 0.0 0.0 \n",
|
|||
|
"45808317 1.254005 -0.453739 0.0 0.0 0.0 \n",
|
|||
|
"45809653 -0.597823 -1.437871 0.0 0.0 0.0 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"45793612 -1.060780 -0.018923 0.0 0.0 0.0 \n",
|
|||
|
"45567237 -1.523737 0.242249 0.0 0.0 0.0 \n",
|
|||
|
"45802935 -0.597823 0.299701 0.0 1.0 0.0 \n",
|
|||
|
"45102093 -0.597823 0.279496 1.0 0.0 0.0 \n",
|
|||
|
"45733630 -0.597823 -1.055951 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Color_Carnelian red Color_Golden Color_Green Color_Grey \\\n",
|
|||
|
"ID \n",
|
|||
|
"41976837 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45793567 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45812786 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45808317 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45809653 0.0 0.0 0.0 0.0 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"45793612 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45567237 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45802935 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45102093 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45733630 0.0 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Color_Orange ... Category_Hatchback Category_Jeep \\\n",
|
|||
|
"ID ... \n",
|
|||
|
"41976837 0.0 ... 0.0 1.0 \n",
|
|||
|
"45793567 0.0 ... 0.0 1.0 \n",
|
|||
|
"45812786 0.0 ... 1.0 0.0 \n",
|
|||
|
"45808317 0.0 ... 0.0 0.0 \n",
|
|||
|
"45809653 0.0 ... 0.0 0.0 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"45793612 0.0 ... 0.0 0.0 \n",
|
|||
|
"45567237 0.0 ... 0.0 1.0 \n",
|
|||
|
"45802935 0.0 ... 0.0 1.0 \n",
|
|||
|
"45102093 0.0 ... 0.0 1.0 \n",
|
|||
|
"45733630 0.0 ... 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Category_Limousine Category_Microbus Category_Minivan \\\n",
|
|||
|
"ID \n",
|
|||
|
"41976837 0.0 0.0 0.0 \n",
|
|||
|
"45793567 0.0 0.0 0.0 \n",
|
|||
|
"45812786 0.0 0.0 0.0 \n",
|
|||
|
"45808317 0.0 0.0 0.0 \n",
|
|||
|
"45809653 0.0 0.0 0.0 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"45793612 0.0 1.0 0.0 \n",
|
|||
|
"45567237 0.0 0.0 0.0 \n",
|
|||
|
"45802935 0.0 0.0 0.0 \n",
|
|||
|
"45102093 0.0 0.0 0.0 \n",
|
|||
|
"45733630 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Category_Pickup Category_Sedan Category_Universal Price \\\n",
|
|||
|
"ID \n",
|
|||
|
"41976837 0.0 0.0 0.0 90006 \n",
|
|||
|
"45793567 0.0 0.0 0.0 7850 \n",
|
|||
|
"45812786 0.0 0.0 0.0 2352 \n",
|
|||
|
"45808317 0.0 1.0 0.0 1333 \n",
|
|||
|
"45809653 0.0 0.0 0.0 62493 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"45793612 0.0 0.0 0.0 51746 \n",
|
|||
|
"45567237 0.0 0.0 0.0 784 \n",
|
|||
|
"45802935 0.0 0.0 0.0 41221 \n",
|
|||
|
"45102093 0.0 0.0 0.0 10976 \n",
|
|||
|
"45733630 0.0 1.0 0.0 55343 \n",
|
|||
|
"\n",
|
|||
|
" Prod_year \n",
|
|||
|
"ID \n",
|
|||
|
"41976837 2015 \n",
|
|||
|
"45793567 2009 \n",
|
|||
|
"45812786 1998 \n",
|
|||
|
"45808317 2015 \n",
|
|||
|
"45809653 2018 \n",
|
|||
|
"... ... \n",
|
|||
|
"45793612 2014 \n",
|
|||
|
"45567237 2014 \n",
|
|||
|
"45802935 2016 \n",
|
|||
|
"45102093 2003 \n",
|
|||
|
"45733630 2018 \n",
|
|||
|
"\n",
|
|||
|
"[3848 rows x 97 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 98,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"X_test"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 99,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Airbags</th>\n",
|
|||
|
" <th>distance_norm</th>\n",
|
|||
|
" <th>Color_Black</th>\n",
|
|||
|
" <th>Color_Blue</th>\n",
|
|||
|
" <th>Color_Brown</th>\n",
|
|||
|
" <th>Color_Carnelian red</th>\n",
|
|||
|
" <th>Color_Golden</th>\n",
|
|||
|
" <th>Color_Green</th>\n",
|
|||
|
" <th>Color_Grey</th>\n",
|
|||
|
" <th>Color_Orange</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>Category_Hatchback</th>\n",
|
|||
|
" <th>Category_Jeep</th>\n",
|
|||
|
" <th>Category_Limousine</th>\n",
|
|||
|
" <th>Category_Microbus</th>\n",
|
|||
|
" <th>Category_Minivan</th>\n",
|
|||
|
" <th>Category_Pickup</th>\n",
|
|||
|
" <th>Category_Sedan</th>\n",
|
|||
|
" <th>Category_Universal</th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" <th>Prod_year</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>ID</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45664431</th>\n",
|
|||
|
" <td>-1.523737</td>\n",
|
|||
|
" <td>0.012512</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1019</td>\n",
|
|||
|
" <td>2013</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45186186</th>\n",
|
|||
|
" <td>1.254005</td>\n",
|
|||
|
" <td>-0.768336</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>14113</td>\n",
|
|||
|
" <td>2010</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45757605</th>\n",
|
|||
|
" <td>-0.134866</td>\n",
|
|||
|
" <td>0.093264</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>15994</td>\n",
|
|||
|
" <td>2010</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45761763</th>\n",
|
|||
|
" <td>-1.060780</td>\n",
|
|||
|
" <td>-0.935158</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>24891</td>\n",
|
|||
|
" <td>2011</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45762505</th>\n",
|
|||
|
" <td>1.254005</td>\n",
|
|||
|
" <td>0.496836</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>706</td>\n",
|
|||
|
" <td>2015</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45738958</th>\n",
|
|||
|
" <td>-0.134866</td>\n",
|
|||
|
" <td>1.046858</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>7840</td>\n",
|
|||
|
" <td>2005</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45761444</th>\n",
|
|||
|
" <td>1.254005</td>\n",
|
|||
|
" <td>-0.288173</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>37633</td>\n",
|
|||
|
" <td>2010</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45809946</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>-0.567004</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>5209</td>\n",
|
|||
|
" <td>2013</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45789900</th>\n",
|
|||
|
" <td>0.328091</td>\n",
|
|||
|
" <td>-0.568642</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>14426</td>\n",
|
|||
|
" <td>2006</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45788440</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>0.037171</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>12544</td>\n",
|
|||
|
" <td>2011</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>15389 rows × 97 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Airbags distance_norm Color_Black Color_Blue Color_Brown \\\n",
|
|||
|
"ID \n",
|
|||
|
"45664431 -1.523737 0.012512 1.0 0.0 0.0 \n",
|
|||
|
"45186186 1.254005 -0.768336 1.0 0.0 0.0 \n",
|
|||
|
"45757605 -0.134866 0.093264 0.0 1.0 0.0 \n",
|
|||
|
"45761763 -1.060780 -0.935158 1.0 0.0 0.0 \n",
|
|||
|
"45762505 1.254005 0.496836 0.0 0.0 0.0 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"45738958 -0.134866 1.046858 0.0 0.0 0.0 \n",
|
|||
|
"45761444 1.254005 -0.288173 1.0 0.0 0.0 \n",
|
|||
|
"45809946 -0.597823 -0.567004 1.0 0.0 0.0 \n",
|
|||
|
"45789900 0.328091 -0.568642 1.0 0.0 0.0 \n",
|
|||
|
"45788440 -0.597823 0.037171 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Color_Carnelian red Color_Golden Color_Green Color_Grey \\\n",
|
|||
|
"ID \n",
|
|||
|
"45664431 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45186186 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45757605 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45761763 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45762505 0.0 0.0 0.0 0.0 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"45738958 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45761444 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45809946 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45789900 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45788440 0.0 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Color_Orange ... Category_Hatchback Category_Jeep \\\n",
|
|||
|
"ID ... \n",
|
|||
|
"45664431 0.0 ... 0.0 1.0 \n",
|
|||
|
"45186186 0.0 ... 0.0 0.0 \n",
|
|||
|
"45757605 0.0 ... 0.0 0.0 \n",
|
|||
|
"45761763 0.0 ... 1.0 0.0 \n",
|
|||
|
"45762505 0.0 ... 0.0 0.0 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"45738958 0.0 ... 0.0 0.0 \n",
|
|||
|
"45761444 0.0 ... 0.0 0.0 \n",
|
|||
|
"45809946 0.0 ... 1.0 0.0 \n",
|
|||
|
"45789900 0.0 ... 0.0 1.0 \n",
|
|||
|
"45788440 0.0 ... 1.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Category_Limousine Category_Microbus Category_Minivan \\\n",
|
|||
|
"ID \n",
|
|||
|
"45664431 0.0 0.0 0.0 \n",
|
|||
|
"45186186 0.0 0.0 0.0 \n",
|
|||
|
"45757605 0.0 0.0 0.0 \n",
|
|||
|
"45761763 0.0 0.0 0.0 \n",
|
|||
|
"45762505 0.0 0.0 0.0 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"45738958 0.0 0.0 0.0 \n",
|
|||
|
"45761444 0.0 0.0 0.0 \n",
|
|||
|
"45809946 0.0 0.0 0.0 \n",
|
|||
|
"45789900 0.0 0.0 0.0 \n",
|
|||
|
"45788440 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Category_Pickup Category_Sedan Category_Universal Price \\\n",
|
|||
|
"ID \n",
|
|||
|
"45664431 0.0 0.0 0.0 1019 \n",
|
|||
|
"45186186 0.0 1.0 0.0 14113 \n",
|
|||
|
"45757605 0.0 1.0 0.0 15994 \n",
|
|||
|
"45761763 0.0 0.0 0.0 24891 \n",
|
|||
|
"45762505 0.0 1.0 0.0 706 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"45738958 0.0 1.0 0.0 7840 \n",
|
|||
|
"45761444 0.0 1.0 0.0 37633 \n",
|
|||
|
"45809946 0.0 0.0 0.0 5209 \n",
|
|||
|
"45789900 0.0 0.0 0.0 14426 \n",
|
|||
|
"45788440 0.0 0.0 0.0 12544 \n",
|
|||
|
"\n",
|
|||
|
" Prod_year \n",
|
|||
|
"ID \n",
|
|||
|
"45664431 2013 \n",
|
|||
|
"45186186 2010 \n",
|
|||
|
"45757605 2010 \n",
|
|||
|
"45761763 2011 \n",
|
|||
|
"45762505 2015 \n",
|
|||
|
"... ... \n",
|
|||
|
"45738958 2005 \n",
|
|||
|
"45761444 2010 \n",
|
|||
|
"45809946 2013 \n",
|
|||
|
"45789900 2006 \n",
|
|||
|
"45788440 2011 \n",
|
|||
|
"\n",
|
|||
|
"[15389 rows x 97 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 99,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"X_train"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 100,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>ID</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>41976837</th>\n",
|
|||
|
" <td>90006</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45793567</th>\n",
|
|||
|
" <td>7850</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45812786</th>\n",
|
|||
|
" <td>2352</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45808317</th>\n",
|
|||
|
" <td>1333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45809653</th>\n",
|
|||
|
" <td>62493</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45793612</th>\n",
|
|||
|
" <td>51746</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45567237</th>\n",
|
|||
|
" <td>784</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45802935</th>\n",
|
|||
|
" <td>41221</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45102093</th>\n",
|
|||
|
" <td>10976</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45733630</th>\n",
|
|||
|
" <td>55343</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>3848 rows × 1 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Price\n",
|
|||
|
"ID \n",
|
|||
|
"41976837 90006\n",
|
|||
|
"45793567 7850\n",
|
|||
|
"45812786 2352\n",
|
|||
|
"45808317 1333\n",
|
|||
|
"45809653 62493\n",
|
|||
|
"... ...\n",
|
|||
|
"45793612 51746\n",
|
|||
|
"45567237 784\n",
|
|||
|
"45802935 41221\n",
|
|||
|
"45102093 10976\n",
|
|||
|
"45733630 55343\n",
|
|||
|
"\n",
|
|||
|
"[3848 rows x 1 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 100,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"y_test"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 101,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>ID</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45664431</th>\n",
|
|||
|
" <td>1019</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45186186</th>\n",
|
|||
|
" <td>14113</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45757605</th>\n",
|
|||
|
" <td>15994</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45761763</th>\n",
|
|||
|
" <td>24891</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45762505</th>\n",
|
|||
|
" <td>706</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45738958</th>\n",
|
|||
|
" <td>7840</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45761444</th>\n",
|
|||
|
" <td>37633</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45809946</th>\n",
|
|||
|
" <td>5209</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45789900</th>\n",
|
|||
|
" <td>14426</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45788440</th>\n",
|
|||
|
" <td>12544</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>15389 rows × 1 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Price\n",
|
|||
|
"ID \n",
|
|||
|
"45664431 1019\n",
|
|||
|
"45186186 14113\n",
|
|||
|
"45757605 15994\n",
|
|||
|
"45761763 24891\n",
|
|||
|
"45762505 706\n",
|
|||
|
"... ...\n",
|
|||
|
"45738958 7840\n",
|
|||
|
"45761444 37633\n",
|
|||
|
"45809946 5209\n",
|
|||
|
"45789900 14426\n",
|
|||
|
"45788440 12544\n",
|
|||
|
"\n",
|
|||
|
"[15389 rows x 1 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 101,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"y_train"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Определение перечня алгоритмов решения задачи аппроксимации (регрессии)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 102,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from sklearn.pipeline import make_pipeline\n",
|
|||
|
"from sklearn.preprocessing import PolynomialFeatures\n",
|
|||
|
"from sklearn import linear_model, tree, neighbors, ensemble, neural_network\n",
|
|||
|
"\n",
|
|||
|
"random_state = 9\n",
|
|||
|
"\n",
|
|||
|
"models = {\n",
|
|||
|
" # \"mlp\": {\n",
|
|||
|
" # \"model\": neural_network.MLPRegressor(\n",
|
|||
|
" # activation=\"tanh\",\n",
|
|||
|
" # hidden_layer_sizes=(3,),\n",
|
|||
|
" # max_iter=500,\n",
|
|||
|
" # early_stopping=True,\n",
|
|||
|
" # random_state=random_state,\n",
|
|||
|
" # )\n",
|
|||
|
" # },\n",
|
|||
|
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
|
|||
|
" \"linear_poly\": {\n",
|
|||
|
" \"model\": make_pipeline(\n",
|
|||
|
" PolynomialFeatures(degree=2),\n",
|
|||
|
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
|
|||
|
" )\n",
|
|||
|
" },\n",
|
|||
|
" \"linear_interact\": {\n",
|
|||
|
" \"model\": make_pipeline(\n",
|
|||
|
" PolynomialFeatures(interaction_only=True),\n",
|
|||
|
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
|
|||
|
" )\n",
|
|||
|
" },\n",
|
|||
|
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
|
|||
|
" \"decision_tree\": {\n",
|
|||
|
" \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n",
|
|||
|
" },\n",
|
|||
|
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
|
|||
|
" \"random_forest\": {\n",
|
|||
|
" \"model\": ensemble.RandomForestRegressor(\n",
|
|||
|
" max_depth=7, random_state=random_state, n_jobs=-1\n",
|
|||
|
" )\n",
|
|||
|
" },\n",
|
|||
|
"}"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Определение функции для стандартизации значений в столбце \"Температура\" для MLP"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 103,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from pandas import DataFrame\n",
|
|||
|
"from sklearn import preprocessing\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"\n",
|
|||
|
"stndart_scaler = preprocessing.StandardScaler()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"def std_temp(df: DataFrame) -> DataFrame:\n",
|
|||
|
" df[\"distance_norm\"] = np.array(stndart_scaler.fit_transform(\n",
|
|||
|
" df[\"distance_norm\"].reshape(-1, 1))\n",
|
|||
|
" ).reshape(df[\"distance_norm\"].shape)\n",
|
|||
|
" return df\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"def std_temp2(df: DataFrame) -> DataFrame:\n",
|
|||
|
" # Преобразуем столбец в массив NumPy и применяем reshape\n",
|
|||
|
" df[\"distance_norm\"] = stndart_scaler.fit_transform(\n",
|
|||
|
" df[\"distance_norm\"].values.reshape(-1, 1) # Изменяем на reshape(-1, 1)\n",
|
|||
|
" )\n",
|
|||
|
" return df\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"def std_temp3(df: DataFrame) -> DataFrame:\n",
|
|||
|
" # Проверка на NaN и заполнение средним значением\n",
|
|||
|
" df[\"distance_norm\"].fillna(df[\"distance_norm\"].mean(), inplace=True)\n",
|
|||
|
"\n",
|
|||
|
" # Преобразуем столбец в массив NumPy и применяем reshape\n",
|
|||
|
" df[\"distance_norm\"] = stndart_scaler.fit_transform(\n",
|
|||
|
" df[\"distance_norm\"].values.reshape(-1, 1)\n",
|
|||
|
" )\n",
|
|||
|
" return df"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 104,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Airbags 0\n",
|
|||
|
"distance_norm 0\n",
|
|||
|
"Color_Black 0\n",
|
|||
|
"Color_Blue 0\n",
|
|||
|
"Color_Brown 0\n",
|
|||
|
" ..\n",
|
|||
|
"Category_Pickup 0\n",
|
|||
|
"Category_Sedan 0\n",
|
|||
|
"Category_Universal 0\n",
|
|||
|
"Price 0\n",
|
|||
|
"Prod_year 0\n",
|
|||
|
"Length: 97, dtype: int64\n",
|
|||
|
"Airbags 0\n",
|
|||
|
"distance_norm 0\n",
|
|||
|
"Color_Black 0\n",
|
|||
|
"Color_Blue 0\n",
|
|||
|
"Color_Brown 0\n",
|
|||
|
" ..\n",
|
|||
|
"Category_Pickup 0\n",
|
|||
|
"Category_Sedan 0\n",
|
|||
|
"Category_Universal 0\n",
|
|||
|
"Price 0\n",
|
|||
|
"Prod_year 0\n",
|
|||
|
"Length: 97, dtype: int64\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(X_train.isnull().sum())\n",
|
|||
|
"print(X_test.isnull().sum())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Обучение и оценка моделей с помощью различных алгоритмов"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 105,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: linear\n",
|
|||
|
"Model: linear_poly\n",
|
|||
|
"Model: linear_interact\n",
|
|||
|
"Model: ridge\n",
|
|||
|
"Model: decision_tree\n",
|
|||
|
"Model: knn\n",
|
|||
|
"Model: random_forest\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import math\n",
|
|||
|
"from pandas import DataFrame\n",
|
|||
|
"from sklearn import metrics\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"for model_name in models.keys():\n",
|
|||
|
" print(f\"Model: {model_name}\")\n",
|
|||
|
" X_train: DataFrame = X_train.copy()\n",
|
|||
|
" X_test: DataFrame = X_test.copy()\n",
|
|||
|
"\n",
|
|||
|
" if model_name == \"mlp\":\n",
|
|||
|
" X_train = std_temp(X_train)\n",
|
|||
|
" X_test = std_temp(X_test)\n",
|
|||
|
"\n",
|
|||
|
" fitted_model = models[model_name][\"model\"].fit(\n",
|
|||
|
" X_train.values, y_train.values.ravel()\n",
|
|||
|
" )\n",
|
|||
|
" y_train_pred = fitted_model.predict(X_train.values)\n",
|
|||
|
" y_test_pred = fitted_model.predict(X_test.values)\n",
|
|||
|
" models[model_name][\"fitted\"] = fitted_model\n",
|
|||
|
" models[model_name][\"train_preds\"] = y_train_pred\n",
|
|||
|
" models[model_name][\"preds\"] = y_test_pred\n",
|
|||
|
" models[model_name][\"RMSE_train\"] = math.sqrt(\n",
|
|||
|
" metrics.mean_squared_error(y_train, y_train_pred)\n",
|
|||
|
" )\n",
|
|||
|
" models[model_name][\"RMSE_test\"] = math.sqrt(\n",
|
|||
|
" metrics.mean_squared_error(y_test, y_test_pred)\n",
|
|||
|
" )\n",
|
|||
|
" models[model_name][\"RMAE_test\"] = math.sqrt(\n",
|
|||
|
" metrics.mean_absolute_error(y_test, y_test_pred)\n",
|
|||
|
" )\n",
|
|||
|
" models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_pred)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Вывод результатов оценки"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 106,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<style type=\"text/css\">\n",
|
|||
|
"#T_a93c1_row0_col0, #T_a93c1_row0_col1, #T_a93c1_row1_col0, #T_a93c1_row1_col1, #T_a93c1_row2_col0, #T_a93c1_row2_col1 {\n",
|
|||
|
" background-color: #26818e;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_a93c1_row0_col2, #T_a93c1_row1_col2, #T_a93c1_row2_col2, #T_a93c1_row6_col3 {\n",
|
|||
|
" background-color: #4e02a2;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_a93c1_row0_col3, #T_a93c1_row1_col3, #T_a93c1_row2_col3, #T_a93c1_row3_col3, #T_a93c1_row4_col2, #T_a93c1_row5_col2, #T_a93c1_row6_col2 {\n",
|
|||
|
" background-color: #da5a6a;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_a93c1_row3_col0 {\n",
|
|||
|
" background-color: #20928c;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_a93c1_row3_col1 {\n",
|
|||
|
" background-color: #25848e;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_a93c1_row3_col2 {\n",
|
|||
|
" background-color: #8305a7;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_a93c1_row4_col0 {\n",
|
|||
|
" background-color: #228b8d;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_a93c1_row4_col1 {\n",
|
|||
|
" background-color: #a2da37;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_a93c1_row4_col3 {\n",
|
|||
|
" background-color: #5502a4;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_a93c1_row5_col0 {\n",
|
|||
|
" background-color: #1e9b8a;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_a93c1_row5_col1 {\n",
|
|||
|
" background-color: #a5db36;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_a93c1_row5_col3 {\n",
|
|||
|
" background-color: #5102a3;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_a93c1_row6_col0, #T_a93c1_row6_col1 {\n",
|
|||
|
" background-color: #a8db34;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"</style>\n",
|
|||
|
"<table id=\"T_a93c1\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th class=\"blank level0\" > </th>\n",
|
|||
|
" <th id=\"T_a93c1_level0_col0\" class=\"col_heading level0 col0\" >RMSE_train</th>\n",
|
|||
|
" <th id=\"T_a93c1_level0_col1\" class=\"col_heading level0 col1\" >RMSE_test</th>\n",
|
|||
|
" <th id=\"T_a93c1_level0_col2\" class=\"col_heading level0 col2\" >RMAE_test</th>\n",
|
|||
|
" <th id=\"T_a93c1_level0_col3\" class=\"col_heading level0 col3\" >R2_test</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_a93c1_level0_row0\" class=\"row_heading level0 row0\" >linear</th>\n",
|
|||
|
" <td id=\"T_a93c1_row0_col0\" class=\"data row0 col0\" >0.000000</td>\n",
|
|||
|
" <td id=\"T_a93c1_row0_col1\" class=\"data row0 col1\" >0.000000</td>\n",
|
|||
|
" <td id=\"T_a93c1_row0_col2\" class=\"data row0 col2\" >0.000005</td>\n",
|
|||
|
" <td id=\"T_a93c1_row0_col3\" class=\"data row0 col3\" >1.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_a93c1_level0_row1\" class=\"row_heading level0 row1\" >linear_interact</th>\n",
|
|||
|
" <td id=\"T_a93c1_row1_col0\" class=\"data row1 col0\" >0.000000</td>\n",
|
|||
|
" <td id=\"T_a93c1_row1_col1\" class=\"data row1 col1\" >0.000000</td>\n",
|
|||
|
" <td id=\"T_a93c1_row1_col2\" class=\"data row1 col2\" >0.000071</td>\n",
|
|||
|
" <td id=\"T_a93c1_row1_col3\" class=\"data row1 col3\" >1.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_a93c1_level0_row2\" class=\"row_heading level0 row2\" >linear_poly</th>\n",
|
|||
|
" <td id=\"T_a93c1_row2_col0\" class=\"data row2 col0\" >0.000001</td>\n",
|
|||
|
" <td id=\"T_a93c1_row2_col1\" class=\"data row2 col1\" >0.000069</td>\n",
|
|||
|
" <td id=\"T_a93c1_row2_col2\" class=\"data row2 col2\" >0.001155</td>\n",
|
|||
|
" <td id=\"T_a93c1_row2_col3\" class=\"data row2 col3\" >1.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_a93c1_level0_row3\" class=\"row_heading level0 row3\" >ridge</th>\n",
|
|||
|
" <td id=\"T_a93c1_row3_col0\" class=\"data row3 col0\" >676.783751</td>\n",
|
|||
|
" <td id=\"T_a93c1_row3_col1\" class=\"data row3 col1\" >14251.537610</td>\n",
|
|||
|
" <td id=\"T_a93c1_row3_col2\" class=\"data row3 col2\" >25.290145</td>\n",
|
|||
|
" <td id=\"T_a93c1_row3_col3\" class=\"data row3 col3\" >0.998871</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_a93c1_level0_row4\" class=\"row_heading level0 row4\" >decision_tree</th>\n",
|
|||
|
" <td id=\"T_a93c1_row4_col0\" class=\"data row4 col0\" >418.103239</td>\n",
|
|||
|
" <td id=\"T_a93c1_row4_col1\" class=\"data row4 col1\" >410021.797952</td>\n",
|
|||
|
" <td id=\"T_a93c1_row4_col2\" class=\"data row4 col2\" >82.474628</td>\n",
|
|||
|
" <td id=\"T_a93c1_row4_col3\" class=\"data row4 col3\" >0.065532</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_a93c1_level0_row5\" class=\"row_heading level0 row5\" >random_forest</th>\n",
|
|||
|
" <td id=\"T_a93c1_row5_col0\" class=\"data row5 col0\" >1051.981912</td>\n",
|
|||
|
" <td id=\"T_a93c1_row5_col1\" class=\"data row5 col1\" >413832.680119</td>\n",
|
|||
|
" <td id=\"T_a93c1_row5_col2\" class=\"data row5 col2\" >82.077377</td>\n",
|
|||
|
" <td id=\"T_a93c1_row5_col3\" class=\"data row5 col3\" >0.048080</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_a93c1_level0_row6\" class=\"row_heading level0 row6\" >knn</th>\n",
|
|||
|
" <td id=\"T_a93c1_row6_col0\" class=\"data row6 col0\" >4091.598286</td>\n",
|
|||
|
" <td id=\"T_a93c1_row6_col1\" class=\"data row6 col1\" >417393.077890</td>\n",
|
|||
|
" <td id=\"T_a93c1_row6_col2\" class=\"data row6 col2\" >82.148464</td>\n",
|
|||
|
" <td id=\"T_a93c1_row6_col3\" class=\"data row6 col3\" >0.031630</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
"<pandas.io.formats.style.Styler at 0x29c1f09e720>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 106,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n",
|
|||
|
" [\"RMSE_train\", \"RMSE_test\", \"RMAE_test\", \"R2_test\"]\n",
|
|||
|
"]\n",
|
|||
|
"reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n",
|
|||
|
" cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n",
|
|||
|
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"RMAE_test\", \"R2_test\"])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Вывод реального и \"спрогнозированного\" результата для обучающей и тестовой выборок"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Получение лучшей модели"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 107,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'linear'"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"best_model = str(reg_metrics.sort_values(by=\"RMSE_test\").iloc[0].name)\n",
|
|||
|
"\n",
|
|||
|
"display(best_model)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Вывод для обучающей выборки"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Airbags</th>\n",
|
|||
|
" <th>distance_norm</th>\n",
|
|||
|
" <th>Color_Black</th>\n",
|
|||
|
" <th>Color_Blue</th>\n",
|
|||
|
" <th>Color_Brown</th>\n",
|
|||
|
" <th>Color_Carnelian red</th>\n",
|
|||
|
" <th>Color_Golden</th>\n",
|
|||
|
" <th>Color_Green</th>\n",
|
|||
|
" <th>Color_Grey</th>\n",
|
|||
|
" <th>Color_Orange</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>Category_Limousine</th>\n",
|
|||
|
" <th>Category_Microbus</th>\n",
|
|||
|
" <th>Category_Minivan</th>\n",
|
|||
|
" <th>Category_Pickup</th>\n",
|
|||
|
" <th>Category_Sedan</th>\n",
|
|||
|
" <th>Category_Universal</th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" <th>Prod_year</th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" <th>DensityPred</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>ID</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45664431</th>\n",
|
|||
|
" <td>-1.523737</td>\n",
|
|||
|
" <td>0.012512</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1019</td>\n",
|
|||
|
" <td>2013</td>\n",
|
|||
|
" <td>1019</td>\n",
|
|||
|
" <td>1019.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45186186</th>\n",
|
|||
|
" <td>1.254005</td>\n",
|
|||
|
" <td>-0.768336</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>14113</td>\n",
|
|||
|
" <td>2010</td>\n",
|
|||
|
" <td>14113</td>\n",
|
|||
|
" <td>14113.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45757605</th>\n",
|
|||
|
" <td>-0.134866</td>\n",
|
|||
|
" <td>0.093264</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>15994</td>\n",
|
|||
|
" <td>2010</td>\n",
|
|||
|
" <td>15994</td>\n",
|
|||
|
" <td>15994.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45761763</th>\n",
|
|||
|
" <td>-1.060780</td>\n",
|
|||
|
" <td>-0.935158</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>24891</td>\n",
|
|||
|
" <td>2011</td>\n",
|
|||
|
" <td>24891</td>\n",
|
|||
|
" <td>24891.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45762505</th>\n",
|
|||
|
" <td>1.254005</td>\n",
|
|||
|
" <td>0.496836</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>706</td>\n",
|
|||
|
" <td>2015</td>\n",
|
|||
|
" <td>706</td>\n",
|
|||
|
" <td>706.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>5 rows × 99 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Airbags distance_norm Color_Black Color_Blue Color_Brown \\\n",
|
|||
|
"ID \n",
|
|||
|
"45664431 -1.523737 0.012512 1.0 0.0 0.0 \n",
|
|||
|
"45186186 1.254005 -0.768336 1.0 0.0 0.0 \n",
|
|||
|
"45757605 -0.134866 0.093264 0.0 1.0 0.0 \n",
|
|||
|
"45761763 -1.060780 -0.935158 1.0 0.0 0.0 \n",
|
|||
|
"45762505 1.254005 0.496836 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Color_Carnelian red Color_Golden Color_Green Color_Grey \\\n",
|
|||
|
"ID \n",
|
|||
|
"45664431 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45186186 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45757605 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45761763 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45762505 0.0 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Color_Orange ... Category_Limousine Category_Microbus \\\n",
|
|||
|
"ID ... \n",
|
|||
|
"45664431 0.0 ... 0.0 0.0 \n",
|
|||
|
"45186186 0.0 ... 0.0 0.0 \n",
|
|||
|
"45757605 0.0 ... 0.0 0.0 \n",
|
|||
|
"45761763 0.0 ... 0.0 0.0 \n",
|
|||
|
"45762505 0.0 ... 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Category_Minivan Category_Pickup Category_Sedan \\\n",
|
|||
|
"ID \n",
|
|||
|
"45664431 0.0 0.0 0.0 \n",
|
|||
|
"45186186 0.0 0.0 1.0 \n",
|
|||
|
"45757605 0.0 0.0 1.0 \n",
|
|||
|
"45761763 0.0 0.0 0.0 \n",
|
|||
|
"45762505 0.0 0.0 1.0 \n",
|
|||
|
"\n",
|
|||
|
" Category_Universal Price Prod_year Price DensityPred \n",
|
|||
|
"ID \n",
|
|||
|
"45664431 0.0 1019 2013 1019 1019.0 \n",
|
|||
|
"45186186 0.0 14113 2010 14113 14113.0 \n",
|
|||
|
"45757605 0.0 15994 2010 15994 15994.0 \n",
|
|||
|
"45761763 0.0 24891 2011 24891 24891.0 \n",
|
|||
|
"45762505 0.0 706 2015 706 706.0 \n",
|
|||
|
"\n",
|
|||
|
"[5 rows x 99 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 109,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"pd.concat(\n",
|
|||
|
" [\n",
|
|||
|
" X_train,\n",
|
|||
|
" y_train,\n",
|
|||
|
" pd.Series(\n",
|
|||
|
" models[best_model][\"train_preds\"],\n",
|
|||
|
" index=y_train.index,\n",
|
|||
|
" name=\"PricePred\",\n",
|
|||
|
" ),\n",
|
|||
|
" ],\n",
|
|||
|
" axis=1,\n",
|
|||
|
").head(5)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Вывод для тестовой выборки"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Airbags</th>\n",
|
|||
|
" <th>distance_norm</th>\n",
|
|||
|
" <th>Color_Black</th>\n",
|
|||
|
" <th>Color_Blue</th>\n",
|
|||
|
" <th>Color_Brown</th>\n",
|
|||
|
" <th>Color_Carnelian red</th>\n",
|
|||
|
" <th>Color_Golden</th>\n",
|
|||
|
" <th>Color_Green</th>\n",
|
|||
|
" <th>Color_Grey</th>\n",
|
|||
|
" <th>Color_Orange</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>Category_Limousine</th>\n",
|
|||
|
" <th>Category_Microbus</th>\n",
|
|||
|
" <th>Category_Minivan</th>\n",
|
|||
|
" <th>Category_Pickup</th>\n",
|
|||
|
" <th>Category_Sedan</th>\n",
|
|||
|
" <th>Category_Universal</th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" <th>Prod_year</th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" <th>DensityPred</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>ID</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>41976837</th>\n",
|
|||
|
" <td>0.328091</td>\n",
|
|||
|
" <td>-1.184551</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>90006</td>\n",
|
|||
|
" <td>2015</td>\n",
|
|||
|
" <td>90006</td>\n",
|
|||
|
" <td>90006.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45793567</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>-0.719669</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>7850</td>\n",
|
|||
|
" <td>2009</td>\n",
|
|||
|
" <td>7850</td>\n",
|
|||
|
" <td>7850.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45812786</th>\n",
|
|||
|
" <td>-1.060780</td>\n",
|
|||
|
" <td>1.832171</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>2352</td>\n",
|
|||
|
" <td>1998</td>\n",
|
|||
|
" <td>2352</td>\n",
|
|||
|
" <td>2352.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45808317</th>\n",
|
|||
|
" <td>1.254005</td>\n",
|
|||
|
" <td>-0.453739</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1333</td>\n",
|
|||
|
" <td>2015</td>\n",
|
|||
|
" <td>1333</td>\n",
|
|||
|
" <td>1333.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>45809653</th>\n",
|
|||
|
" <td>-0.597823</td>\n",
|
|||
|
" <td>-1.437871</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>62493</td>\n",
|
|||
|
" <td>2018</td>\n",
|
|||
|
" <td>62493</td>\n",
|
|||
|
" <td>62493.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>5 rows × 99 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Airbags distance_norm Color_Black Color_Blue Color_Brown \\\n",
|
|||
|
"ID \n",
|
|||
|
"41976837 0.328091 -1.184551 0.0 0.0 0.0 \n",
|
|||
|
"45793567 -0.597823 -0.719669 1.0 0.0 0.0 \n",
|
|||
|
"45812786 -1.060780 1.832171 0.0 0.0 0.0 \n",
|
|||
|
"45808317 1.254005 -0.453739 0.0 0.0 0.0 \n",
|
|||
|
"45809653 -0.597823 -1.437871 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Color_Carnelian red Color_Golden Color_Green Color_Grey \\\n",
|
|||
|
"ID \n",
|
|||
|
"41976837 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45793567 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45812786 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45808317 0.0 0.0 0.0 0.0 \n",
|
|||
|
"45809653 0.0 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Color_Orange ... Category_Limousine Category_Microbus \\\n",
|
|||
|
"ID ... \n",
|
|||
|
"41976837 0.0 ... 0.0 0.0 \n",
|
|||
|
"45793567 0.0 ... 0.0 0.0 \n",
|
|||
|
"45812786 0.0 ... 0.0 0.0 \n",
|
|||
|
"45808317 0.0 ... 0.0 0.0 \n",
|
|||
|
"45809653 0.0 ... 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Category_Minivan Category_Pickup Category_Sedan \\\n",
|
|||
|
"ID \n",
|
|||
|
"41976837 0.0 0.0 0.0 \n",
|
|||
|
"45793567 0.0 0.0 0.0 \n",
|
|||
|
"45812786 0.0 0.0 0.0 \n",
|
|||
|
"45808317 0.0 0.0 1.0 \n",
|
|||
|
"45809653 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Category_Universal Price Prod_year Price DensityPred \n",
|
|||
|
"ID \n",
|
|||
|
"41976837 0.0 90006 2015 90006 90006.0 \n",
|
|||
|
"45793567 0.0 7850 2009 7850 7850.0 \n",
|
|||
|
"45812786 0.0 2352 1998 2352 2352.0 \n",
|
|||
|
"45808317 0.0 1333 2015 1333 1333.0 \n",
|
|||
|
"45809653 0.0 62493 2018 62493 62493.0 \n",
|
|||
|
"\n",
|
|||
|
"[5 rows x 99 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 110,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"pd.concat(\n",
|
|||
|
" [\n",
|
|||
|
" X_test,\n",
|
|||
|
" y_test,\n",
|
|||
|
" pd.Series(\n",
|
|||
|
" models[best_model][\"preds\"],\n",
|
|||
|
" index=y_test.index,\n",
|
|||
|
" name=\"PricePred\",\n",
|
|||
|
" ),\n",
|
|||
|
" ],\n",
|
|||
|
" axis=1,\n",
|
|||
|
").head(5)"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": ".venv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.5"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|