2024-12-07 13:00:14 +04:00
{
"cells": [
{
"cell_type": "markdown",
2024-12-12 23:48:52 +04:00
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
2024-12-07 13:00:14 +04:00
"source": [
"#### Загрузка набора данных"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 30,
2024-12-07 13:00:14 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" <th>Levy</th>\n",
" <th>Manufacturer</th>\n",
" <th>Model</th>\n",
" <th>Prod. year</th>\n",
" <th>Category</th>\n",
" <th>Leather interior</th>\n",
" <th>Fuel type</th>\n",
" <th>Engine volume</th>\n",
" <th>Mileage</th>\n",
" <th>Cylinders</th>\n",
" <th>Gear box type</th>\n",
" <th>Drive wheels</th>\n",
" <th>Doors</th>\n",
" <th>Wheel</th>\n",
" <th>Color</th>\n",
" <th>Airbags</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13328</td>\n",
" <td>1399</td>\n",
" <td>LEXUS</td>\n",
" <td>RX 450</td>\n",
" <td>2010</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>3.5</td>\n",
2024-12-12 23:48:52 +04:00
" <td>186005 km</td>\n",
" <td>6.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
2024-12-12 23:48:52 +04:00
" <td>04-May</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>16621</td>\n",
" <td>1018</td>\n",
" <td>CHEVROLET</td>\n",
" <td>Equinox</td>\n",
" <td>2011</td>\n",
" <td>Jeep</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
2024-12-12 23:48:52 +04:00
" <td>3</td>\n",
" <td>192000 km</td>\n",
" <td>6.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Tiptronic</td>\n",
" <td>4x4</td>\n",
2024-12-12 23:48:52 +04:00
" <td>04-May</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8467</td>\n",
2024-12-12 23:48:52 +04:00
" <td>-</td>\n",
2024-12-07 13:00:14 +04:00
" <td>HONDA</td>\n",
" <td>FIT</td>\n",
" <td>2006</td>\n",
" <td>Hatchback</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>1.3</td>\n",
2024-12-12 23:48:52 +04:00
" <td>200000 km</td>\n",
" <td>4.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Variator</td>\n",
" <td>Front</td>\n",
2024-12-12 23:48:52 +04:00
" <td>04-May</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Right-hand drive</td>\n",
" <td>Black</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3607</td>\n",
" <td>862</td>\n",
" <td>FORD</td>\n",
" <td>Escape</td>\n",
" <td>2011</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>2.5</td>\n",
2024-12-12 23:48:52 +04:00
" <td>168966 km</td>\n",
" <td>4.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
2024-12-12 23:48:52 +04:00
" <td>04-May</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>11726</td>\n",
" <td>446</td>\n",
" <td>HONDA</td>\n",
" <td>FIT</td>\n",
" <td>2014</td>\n",
" <td>Hatchback</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>1.3</td>\n",
2024-12-12 23:48:52 +04:00
" <td>91901 km</td>\n",
" <td>4.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Automatic</td>\n",
" <td>Front</td>\n",
2024-12-12 23:48:52 +04:00
" <td>04-May</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
2024-12-12 23:48:52 +04:00
" <th>19232</th>\n",
2024-12-07 13:00:14 +04:00
" <td>8467</td>\n",
2024-12-12 23:48:52 +04:00
" <td>-</td>\n",
2024-12-07 13:00:14 +04:00
" <td>MERCEDES-BENZ</td>\n",
" <td>CLK 200</td>\n",
" <td>1999</td>\n",
" <td>Coupe</td>\n",
" <td>Yes</td>\n",
" <td>CNG</td>\n",
2024-12-12 23:48:52 +04:00
" <td>2.0 Turbo</td>\n",
" <td>300000 km</td>\n",
" <td>4.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Manual</td>\n",
" <td>Rear</td>\n",
2024-12-12 23:48:52 +04:00
" <td>02-Mar</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
2024-12-12 23:48:52 +04:00
" <th>19233</th>\n",
2024-12-07 13:00:14 +04:00
" <td>15681</td>\n",
" <td>831</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Sonata</td>\n",
" <td>2011</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>2.4</td>\n",
2024-12-12 23:48:52 +04:00
" <td>161600 km</td>\n",
" <td>4.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Tiptronic</td>\n",
" <td>Front</td>\n",
2024-12-12 23:48:52 +04:00
" <td>04-May</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Left wheel</td>\n",
" <td>Red</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
2024-12-12 23:48:52 +04:00
" <th>19234</th>\n",
2024-12-07 13:00:14 +04:00
" <td>26108</td>\n",
" <td>836</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Tucson</td>\n",
" <td>2010</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
2024-12-12 23:48:52 +04:00
" <td>2</td>\n",
" <td>116365 km</td>\n",
" <td>4.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Automatic</td>\n",
" <td>Front</td>\n",
2024-12-12 23:48:52 +04:00
" <td>04-May</td>\n",
2024-12-07 13:00:14 +04:00
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>4</td>\n",
" </tr>\n",
2024-12-12 23:48:52 +04:00
" <tr>\n",
" <th>19235</th>\n",
" <td>5331</td>\n",
" <td>1288</td>\n",
" <td>CHEVROLET</td>\n",
" <td>Captiva</td>\n",
" <td>2007</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>2</td>\n",
" <td>51258 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19236</th>\n",
" <td>470</td>\n",
" <td>753</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Sonata</td>\n",
" <td>2012</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>2.4</td>\n",
" <td>186923 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>12</td>\n",
" </tr>\n",
2024-12-07 13:00:14 +04:00
" </tbody>\n",
"</table>\n",
2024-12-12 23:48:52 +04:00
"<p>19237 rows × 17 columns</p>\n",
2024-12-07 13:00:14 +04:00
"</div>"
],
"text/plain": [
2024-12-12 23:48:52 +04:00
" Price Levy Manufacturer Model Prod. year Category \\\n",
"0 13328 1399 LEXUS RX 450 2010 Jeep \n",
"1 16621 1018 CHEVROLET Equinox 2011 Jeep \n",
"2 8467 - HONDA FIT 2006 Hatchback \n",
"3 3607 862 FORD Escape 2011 Jeep \n",
"4 11726 446 HONDA FIT 2014 Hatchback \n",
"... ... ... ... ... ... ... \n",
"19232 8467 - MERCEDES-BENZ CLK 200 1999 Coupe \n",
"19233 15681 831 HYUNDAI Sonata 2011 Sedan \n",
"19234 26108 836 HYUNDAI Tucson 2010 Jeep \n",
"19235 5331 1288 CHEVROLET Captiva 2007 Jeep \n",
"19236 470 753 HYUNDAI Sonata 2012 Sedan \n",
2024-12-07 13:00:14 +04:00
"\n",
2024-12-12 23:48:52 +04:00
" Leather interior Fuel type Engine volume Mileage Cylinders \\\n",
"0 Yes Hybrid 3.5 186005 km 6.0 \n",
"1 No Petrol 3 192000 km 6.0 \n",
"2 No Petrol 1.3 200000 km 4.0 \n",
"3 Yes Hybrid 2.5 168966 km 4.0 \n",
"4 Yes Petrol 1.3 91901 km 4.0 \n",
"... ... ... ... ... ... \n",
"19232 Yes CNG 2.0 Turbo 300000 km 4.0 \n",
"19233 Yes Petrol 2.4 161600 km 4.0 \n",
"19234 Yes Diesel 2 116365 km 4.0 \n",
"19235 Yes Diesel 2 51258 km 4.0 \n",
"19236 Yes Hybrid 2.4 186923 km 4.0 \n",
2024-12-07 13:00:14 +04:00
"\n",
2024-12-12 23:48:52 +04:00
" Gear box type Drive wheels Doors Wheel Color Airbags \n",
"0 Automatic 4x4 04-May Left wheel Silver 12 \n",
"1 Tiptronic 4x4 04-May Left wheel Black 8 \n",
"2 Variator Front 04-May Right-hand drive Black 2 \n",
"3 Automatic 4x4 04-May Left wheel White 0 \n",
"4 Automatic Front 04-May Left wheel Silver 4 \n",
"... ... ... ... ... ... ... \n",
"19232 Manual Rear 02-Mar Left wheel Silver 5 \n",
"19233 Tiptronic Front 04-May Left wheel Red 8 \n",
"19234 Automatic Front 04-May Left wheel Grey 4 \n",
"19235 Automatic Front 04-May Left wheel Black 4 \n",
"19236 Automatic Front 04-May Left wheel White 12 \n",
2024-12-07 13:00:14 +04:00
"\n",
2024-12-12 23:48:52 +04:00
"[19237 rows x 17 columns]"
2024-12-07 13:00:14 +04:00
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 30,
2024-12-07 13:00:14 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
2024-12-12 23:48:52 +04:00
"import math\n",
2024-12-07 13:00:14 +04:00
"\n",
2024-12-12 23:48:52 +04:00
"from sklearn.preprocessing import StandardScaler \n",
"from sklearn.preprocessing import MinMaxScaler\n",
2024-12-07 13:00:14 +04:00
"from imblearn.over_sampling import RandomOverSampler\n",
2024-12-12 23:48:52 +04:00
"from sklearn.preprocessing import OneHotEncoder\n",
"from sklearn.preprocessing import OrdinalEncoder\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn import metrics\n",
"from sklearn import linear_model, tree, neighbors, ensemble\n",
2024-12-07 13:00:14 +04:00
"from sklearn.compose import ColumnTransformer\n",
2024-12-12 23:48:52 +04:00
"from sklearn.preprocessing import FunctionTransformer\n",
2024-12-07 13:00:14 +04:00
"from sklearn.impute import SimpleImputer\n",
"from sklearn.pipeline import Pipeline\n",
"\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn import set_config\n",
2024-12-12 23:48:52 +04:00
"from sklearn.pipeline import make_pipeline\n",
2024-12-07 13:00:14 +04:00
"\n",
"\n",
2024-12-12 23:48:52 +04:00
"df = pd.read_csv(\"../data/car_price_prediction.csv\")\n",
2024-12-07 13:00:14 +04:00
"\n",
2024-12-12 23:48:52 +04:00
"df = df.drop(columns=[\"ID\"])\n",
2024-12-07 13:00:14 +04:00
"\n",
2024-12-12 23:48:52 +04:00
"random_state = 9\n",
2024-12-07 13:00:14 +04:00
"\n",
2024-12-12 23:48:52 +04:00
"set_config(transform_output=\"pandas\")\n",
2024-12-07 13:00:14 +04:00
"\n",
"df"
]
},
2024-12-12 23:48:52 +04:00
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Анализ датасета и очистка данных"
]
},
2024-12-07 13:00:14 +04:00
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 31,
2024-12-07 13:00:14 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Price int64\n",
2024-12-12 23:48:52 +04:00
"Levy object\n",
2024-12-07 13:00:14 +04:00
"Manufacturer object\n",
"Model object\n",
"Prod. year int64\n",
"Category object\n",
"Leather interior object\n",
"Fuel type object\n",
2024-12-12 23:48:52 +04:00
"Engine volume object\n",
"Mileage object\n",
"Cylinders float64\n",
2024-12-07 13:00:14 +04:00
"Gear box type object\n",
"Drive wheels object\n",
"Doors object\n",
"Wheel object\n",
"Color object\n",
"Airbags int64\n",
"dtype: object"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 31,
2024-12-07 13:00:14 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
2024-12-12 23:48:52 +04:00
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 32,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 3.5, 3. , 1.3, 2.5, 2. , 1.8, 2.4, 4. , 1.6, 3.3, 2.2,\n",
" 4.7, 1.5, 4.4, 1.4, 3.6, 2.3, 5.5, 2.8, 3.2, 3.8, 4.6,\n",
" 1.2, 5. , 1.7, 2.9, 0.5, 1.9, 2.7, 4.8, 5.3, 0.4, 1.1,\n",
" 2.1, 0.7, 5.4, 3.7, 1. , 2.6, 0.8, 0.2, 5.7, 6.7, 6.2,\n",
" 3.4, 6.3, 4.3, 4.2, 0. , 20. , 0.3, 5.9, 5.6, 6. , 0.6,\n",
" 6.8, 4.5, 7.3, 0.1, 3.1, 6.4, 3.9, 0.9, 5.2, 5.8])"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 32,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Engine volume\"] = df[\"Engine volume\"].str.replace(\"Turbo\", \"\")\n",
"df[\"Engine volume\"] = pd.to_numeric(df[\"Engine volume\"])\n",
"df[\"Engine volume\"].unique()"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 33,
2024-12-07 13:00:14 +04:00
"metadata": {},
2024-12-12 23:48:52 +04:00
"outputs": [
{
"data": {
"text/plain": [
"array([186005, 192000, 200000, ..., 140607, 307325, 186923])"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 33,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-12-07 13:00:14 +04:00
"source": [
2024-12-12 23:48:52 +04:00
"df[\"Mileage\"] = df[\"Mileage\"].str.replace(\"km\", \"\")\n",
"df[\"Mileage\"] = df[\"Mileage\"].astype(\"int64\")\n",
"df[\"Mileage\"].unique()"
2024-12-07 13:00:14 +04:00
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 34,
2024-12-07 13:00:14 +04:00
"metadata": {},
"outputs": [
{
2024-12-12 23:48:52 +04:00
"data": {
"text/plain": [
"array([ 1399, 1018, 0, 862, 446, 891, 761, 751, 394,\n",
" 1053, 1055, 1079, 810, 2386, 1850, 531, 586, 1249,\n",
" 2455, 583, 1537, 1288, 915, 1750, 707, 1077, 1486,\n",
" 1091, 650, 382, 1436, 1194, 503, 1017, 1104, 639,\n",
" 629, 919, 781, 530, 640, 765, 777, 779, 934,\n",
" 769, 645, 1185, 1324, 830, 1187, 1111, 760, 642,\n",
" 1604, 1095, 966, 473, 1138, 1811, 988, 917, 1156,\n",
" 687, 11714, 836, 1347, 2866, 1646, 259, 609, 697,\n",
" 585, 475, 690, 308, 1823, 1361, 1273, 924, 584,\n",
" 2078, 831, 1172, 893, 1872, 1885, 1266, 447, 2148,\n",
" 1730, 730, 289, 502, 333, 1325, 247, 879, 1342,\n",
" 1327, 1598, 1514, 1058, 738, 1935, 481, 1522, 1282,\n",
" 456, 880, 900, 798, 1277, 442, 1051, 790, 1292,\n",
" 1047, 528, 1211, 1493, 1793, 574, 930, 1998, 271,\n",
" 706, 1481, 1677, 1661, 1286, 1408, 1090, 595, 1451,\n",
" 1267, 993, 1714, 878, 641, 749, 1511, 603, 353,\n",
" 877, 1236, 1141, 397, 784, 1024, 1357, 1301, 770,\n",
" 922, 1438, 753, 607, 1363, 638, 490, 431, 565,\n",
" 517, 833, 489, 1760, 986, 1841, 1620, 1360, 474,\n",
" 1099, 978, 1624, 1946, 1268, 1307, 696, 649, 666,\n",
" 2151, 551, 800, 971, 1323, 2377, 1845, 1083, 694,\n",
" 463, 419, 345, 1515, 1505, 2056, 1203, 729, 460,\n",
" 1356, 876, 911, 1190, 780, 448, 2410, 1848, 1148,\n",
" 834, 1275, 1028, 1197, 724, 890, 1705, 505, 789,\n",
" 2959, 518, 461, 1719, 2858, 3156, 2225, 2177, 1968,\n",
" 1888, 1308, 2736, 1103, 557, 2195, 843, 1664, 723,\n",
" 4508, 562, 501, 2018, 1076, 1202, 3301, 691, 1440,\n",
" 1869, 1178, 418, 1820, 1413, 488, 1304, 363, 2108,\n",
" 521, 1659, 87, 1411, 1528, 3292, 7058, 1578, 627,\n",
" 874, 1996, 1488, 5679, 1234, 5603, 400, 889, 3268,\n",
" 875, 949, 2265, 441, 742, 425, 2476, 2971, 614,\n",
" 1816, 1375, 1405, 2297, 1062, 1113, 420, 2469, 658,\n",
" 1951, 2670, 2578, 1995, 1032, 994, 1011, 2421, 1296,\n",
" 155, 494, 426, 1086, 961, 2236, 1829, 764, 1834,\n",
" 1054, 617, 1529, 2266, 637, 626, 1832, 1016, 2002,\n",
" 1756, 746, 1285, 2690, 1118, 5332, 980, 1807, 970,\n",
" 1228, 1195, 1132, 1768, 1384, 1080, 7063, 1817, 1452,\n",
" 1975, 1368, 702, 1974, 1781, 1036, 944, 663, 364,\n",
" 1539, 1345, 1680, 2209, 741, 1575, 695, 1317, 294,\n",
" 1525, 424, 997, 1473, 1552, 2819, 2188, 1668, 3057,\n",
" 799, 1502, 2606, 552, 1694, 1759, 1110, 399, 1470,\n",
" 1174, 5877, 1474, 1688, 526, 686, 5908, 1107, 2070,\n",
" 1468, 1246, 1685, 556, 1533, 1917, 1346, 732, 692,\n",
" 579, 421, 362, 3505, 1855, 2711, 1586, 3739, 681,\n",
" 1708, 2278, 1701, 722, 1482, 928, 827, 832, 527,\n",
" 604, 173, 1341, 3329, 1553, 859, 167, 916, 828,\n",
" 2082, 1176, 1108, 975, 3008, 1516, 2269, 1699, 2073,\n",
" 1031, 1503, 2364, 1030, 1442, 5666, 2715, 1437, 2067,\n",
" 1426, 2908, 1279, 866, 4283, 279, 2658, 3015, 2004,\n",
" 1391, 4736, 748, 1466, 644, 683, 2705, 1297, 731,\n",
" 1252, 2216, 3141, 3273, 1518, 1723, 1588, 972, 682,\n",
" 1094, 668, 175, 967, 402, 3894, 1960, 1599, 2000,\n",
" 2084, 1621, 714, 1109, 3989, 873, 1572, 1163, 1991,\n",
" 1716, 1673, 2562, 2874, 965, 462, 605, 1948, 1736,\n",
" 3518, 2054, 2467, 1681, 1272, 1205, 750, 2156, 2566,\n",
" 115, 524, 3184, 676, 1678, 612, 328, 955, 1441,\n",
" 1675, 3965, 2909, 623, 822, 867, 3025, 1993, 792,\n",
" 636, 4057, 3743, 2337, 2570, 2418, 2472, 3910, 1662,\n",
" 2123, 2628, 3208, 2080, 3699, 2913, 864, 2505, 870,\n",
" 7536, 1924, 1671, 1064, 1836, 1866, 4741, 841, 1369,\n",
" 5681, 3112, 1366, 2223, 1198, 1039, 3811, 3571, 1387,\n",
" 1171, 1365, 1531, 1590, 11706, 2308, 4860, 1641, 1045,\n",
" 1901])"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 34,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
2024-12-07 13:00:14 +04:00
}
],
"source": [
2024-12-12 23:48:52 +04:00
"df[\"Levy\"] = df[\"Levy\"].replace(\"-\", \"0\")\n",
"df[\"Levy\"] = df[\"Levy\"].astype(\"int64\")\n",
"df[\"Levy\"].unique()"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 35,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 6, 4, 8, 1, 12, 3, 2, 16, 5, 7, 9, 10, 14])"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 35,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Cylinders\"] = df[\"Cylinders\"].astype(\"int64\")\n",
"df[\"Cylinders\"].unique()"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 36,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['04-May', '02-Mar', '>5'], dtype=object)"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 36,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Doors\"].unique()"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 37,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Четырехдверный', 'Двухдверный', 'Многодверный'], dtype=object)"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 37,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Doors\"] = df[\"Doors\"].map(\n",
" {\"02-Mar\": \"Двухдверный\", \"04-May\": \"Четырехдверный\", \">5\": \"Многодверный\"}\n",
2024-12-07 13:00:14 +04:00
")\n",
2024-12-12 23:48:52 +04:00
"df[\"Doors\"].unique()"
2024-12-07 13:00:14 +04:00
]
},
{
2024-12-12 23:48:52 +04:00
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 38,
2024-12-07 13:00:14 +04:00
"metadata": {},
2024-12-12 23:48:52 +04:00
"outputs": [
{
"data": {
"text/plain": [
"array([ 1, 3, 6, ..., 627220, 872946, 26307500])"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 38,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-12-07 13:00:14 +04:00
"source": [
2024-12-12 23:48:52 +04:00
"sorted_df = df.sort_values(by=\"Price\")\n",
"sorted_df[\"Price\"].unique()"
2024-12-07 13:00:14 +04:00
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 39,
2024-12-07 13:00:14 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-12-12 23:48:52 +04:00
"Количество строк до удаления некорректных значений: 19237\n",
"Количество строк после удаления некорректных значений: 17574\n"
2024-12-07 13:00:14 +04:00
]
}
],
"source": [
2024-12-12 23:48:52 +04:00
"print(f\"Количество строк до удаления некорректных значений: {len(df)}\")\n",
"df = df[df[\"Price\"] >= 500]\n",
"print(f\"Количество строк после удаления некорректных значений: {len(df)}\")"
2024-12-07 13:00:14 +04:00
]
},
{
2024-12-12 23:48:52 +04:00
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 40,
2024-12-07 13:00:14 +04:00
"metadata": {},
2024-12-12 23:48:52 +04:00
"outputs": [
{
"data": {
"text/plain": [
"array([ 500, 549, 600, ..., 627220, 872946, 26307500])"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 40,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
2024-12-07 13:00:14 +04:00
"source": [
2024-12-12 23:48:52 +04:00
"sorted_df = df.sort_values(by=\"Price\")\n",
"sorted_df[\"Price\"].unique()"
2024-12-07 13:00:14 +04:00
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 41,
2024-12-07 13:00:14 +04:00
"metadata": {},
2024-12-12 23:48:52 +04:00
"outputs": [
{
"data": {
"text/plain": [
"array([1943, 1953, 1957, 1964, 1965, 1968, 1973, 1974, 1977, 1978, 1980,\n",
" 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991,\n",
" 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,\n",
" 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,\n",
" 2014, 2015, 2016, 2017, 2018, 2019, 2020])"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 41,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted_df = df.sort_values(by=\"Prod. year\")\n",
"sorted_df[\"Prod. year\"].unique()"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 42,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" <th>Levy</th>\n",
" <th>Manufacturer</th>\n",
" <th>Model</th>\n",
" <th>Prod. year</th>\n",
" <th>Category</th>\n",
" <th>Leather interior</th>\n",
" <th>Fuel type</th>\n",
" <th>Engine volume</th>\n",
" <th>Mileage</th>\n",
" <th>Cylinders</th>\n",
" <th>Gear box type</th>\n",
" <th>Drive wheels</th>\n",
" <th>Doors</th>\n",
" <th>Wheel</th>\n",
" <th>Color</th>\n",
" <th>Airbags</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13328</td>\n",
" <td>1399</td>\n",
" <td>LEXUS</td>\n",
" <td>RX 450</td>\n",
" <td>2010</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>3.5</td>\n",
" <td>186005</td>\n",
" <td>6</td>\n",
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>16621</td>\n",
" <td>1018</td>\n",
" <td>CHEVROLET</td>\n",
" <td>Equinox</td>\n",
" <td>2011</td>\n",
" <td>Jeep</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>3.0</td>\n",
" <td>192000</td>\n",
" <td>6</td>\n",
" <td>Tiptronic</td>\n",
" <td>4x4</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8467</td>\n",
" <td>0</td>\n",
" <td>HONDA</td>\n",
" <td>FIT</td>\n",
" <td>2006</td>\n",
" <td>Hatchback</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>1.3</td>\n",
" <td>200000</td>\n",
" <td>4</td>\n",
" <td>Variator</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Right-hand drive</td>\n",
" <td>Black</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3607</td>\n",
" <td>862</td>\n",
" <td>FORD</td>\n",
" <td>Escape</td>\n",
" <td>2011</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>2.5</td>\n",
" <td>168966</td>\n",
" <td>4</td>\n",
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>11726</td>\n",
" <td>446</td>\n",
" <td>HONDA</td>\n",
" <td>FIT</td>\n",
" <td>2014</td>\n",
" <td>Hatchback</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>1.3</td>\n",
" <td>91901</td>\n",
" <td>4</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19231</th>\n",
" <td>5802</td>\n",
" <td>1055</td>\n",
" <td>MERCEDES-BENZ</td>\n",
" <td>E 350</td>\n",
" <td>2013</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>3.5</td>\n",
" <td>107800</td>\n",
" <td>6</td>\n",
" <td>Automatic</td>\n",
" <td>Rear</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19232</th>\n",
" <td>8467</td>\n",
" <td>0</td>\n",
" <td>MERCEDES-BENZ</td>\n",
" <td>CLK 200</td>\n",
" <td>1999</td>\n",
" <td>Coupe</td>\n",
" <td>Yes</td>\n",
" <td>CNG</td>\n",
" <td>2.0</td>\n",
" <td>300000</td>\n",
" <td>4</td>\n",
" <td>Manual</td>\n",
" <td>Rear</td>\n",
" <td>Двухдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19233</th>\n",
" <td>15681</td>\n",
" <td>831</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Sonata</td>\n",
" <td>2011</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>2.4</td>\n",
" <td>161600</td>\n",
" <td>4</td>\n",
" <td>Tiptronic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Red</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19234</th>\n",
" <td>26108</td>\n",
" <td>836</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Tucson</td>\n",
" <td>2010</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>2.0</td>\n",
" <td>116365</td>\n",
" <td>4</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19235</th>\n",
" <td>5331</td>\n",
" <td>1288</td>\n",
" <td>CHEVROLET</td>\n",
" <td>Captiva</td>\n",
" <td>2007</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>2.0</td>\n",
" <td>51258</td>\n",
" <td>4</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>4</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>17574 rows × 17 columns</p>\n",
"</div>"
],
"text/plain": [
" Price Levy Manufacturer Model Prod. year Category \\\n",
"0 13328 1399 LEXUS RX 450 2010 Jeep \n",
"1 16621 1018 CHEVROLET Equinox 2011 Jeep \n",
"2 8467 0 HONDA FIT 2006 Hatchback \n",
"3 3607 862 FORD Escape 2011 Jeep \n",
"4 11726 446 HONDA FIT 2014 Hatchback \n",
"... ... ... ... ... ... ... \n",
"19231 5802 1055 MERCEDES-BENZ E 350 2013 Sedan \n",
"19232 8467 0 MERCEDES-BENZ CLK 200 1999 Coupe \n",
"19233 15681 831 HYUNDAI Sonata 2011 Sedan \n",
"19234 26108 836 HYUNDAI Tucson 2010 Jeep \n",
"19235 5331 1288 CHEVROLET Captiva 2007 Jeep \n",
"\n",
" Leather interior Fuel type Engine volume Mileage Cylinders \\\n",
"0 Yes Hybrid 3.5 186005 6 \n",
"1 No Petrol 3.0 192000 6 \n",
"2 No Petrol 1.3 200000 4 \n",
"3 Yes Hybrid 2.5 168966 4 \n",
"4 Yes Petrol 1.3 91901 4 \n",
"... ... ... ... ... ... \n",
"19231 Yes Diesel 3.5 107800 6 \n",
"19232 Yes CNG 2.0 300000 4 \n",
"19233 Yes Petrol 2.4 161600 4 \n",
"19234 Yes Diesel 2.0 116365 4 \n",
"19235 Yes Diesel 2.0 51258 4 \n",
"\n",
" Gear box type Drive wheels Doors Wheel Color \\\n",
"0 Automatic 4x4 Четырехдверный Left wheel Silver \n",
"1 Tiptronic 4x4 Четырехдверный Left wheel Black \n",
"2 Variator Front Четырехдверный Right-hand drive Black \n",
"3 Automatic 4x4 Четырехдверный Left wheel White \n",
"4 Automatic Front Четырехдверный Left wheel Silver \n",
"... ... ... ... ... ... \n",
"19231 Automatic Rear Четырехдверный Left wheel Grey \n",
"19232 Manual Rear Двухдверный Left wheel Silver \n",
"19233 Tiptronic Front Четырехдверный Left wheel Red \n",
"19234 Automatic Front Четырехдверный Left wheel Grey \n",
"19235 Automatic Front Четырехдверный Left wheel Black \n",
"\n",
" Airbags \n",
"0 12 \n",
"1 8 \n",
"2 2 \n",
"3 0 \n",
"4 4 \n",
"... ... \n",
"19231 12 \n",
"19232 5 \n",
"19233 8 \n",
"19234 4 \n",
"19235 4 \n",
"\n",
"[17574 rows x 17 columns]"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 42,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Очистка дубликатов и пропущенных значений"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 43,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.int64(2773)"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 43,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.duplicated().sum()"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 44,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-12-14 10:14:47 +04:00
"C:\\Users\\user\\AppData\\Local\\Temp\\ipykernel_18064\\1689817098.py:1: SettingWithCopyWarning: \n",
2024-12-12 23:48:52 +04:00
"A value is trying to be set on a copy of a slice from a DataFrame\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df.drop_duplicates(inplace=True)\n"
]
}
],
"source": [
"df.drop_duplicates(inplace=True)\n",
"# df.duplicated().sum()"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 45,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Price 0\n",
"Levy 0\n",
"Manufacturer 0\n",
"Model 0\n",
"Prod. year 0\n",
"Category 0\n",
"Leather interior 0\n",
"Fuel type 0\n",
"Engine volume 0\n",
"Mileage 0\n",
"Cylinders 0\n",
"Gear box type 0\n",
"Drive wheels 0\n",
"Doors 0\n",
"Wheel 0\n",
"Color 0\n",
"Airbags 0\n",
"dtype: int64"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 45,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna().sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Очистка выбросов"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 46,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Price int64\n",
"Levy int64\n",
"Manufacturer object\n",
"Model object\n",
"Prod. year int64\n",
"Category object\n",
"Leather interior object\n",
"Fuel type object\n",
"Engine volume float64\n",
"Mileage int64\n",
"Cylinders int64\n",
"Gear box type object\n",
"Drive wheels object\n",
"Doors object\n",
"Wheel object\n",
"Color object\n",
"Airbags int64\n",
"dtype: object"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 46,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 47,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWkAAAGECAYAAAD0odESAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAc3ElEQVR4nO3df1BVdeL/8deVrhdQYHMJECVlIy0TEc0QytT5CITWfthWx2mawZpq2x2oXHLa6Ltj0a/7R1nOzrpa4yhTu1ZbKTZExo0ScMRKk12tzbQlaRMI95NcgbzeLvf7R+ttCdB7Ebhvuc/HzJ3xnvt+c9935vD0zLnnXixer9crAICRRgV7AQCA/hFpADAYkQYAgxFpADAYkQYAgxFpADAYkQYAgxFpADAYkQYAgxFpADDYBRXp2tpa3XTTTUpMTJTFYlF5eXlA8x955BFZLJZetzFjxgzNggHgPF1Qke7s7FRaWprWrVs3oPmrVq1Sc3Nzj9u0adO0bNmyQV4pAAyOCyrSeXl5evzxx/WLX/yiz8ddLpdWrVqlCRMmaMyYMcrIyNDOnTt9j48dO1YJCQm+W2trqz755BPdcccdw/QKACAwF1Skz6WoqEj19fV6+eWX9fe//13Lli3TDTfcoMOHD/c5fuPGjZoyZYrmzZs3zCsFAP+MmEg3NTVp8+bNevXVVzVv3jxddtllWrVqla677jpt3ry51/hTp07pL3/5C0fRAIx2UbAXMFgOHDggj8ejKVOm9Njucrn005/+tNf4bdu26eTJk1qxYsVwLREAAjZiIt3R0aGwsDDt27dPYWFhPR4bO3Zsr/EbN27UjTfeqPj4+OFaIgAEbMREOj09XR6PR19//fU5zzE3Njbqvffe0xtvvDFMqwOAgbmgIt3R0aEjR4747jc2NqqhoUHjxo3TlClTdOutt6qgoEBr1qxRenq62traVF1drRkzZmjJkiW+eZs2bdL48eOVl5cXjJcBAH6zXEh/43Dnzp1auHBhr+0rVqxQWVmZ3G63Hn/8cb3wwgv66quvFBsbq7lz56q0tFSpqamSpO7ubk2aNEkFBQV64oknhvslAEBALqhIA0CoGTGX4AHASHRBnJPu7u7WsWPHFBUVJYvFEuzlAMB583q9OnnypBITEzVqVP/HyxdEpI8dO6akpKRgLwMABt2XX36piRMn9vv4BRHpqKgoSd+/mOjo6CCvBiOZ2+1WVVWVcnJyZLVag70cjGBOp1NJSUm+vvXngoj0mVMc0dHRRBpDyu12KzIyUtHR0UQaw+Jcp3B54xAADEakAcBgRBoADEakAcBgRBoADEakAcBgRBoADEakgf/weDyqqalRbW2tampq5PF4gr0kgEgDkrR161alpKQoOztbzzzzjLKzs5WSkqKtW7cGe2kIcUQaIW/r1q1aunSpUlNTVVdXp5deekl1dXVKTU3V0qVLCTWC6oL4Pmmn06mYmBi1t7fzsXAMKo/Ho5SUFKWmpqq8vFwej0eVlZVavHixwsLClJ+fr4MHD+rw4cO9/nYmcD787RpH0ghpdXV1+uKLL/TQQw/1+rrIUaNGqaSkRI2NjaqrqwvSChHqiDRCWnNzsyRp+vTpfT5+ZvuZccBwI9IIaePHj5ckHTx4sM/Hz2w/Mw4YbkQaIW3evHmaPHmynnzySXV3d/d4rLu7W3a7XcnJyZo3b16QVohQR6QR0sLCwrRmzRpVVFQoPz9fe/bs0bfffqs9e/YoPz9fFRUVevrpp3nTEEFzQXzpPzCUbr75Zr322mu6//77df311/u2Jycn67XXXtPNN98cxNUh1HEJHvAfHo9H7733nt566y3l5eVp4cKFHEFjyPjbNY6kgf8ICwvT/Pnz1dnZqfnz5xNoGIFz0gBgMCINAAYj0gBgMCINAAYj0gBgsIAibbfbNWfOHEVFRSkuLk75+fk6dOjQWeeUlZXJYrH0uIWHh5/XogEgVAQU6ZqaGhUWFmrPnj1yOBxyu93KyclRZ2fnWedFR0erubnZdzt69Oh5LRoAQkVA10nv2LGjx/2ysjLFxcVp3759PT6p9WMWi0UJCQl+P4/L5ZLL5fLddzqdkiS32y232x3IkoGAnNm/2M8w1Pzdx87rwyzt7e2SpHHjxp11XEdHhyZNmqTu7m7NmjVLTz75pK666qp+x9vtdpWWlvbaXlVVpcjIyPNZMuAXh8MR7CVghOvq6vJr3IA/Ft7d3a2f//znOnHihHbt2tXvuPr6eh0+fFgzZsxQe3u7nn76adXW1urjjz/WxIkT+5zT15F0UlKSjh8/zsfCMaTcbrccDoeys7NltVqDvRyMYE6nU7GxsUP3sfDCwkIdPHjwrIGWpMzMTGVmZvruZ2Vl6corr9Rzzz2nxx57rM85NptNNput13ar1covDoYF+xqGmr/714AiXVRUpIqKCtXW1vZ7NHy2haWnp+vIkSMDeWoACCkBXd3h9XpVVFSkbdu26d1331VycnLAT+jxeHTgwAH+0gUA+CGgI+nCwkJt2bJF27dvV1RUlFpaWiRJMTExioiIkCQVFBRowoQJstvtkqRHH31Uc+fOVUpKik6cOKGnnnpKR48e1Z133jnILwUARp6AIr1+/XpJ0oIFC3ps37x5s2677TZJUlNTU4+/uvzNN9/orrvuUktLiy6++GLNnj1bu3fv1rRp085v5QAQAvjSf+C/uN1uVVZWavHixbxxiCHlb9f47g4AMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDBRRpu92uOXPmKCoqSnFxccrPz9ehQ4fOOe/VV1/VFVdcofDwcKWmpqqysnLACwaAUBJQpGtqalRYWKg9e/bI4XDI7XYrJydHnZ2d/c7ZvXu3brnlFt1xxx3av3+/8vPzlZ+fr4MHD5734gFgpLN4vV7vQCe3tbUpLi5ONTU1uv766/scs3z5cnV2dqqiosK3be7cuZo5c6Y2bNjQ5xyXyyWXy+W773Q6lZSUpOPHjys6OnqgywXOye12y+FwKDs7W1arNdjLwQjmdDoVGxur9vb2s3btovN5kvb2dknSuHHj+h1TX1+v4uLiHttyc3NVXl7e7xy73a7S0tJe26uqqhQZGTmwxQIBcDgcwV4CRriuri6/xg040t3d3Vq5cqWuvfZaTZ8+vd9xLS0tio+P77EtPj5eLS0t/c4pKSnpEfYzR9I5OTkcSWNIcSSN4eJ0Ov0aN+BIFxYW6uDBg9q1a9dAf0S/bDabbDZbr+1Wq5VfHAwL9jUMNX/3rwFFuqioSBUVFaqtrdXEiRPPOjYhIUGtra09trW2tiohIWEgTw0AISWgqzu8Xq+Kioq0bds2vfvuu0pOTj7nnMzMTFVXV/fY5nA4lJmZGdhKASAEBXQkXVhYqC1btmj79u2KiorynVeOiYlRRESEJKmgoEATJkyQ3W6XJN13332aP3++1qxZoyVLlujll1/W3r179fzzzw/ySwGAkSegI+n169ervb1dCxYs0Pjx4323V155xTemqalJzc3NvvtZWVnasmWLnn/+eaWlpem1115TeXn5Wd9sBAB8L6AjaX8uqd65c2evbcuWLdOyZcsCeSoAgPjuDgAwGpEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJE
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAF1CAYAAAAA3+oBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAyAklEQVR4nO3de1xUZf4H8A+XmeE6XLzMqCFQmq7KJmU/ZFPzgmCiC6KVLJkprqurrS6aprtl/Pqt99y2tVB/7dq+Nsv1lv4WLznhBcwRFa+omRVqaQMmMoOAwzA8vz/cOS9GMMecwyjn8369eMl5nu+ceY6v4cPhzDPP8RJCCBARkaJ4e3oARETU/Bj+REQKxPAnIlIghj8RkQIx/ImIFIjhT0SkQAx/IiIF8vX0ADypvr4ely9fRnBwMLy8vDw9HCKieyaEQGVlJdq3bw9v79uf3ys6/C9fvoyIiAhPD4OIyO2+/fZbPPTQQ7ftV3T4BwcHA7j5n6TVaj08GmrJbDYbdu7cicTERKhUKk8Ph1owi8WCiIgIKd9uR9Hh77jUo9VqGf4kK5vNhoCAAGi1WoY/NYs7XcrmG75ERAp01+Gfn5+P4cOHo3379vDy8sLmzZulPpvNhtmzZyMmJgaBgYFo3749XnzxRVy+fNlpH+Xl5cjIyIBWq0VoaCgyMzNx/fp1p5oTJ06gb9++8PPzQ0REBBYvXtxoLOvXr0fXrl3h5+eHmJgYbNu27W4Ph4hIke46/KuqqvDYY4/h3XffbdRXXV2NI0eO4LXXXsORI0ewadMmnD17Fr/85S+d6jIyMnDq1CkYDAbk5uYiPz8fEydOlPotFgsSExMRGRmJoqIiLFmyBG+88QZWrVol1ezfvx/p6enIzMzE0aNHkZqaitTUVBQXF9/tIRERKY+4BwDEJ5988qM1Bw8eFADEhQsXhBBCnD59WgAQhw4dkmq2b98uvLy8xKVLl4QQQrz33nsiLCxMWK1WqWb27NmiS5cu0vZzzz0nkpOTnZ4rLi5O/OY3v3F5/GazWQAQZrPZ5ccQ/RS1tbVi8+bNora21tNDoRbO1VyT/Q1fs9kMLy8vhIaGAgCMRiNCQ0PRq1cvqSYhIQHe3t4oLCzEiBEjYDQa0a9fP6jVaqkmKSkJixYtwrVr1xAWFgaj0YisrCyn50pKSnK6DHUrq9UKq9UqbVssFgA3L1fZbDY3HC1R0xyvL77OSG6uvsZkDf8bN25g9uzZSE9Pl2bTmEwmtG3b1nkQvr4IDw+HyWSSaqKjo51qdDqd1BcWFgaTySS1Naxx7KMpCxYsQHZ2dqP2nTt3IiAg4O4PkOguGQwGTw+BWrjq6mqX6mQLf5vNhueeew5CCOTk5Mj1NHdlzpw5Tn8tOObDJiYmcqonycpms8FgMGDw4MGc6kmyclzRuBNZwt8R/BcuXMCuXbucglWv16OsrMypvq6uDuXl5dDr9VJNaWmpU41j+041jv6maDQaaDSaRu0qlYo/kCSbmpoazJgxAwcOHMCOHTuwbNky+Pv7e3pY1EK5mmVun+fvCP5z587hs88+Q6tWrZz64+PjUVFRgaKiIqlt165dqK+vR1xcnFSTn5/vdO3KYDCgS5cuCAsLk2ry8vKc9m0wGBAfH+/uQyL6yVJTUxEQEIAVK1bg2LFjWLFiBQICApCamurpoZHS3e07yZWVleLo0aPi6NGjAoBYtmyZOHr0qLhw4YKora0Vv/zlL8VDDz0kjh07Jr7//nvpq+HMnSFDhojY2FhRWFgo9u3bJzp37izS09Ol/oqKCqHT6cSYMWNEcXGxWLt2rQgICBArV66Uaj7//HPh6+srli5dKs6cOSPmzZsnVCqVOHnypMvHwtk+JKeUlBQBQKjVajFr1iyRk5MjZs2aJdRqtQAgUlJSPD1EaoFczbW7Dv/du3cLAI2+xo4dK0pKSprsAyB2794t7ePq1asiPT1dBAUFCa1WK8aNGycqKyudnuf48eOiT58+QqPRiA4dOoiFCxc2Gsu6devEo48+KtRqtejevbvYunXrXR0Lw5/kUl1dLQW/1Wp1mupptVqlXwDV1dWeHiq1MLKFf0vC8Ce5TJkyRQAQr776qhCi8Tz/WbNmCQBiypQpnhwmtUCu5hrX9iGSwblz5wAAEyZMaLI/MzPTqY6ouTH8iWTQuXNnAMD777/fZP/f/vY3pzqi5uYlhBCeHoSnWCwWhISEwGw2c54/uVVNTQ0CAgKgVqtRWVkJLy8vbNu2DUOHDoUQAsHBwaitrUV1dTWnfZJbuZprPPMnkoG/vz9SUlJQW1uL4OBgzJ07F5cuXcLcuXOl4E9JSWHwk8fwzJ9n/iSj1NRUbNmypVF7SkrKj65DRfRT8cyf6D6wefNmVFdXY9KkSejZsycmTZqE6upqBj95nKJv40jUHPz9/fHOO+9I1/y5lAjdD3jmT0SkQAx/IiIFYvgTESkQw5+ISIEY/kRECsTwJyJSIIY/EZECMfyJiBSI4U9EpEAMfyIiBWL4ExEpEMOfiEiBGP5ERArE8CciUiCGPxGRAjH8iYgUiOFPRKRADH8iIgVi+BMRKRDDn4hIgRj+REQKxPAnIlIghj8RkQIx/ImIFIjhT0SkQAx/IiIFYvgTESkQw5+ISIEY/kRECsTwJyJSIIY/EZECMfyJiBSI4U9EpEB3Hf75+fkYPnw42rdvDy8vL2zevNmpXwiB119/He3atYO/vz8SEhJw7tw5p5ry8nJkZGRAq9UiNDQUmZmZuH79ulPNiRMn0LdvX/j5+SEiIgKLFy9uNJb169eja9eu8PPzQ0xMDLZt23a3h0NEpEh3Hf5VVVV47LHH8O677zbZv3jxYrzzzjtYsWIFCgsLERgYiKSkJNy4cUOqycjIwKlTp2AwGJCbm4v8/HxMnDhR6rdYLEhMTERkZCSKioqwZMkSvPHGG1i1apVUs3//fqSnpyMzMxNHjx5FamoqUlNTUVxcfLeHRESkPOIeABCffPKJtF1fXy/0er1YsmSJ1FZRUSE0Go34+OOPhRBCnD59WgAQhw4dkmq2b98uvLy8xKVLl4QQQrz33nsiLCxMWK1WqWb27NmiS5cu0vZzzz0nkpOTncYTFxcnfvOb37g8frPZLAAIs9ns8mOIfora2lqxefNmUVtb6+mhUAvnaq75uvMXSUlJCUwmExISEqS2kJAQxMXFwWg0YvTo0TAajQgNDUWvXr2kmoSEBHh7e6OwsBAjRoyA0WhEv379oFarpZqkpCQsWrQI165dQ1hYGIxGI7KyspyePykpqdFlqIasViusVqu0bbFYAAA2mw02m+1eD5/othyvL77OSG6uvsbcGv4mkwkAoNPpnNp1Op3UZzKZ0LZtW+dB+PoiPDzcqSY6OrrRPhx9YWFhMJlMP/o8TVmwYAGys7Mbte/cuRMBAQGuHCLRPTEYDJ4eArVw1dXVLtW5Nfzvd3PmzHH6a8FisSAiIgKJiYnQarUeHBm1dDabDQaDAYMHD4ZKpfL0cKgFc1zRuBO3hr9erwcAlJaWol27dlJ7aWkpevbsKdWUlZU5Pa6urg7l5eXS4/V6PUpLS51qHNt3qnH0N0Wj0UCj0TRqV6lU/IGkZsHXGsnN1deXW+f5R0dHQ6/XIy8vT2qzWCwoLCxEfHw8ACA+Ph4VFRUoKiqSanbt2oX6+nrExcVJNfn5+U7XrgwGA7p06YKwsDCppuHzOGocz0NERD/ibt9JrqysFEePHhVHjx4VAMSyZcvE0aNHxYULF4QQQixcuFCEhoaKLVu2iBMnToiUlBQRHR0tampqpH0MGTJExMbGisLCQrFv3z7RuXNnkZ6eLvVXVFQInU4nxowZI4qLi8XatWtFQECAWLlypVTz+eefC19fX7F06VJx5swZMW/ePKFSqcTJkyddPhbO9qHmwtk+1FxczbW7Dv/du3cLAI2+xo4dK4S4Od3ztddeEzqdTmg0GjFo0CBx9uxZp31cvXpVpKeni6CgIKHVasW4ceNEZWWlU83x48dFnz59hEajER06dBALFy5sNJZ169aJRx99VKjVatG9e3exdevWuzoWhj81F4Y/NRdXc81
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWkAAAGECAYAAAD0odESAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAiy0lEQVR4nO3df1BU973/8deyblESwBgV0WDguiapkfgriWIuoleRAPVm9YuT/pjRJE3yzUy8kwx2eou5Y2L7rdzeik2n8d7UySSZdKpJpSudQUjdoOL2Bm6uRtPQNBa9EtMIqFVZQcTNcr5/KHtDBNk1wH5gn48ZZjyf8/nsee/M2ZefOefsZ22WZVkCABgpJtIFAAB6R0gDgMEIaQAwGCENAAYjpAHAYIQ0ABiMkAYAgxHSAGAwQhoADEZIA4DBhlRI79+/X8uWLdPEiRNls9lUVlYW9mv85je/0cyZMxUXF6fbb79dP/3pT/u/UADoJ0MqpNva2jRjxgxt2bLlhsZXVlbqO9/5jp566inV1dXp3//93/Wzn/1ML730Uj9XCgD9wzZUF1iy2WzauXOnXC5XsK2jo0PPPfectm/frvPnz2v69On6yU9+ooULF0qSvv3tb8vv92vHjh3BMb/4xS/0b//2bzpx4oRsNtsgvwsAuL4hNZPuy5o1a1RTU6M333xTf/zjH7Vy5Uo9+OCDqq+vl3QlxEeOHNltzKhRo/TXv/5Vn3zySSRKBoDrGjYhfeLECb322mvasWOHMjMzNWXKFH3ve9/T3//93+u1116TJOXk5MjtdquqqkqdnZ36y1/+opKSEklSY2NjJMsHgB6NiHQB/eXDDz9UIBDQHXfc0a29o6NDt956qyTpiSee0LFjx/SNb3xDfr9fCQkJeuaZZ/TCCy8oJmbY/H8FYBgZNiHd2toqu92ugwcPym63d9t38803S7pyHfsnP/mJNm7cqKamJo0bN05VVVWSpL/7u78b9JoBoC/DJqRnzZqlQCCgU6dOKTMz87p97Xa7Jk2aJEnavn27MjIyNG7cuMEoEwDCMqRCurW1VUePHg1uHz9+XIcPH9aYMWN0xx136Dvf+Y5WrVqlkpISzZo1S6dPn1ZVVZXuuece5efn68yZMyotLdXChQt16dKl4DXs6urqCL4rALgOawjZu3evJemav9WrV1uWZVmXL1+21q9fb6WmploOh8NKTk62li9fbv3xj3+0LMuyTp8+bc2bN8+66aabrLi4OGvx4sVWbW1tBN8RAFzfkH1OGgCiAY80AIDBhsQ16c7OTp08eVLx8fF8KxDAsGBZli5cuKCJEyde9xHgIRHSJ0+eVEpKSqTLAIB+9+mnn+q2227rdf+QCOn4+HhJV95MQkJChKvBcOb3+7V7924tXbpUDocj0uVgGPP5fEpJSQnmW2+GREh3XeJISEggpDGg/H6/4uLilJCQQEhjUPR1CZcbhwBgMEIaAAxGSAOAwQhpADAYIQ0ABiOkAcBghDQAGGxIPCcNDIazZ88qMzNTn376qVJSUuT1ejVmzJhIl4UoR0gDkiZMmKDm5ubg9kcffaRbb71VSUlJampqimBliHZc7kDU+2JAz507Vxs2bNDcuXMlSc3NzZowYUIky0OUI6QR1c6ePRsM6AsXLsjr9WrGjBnyer26cOGCpCtBffbs2UiWiShGSCOqZWVlSZLmzZsX/MHiLjfffLPuv//+bv2AwUZII6qdPHlSkvTjH/+4x/0//OEPu/UDBhshjag2ceJESdJzzz3X4/7169d36wcMNkIaUa3rl+Jra2vV2trabV9ra6vee++9bv2AwUZII6qNGTNGSUlJkq78uMQDDzyg999/Xw888EBwMfakpCSel0bEDIlfC/f5fEpMTFRLSwuL/mNAfPk56S48J42BEmquMZMGJDU1Nelvf/ubpk2bpvj4eE2bNk1/+9vfCGhEHN84BK4aM2aMDh8+rIqKCuXl5fHzWTACM2kAMBghDQAGI6QBwGCENAAYjJAGAIMR0gBgMEIaAAxGSAOAwQhpADAYIQ0ABgsrpIuLi3XfffcpPj5e48ePl8vl0pEjR/oct2PHDt11110aOXKk0tPTVVFRccMFA0A0CSukq6ur9fTTT6u2tlYej0d+v19Lly5VW1tbr2Peffddfetb39J3v/tdHTp0SC6XSy6XS3V1dV+5eAAY7r7SUqWnT5/W+PHjVV1drQULFvTY5+GHH1ZbW5vKy8uDbfPmzdPMmTP18ssvh3QclirFYPH7/SywhEERaq59pVXwWlpaJOm6C6LX1NSosLCwW1tOTo7Kysp6HdPR0aGOjo7gts/nk3TlA+T3+79CxcD1dZ1fnGcYaKGeYzcc0p2dnXr22Wf1wAMPaPr06b32a2pqCv7yRZe+FlIvLi7Whg0brmnfvXu34uLibrRkIGQejyfSJWCYu3jxYkj9bjikn376adXV1ekPf/jDjb5Er4qKirrNvn0+n1JSUrR06VIud2BA+f1+eTweZWdnc7kDA6rrCkFfbiik16xZo/Lycu3fv1+33Xbbdfv29LNEzc3NmjBhQq9jYmNjFRsbe027w+Hgg4NBwbmGgRbq+RXW0x2WZWnNmjXauXOn9uzZo7S0tD7HZGRkqKqqqlubx+NRRkZGOIcGgKgU1kz66aef1rZt2/S73/1O8fHxwevKiYmJGjVqlCRp1apVmjRpkoqLiyVJzzzzjLKyslRSUqL8/Hy9+eabOnDggLZu3drPbwUAhp+wZtL/8R//oZaWFi1cuFDJycnBv7feeivY58SJE2psbAxuz58/X9u2bdPWrVs1Y8YMlZaWqqys7Lo3GwEAV4Q1kw7lkep9+/Zd07Zy5UqtXLkynEMBAMTaHQBgNEIaAAxGSAOAwQhpADAYIQ0ABiOkAcBghDQAGIyQBgCDEdIAYDBCGgAMRkgDgMEIaQAwGCENAAYjpAHAYIQ0ABiMkAYAgxHSAGAwQhoADEZIA4DBCGngqkAgoOrqau3fv1/V1dUKBAKRLgkgpAFJcrvdcjqdys7O1ubNm5WdnS2n0ym32x3p0hDlCGlEPbfbrYKCAqWnp8vr9Wr79u3yer1KT09XQUEBQY2IslmWZUW6iL74fD4lJiaqpaVFCQkJkS4Hw0ggEJDT6VR6errKysoUCARUUVGhvLw82e12uVwu1dXVqb6+Xna7PdLlYhgJNdeYSSOqeb1eNTQ0aN26dYqJ6f5xiImJUVFRkY4fPy6v1xuhChHtCGlEtcbGRknS9OnTe9zf1d7VDxhshDSiWnJysiSprq6ux/1d7V39gMFGSCOqZWZmKjU1VRs3blRnZ2e3fZ2dnSouLlZaWpoyMzMjVCGiHSGNqGa321VSUqLy8nK5XC7V1taqvb1dtbW1crlcKi8v16ZNm7hpiIgZEekCgEhbsWKFSktLtXbtWi1YsCDYnpaWptLSUq1YsSKC1SHa8QgecFUgENDevXtVWVmp3NxcLVq0iBk0BkyoucZMGrjKbrcrKytLbW1tysrKIqBhBK5JA4DBCGkAMBghDQAGI6QBwGCENAAYjJAGAIMR0gBgMEIaAAxGSAOAwQhpADAYIQ0ABiOkAcBghDQAGIyQBgCDEdIAYDBCGgAMRkgDgMEIaQAwGCENAAYjpAHAYIQ0ABiMkAYAgxHSAGAwQhoADEZIA4DBCGkAMBghDQAGI6QBwGBhh/T+/fu1bNkyTZw4UTabTWVlZdftv2/fPtlstmv+mpqabrRmAIgaYYd0W1ubZsyYoS1btoQ17siRI2psbAz+jR8/PtxDA0DUGRHugNzcXOXm5oZ9oPHjx2v06NFhjwOAaBZ2SN+omTNnqqOjQ9OnT9cLL7ygBx54oNe+HR0d6ujoCG77fD5Jkt/vl9/vH/BaEb26zi/OMwy0UM+xAQ/p5ORkvfzyy7r33nvV0dGhV155RQsXLtR//dd/afbs2T2OKS4u1oYNG65p3717t+Li4ga6ZEAejyfSJWCYu3jxYkj9bJZlWTd6EJvNpp07d8rlcoU1LisrS5MnT9avfvWrHvf3NJNOSUnRmTNnlJCQcKPlAn3y+/3yeDzKzs6Ww+GIdDk
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXcAAAF1CAYAAAATCKr1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA9pElEQVR4nO3dfVxUdaI/8M8Aw8ggD6EDSDBI4s+HJB9bo1LTAEWvSejWmtfVxHVTLAUfWrz24O4tds2Hq5u59qTubesWhtiiJmwgDwmWLNzEWkLAyGTGyssgD80Mw/f3h5dzncCWAzNOHT7v12tecr7nO9/zPb4OH4/f+c73qIQQAkREpChuru4AERE5HsOdiEiBGO5ERArEcCciUiCGOxGRAjHciYgUiOFORKRAHq7ugLN0dHTg0qVL8PHxgUqlcnV3iIj6TAiBq1evIiQkBG5uP3xvrthwv3TpEsLCwlzdDSIih/vyyy8RGhr6g3UUG+4+Pj4Arv0l+Pr6urg3pGRWqxU5OTmIi4uDWq12dXdIwZqamhAWFibl2w9RbLh3DsX4+voy3MmprFYrtFotfH19Ge50U/RkqJkfqBIRKRDDnYhIgRjuREQKxHAnIlIghjsRkQIx3ImIFIjhTkSkQAx3IiIFkhXu6enpuPPOO+Hj44PAwEAkJCSgqqrKrs53332H5ORkDBo0CAMHDsT8+fNhNBql/f/93/+NhQsXIiwsDF5eXhg1ahR27drV5VgnT57EhAkToNFoEBkZiQMHDvTuDImI+iFZ4V5QUIDk5GSUlpYiNzcXVqsVcXFxaGlpkeqkpKTgr3/9KzIyMlBQUIBLly4hMTFR2l9WVobAwEC88cYbOHfuHP7t3/4NaWlpePHFF6U6dXV1mDNnDqZPn46KigqsXbsWy5cvx4kTJxxwykRE/YDog8uXLwsAoqCgQAghRGNjo1Cr1SIjI0Oq89lnnwkAoqSk5IbtrFq1SkyfPl3a3rhxo7j99tvt6jz88MNi5syZPe6byWQSAITJZOrxe4h6w2KxiKysLGGxWFzdFVI4ObnWp7VlTCYTACAgIADAtbtyq9WKmJgYqc7IkSOh1+tRUlKCu+6664btdLYBACUlJXZtAMDMmTOxdu3aG/bFbDbDbDZL201NTQCurfthtVrlnRj1a62trV2GG39Ic5sZp87WwMe/FAO9ND1+34gRI6DVanvTReqn5GRZr8O9o6MDa9euxT333IMxY8YAAAwGAzw9PeHv729XNygoCAaDodt2Tp06hbfffhtHjx6VygwGA4KCgrq00dTUhLa2Nnh5eXVpJz09HVu2bOlSnpOTw18gkqWmpgbr1q2T/b6tMutv374dw4YNk30c6r9aW1t7XLfX4Z6cnIzKykoUFxf3tglUVlZi3rx5eOaZZxAXF9frdgAgLS0Nqamp0nbn0phxcXFcFZJkaW1txb333tvj+p83mLDh8Kd44cHR+H9D/Hr8Pt65k1ydIxI90atwX716NbKzs1FYWGi3YHxwcDAsFgsaGxvt7t6NRiOCg4Pt2vj0009x//33Y8WKFdi8ebPdvuDgYLsZNp1t+Pr6dnvXDgAajQYaTdf/EqvVai7DSrL4+fnhZz/7WY/re37xLTQlFowZNwHjwgc5sWfU38nJMlmzZYQQWL16NQ4fPoy8vDxERETY7Z84cSLUajU++OADqayqqgr19fWIjo6Wys6dO4fp06djyZIleO6557ocJzo62q4NAMjNzbVrg4iIbkzWnXtycjLefPNNHDlyBD4+PtI4up+fH7y8vODn54ekpCSkpqYiICAAvr6+ePzxxxEdHS19mFpZWYkZM2Zg5syZSE1Nldpwd3eHTqcDADz22GN48cUXsXHjRixbtgx5eXl455137MbliYjoB8iZhgOg29f+/fulOm1tbWLVqlXilltuEVqtVjz44IOioaFB2v/MM89020Z4eLjdsfLz88W4ceOEp6enuO222+yO0ROcCkk3S/mFb0T4k9mi/MI3ru4KKZycXFMJIYRL/lVxsqamJvj5+cFkMvEDVXKqii++RcLeUmStvItj7uRUcnKNa8sQESkQw52ISIEY7kRECsRwJyJSIIY7EZECMdyJiBSI4U5EpEAMdyIiBWK4ExEpEMOdiEiBGO5ERArEcCciUiCGOxGRAjHciYgUiOFORKRADHciIgXq1QOyiX6K6r5pQYu53eHt1nzdIv3p4eH4XylvjQciBns7vF1SNoY79Qt137Rg+raTTj3GukNnndZ2/vr7GPAkC8Od+oXOO/b/eHgcIgMHOrbtNjOyT5bgX+6LhreXxqFtn7/cjLVvVzjlfxykbAx36lciAwdizK1+Dm3TarXCoAMmhN8CtVrt0LaJeosfqBIRKRDDnYhIgRjuREQKxDF36jdUHk2oa6qC2wDHfqDa3t6OS+2X8NmVzxw+FbKuqRkqjyaHtkn9A8Od+g21/2ls+uh5p7X/0vsvOaVdtf/9AGY7pW1SLoY79RvWxsnYPucRDHPwVMj29nZ8WPwh7rn3HoffuddcbsYTf6lxaJvUPzDcqd8Q7b6I8B2B0YMcPxWyzqMOowJGOXwqZMd3Joj2rx3aJvUP/ECViEiBZIV7eno67rzzTvj4+CAwMBAJCQmoqqqyq/Pdd98hOTkZgwYNwsCBAzF//nwYjUa7Ok888QQmTpwIjUaDcePGdXusTz75BFOmTMGAAQMQFhaGrVu3yjszIqJ+TFa4FxQUIDk5GaWlpcjNzYXVakVcXBxaWlqkOikpKfjrX/+KjIwMFBQU4NKlS0hMTOzS1rJly/Dwww93e5ympibExcUhPDwcZWVleOGFF/Dss8/i5Zdflnl6RET9k6wx9/fff99u+8CBAwgMDERZWRmmTp0Kk8mE1157DW+++SZmzJgBANi/fz9GjRqF0tJS3HXXXQCA3bt3AwC+/vprfPLJJ12O85e//AUWiwWvv/46PD09cfvtt6OiogI7duzAihUrenWiRET9SZ8+UDWZTACAgIAAAEBZWRmsVitiYmKkOiNHjoRer0dJSYkU7v9MSUkJpk6dCk9PT6ls5syZ+MMf/oD/+Z//wS233NLlPWazGWazWdpuaro2N9hqtcJqtco/OVKU9vZ26U9HXw+d7TnjOnNmv+mnR8410Otw7+jowNq1a3HPPfdgzJgxAACDwQBPT0/4+/vb1Q0KCoLBYOhx2waDAREREV3a6NzXXbinp6djy5YtXcpzcnKg1Wp7fGxSpi+bAcADxcXF+MKxMyElubm5Dm/zZvSbfjpaW1t7XLfX4Z6cnIzKykoUFxf3tgmHSktLQ2pqqrTd1NSEsLAwxMXFwdfX14U9ox+Dc5easO1sKe69917cHuLY68FqtSI3NxexsbEOnwrpzH7TT0/niERP9CrcV69ejezsbBQWFiI0NFQqDw4OhsViQWNjo93du9FoRHBwcI/bDw4O7jLDpnP7Ru1oNBpoNF3X0lar1VyGlaQvF3l4eDjtenDGtXYz+k0/HXKuAVmzZYQQWL16NQ4fPoy8vLwuQycTJ06EWq3GBx98IJVVVVWhvr4e0dHRPT5OdHQ0CgsL7caXcnNzMWLEiG6HZIiIyJ6scE9OTsYbb7yBN998Ez4+PjAYDDAYDGhrawMA+Pn5ISkpCampqcjPz0dZWRkeffRRREdH232Yev78eVRUVEjvraioQEVFBSwWCwDgkUcegaenJ5KSknDu3Dm8/fbb2LVrl92wCxER3ZisYZm9e/cCAO677z678v3792Pp0qUAgJ07d8LNzQ3z58+H2WzGzJkz8dJL9gsqLV++HAUFBdL2+PHjAQB1dXUYOnQo/Pz8kJOTg+TkZEycOBGDBw/G008/zWmQREQ9JCvchRD/tM6AAQOwZ88e7Nmz54Z1Tp48+U/bueOOO1BUVCSne0RE9L+4cBj1C21WGwCg8iuTw9tuaTPjzNdA8Bf/45QHZBP1BsOd+oWa/w3J32SeddIRPPCf5z92UtuAt4a/qiQPrxjqF+JuvzaFdljgQHip3R3adlWDCesOncX2BVEYMcSxywkD14I9YrC3w9slZWO4U78Q4O2JX/xM75S
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"numeric_features_with_outliers = [\n",
" \"Price\",\n",
" \"Levy\",\n",
" \"Mileage\",\n",
" \"Prod. year\",\n",
"]\n",
"\n",
"i = 1\n",
"for col in numeric_features_with_outliers:\n",
" plt.figure(figsize=(4, 30))\n",
" plt.subplot(6, 1, i)\n",
" df.boxplot(column=col)\n",
" i += 1"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 48,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Количество строк до удаления выбросов: 14801\n",
"Количество строк после удаления выбросов: 12597\n"
]
}
],
"source": [
"def remove_outliers(df, column):\n",
" Q1 = df[column].quantile(0.25)\n",
" Q3 = df[column].quantile(0.75)\n",
" IQR = Q3 - Q1\n",
" lower_bound = Q1 - 1.5 * IQR\n",
" upper_bound = Q3 + 1.5 * IQR\n",
" return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]\n",
"\n",
"print(f\"Количество строк до удаления выбросов: {len(df)}\")\n",
"\n",
"for column in numeric_features_with_outliers:\n",
" df = remove_outliers(df, column)\n",
"\n",
"print(f\"Количество строк после удаления выбросов: {len(df)}\")"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 49,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAF1CAYAAAAA3+oBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAhoUlEQVR4nO3df3RU9YH38U9+zEwSwgQQSMySIIoSEQQJJMyp1rLGRJv2LErPYuvZQ1utShMkxkXMc7pQetwnrW5P8UfA7eNZ4zl9WljcR7siItkg8bAMEoKp4Wd/BcHiBBCSIfyYTJLv80c3t47hRxIySZPv+3UOB+fe73znOznDm8vN9SbGGGMEALBK7GAvAAAw8Ig/AFiI+AOAhYg/AFiI+AOAhYg/AFiI+AOAheIHewGDqbOzU8eOHdPIkSMVExMz2MsBgKtmjNGZM2eUnp6u2NhLH99bHf9jx44pIyNjsJcBAP3u6NGjmjBhwiX3Wx3/kSNHSvrzF8nr9Q7yajCchcNhbdmyRfn5+XK5XIO9HAxjwWBQGRkZTt8uxer4d53q8Xq9xB9RFQ6HlZSUJK/XS/wxIK50Kptv+AKAhYg/AFiI+AOAhYg/AFiI+AOAhYg/AFiI+AOAhYg/EGWtra1asGCBli5dqgULFqi1tXWwlwTY/T95AdGWk5Oj2tpa5/HHH3+skSNHas6cOdq1a9cgrgy248gfiJIvhv/zamtrlZOTM8ArAv6C+ANR0Nraesnwd6mtreUUEAYN8QeiYOHChf06DuhvxB+Igvfee69fxwH9jfgDUXD+/Pl+HQf0N+IPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgofjBXgAwFJ07d04HDx7sl7n27Nlz0e1ZWVlKSkrql9cAvoj4A31w8OBBZWdn98tcl5qnrq5Os2bN6pfXAL6I+AN9kJWVpbq6ukvuP3XqlO6+++4rzlNVVaUxY8Zc8jWAaCH+QB8kJSVd8ag8NTVVTU1Nl92fl5fX30sDeoRv+AJREggElJqaetF9qampCgQCA7wi4C+IPxBFgUBAn332mW64KUsxCSN1w01Z+uyzzwg/Bh3xB6JszJgxen3LdmUu/ZVe37L9kuf4gYFE/AHAQsQfACxE/AHAQsQfACxE/AHAQsQfACxE/AHAQsQfACx0VfH/8Y9/rJiYGJWUlDjbLly4oKKiIl1zzTVKTk7WggULut3f5MiRIyosLFRSUpLGjx+vZcuWqb29PWLMtm3bNGvWLHk8Hk2ePFmVlZXdXr+iokLXXXedEhISlJubq127dl3N2wEAa/Q5/rW1tfrXf/1X3XrrrRHbn3jiCb311lvasGGDampqdOzYMd1///3O/o6ODhUWFqqtrU07duzQa6+9psrKSq1YscIZ09jYqMLCQs2bN0/19fUqKSnRww8/rHfffdcZs379epWWlmrlypXas2ePZsyYoYKCAh0/fryvbwkArBFjjDG9fVJra6tmzZqlNWvW6JlnntHMmTO1evVqtbS0aNy4cfrlL3+pb3zjG5L+fN/zm2++WX6/X3PnztU777yjr33tazp27Jhz06uXX35Zy5cv14kTJ+R2u7V8+XK9/fbb2rt3r/OaDzzwgJqbm7V582ZJUm5urubMmaOXXnpJktTZ2amMjAwtWbJETz/99EXXHQqFFAqFnMfBYFAZGRk6efKkvF5vb78MQI/95sgpfeP/7Nbr35utGZnc3gHREwwGNXbsWLW0tFy2a326pXNRUZEKCwuVl5enZ555xtleV1encDgccZvarKwsZWZmOvH3+/2aPn16xN0OCwoKtHjxYu3bt0+33Xab/H5/t1vdFhQUOKeX2traVFdXp7KyMmd/bGys8vLy5Pf7L7nu8vJyrVq1qtv2LVu28BOTEFVHWyUpXjt37tSf9l5pNNB3586d69G4Xsd/3bp12rNnj2pra7vtCwQCcrvdGjVqVMT2z9++9mK3ue16fKUxwWBQ58+f1+nTp9XR0XHRMZf70XplZWUqLS11Hncd+efn53Pkj6j6zZFTUsNuzZ07lyN/RFUwGOzRuF7F/+jRo1q6dKmqqqqUkJDQp4UNJo/HI4/H0227y+WSy+UahBXBFvHx8c7vfNYQTT39fPXqG751dXU6fvy4Zs2apfj4eMXHx6umpkYvvPCC4uPjlZqaqra2NjU3N0c8r6mpSWlpaZKktLS0blf/dD2+0hiv16vExESNHTtWcXFxFx3TNQcA4NJ6Ff+77rpLDQ0Nqq+vd37Nnj1bDz74oPPfLpdL1dXVznMOHTqkI0eOyOfzSZJ8Pp8aGhoirsqpqqqS1+vV1KlTnTGfn6NrTNccbrdb2dnZEWM6OztVXV3tjAEAXIa5SnfeeadZunSp8/ixxx4zmZmZZuvWrWb37t3G5/MZn8/n7G9vbzfTpk0z+fn5pr6+3mzevNmMGzfOlJWVOWP++Mc/mqSkJLNs2TJz4MABU1FRYeLi4szmzZudMevWrTMej8dUVlaa/fv3m0ceecSMGjXKBAKBHq+9paXFSDItLS1X90UAruDDwyfNxOUbzYeHTw72UjDM9bRr/f4D3H/2s58pNjZWCxYsUCgUUkFBgdasWePsj4uL08aNG7V48WL5fD6NGDFCixYt0o9+9CNnzKRJk/T222/riSee0PPPP68JEybolVdeUUFBgTNm4cKFOnHihFasWKFAIKCZM2dq8+bNl/yZqQCAv+jTdf7DRTAYVEpKyhWvhwWuVv3Hn2n+2p16c/FczZx4zWAvB8NYT7vGvX0AwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAs1Kv4r127Vrfeequ8Xq+8Xq98Pp/eeecdZ/+FCxdUVFSka665RsnJyVqwYIGampoi5jhy5IgKCwuVlJSk8ePHa9myZWpvb48Ys23bNs2aNUsej0eTJ09WZWVlt7VUVFTouuuuU0JCgnJzc7Vr167evBUAsFqv4j9hwgT9+Mc/Vl1dnXbv3q2//du/1d/93d9p3759kqQnnnhCb731ljZs2KCamhodO3ZM999/v/P8jo4OFRYWqq2tTTt27NBrr72myspKrVixwhnT2NiowsJCzZs3T/X19SopKdHDDz+sd9991xmzfv16lZaWauXKldqzZ49mzJihgoICHT9+/Gq/HgBgB3OVRo8ebV555RXT3NxsXC6X2bBhg7PvwIEDRpLx+/3GGGM2bdpkYmNjTSAQcMasXbvWeL1eEwqFjDHGPPXUU+aWW26JeI2FCxeagoIC53FOTo4pKipyHnd0dJj09HRTXl7eq7W3tLQYSaalpaVXzwN668PDJ83E5RvNh4dPDvZSMMz1tGvxff1Lo6OjQxs2bNDZs2fl8/lUV1encDisvLw8Z0xWVpYyMzPl9/s1d+5c+f1+TZ8+Xampqc6YgoICLV68WPv27dNtt90mv98fMUfXmJKSEklSW1ub6urqVFZW5uyPjY1VXl6e/H7/ZdccCoUUCoWcx8FgUJIUDocVDof7+qUArqjr1GZ7ezufNURVTz9fvY5/Q0ODfD6fLly4oOTkZL3xxhuaOnWq6uvr5Xa7NWrUqIjxqampCgQCkqRAIBAR/q79XfsuNyYYDOr8+fM6ffq0Ojo6Ljrm4MGDl117eXm5Vq1a1W37li1blJSUdOU3D/TR0VZJitfOnTv1p72DvRo
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAF1CAYAAAD8ysHLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAc3klEQVR4nO3de3BU9eH38U8umyUhbEKAZImGgPqUACI3LcmIjkBMiJQRcaxcqggRpzTxN5hWKf1ZheoYRR8vraBjW6F/gCJ1tC1YmsjFVNiARIMhKhUGDAqbCDFZQmDZJef5wydb14BsYJfAl/drJhP2nO+e/R7n8M7x7GETZVmWJQCAMaK7egIAgPAi7ABgGMIOAIYh7ABgGMIOAIYh7ABgGMIOAIaJ7eoJREpbW5sOHDigHj16KCoqqqunAwDnzLIsHTlyROnp6YqOPv15ubFhP3DggDIyMrp6GgAQdvv379fll19+2vXGhr1Hjx6Svv0P4HA4ung2MJnP51NZWZny8vJks9m6ejowmMfjUUZGRqBvp2Ns2NsvvzgcDsKOiPL5fEpISJDD4SDsOC/OdHmZN08BwDCEHQAMQ9gBwDCEHQAMQ9gBwDCEHQAMQ9gBwDCEHQAMQ9gBwDCEHQAMQ9gBwDDGflYMcLZaW1v12WefhTy+5ZhXW2r2qGfv7UqMt4f0nKysLCUkJJztFIEfRNiB7/nss880atSoTj9vcSfGVlVVaeTIkZ1+DSAUhB34nqysLFVVVYU8ftfBJpWsrtGzdwzVwL7JIb8GECmEHfiehISETp1NR39xWPZ/H9Ogq4dpeGavCM4MCA1vngKAYQg7ABiGsAOAYQg7ABiGsAOAYToV9tLSUl133XXq0aOHUlNTNXnyZO3atStozPHjx1VUVKRevXopMTFRt99+u+rr64PG1NXVaeLEiUpISFBqaqoefPBB+f3+oDGbNm3SyJEjZbfbddVVV2n58uVnt4cAcInpVNjfe+89FRUVqbKyUuXl5fL5fMrLy9PRo0cDYx544AH94x//0OrVq/Xee+/pwIEDmjJlSmD9yZMnNXHiRJ04cUJbtmzRX/7yFy1fvlyPPPJIYMzevXs1ceJEjR07VtXV1Zo3b57uvfde/etf/wrDLgOA4axz0NDQYEmy3nvvPcuyLKupqcmy2WzW6tWrA2M+/fRTS5Llcrksy7Ksd955x4qOjrbcbndgzEsvvWQ5HA7L6/ValmVZDz30kDVkyJCg17rzzjut/Pz8kOfW3NxsSbKam5vPev+AUHy075CVOX+N9dG+Q109FRgu1K6d0z9Qam5uliSlpKRI+vafSft8PuXm5gbGZGVlqV+/fnK5XMrOzpbL5dLQoUOVlpYWGJOfn6+5c+eqtrZWI0aMkMvlCtpG+5h58+addi5er1derzfw2OPxSJJ8Pp98Pt+57Cbwg9ovI/r9fo41RFSox9dZh72trU3z5s3T9ddfr6uvvlqS5Ha7FRcXp+Tk5KCxaWlpcrvdgTHfjXr7+vZ1PzTG4/Ho2LFjio+P7zCf0tJSLVq0qMPysrIyPmwJEbW/RZJiVVlZqa92dvVsYLLW1taQxp112IuKirRz5069//77Z7uJsFqwYIFKSkoCjz0ejzIyMpSXlyeHw9GFM4PpdtQ1SjXblZ2drWH9Urp6OjBY+5WIMzmrsBcXF2vNmjWqqKjQ5ZdfHljudDp14sQJNTU1BZ2119fXy+l0BsZs27YtaHvtd818d8z376Spr6+Xw+E45dm6JNntdtntHT8y1WazyWazdX4ngRDFxsYGvnOsIZJCPb46dVeMZVkqLi7WW2+9pQ0bNmjAgAFB60eNGiWbzab169cHlu3atUt1dXXKycmRJOXk5KimpkYNDQ2BMeXl5XI4HBo8eHBgzHe30T6mfRsAgNPr1Bl7UVGRVq5cqb/97W/q0aNH4Jp4UlKS4uPjlZSUpMLCQpWUlCglJUUOh0P333+/cnJylJ2dLUnKy8vT4MGDddddd2nx4sVyu916+OGHVVRUFDjj/vnPf64XX3xRDz30kGbPnq0NGzbojTfe0Nq1a8O8+wBgoM7caiPplF/Lli0LjDl27Jj1i1/8wurZs6eVkJBg3XbbbdbBgweDtrNv3z6roKDAio+Pt3r37m398pe/tHw+X9CYjRs3WsOHD7fi4uKsK664Iug1QsHtjjhfuN0R50uoXYuyLMvquh8rkePxeJSUlKTm5mbePEVEVX9xWJNfqtTbc7P5PHZEVKhd47NiAMAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwnQ57RUWFJk2apPT0dEVFRentt98OWn/PPfcoKioq6GvChAlBYxobGzVjxgw5HA4lJyersLBQLS0tQWM+/vhj3XDDDerWrZsyMjK0ePHizu8dAFyCOh32o0ePatiwYVqyZMlpx0yYMEEHDx4MfL322mtB62fMmKHa2lqVl5drzZo1qqio0H333RdY7/F4lJeXp8zMTFVVVenpp5/WwoUL9corr3R2ugBwyYnt7BMKCgpUUFDwg2PsdrucTucp13366adat26dPvjgA1177bWSpD/84Q+65ZZb9Mwzzyg9PV0rVqzQiRMn9OqrryouLk5DhgxRdXW1nn322aAfAACAjjod9lBs2rRJqamp6tmzp8aNG6fHH39cvXr1kiS5XC4lJycHoi5Jubm5io6O1tatW3XbbbfJ5XLpxhtvVFxcXGBMfn6+nnrqKX3zzTfq2bNnh9f0er3yer2Bxx6PR5Lk8/nk8/kisZuAJMnv9we+c6whkkI9vsIe9gkTJmjKlCkaMGCA9uzZo9/85jcqKCiQy+VSTEyM3G63UlNTgycRG6uUlBS53W5Jktvt1oABA4LGpKWlBdadKuylpaVatGhRh+VlZWVKSEgI1+4BHexvkaRYVVZW6qudXT0bmKy1tTWkcWEP+9SpUwN/Hjp0qK655hpdeeWV2rRpk8aPHx/ulwtYsGCBSkpKAo89Ho8yMjKUl5cnh8MRsdcFdtQ1SjXblZ2drWH9Urp6OjBY+5WIM4nIpZjvuuKKK9S7d2/t3r1b48ePl9PpVENDQ9AYv9+vxsbGwHV5p9Op+vr6oDHtj0937d5ut8tut3dYbrPZZLPZwrErwCnFxsYGvnOsIZJCPb4ifh/7l19+qcOHD6tv376SpJycHDU1NamqqiowZsOGDWpra9Po0aMDYyoqKoKuJ5WXl2vgwIGnvAwDAPivToe9paVF1dXVqq6uliTt3btX1dXVqqurU0tLix588EFVVlZq3759Wr9+vW699VZdddVVys/PlyQNGjRIEyZM0Jw5c7Rt2zZt3rxZxcXFmjp1qtLT0yVJ06dPV1xcnAoLC1VbW6tVq1bphRdeCLrUAgA4DauTNm7caEnq8DVz5kyrtbXVysvLs/r06WPZbDYrMzPTmjNnjuV2u4O2cfjwYWvatGlWYmKi5XA4rFmzZllHjhwJGrNjxw5rzJgxlt1uty677DLrySef7NQ8m5ubLUlWc3NzZ3cR6JSP9h2yMuevsT7ad6irpwLDhdq1KMuyrC78uRIxHo9HSUlJam5u5s1TRFT1F4c1+aVKvT03W8Mze3X1dGCwULvGZ8UAgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrA
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAF1CAYAAADyT33hAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAzhUlEQVR4nO3de1hTd7oH+i+XJJBiAAG5tIAXdrVe6oVWzIy1tVKiZXy8tHu07XPGtiqjxdmlWKrMPvUyu+fQsdO9p3UYOz372eI+01ZLq52pVQqDgtsatKBRRGRXhdJWArVKIgIhwHv+6GE9RpaaKBGl38/z5MG1fm9+602M62uyFlk+IiIgIiK6gm9/N0BERLcnBgQREaliQBARkSoGBBERqWJAEBGRKgYEERGpYkAQEZEq//5u4HbW3d2Ns2fPYtCgQfDx8envdoiIbpqI4OLFi4iJiYGv77XfIzAgruHs2bOIjY3t7zaIiPrcN998g3vuueeaNQyIaxg0aBCAH59Ig8HQz93QQOZ0OlFYWIiUlBRoNJr+bocGMLvdjtjYWGX/di0MiGvo+VjJYDAwIMirnE4n9Ho9DAYDA4JuCXc+NudBaiIiUsWAICIiVQwIIiJSxYAgIiJVDAgiIlLFgCAiIlUeBcSmTZtw//33K6d9Go1G7N69Wxl/5JFH4OPj43JbtmyZyxz19fVITU2FXq/HkCFDkJWVhc7OTpeakpISTJo0CTqdDgkJCcjLy+vVS25uLoYOHYqAgAAkJSXh0KFDLuPt7e1IT09HWFgYgoKC8MQTT6CxsdGTh0tE9JPmUUDcc889eP3111FRUYHy8nI8+uijmDNnDqqqqpSapUuXoqGhQblt2LBBGevq6kJqaio6Ojpw4MABbNmyBXl5eVizZo1SU1tbi9TUVEyfPh0WiwUZGRlYsmQJPv/8c6Vm27ZtyMzMxNq1a3H48GGMHz8eJpMJTU1NSs1LL72ETz/9FPn5+SgtLcXZs2cxf/78G3qSiLypq6sLpaWl2LdvH0pLS9HV1dXfLRH9SG5SaGio/Od//qeIiDz88MPy4osvXrV2165d4uvrK1arVVm3adMmMRgM4nA4RETklVdekTFjxrjcb8GCBWIymZTlyZMnS3p6urLc1dUlMTExkpOTIyIizc3NotFoJD8/X6mprq4WAGI2m91+bDabTQCIzWZz+z5Envj4449l6NChAkC5DR06VD7++OP+bo0GKE/2azf8m9RdXV3Iz8/HpUuXYDQalfXvvfce/vrXvyIqKgqzZ8/Gq6++Cr1eDwAwm80YN24cIiMjlXqTyYTly5ejqqoKEydOhNlsRnJyssu2TCYTMjIyAAAdHR2oqKhAdna2Mu7r64vk5GSYzWYAQEVFBZxOp8s8o0aNQlxcHMxmM6ZMmaL6mBwOBxwOh7Jst9sB/Phbrk6n80aeJqKr2rFjBxYuXAg/Pz+X9d9++y2efPJJbN26FfPmzeun7mig8mRf5nFAVFZWwmg0or29HUFBQdixYwdGjx4NAHj66acRHx+PmJgYHDt2DKtWrUJNTQ22b98OALBarS7hAEBZtlqt16yx2+1oa2vDhQsX0NXVpVpz8uRJZQ6tVouQkJBeNT3bUZOTk4P169f3Wl9YWKiEHFFf6OrqQlpaGkSk1zG4nuW0tDT4+/v3ChCim9Ha2up2rccBMXLkSFgsFthsNnz00UdYtGgRSktLMXr0aKSlpSl148aNQ3R0NGbMmIHTp09jxIgRnm7qlsvOzkZmZqay3POlVikpKfwuJupTe/fuhc1mu2aNzWZDUFAQpk+ffou6op+Cnk9G3OFxQGi1WiQkJAAAEhMT8eWXX+Ktt97CX/7yl161SUlJAIBTp05hxIgRiIqK6nW2Uc+ZRVFRUcrPK882amxshMFgQGBgIPz8/ODn56dac/kcHR0daG5udnkXcXmNGp1OB51O12u9RqPhF6hRn/rss8/crktJSfFyN/RT4sm+7KZ/D6K7u9vlc/vLWSwWAEB0dDQAwGg0orKy0uVso6KiIhgMBuVjKqPRiOLiYpd5ioqKlOMcWq0WiYmJLjXd3d0oLi5WahITE6HRaFxqampqUF9f73K8hKi/bN68uU/riLzCk6Pfq1evltLSUqmtrZVjx47J6tWrxcfHRwoLC+XUqVPyu9/9TsrLy6W2tlb+9re/yfDhw2XatGnK/Ts7O2Xs2LGSkpIiFotFCgoKJCIiQrKzs5WaM2fOiF6vl6ysLKmurpbc3Fzx8/OTgoICpWbr1q2i0+kkLy9PTpw4IWlpaRISEuJydtSyZcskLi5O9uzZI+Xl5WI0GsVoNHrycHkWE3mNr6+vctZSWFiYDB8+XEJDQ2X48OESFhamjPn6+vZ3qzTAeLJf8yggnn/+eYmPjxetVisREREyY8YMKSwsFBGR+vp6mTZtmgwePFh0Op0kJCRIVlZWrybq6upk1qxZEhgYKOHh4bJy5UpxOp0uNXv37pUJEyaIVquV4cOHy+bNm3v1snHjRomLixOtViuTJ0+WsrIyl/G2tjZ54YUXJDQ0VPR6vcybN08aGho8ebgMCPIaPz8/l1Nbr3bz8/Pr71ZpgPFkv+YjInLL37bcIex2O4KDg2Gz2XiQmvpUWFgYzp8/f926wYMH44cffrgFHdFPhSf7NX4XE1E/iIiI6NM6Im9gQBD1A3fPRffknHWivsaAIOoHDAi6EzAgiPrB1U4Nv9E6Im9gQBD1gyu/XuNm64i8gQFB1A/4DoLuBAwIon7g7tnlPAud+hMDgoiIVDEgiIhIFQOCiIhUMSCIiEgVA4KoH7h7lTheTY76EwOCqB+oXZjqZuqIvMHjK8oRkXtaW1uV66TfjMOHD191bNSoUbxeOnkNA4LIS06ePInExMSbmqO1tfWac1RUVGDSpEk3tQ2iq2FAEHnJqFGjUFFRoTq2ZcsWvP3228qyv0aDzi6Bv58POp1OZf2//Mu/YNGiRdfcBpG38IJB18ALBpG3dHR0ICAg4Jq/Ke3j44P29nZotdpb2BkNdLxgENFtTqvV4uWXX75mzcsvv8xwoH7Fj5iI+smGDRsAAG+++Sa6u7uV9b6+vli5cqUyTtRf+BHTNfAjJroVOjo68K+vbcBfPjuIX6cm4f/6P1/hOwfyGk/2a3wHQdTPtFotnlm8HPkdE/HM4ikMB7pt8BgEERGpYkAQEZEqBgQREaliQBARkSoGBBERqWJAEBGRKgYEERGpYkAQEZEqBgQREaliQBARkSoGBBERqWJAEBGRKo8CYtOmTbj//vthMBhgMBhgNBqxe/duZby9vR3p6ekICwtDUFAQnnjiCTQ2NrrMUV9fj9TUVOj1egwZMgRZWVno7Ox0qSkpKcGkSZOg0+mQkJCAvLy8Xr3k5uZi6NChCAgIQFJSEg4dOuQy7k4vRER0dR4FxD333IPXX38dFRUVKC8vx6OPPoo5c+agqqoKAPDSSy/h008/RX5+PkpLS3H27FnMnz9fuX9XVxdSU1PR0dGBAwcOYMuWLcjLy8OaNWuUmtraWqSmpmL69OmwWCzIyMjAkiVL8Pnnnys127ZtQ2ZmJtauXYvDhw9j/PjxMJlMaGpqUmqu1wsREV3bTV8PYvDgwXjjjTfw5JNPIiIiAu+//z6efPJJAD9etP2+++6D2WzGlClTsHv3bvziF7/A2bNnERkZCQB45513sGrVKnz//ffQarVYtWoVPvvsMxw/flzZxsKFC9Hc3IyCggIAQFJSEh588EH86U9/AgB0d3cjNjYWv/nNb7B69WrYbLbr9qLG4XDA4XAoy3a7HbGxsTh37hyvB0FedbT+PJ78f8rx0dIHMD5ucH+3QwOY3W5HeHi4d68H0dXVhfz8fFy6dAlGoxEVFRVwOp1ITk5WakaNGoW4uDhlp2w2mzFu3DglHADAZDJh+fLlqKqqwsSJE2E2m13m6KnJyMgA8OPFVSoqKpCdna2M+/r6Ijk5GWazGQDc6kVNTk4O1q9f32t9YWEh9Hq9508SkZu+aQEAf5SVleG749erJrpxra2tbtd6HBCVlZUwGo1ob29HUFAQduzYgdGjR8NisUCr1SI
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAF1CAYAAAD8ysHLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAi40lEQVR4nO3df3RU5YH/8U9iZsaEZCYmkgQk0LS0/LQCEWWKpYCY7ErXRtKzfHW3CwWOW5hoQ1C2dLsq7ek3rS61dlV6PGrCqeWsS7fgglZJ+REWSfyRNZVQTFeKxtNkgrpmRgjMTMzz/aPfzDqEyEx+EHjyfp3Dwbn3mWeeyxneXO8MlyRjjBEAwBrJw70AAMDgIuwAYBnCDgCWIewAYBnCDgCWIewAYBnCDgCWSRnuBQyV7u5utba2KiMjQ0lJScO9HAAYMGOMPvroI40dO1bJyX2fl1sb9tbWVuXn5w/3MgBg0L377rsaN25cn/utDXtGRoakP/8CuN3uYV4NbBaJRLR7924VFRXJ4XAM93JgsWAwqPz8/Gjf+mJt2Hsuv7jdbsKOIRWJRJSWlia3203YcUGc7/IyH54CgGUIOwBYhrADgGUIOwBYhrADgGUIOwBYhrADgGUIOwBYJqGwV1ZWavbs2crIyFBOTo5KSkrU3NwcM+bMmTPy+XzKzs5Wenq6SktL1d7eHt3/u9/9Trfddpvy8/OVmpqqKVOm6OGHH+71Wvv379esWbPkcrk0ceJEVVdX9+8IAWCESSjstbW18vl8qq+vV01NjSKRiIqKinTq1KnomLVr12rnzp3atm2bamtr1draqiVLlkT3NzQ0KCcnR08//bSOHDmif/zHf9SGDRv0yCOPRMccP35cixcv1oIFC9TY2Kjy8nKtWrVKL7744iAcMgBYzgzAiRMnjCRTW1trjDGmo6PDOBwOs23btuiYo0ePGkmmrq6uz3nWrFljFixYEH28fv16M23atJgxS5cuNcXFxXGvLRAIGEkmEAjE/RygP8LhsNmxY4cJh8PDvRRYLt6uDeheMYFAQJKUlZUl6c9n45FIRIsWLYqOmTx5ssaPH6+6ujrNmTOnz3l65pCkurq6mDkkqbi4WOXl5X2uJRQKKRQKRR8Hg0FJf76PRyQSSezAMKJ1dnb2usT4aU6eDunQ4WPKyKxXeqorrudMmjRJaWlp/V0iRqh4W9bvsHd3d6u8vFxz587V9OnTJUl+v19Op1OZmZkxY3Nzc+X3+885z6FDh/TMM8/oueeei27z+/3Kzc3tNUcwGNTp06eVmpraa57Kykpt3Lix1/bdu3fzGwgJOXbsmNatW5fw8x5IYOymTZv0uc99LuHXwMjW2dkZ17h+h93n86mpqUkHDx7s7xRqamrS1772Nd13330qKirq9zyStGHDBlVUVEQf99zesqioiLs7IiGdnZ264YYb4h7/h7aA7tn+ez1461R9YYwnrudwxo7+6LkScT79CntZWZl27dqlAwcOxNzsPS8vT+FwWB0dHTFn7e3t7crLy4uZ4/e//71uvPFG3XHHHfre974Xsy8vLy/mmzQ9c7jd7nOerUuSy+WSy9X7f4MdDge3UkVCPB6PrrvuurjHO9/5QK66sKbPmKUZE7KHcGUY6eJtWULfijHGqKysTNu3b9fevXtVUFAQs7+wsFAOh0N79uyJbmtublZLS4u8Xm9025EjR7RgwQItW7ZMP/zhD3u9jtfrjZlDkmpqamLmAACcW0Jn7D6fT1u3btWzzz6rjIyM6HVzj8ej1NRUeTwerVy5UhUVFcrKypLb7dadd94pr9cb/eC0qalJCxcuVHFxsSoqKqJzXHbZZRo9erQk6Vvf+pYeeeQRrV+/XitWrNDevXv1b//2bzHX4QEAfUjkqzaSzvmjqqoqOub06dNmzZo15oorrjBpaWnm1ltvNW1tbdH999133znnmDBhQsxr7du3z8yYMcM4nU7z2c9+NuY14sHXHXGhvP72+2bCP+wyr7/9/nAvBZaLt2tJxhgzLH+iDLFgMCiPx6NAIMCHpxhSje98oJLN9dqxeg7X2DGk4u0a94oBAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMskFPbKykrNnj1bGRkZysnJUUlJiZqbm2PGnDlzRj6fT9nZ2UpPT1dpaana29tjxtx1110qLCyUy+XSjBkzer3O22+/raSkpF4/6uvrEz9CABhhEgp7bW2tfD6f6uvrVVNTo0gkoqKiIp06dSo6Zu3atdq5c6e2bdum2tpatba2asmSJb3mWrFihZYuXfqpr/fb3/5WbW1t0R+FhYWJLBcARqSURAa/8MILMY+rq6uVk5OjhoYGzZs3T4FAQE8++aS2bt2qhQsXSpKqqqo0ZcoU1dfXa86cOZKkn/3sZ5Kk9957T2+88Uafr5edna28vLyEDggARrqEwn62QCAgScrKypIkNTQ0KBKJaNGiRdExkydP1vjx41VXVxcNe7xuueUWnTlzRl/4whe0fv163XLLLX2ODYVCCoVC0cfBYFCSFIlEFIlEEnpdIBFdXV3Rn3mvYSjF+/7qd9i7u7tVXl6uuXPnavr06ZIkv98vp9OpzMzMmLG5ubny+/1xz52enq5NmzZp7ty5Sk5O1r//+7+rpKREO3bs6DPulZWV2rhxY6/tu3fvVlpaWvwHBiTo3ZOSlKL6+nr9qWm4VwObdXZ2xjWu32H3+XxqamrSwYMH+ztFn6688kpVVFREH8+ePVutra168MEH+wz7hg0bYp4TDAaVn5+voqIiud3uQV8j0ON3Lf8jHX5Nc+bM0TXjs4Z7ObBYz5WI8+lX2MvKyrRr1y4dOHBA48aNi27Py8tTOBxWR0dHzFl7e3v7gK+VX3/99aqpqelzv8vlksvl6rXd4XDI4XAM6LWBT5OSkhL9mfcahlK876+EvhVjjFFZWZm2b9+uvXv3qqCgIGZ/YWGhHA6H9uzZE93W3NyslpYWeb3eRF6ql8bGRo0ZM2ZAcwDASJDQGbvP59PWrVv17LPPKiMjI3rd3OPxKDU1VR6PRytXrlRFRYWysrLkdrt15513yuv1xnxw+tZbb+nkyZPy+/06ffq0GhsbJUlTp06V0+nUli1b5HQ6NXPmTEnSr3/9az311FN64oknBumwAcBeCYV98+bNkqT58+fHbK+qqtLy5cslSQ899JCSk5NVWlqqUCik4uJiPfbYYzHjV61apdra2ujjnoAfP35cn/nMZyRJP/jBD/TOO+8oJSVFkydP1jPPPKOvf/3riSwXAEakJGOMGe5FDIVgMCiPx6NAIMCHpxhSje98oJLN9dqxeo5mTMge7uXAYvF2jXvFAIBlCDsAWIawA4BlCDsAWIawA4BlCDsAWGZAd3cELiXH3z+lU6GuQZ/32Hunoj/33F5gMI1ypajgylGDPi/sRdgxIhx//5QW/PP+IX2Ndb86PGRz77t7PnFH3Ag7RoSeM/WfLp2hiTnpgzv36ZB27a/TV+d7NSq1943oBuKtEydV/kzjkPyfBuxF2DGiTMxJ1/SrPIM6ZyQSkX+0NGvCFdzdERcFPjwFAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMsQdgCwDGEHAMukDPcCgAslKSWo48FmJV+ePqjzdnV1qbWrVUf/56h
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"i = 1\n",
"for col in numeric_features_with_outliers:\n",
" plt.figure(figsize=(4, 30))\n",
" plt.subplot(6, 1, i)\n",
" df.boxplot(column=col)\n",
" i += 1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Формирование конвейера для классификации данных"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 50,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Price int64\n",
"Levy int64\n",
"Manufacturer object\n",
"Model object\n",
"Prod. year int64\n",
"Category object\n",
"Leather interior object\n",
"Fuel type object\n",
"Engine volume float64\n",
"Mileage int64\n",
"Cylinders int64\n",
"Gear box type object\n",
"Drive wheels object\n",
"Doors object\n",
"Wheel object\n",
"Color object\n",
"Airbags int64\n",
"dtype: object"
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 50,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 51,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [],
"source": [
"columns_to_drop = [\n",
" \"Model\",\n",
2024-12-14 10:14:47 +04:00
" # \"Manufacturer\",\n",
2024-12-12 23:48:52 +04:00
" \"Color\",\n",
" \"Doors\",\n",
" \"Cylinders\",\n",
" \"Mileage\",\n",
"]\n",
"\n",
"num_columns = [\n",
" column\n",
" for column in df.columns\n",
" if column not in columns_to_drop and df[column].dtype != \"object\"\n",
"]\n",
"\n",
"cat_columns = [\n",
" column\n",
" for column in df.columns\n",
2024-12-07 13:00:14 +04:00
" if column not in columns_to_drop and df[column].dtype == \"object\"\n",
"]\n",
"\n",
2024-12-12 23:48:52 +04:00
"cat_cols_for_one_hot_enc = [\n",
" \"Leather interior\",\n",
" \"Category\",\n",
" \"Fuel type\",\n",
" \"Gear box type\",\n",
" \"Drive wheels\",\n",
" # \"Doors\",\n",
" \"Wheel\",\n",
2024-12-14 10:14:47 +04:00
" \"Manufacturer\",\n",
2024-12-12 23:48:52 +04:00
"]\n",
"\n",
"cat_cols_for_num_enc = [\n",
" # \"Model\",\n",
" # \"Manufacturer\",\n",
" # \"Color\",\n",
"]\n",
"\n",
2024-12-07 13:00:14 +04:00
"num_imputer = SimpleImputer(strategy=\"median\")\n",
"num_scaler = StandardScaler()\n",
"preprocessing_num = Pipeline(\n",
" [\n",
" (\"imputer\", num_imputer),\n",
" (\"scaler\", num_scaler),\n",
" ]\n",
")\n",
"\n",
"cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n",
2024-12-12 23:48:52 +04:00
"cat_one_hot_encoder = OneHotEncoder(\n",
" handle_unknown=\"ignore\", sparse_output=False, drop=\"first\"\n",
")\n",
"preprocessing_one_hot = Pipeline(\n",
2024-12-07 13:00:14 +04:00
" [\n",
" (\"imputer\", cat_imputer),\n",
2024-12-12 23:48:52 +04:00
" (\"encoder\", cat_one_hot_encoder),\n",
2024-12-07 13:00:14 +04:00
" ]\n",
")\n",
"\n",
2024-12-12 23:48:52 +04:00
"preprocessing_label_enc = Pipeline(\n",
" [\n",
" (\"imputer\", cat_imputer),\n",
" (\"encoder\", OrdinalEncoder()),\n",
" ]\n",
2024-12-07 13:00:14 +04:00
")\n",
"\n",
2024-12-12 23:48:52 +04:00
"features_preprocessing = ColumnTransformer(\n",
2024-12-07 13:00:14 +04:00
" verbose_feature_names_out=False,\n",
" transformers=[\n",
2024-12-12 23:48:52 +04:00
" (\"prepocessing_one_hot\", preprocessing_one_hot, cat_cols_for_one_hot_enc),\n",
" (\"prepocessing_label_enc\", preprocessing_label_enc, cat_cols_for_num_enc),\n",
" (\"prepocessing_num\", preprocessing_num, num_columns),\n",
2024-12-07 13:00:14 +04:00
" ],\n",
" remainder=\"passthrough\",\n",
")\n",
"\n",
"drop_columns = ColumnTransformer(\n",
" verbose_feature_names_out=False,\n",
" transformers=[\n",
" (\"drop_columns\", \"drop\", columns_to_drop),\n",
" ],\n",
" remainder=\"passthrough\",\n",
")\n",
"\n",
2024-12-12 23:48:52 +04:00
"pipeline_end = Pipeline(\n",
" [\n",
" (\"features_preprocessing\", features_preprocessing),\n",
" (\"drop_columns\", drop_columns),\n",
" ]\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Демонстрация работы конвейера для предобработки данных"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 52,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Leather interior_Yes</th>\n",
" <th>Category_Coupe</th>\n",
" <th>Category_Goods wagon</th>\n",
" <th>Category_Hatchback</th>\n",
" <th>Category_Jeep</th>\n",
" <th>Category_Limousine</th>\n",
" <th>Category_Microbus</th>\n",
" <th>Category_Minivan</th>\n",
" <th>Category_Pickup</th>\n",
" <th>Category_Sedan</th>\n",
" <th>...</th>\n",
2024-12-14 10:14:47 +04:00
" <th>Manufacturer_UAZ</th>\n",
" <th>Manufacturer_VAZ</th>\n",
" <th>Manufacturer_VOLKSWAGEN</th>\n",
" <th>Manufacturer_VOLVO</th>\n",
" <th>Manufacturer_სხვა</th>\n",
2024-12-12 23:48:52 +04:00
" <th>Price</th>\n",
" <th>Levy</th>\n",
" <th>Prod. year</th>\n",
" <th>Engine volume</th>\n",
" <th>Airbags</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.332733</td>\n",
" <td>1.851925</td>\n",
" <td>-0.224219</td>\n",
" <td>1.849443</td>\n",
" <td>1.353782</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.029023</td>\n",
" <td>1.024150</td>\n",
" <td>-0.007434</td>\n",
" <td>1.162057</td>\n",
" <td>0.340316</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>-0.781060</td>\n",
" <td>-1.187596</td>\n",
" <td>-1.091356</td>\n",
" <td>-1.175055</td>\n",
" <td>-1.179883</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-1.229294</td>\n",
" <td>0.685218</td>\n",
" <td>-0.007434</td>\n",
" <td>0.474671</td>\n",
" <td>-1.686616</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.480485</td>\n",
" <td>-0.218599</td>\n",
" <td>0.642919</td>\n",
" <td>-1.175055</td>\n",
" <td>-0.673150</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19225</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.752100</td>\n",
" <td>1.217515</td>\n",
" <td>-0.874572</td>\n",
" <td>-0.625146</td>\n",
" <td>-0.673150</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19226</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>-0.838888</td>\n",
" <td>-1.187596</td>\n",
" <td>-1.741709</td>\n",
" <td>1.162057</td>\n",
" <td>-1.686616</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19232</th>\n",
" <td>1.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>0.0</td>\n",
" <td>-0.781060</td>\n",
" <td>-1.187596</td>\n",
" <td>-2.608847</td>\n",
" <td>-0.212715</td>\n",
" <td>-0.419784</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19233</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.115718</td>\n",
" <td>0.617867</td>\n",
" <td>-0.007434</td>\n",
" <td>0.337194</td>\n",
" <td>0.340316</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19234</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.845956</td>\n",
" <td>0.628730</td>\n",
" <td>-0.224219</td>\n",
" <td>-0.212715</td>\n",
" <td>-0.673150</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
2024-12-14 10:14:47 +04:00
"<p>12597 rows × 83 columns</p>\n",
2024-12-12 23:48:52 +04:00
"</div>"
],
"text/plain": [
" Leather interior_Yes Category_Coupe Category_Goods wagon \\\n",
"0 1.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 \n",
"3 1.0 0.0 0.0 \n",
"4 1.0 0.0 0.0 \n",
"... ... ... ... \n",
"19225 0.0 0.0 1.0 \n",
"19226 1.0 0.0 0.0 \n",
"19232 1.0 1.0 0.0 \n",
"19233 1.0 0.0 0.0 \n",
"19234 1.0 0.0 0.0 \n",
"\n",
" Category_Hatchback Category_Jeep Category_Limousine \\\n",
"0 0.0 1.0 0.0 \n",
"1 0.0 1.0 0.0 \n",
"2 1.0 0.0 0.0 \n",
"3 0.0 1.0 0.0 \n",
"4 1.0 0.0 0.0 \n",
"... ... ... ... \n",
"19225 0.0 0.0 0.0 \n",
"19226 0.0 0.0 0.0 \n",
"19232 0.0 0.0 0.0 \n",
"19233 0.0 0.0 0.0 \n",
"19234 0.0 1.0 0.0 \n",
"\n",
" Category_Microbus Category_Minivan Category_Pickup Category_Sedan \\\n",
"0 0.0 0.0 0.0 0.0 \n",
"1 0.0 0.0 0.0 0.0 \n",
"2 0.0 0.0 0.0 0.0 \n",
"3 0.0 0.0 0.0 0.0 \n",
"4 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"19225 0.0 0.0 0.0 0.0 \n",
"19226 0.0 0.0 0.0 1.0 \n",
"19232 0.0 0.0 0.0 0.0 \n",
"19233 0.0 0.0 0.0 1.0 \n",
"19234 0.0 0.0 0.0 0.0 \n",
"\n",
2024-12-14 10:14:47 +04:00
" ... Manufacturer_UAZ Manufacturer_VAZ Manufacturer_VOLKSWAGEN \\\n",
"0 ... 0.0 0.0 0.0 \n",
"1 ... 0.0 0.0 0.0 \n",
"2 ... 0.0 0.0 0.0 \n",
"3 ... 0.0 0.0 0.0 \n",
"4 ... 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"19225 ... 0.0 0.0 0.0 \n",
"19226 ... 0.0 0.0 0.0 \n",
"19232 ... 0.0 0.0 0.0 \n",
"19233 ... 0.0 0.0 0.0 \n",
"19234 ... 0.0 0.0 0.0 \n",
2024-12-12 23:48:52 +04:00
"\n",
2024-12-14 10:14:47 +04:00
" Manufacturer_VOLVO Manufacturer_სხვა Price Levy Prod. year \\\n",
"0 0.0 0.0 -0.332733 1.851925 -0.224219 \n",
"1 0.0 0.0 -0.029023 1.024150 -0.007434 \n",
"2 0.0 0.0 -0.781060 -1.187596 -1.091356 \n",
"3 0.0 0.0 -1.229294 0.685218 -0.007434 \n",
"4 0.0 0.0 -0.480485 -0.218599 0.642919 \n",
"... ... ... ... ... ... \n",
"19225 0.0 0.0 -0.752100 1.217515 -0.874572 \n",
"19226 0.0 0.0 -0.838888 -1.187596 -1.741709 \n",
"19232 0.0 0.0 -0.781060 -1.187596 -2.608847 \n",
"19233 0.0 0.0 -0.115718 0.617867 -0.007434 \n",
"19234 0.0 0.0 0.845956 0.628730 -0.224219 \n",
2024-12-12 23:48:52 +04:00
"\n",
2024-12-14 10:14:47 +04:00
" Engine volume Airbags \n",
"0 1.849443 1.353782 \n",
"1 1.162057 0.340316 \n",
"2 -1.175055 -1.179883 \n",
"3 0.474671 -1.686616 \n",
"4 -1.175055 -0.673150 \n",
"... ... ... \n",
"19225 -0.625146 -0.673150 \n",
"19226 1.162057 -1.686616 \n",
"19232 -0.212715 -0.419784 \n",
"19233 0.337194 0.340316 \n",
"19234 -0.212715 -0.673150 \n",
2024-12-12 23:48:52 +04:00
"\n",
2024-12-14 10:14:47 +04:00
"[12597 rows x 83 columns]"
2024-12-12 23:48:52 +04:00
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 52,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"preprocessing_result = pipeline_end.fit_transform(df)\n",
"df = pd.DataFrame(\n",
" preprocessing_result,\n",
" columns=pipeline_end.get_feature_names_out(),\n",
2024-12-07 13:00:14 +04:00
")\n",
"\n",
2024-12-12 23:48:52 +04:00
"df"
2024-12-07 13:00:14 +04:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2024-12-12 23:48:52 +04:00
"#### Разбиение на выборки\n"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 53,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Размеры выборок:\n",
"Обучающая выборка: 8817 записей\n",
"Тестовая выборка: 3780 записей\n"
]
}
],
"source": [
"train_df, test_df = train_test_split(\n",
" df, test_size=0.3, random_state=42\n",
")\n",
"\n",
"print(\"Размеры выборок:\")\n",
"print(f\"Обучающая выборка: {train_df.shape[0]} записей\")\n",
"print(f\"Тестовая выборка: {test_df.shape[0]} записей\")"
2024-12-07 13:00:14 +04:00
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 54,
2024-12-07 13:00:14 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
2024-12-12 23:48:52 +04:00
" <th>Leather interior_Yes</th>\n",
" <th>Category_Coupe</th>\n",
" <th>Category_Goods wagon</th>\n",
" <th>Category_Hatchback</th>\n",
" <th>Category_Jeep</th>\n",
" <th>Category_Limousine</th>\n",
" <th>Category_Microbus</th>\n",
" <th>Category_Minivan</th>\n",
" <th>Category_Pickup</th>\n",
" <th>Category_Sedan</th>\n",
" <th>...</th>\n",
2024-12-14 10:14:47 +04:00
" <th>Manufacturer_TOYOTA</th>\n",
" <th>Manufacturer_UAZ</th>\n",
" <th>Manufacturer_VAZ</th>\n",
" <th>Manufacturer_VOLKSWAGEN</th>\n",
" <th>Manufacturer_VOLVO</th>\n",
" <th>Manufacturer_სხვა</th>\n",
2024-12-07 13:00:14 +04:00
" <th>Levy</th>\n",
2024-12-12 23:48:52 +04:00
" <th>Prod. year</th>\n",
2024-12-07 13:00:14 +04:00
" <th>Engine volume</th>\n",
" <th>Airbags</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2024-12-12 23:48:52 +04:00
" <th>15146</th>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>1.0</td>\n",
" <td>...</td>\n",
2024-12-14 10:14:47 +04:00
" <td>1.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>0.0</td>\n",
" <td>-1.187596</td>\n",
" <td>1.943625</td>\n",
" <td>-0.487669</td>\n",
" <td>-0.673150</td>\n",
2024-12-07 13:00:14 +04:00
" </tr>\n",
" <tr>\n",
2024-12-12 23:48:52 +04:00
" <th>14145</th>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>1.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>-1.187596</td>\n",
" <td>-0.874572</td>\n",
" <td>-0.900100</td>\n",
" <td>-1.179883</td>\n",
2024-12-07 13:00:14 +04:00
" </tr>\n",
" <tr>\n",
2024-12-12 23:48:52 +04:00
" <th>8943</th>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>1.0</td>\n",
" <td>...</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>0.081225</td>\n",
" <td>0.642919</td>\n",
" <td>-0.487669</td>\n",
" <td>0.847049</td>\n",
2024-12-07 13:00:14 +04:00
" </tr>\n",
" <tr>\n",
2024-12-12 23:48:52 +04:00
" <th>17889</th>\n",
" <td>1.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>-1.187596</td>\n",
" <td>-1.524925</td>\n",
" <td>0.474671</td>\n",
" <td>-0.419784</td>\n",
2024-12-07 13:00:14 +04:00
" </tr>\n",
" <tr>\n",
2024-12-12 23:48:52 +04:00
" <th>9515</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>...</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>1.695495</td>\n",
" <td>1.510056</td>\n",
" <td>0.474671</td>\n",
" <td>-0.673150</td>\n",
2024-12-07 13:00:14 +04:00
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
2024-12-12 23:48:52 +04:00
" <th>18201</th>\n",
2024-12-07 13:00:14 +04:00
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>1.0</td>\n",
" <td>...</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>-1.187596</td>\n",
" <td>-0.874572</td>\n",
" <td>0.337194</td>\n",
" <td>-0.673150</td>\n",
2024-12-07 13:00:14 +04:00
" </tr>\n",
" <tr>\n",
2024-12-12 23:48:52 +04:00
" <th>7436</th>\n",
" <td>1.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>1.0</td>\n",
" <td>...</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>0.474472</td>\n",
" <td>0.859703</td>\n",
" <td>-0.212715</td>\n",
" <td>1.353782</td>\n",
2024-12-07 13:00:14 +04:00
" </tr>\n",
" <tr>\n",
2024-12-12 23:48:52 +04:00
" <th>7728</th>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>1.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>...</td>\n",
2024-12-14 10:14:47 +04:00
" <td>1.0</td>\n",
" <td>0.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>0.643938</td>\n",
" <td>-0.657787</td>\n",
" <td>-0.900100</td>\n",
" <td>0.340316</td>\n",
2024-12-07 13:00:14 +04:00
" </tr>\n",
" <tr>\n",
2024-12-12 23:48:52 +04:00
" <th>1136</th>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>...</td>\n",
" <td>0.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>0.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>-1.187596</td>\n",
" <td>1.076487</td>\n",
" <td>-0.762623</td>\n",
" <td>0.340316</td>\n",
2024-12-07 13:00:14 +04:00
" </tr>\n",
" <tr>\n",
2024-12-12 23:48:52 +04:00
" <th>10640</th>\n",
" <td>1.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>1.0</td>\n",
" <td>...</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-14 10:14:47 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-07 13:00:14 +04:00
" <td>0.0</td>\n",
" <td>0.0</td>\n",
2024-12-12 23:48:52 +04:00
" <td>0.444055</td>\n",
" <td>0.426134</td>\n",
" <td>0.337194</td>\n",
" <td>0.340316</td>\n",
2024-12-07 13:00:14 +04:00
" </tr>\n",
" </tbody>\n",
"</table>\n",
2024-12-14 10:14:47 +04:00
"<p>8817 rows × 82 columns</p>\n",
2024-12-07 13:00:14 +04:00
"</div>"
],
"text/plain": [
2024-12-12 23:48:52 +04:00
" Leather interior_Yes Category_Coupe Category_Goods wagon \\\n",
"15146 0.0 0.0 0.0 \n",
"14145 0.0 0.0 0.0 \n",
"8943 0.0 0.0 0.0 \n",
"17889 1.0 0.0 0.0 \n",
"9515 1.0 0.0 0.0 \n",
"... ... ... ... \n",
"18201 1.0 0.0 0.0 \n",
"7436 1.0 0.0 0.0 \n",
"7728 0.0 0.0 0.0 \n",
"1136 0.0 0.0 0.0 \n",
"10640 1.0 0.0 0.0 \n",
2024-12-07 13:00:14 +04:00
"\n",
2024-12-12 23:48:52 +04:00
" Category_Hatchback Category_Jeep Category_Limousine \\\n",
"15146 0.0 0.0 0.0 \n",
"14145 0.0 0.0 0.0 \n",
"8943 0.0 0.0 0.0 \n",
"17889 0.0 1.0 0.0 \n",
"9515 0.0 0.0 0.0 \n",
"... ... ... ... \n",
"18201 0.0 0.0 0.0 \n",
"7436 0.0 0.0 0.0 \n",
"7728 1.0 0.0 0.0 \n",
"1136 0.0 0.0 0.0 \n",
"10640 0.0 0.0 0.0 \n",
2024-12-07 13:00:14 +04:00
"\n",
2024-12-12 23:48:52 +04:00
" Category_Microbus Category_Minivan Category_Pickup Category_Sedan \\\n",
"15146 0.0 0.0 0.0 1.0 \n",
"14145 0.0 0.0 0.0 1.0 \n",
"8943 0.0 0.0 0.0 1.0 \n",
"17889 0.0 0.0 0.0 0.0 \n",
"9515 0.0 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"18201 0.0 0.0 0.0 1.0 \n",
"7436 0.0 0.0 0.0 1.0 \n",
"7728 0.0 0.0 0.0 0.0 \n",
"1136 0.0 0.0 0.0 1.0 \n",
"10640 0.0 0.0 0.0 1.0 \n",
2024-12-07 13:00:14 +04:00
"\n",
2024-12-14 10:14:47 +04:00
" ... Manufacturer_TOYOTA Manufacturer_UAZ Manufacturer_VAZ \\\n",
"15146 ... 1.0 0.0 0.0 \n",
"14145 ... 1.0 0.0 0.0 \n",
"8943 ... 0.0 0.0 0.0 \n",
"17889 ... 0.0 0.0 0.0 \n",
"9515 ... 0.0 0.0 0.0 \n",
"... ... ... ... ... \n",
"18201 ... 0.0 0.0 0.0 \n",
"7436 ... 0.0 0.0 0.0 \n",
"7728 ... 1.0 0.0 0.0 \n",
"1136 ... 0.0 0.0 0.0 \n",
"10640 ... 0.0 0.0 0.0 \n",
2024-12-07 13:00:14 +04:00
"\n",
2024-12-14 10:14:47 +04:00
" Manufacturer_VOLKSWAGEN Manufacturer_VOLVO Manufacturer_სხვა \\\n",
"15146 0.0 0.0 0.0 \n",
"14145 0.0 0.0 0.0 \n",
"8943 0.0 0.0 0.0 \n",
"17889 0.0 0.0 0.0 \n",
"9515 0.0 0.0 0.0 \n",
"... ... ... ... \n",
"18201 0.0 0.0 0.0 \n",
"7436 0.0 0.0 0.0 \n",
"7728 0.0 0.0 0.0 \n",
"1136 0.0 0.0 0.0 \n",
"10640 0.0 0.0 0.0 \n",
2024-12-07 13:00:14 +04:00
"\n",
2024-12-14 10:14:47 +04:00
" Levy Prod. year Engine volume Airbags \n",
"15146 -1.187596 1.943625 -0.487669 -0.673150 \n",
"14145 -1.187596 -0.874572 -0.900100 -1.179883 \n",
"8943 0.081225 0.642919 -0.487669 0.847049 \n",
"17889 -1.187596 -1.524925 0.474671 -0.419784 \n",
"9515 1.695495 1.510056 0.474671 -0.673150 \n",
"... ... ... ... ... \n",
"18201 -1.187596 -0.874572 0.337194 -0.673150 \n",
"7436 0.474472 0.859703 -0.212715 1.353782 \n",
"7728 0.643938 -0.657787 -0.900100 0.340316 \n",
"1136 -1.187596 1.076487 -0.762623 0.340316 \n",
"10640 0.444055 0.426134 0.337194 0.340316 \n",
2024-12-12 23:48:52 +04:00
"\n",
2024-12-14 10:14:47 +04:00
"[8817 rows x 82 columns]"
2024-12-07 13:00:14 +04:00
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 54,
2024-12-07 13:00:14 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-12-12 23:48:52 +04:00
"price_y_train = train_df[\"Price\"]\n",
"price_y_test = test_df[\"Price\"]\n",
"train_df.drop(columns=[\"Price\"], inplace=True)\n",
"test_df.drop(columns=[\"Price\"], inplace=True)\n",
2024-12-07 13:00:14 +04:00
"\n",
2024-12-12 23:48:52 +04:00
"train_df"
2024-12-07 13:00:14 +04:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Формирование набора моделей"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 55,
2024-12-07 13:00:14 +04:00
"metadata": {},
"outputs": [],
2024-12-12 23:48:52 +04:00
"source": [
"models = {\n",
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
" # \"linear_poly\": {\n",
" # \"model\": make_pipeline(\n",
" # PolynomialFeatures(degree=2),\n",
" # linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" # )\n",
" # },\n",
" # \"linear_interact\": {\n",
" # \"model\": make_pipeline(\n",
" # PolynomialFeatures(interaction_only=True),\n",
" # linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" # )\n",
" # },\n",
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestRegressor(\n",
" max_depth=7, random_state=random_state, n_jobs=-1\n",
" )\n",
" },\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Обучение и оценка моделей с помощью различных алгоритмов"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 56,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-12-14 10:14:47 +04:00
"Model: linear\n"
2024-12-12 23:48:52 +04:00
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but LinearRegression was fitted without feature names\n",
" warnings.warn(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but LinearRegression was fitted without feature names\n",
2024-12-14 10:14:47 +04:00
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: ridge\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-12-12 23:48:52 +04:00
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but RidgeCV was fitted without feature names\n",
" warnings.warn(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but RidgeCV was fitted without feature names\n",
" warnings.warn(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names\n",
" warnings.warn(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names\n",
" warnings.warn(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but KNeighborsRegressor was fitted without feature names\n",
" warnings.warn(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but KNeighborsRegressor was fitted without feature names\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-12-14 10:14:47 +04:00
"Model: decision_tree\n",
"Model: knn\n",
2024-12-12 23:48:52 +04:00
"Model: random_forest\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but RandomForestRegressor was fitted without feature names\n",
" warnings.warn(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but RandomForestRegressor was fitted without feature names\n",
" warnings.warn(\n"
]
}
],
"source": [
"for model_name in models.keys():\n",
" print(f\"Model: {model_name}\")\n",
"\n",
" model = models[model_name][\"model\"]\n",
"\n",
" fitted_model = model.fit(train_df.values, price_y_train.values.ravel())\n",
"\n",
" y_train_pred = fitted_model.predict(train_df)\n",
" y_test_pred = fitted_model.predict(test_df)\n",
"\n",
" models[model_name][\"fitted\"] = fitted_model\n",
" models[model_name][\"train_preds\"] = y_train_pred\n",
" models[model_name][\"preds\"] = y_test_pred\n",
"\n",
" models[model_name][\"RMSE_train\"] = math.sqrt(\n",
" metrics.mean_squared_error(price_y_train, y_train_pred)\n",
" )\n",
" models[model_name][\"RMSE_test\"] = math.sqrt(\n",
" metrics.mean_squared_error(price_y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"RMAE_test\"] = math.sqrt(\n",
" metrics.mean_absolute_error(price_y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"R2_test\"] = metrics.r2_score(price_y_test, y_test_pred)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вывод результатов оценки"
]
},
{
"cell_type": "code",
2024-12-14 10:14:47 +04:00
"execution_count": 57,
2024-12-12 23:48:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
2024-12-14 10:14:47 +04:00
"#T_d4e42_row0_col0, #T_d4e42_row0_col1 {\n",
2024-12-12 23:48:52 +04:00
" background-color: #26818e;\n",
" color: #f1f1f1;\n",
"}\n",
2024-12-14 10:14:47 +04:00
"#T_d4e42_row0_col2, #T_d4e42_row3_col3, #T_d4e42_row4_col3 {\n",
2024-12-12 23:48:52 +04:00
" background-color: #4e02a2;\n",
" color: #f1f1f1;\n",
"}\n",
2024-12-14 10:14:47 +04:00
"#T_d4e42_row0_col3, #T_d4e42_row3_col2, #T_d4e42_row4_col2 {\n",
2024-12-12 23:48:52 +04:00
" background-color: #da5a6a;\n",
" color: #f1f1f1;\n",
"}\n",
2024-12-14 10:14:47 +04:00
"#T_d4e42_row1_col0 {\n",
" background-color: #21a585;\n",
2024-12-12 23:48:52 +04:00
" color: #f1f1f1;\n",
"}\n",
2024-12-14 10:14:47 +04:00
"#T_d4e42_row1_col1 {\n",
" background-color: #1f988b;\n",
2024-12-12 23:48:52 +04:00
" color: #f1f1f1;\n",
"}\n",
2024-12-14 10:14:47 +04:00
"#T_d4e42_row1_col2 {\n",
" background-color: #8305a7;\n",
2024-12-12 23:48:52 +04:00
" color: #f1f1f1;\n",
"}\n",
2024-12-14 10:14:47 +04:00
"#T_d4e42_row1_col3 {\n",
" background-color: #c5407e;\n",
2024-12-12 23:48:52 +04:00
" color: #f1f1f1;\n",
"}\n",
2024-12-14 10:14:47 +04:00
"#T_d4e42_row2_col0 {\n",
" background-color: #34b679;\n",
2024-12-12 23:48:52 +04:00
" color: #f1f1f1;\n",
"}\n",
2024-12-14 10:14:47 +04:00
"#T_d4e42_row2_col1 {\n",
" background-color: #2eb37c;\n",
2024-12-12 23:48:52 +04:00
" color: #f1f1f1;\n",
"}\n",
2024-12-14 10:14:47 +04:00
"#T_d4e42_row2_col2 {\n",
" background-color: #a01a9c;\n",
2024-12-12 23:48:52 +04:00
" color: #f1f1f1;\n",
"}\n",
2024-12-14 10:14:47 +04:00
"#T_d4e42_row2_col3 {\n",
" background-color: #a72197;\n",
2024-12-12 23:48:52 +04:00
" color: #f1f1f1;\n",
"}\n",
2024-12-14 10:14:47 +04:00
"#T_d4e42_row3_col0, #T_d4e42_row3_col1, #T_d4e42_row4_col0, #T_d4e42_row4_col1 {\n",
2024-12-12 23:48:52 +04:00
" background-color: #a8db34;\n",
" color: #000000;\n",
"}\n",
"</style>\n",
2024-12-14 10:14:47 +04:00
"<table id=\"T_d4e42\">\n",
2024-12-12 23:48:52 +04:00
" <thead>\n",
" <tr>\n",
" <th class=\"blank level0\" > </th>\n",
2024-12-14 10:14:47 +04:00
" <th id=\"T_d4e42_level0_col0\" class=\"col_heading level0 col0\" >RMSE_train</th>\n",
" <th id=\"T_d4e42_level0_col1\" class=\"col_heading level0 col1\" >RMSE_test</th>\n",
" <th id=\"T_d4e42_level0_col2\" class=\"col_heading level0 col2\" >RMAE_test</th>\n",
" <th id=\"T_d4e42_level0_col3\" class=\"col_heading level0 col3\" >R2_test</th>\n",
2024-12-12 23:48:52 +04:00
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
2024-12-14 10:14:47 +04:00
" <th id=\"T_d4e42_level0_row0\" class=\"row_heading level0 row0\" >knn</th>\n",
" <td id=\"T_d4e42_row0_col0\" class=\"data row0 col0\" >0.529359</td>\n",
" <td id=\"T_d4e42_row0_col1\" class=\"data row0 col1\" >0.594354</td>\n",
" <td id=\"T_d4e42_row0_col2\" class=\"data row0 col2\" >0.635191</td>\n",
" <td id=\"T_d4e42_row0_col3\" class=\"data row0 col3\" >0.650397</td>\n",
2024-12-12 23:48:52 +04:00
" </tr>\n",
" <tr>\n",
2024-12-14 10:14:47 +04:00
" <th id=\"T_d4e42_level0_row1\" class=\"row_heading level0 row1\" >random_forest</th>\n",
" <td id=\"T_d4e42_row1_col0\" class=\"data row1 col0\" >0.609210</td>\n",
" <td id=\"T_d4e42_row1_col1\" class=\"data row1 col1\" >0.631618</td>\n",
" <td id=\"T_d4e42_row1_col2\" class=\"data row1 col2\" >0.673173</td>\n",
" <td id=\"T_d4e42_row1_col3\" class=\"data row1 col3\" >0.605184</td>\n",
2024-12-12 23:48:52 +04:00
" </tr>\n",
" <tr>\n",
2024-12-14 10:14:47 +04:00
" <th id=\"T_d4e42_level0_row2\" class=\"row_heading level0 row2\" >decision_tree</th>\n",
" <td id=\"T_d4e42_row2_col0\" class=\"data row2 col0\" >0.647025</td>\n",
" <td id=\"T_d4e42_row2_col1\" class=\"data row2 col1\" >0.674358</td>\n",
" <td id=\"T_d4e42_row2_col2\" class=\"data row2 col2\" >0.696539</td>\n",
" <td id=\"T_d4e42_row2_col3\" class=\"data row2 col3\" >0.549944</td>\n",
2024-12-12 23:48:52 +04:00
" </tr>\n",
" <tr>\n",
2024-12-14 10:14:47 +04:00
" <th id=\"T_d4e42_level0_row3\" class=\"row_heading level0 row3\" >ridge</th>\n",
" <td id=\"T_d4e42_row3_col0\" class=\"data row3 col0\" >0.753637</td>\n",
" <td id=\"T_d4e42_row3_col1\" class=\"data row3 col1\" >0.759154</td>\n",
" <td id=\"T_d4e42_row3_col2\" class=\"data row3 col2\" >0.758869</td>\n",
" <td id=\"T_d4e42_row3_col3\" class=\"data row3 col3\" >0.429644</td>\n",
2024-12-12 23:48:52 +04:00
" </tr>\n",
" <tr>\n",
2024-12-14 10:14:47 +04:00
" <th id=\"T_d4e42_level0_row4\" class=\"row_heading level0 row4\" >linear</th>\n",
" <td id=\"T_d4e42_row4_col0\" class=\"data row4 col0\" >0.752569</td>\n",
" <td id=\"T_d4e42_row4_col1\" class=\"data row4 col1\" >0.759341</td>\n",
" <td id=\"T_d4e42_row4_col2\" class=\"data row4 col2\" >0.758784</td>\n",
" <td id=\"T_d4e42_row4_col3\" class=\"data row4 col3\" >0.429364</td>\n",
2024-12-12 23:48:52 +04:00
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
2024-12-14 10:14:47 +04:00
"<pandas.io.formats.style.Styler at 0x12df22077d0>"
2024-12-12 23:48:52 +04:00
]
},
2024-12-14 10:14:47 +04:00
"execution_count": 57,
2024-12-12 23:48:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n",
" [\"RMSE_train\", \"RMSE_test\", \"RMAE_test\", \"R2_test\"]\n",
"]\n",
"reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n",
" cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n",
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"RMAE_test\", \"R2_test\"])"
]
2024-12-14 10:14:47 +04:00
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'knn'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"best_model = str(reg_metrics.sort_values(by=\"RMSE_test\").iloc[0].name)\n",
"\n",
"display(best_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вывод предсказаний для выборок"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Leather interior_Yes</th>\n",
" <th>Category_Coupe</th>\n",
" <th>Category_Goods wagon</th>\n",
" <th>Category_Hatchback</th>\n",
" <th>Category_Jeep</th>\n",
" <th>Category_Limousine</th>\n",
" <th>Category_Microbus</th>\n",
" <th>Category_Minivan</th>\n",
" <th>Category_Pickup</th>\n",
" <th>Category_Sedan</th>\n",
" <th>...</th>\n",
" <th>Manufacturer_VAZ</th>\n",
" <th>Manufacturer_VOLKSWAGEN</th>\n",
" <th>Manufacturer_VOLVO</th>\n",
" <th>Manufacturer_სხვა</th>\n",
" <th>Levy</th>\n",
" <th>Prod. year</th>\n",
" <th>Engine volume</th>\n",
" <th>Airbags</th>\n",
" <th>Price</th>\n",
" <th>PricePred</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>15146</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-1.187596</td>\n",
" <td>1.943625</td>\n",
" <td>-0.487669</td>\n",
" <td>-0.673150</td>\n",
" <td>0.144553</td>\n",
" <td>0.557766</td>\n",
" </tr>\n",
" <tr>\n",
" <th>14145</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-1.187596</td>\n",
" <td>-0.874572</td>\n",
" <td>-0.900100</td>\n",
" <td>-1.179883</td>\n",
" <td>-0.665312</td>\n",
" <td>-0.603334</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8943</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.081225</td>\n",
" <td>0.642919</td>\n",
" <td>-0.487669</td>\n",
" <td>0.847049</td>\n",
" <td>-0.144678</td>\n",
" <td>-0.355422</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17889</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-1.187596</td>\n",
" <td>-1.524925</td>\n",
" <td>0.474671</td>\n",
" <td>-0.419784</td>\n",
" <td>-0.462869</td>\n",
" <td>-0.746737</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9515</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.695495</td>\n",
" <td>1.510056</td>\n",
" <td>0.474671</td>\n",
" <td>-0.673150</td>\n",
" <td>2.765339</td>\n",
" <td>2.375842</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 84 columns</p>\n",
"</div>"
],
"text/plain": [
" Leather interior_Yes Category_Coupe Category_Goods wagon \\\n",
"15146 0.0 0.0 0.0 \n",
"14145 0.0 0.0 0.0 \n",
"8943 0.0 0.0 0.0 \n",
"17889 1.0 0.0 0.0 \n",
"9515 1.0 0.0 0.0 \n",
"\n",
" Category_Hatchback Category_Jeep Category_Limousine \\\n",
"15146 0.0 0.0 0.0 \n",
"14145 0.0 0.0 0.0 \n",
"8943 0.0 0.0 0.0 \n",
"17889 0.0 1.0 0.0 \n",
"9515 0.0 0.0 0.0 \n",
"\n",
" Category_Microbus Category_Minivan Category_Pickup Category_Sedan \\\n",
"15146 0.0 0.0 0.0 1.0 \n",
"14145 0.0 0.0 0.0 1.0 \n",
"8943 0.0 0.0 0.0 1.0 \n",
"17889 0.0 0.0 0.0 0.0 \n",
"9515 0.0 0.0 0.0 0.0 \n",
"\n",
" ... Manufacturer_VAZ Manufacturer_VOLKSWAGEN Manufacturer_VOLVO \\\n",
"15146 ... 0.0 0.0 0.0 \n",
"14145 ... 0.0 0.0 0.0 \n",
"8943 ... 0.0 0.0 0.0 \n",
"17889 ... 0.0 0.0 0.0 \n",
"9515 ... 0.0 0.0 0.0 \n",
"\n",
" Manufacturer_სხვა Levy Prod. year Engine volume Airbags \\\n",
"15146 0.0 -1.187596 1.943625 -0.487669 -0.673150 \n",
"14145 0.0 -1.187596 -0.874572 -0.900100 -1.179883 \n",
"8943 0.0 0.081225 0.642919 -0.487669 0.847049 \n",
"17889 0.0 -1.187596 -1.524925 0.474671 -0.419784 \n",
"9515 0.0 1.695495 1.510056 0.474671 -0.673150 \n",
"\n",
" Price PricePred \n",
"15146 0.144553 0.557766 \n",
"14145 -0.665312 -0.603334 \n",
"8943 -0.144678 -0.355422 \n",
"17889 -0.462869 -0.746737 \n",
"9515 2.765339 2.375842 \n",
"\n",
"[5 rows x 84 columns]"
]
},
"execution_count": 59,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [\n",
" train_df,\n",
" price_y_train,\n",
" pd.Series(\n",
" models[best_model][\"train_preds\"],\n",
" index=price_y_train.index,\n",
" name=\"PricePred\",\n",
" ),\n",
" ],\n",
" axis=1,\n",
").head(5)"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Leather interior_Yes</th>\n",
" <th>Category_Coupe</th>\n",
" <th>Category_Goods wagon</th>\n",
" <th>Category_Hatchback</th>\n",
" <th>Category_Jeep</th>\n",
" <th>Category_Limousine</th>\n",
" <th>Category_Microbus</th>\n",
" <th>Category_Minivan</th>\n",
" <th>Category_Pickup</th>\n",
" <th>Category_Sedan</th>\n",
" <th>...</th>\n",
" <th>Manufacturer_VAZ</th>\n",
" <th>Manufacturer_VOLKSWAGEN</th>\n",
" <th>Manufacturer_VOLVO</th>\n",
" <th>Manufacturer_სხვა</th>\n",
" <th>Levy</th>\n",
" <th>Prod. year</th>\n",
" <th>Engine volume</th>\n",
" <th>Airbags</th>\n",
" <th>Price</th>\n",
" <th>PricePred</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>10968</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.055651</td>\n",
" <td>-0.007434</td>\n",
" <td>-1.037578</td>\n",
" <td>1.353782</td>\n",
" <td>-1.316082</td>\n",
" <td>-0.960788</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1121</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>-0.331577</td>\n",
" <td>0.642919</td>\n",
" <td>0.474671</td>\n",
" <td>1.353782</td>\n",
" <td>-1.489657</td>\n",
" <td>-0.235577</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4355</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.398430</td>\n",
" <td>1.076487</td>\n",
" <td>-0.762623</td>\n",
" <td>-0.673150</td>\n",
" <td>0.771343</td>\n",
" <td>0.887499</td>\n",
" </tr>\n",
" <tr>\n",
" <th>17702</th>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.528788</td>\n",
" <td>-0.441003</td>\n",
" <td>-0.762623</td>\n",
" <td>-0.673150</td>\n",
" <td>-0.679792</td>\n",
" <td>-0.579671</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6167</th>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.0</td>\n",
" <td>...</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>0.719981</td>\n",
" <td>-0.441003</td>\n",
" <td>-0.487669</td>\n",
" <td>0.340316</td>\n",
" <td>-0.173638</td>\n",
" <td>-0.471104</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 84 columns</p>\n",
"</div>"
],
"text/plain": [
" Leather interior_Yes Category_Coupe Category_Goods wagon \\\n",
"10968 1.0 0.0 0.0 \n",
"1121 1.0 0.0 0.0 \n",
"4355 1.0 0.0 0.0 \n",
"17702 1.0 0.0 0.0 \n",
"6167 0.0 0.0 0.0 \n",
"\n",
" Category_Hatchback Category_Jeep Category_Limousine \\\n",
"10968 1.0 0.0 0.0 \n",
"1121 0.0 0.0 0.0 \n",
"4355 0.0 0.0 0.0 \n",
"17702 1.0 0.0 0.0 \n",
"6167 0.0 0.0 0.0 \n",
"\n",
" Category_Microbus Category_Minivan Category_Pickup Category_Sedan \\\n",
"10968 0.0 0.0 0.0 0.0 \n",
"1121 0.0 0.0 0.0 1.0 \n",
"4355 0.0 0.0 0.0 1.0 \n",
"17702 0.0 0.0 0.0 0.0 \n",
"6167 0.0 0.0 0.0 1.0 \n",
"\n",
" ... Manufacturer_VAZ Manufacturer_VOLKSWAGEN Manufacturer_VOLVO \\\n",
"10968 ... 0.0 0.0 0.0 \n",
"1121 ... 0.0 0.0 0.0 \n",
"4355 ... 0.0 0.0 0.0 \n",
"17702 ... 0.0 0.0 0.0 \n",
"6167 ... 0.0 0.0 0.0 \n",
"\n",
" Manufacturer_სხვა Levy Prod. year Engine volume Airbags \\\n",
"10968 0.0 -0.055651 -0.007434 -1.037578 1.353782 \n",
"1121 0.0 -0.331577 0.642919 0.474671 1.353782 \n",
"4355 0.0 0.398430 1.076487 -0.762623 -0.673150 \n",
"17702 0.0 0.528788 -0.441003 -0.762623 -0.673150 \n",
"6167 0.0 0.719981 -0.441003 -0.487669 0.340316 \n",
"\n",
" Price PricePred \n",
"10968 -1.316082 -0.960788 \n",
"1121 -1.489657 -0.235577 \n",
"4355 0.771343 0.887499 \n",
"17702 -0.679792 -0.579671 \n",
"6167 -0.173638 -0.471104 \n",
"\n",
"[5 rows x 84 columns]"
]
},
"execution_count": 60,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [\n",
" test_df,\n",
" price_y_test,\n",
" pd.Series(\n",
" models[best_model][\"preds\"],\n",
" index=price_y_test.index,\n",
" name=\"PricePred\",\n",
" ),\n",
" ],\n",
" axis=1,\n",
").head(5)"
]
2024-12-07 13:00:14 +04:00
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}