2799 lines
198 KiB
Plaintext
Raw Normal View History

2024-12-07 00:08:27 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"vscode": {
"languageId": "plaintext"
}
},
"source": [
"#### Загрузка набора данных"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1058,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" <th>Levy</th>\n",
" <th>Manufacturer</th>\n",
" <th>Model</th>\n",
" <th>Prod. year</th>\n",
" <th>Category</th>\n",
" <th>Leather interior</th>\n",
" <th>Fuel type</th>\n",
" <th>Engine volume</th>\n",
" <th>Mileage</th>\n",
" <th>Cylinders</th>\n",
" <th>Gear box type</th>\n",
" <th>Drive wheels</th>\n",
" <th>Doors</th>\n",
" <th>Wheel</th>\n",
" <th>Color</th>\n",
" <th>Airbags</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13328</td>\n",
" <td>1399</td>\n",
" <td>LEXUS</td>\n",
" <td>RX 450</td>\n",
" <td>2010</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>3.5</td>\n",
" <td>186005 km</td>\n",
" <td>6.0</td>\n",
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>16621</td>\n",
" <td>1018</td>\n",
" <td>CHEVROLET</td>\n",
" <td>Equinox</td>\n",
" <td>2011</td>\n",
" <td>Jeep</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>3</td>\n",
" <td>192000 km</td>\n",
" <td>6.0</td>\n",
" <td>Tiptronic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8467</td>\n",
" <td>-</td>\n",
" <td>HONDA</td>\n",
" <td>FIT</td>\n",
" <td>2006</td>\n",
" <td>Hatchback</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>1.3</td>\n",
" <td>200000 km</td>\n",
" <td>4.0</td>\n",
" <td>Variator</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Right-hand drive</td>\n",
" <td>Black</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3607</td>\n",
" <td>862</td>\n",
" <td>FORD</td>\n",
" <td>Escape</td>\n",
" <td>2011</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>2.5</td>\n",
" <td>168966 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>11726</td>\n",
" <td>446</td>\n",
" <td>HONDA</td>\n",
" <td>FIT</td>\n",
" <td>2014</td>\n",
" <td>Hatchback</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>1.3</td>\n",
" <td>91901 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19232</th>\n",
" <td>8467</td>\n",
" <td>-</td>\n",
" <td>MERCEDES-BENZ</td>\n",
" <td>CLK 200</td>\n",
" <td>1999</td>\n",
" <td>Coupe</td>\n",
" <td>Yes</td>\n",
" <td>CNG</td>\n",
" <td>2.0 Turbo</td>\n",
" <td>300000 km</td>\n",
" <td>4.0</td>\n",
" <td>Manual</td>\n",
" <td>Rear</td>\n",
" <td>02-Mar</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19233</th>\n",
" <td>15681</td>\n",
" <td>831</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Sonata</td>\n",
" <td>2011</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>2.4</td>\n",
" <td>161600 km</td>\n",
" <td>4.0</td>\n",
" <td>Tiptronic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Red</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19234</th>\n",
" <td>26108</td>\n",
" <td>836</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Tucson</td>\n",
" <td>2010</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>2</td>\n",
" <td>116365 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19235</th>\n",
" <td>5331</td>\n",
" <td>1288</td>\n",
" <td>CHEVROLET</td>\n",
" <td>Captiva</td>\n",
" <td>2007</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>2</td>\n",
" <td>51258 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19236</th>\n",
" <td>470</td>\n",
" <td>753</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Sonata</td>\n",
" <td>2012</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>2.4</td>\n",
" <td>186923 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>19237 rows × 17 columns</p>\n",
"</div>"
],
"text/plain": [
" Price Levy Manufacturer Model Prod. year Category \\\n",
"0 13328 1399 LEXUS RX 450 2010 Jeep \n",
"1 16621 1018 CHEVROLET Equinox 2011 Jeep \n",
"2 8467 - HONDA FIT 2006 Hatchback \n",
"3 3607 862 FORD Escape 2011 Jeep \n",
"4 11726 446 HONDA FIT 2014 Hatchback \n",
"... ... ... ... ... ... ... \n",
"19232 8467 - MERCEDES-BENZ CLK 200 1999 Coupe \n",
"19233 15681 831 HYUNDAI Sonata 2011 Sedan \n",
"19234 26108 836 HYUNDAI Tucson 2010 Jeep \n",
"19235 5331 1288 CHEVROLET Captiva 2007 Jeep \n",
"19236 470 753 HYUNDAI Sonata 2012 Sedan \n",
"\n",
" Leather interior Fuel type Engine volume Mileage Cylinders \\\n",
"0 Yes Hybrid 3.5 186005 km 6.0 \n",
"1 No Petrol 3 192000 km 6.0 \n",
"2 No Petrol 1.3 200000 km 4.0 \n",
"3 Yes Hybrid 2.5 168966 km 4.0 \n",
"4 Yes Petrol 1.3 91901 km 4.0 \n",
"... ... ... ... ... ... \n",
"19232 Yes CNG 2.0 Turbo 300000 km 4.0 \n",
"19233 Yes Petrol 2.4 161600 km 4.0 \n",
"19234 Yes Diesel 2 116365 km 4.0 \n",
"19235 Yes Diesel 2 51258 km 4.0 \n",
"19236 Yes Hybrid 2.4 186923 km 4.0 \n",
"\n",
" Gear box type Drive wheels Doors Wheel Color Airbags \n",
"0 Automatic 4x4 04-May Left wheel Silver 12 \n",
"1 Tiptronic 4x4 04-May Left wheel Black 8 \n",
"2 Variator Front 04-May Right-hand drive Black 2 \n",
"3 Automatic 4x4 04-May Left wheel White 0 \n",
"4 Automatic Front 04-May Left wheel Silver 4 \n",
"... ... ... ... ... ... ... \n",
"19232 Manual Rear 02-Mar Left wheel Silver 5 \n",
"19233 Tiptronic Front 04-May Left wheel Red 8 \n",
"19234 Automatic Front 04-May Left wheel Grey 4 \n",
"19235 Automatic Front 04-May Left wheel Black 4 \n",
"19236 Automatic Front 04-May Left wheel White 12 \n",
"\n",
"[19237 rows x 17 columns]"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1058,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import featuretools as ft\n",
"import re\n",
"from sklearn.preprocessing import StandardScaler\n",
"from imblearn.over_sampling import RandomOverSampler\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"\n",
"df = pd.read_csv(\"../data/car_price_prediction.csv\")\n",
"\n",
"df = df.drop(columns=[\"ID\"])\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Анализ датасета и очистка данных"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1059,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Price int64\n",
"Levy object\n",
"Manufacturer object\n",
"Model object\n",
"Prod. year int64\n",
"Category object\n",
"Leather interior object\n",
"Fuel type object\n",
"Engine volume object\n",
"Mileage object\n",
"Cylinders float64\n",
"Gear box type object\n",
"Drive wheels object\n",
"Doors object\n",
"Wheel object\n",
"Color object\n",
"Airbags int64\n",
"dtype: object"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1059,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1060,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 3.5, 3. , 1.3, 2.5, 2. , 1.8, 2.4, 4. , 1.6, 3.3, 2.2,\n",
" 4.7, 1.5, 4.4, 1.4, 3.6, 2.3, 5.5, 2.8, 3.2, 3.8, 4.6,\n",
" 1.2, 5. , 1.7, 2.9, 0.5, 1.9, 2.7, 4.8, 5.3, 0.4, 1.1,\n",
" 2.1, 0.7, 5.4, 3.7, 1. , 2.6, 0.8, 0.2, 5.7, 6.7, 6.2,\n",
" 3.4, 6.3, 4.3, 4.2, 0. , 20. , 0.3, 5.9, 5.6, 6. , 0.6,\n",
" 6.8, 4.5, 7.3, 0.1, 3.1, 6.4, 3.9, 0.9, 5.2, 5.8])"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1060,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Engine volume\"] = df[\"Engine volume\"].str.replace(\"Turbo\", \"\")\n",
"df[\"Engine volume\"] = pd.to_numeric(df[\"Engine volume\"])\n",
"df[\"Engine volume\"].unique()"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1061,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([186005, 192000, 200000, ..., 140607, 307325, 186923])"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1061,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Mileage\"] = df[\"Mileage\"].str.replace(\"km\", \"\")\n",
"df[\"Mileage\"] = df[\"Mileage\"].astype(\"int64\")\n",
"df[\"Mileage\"].unique()"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1062,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1399, 1018, 0, 862, 446, 891, 761, 751, 394,\n",
" 1053, 1055, 1079, 810, 2386, 1850, 531, 586, 1249,\n",
" 2455, 583, 1537, 1288, 915, 1750, 707, 1077, 1486,\n",
" 1091, 650, 382, 1436, 1194, 503, 1017, 1104, 639,\n",
" 629, 919, 781, 530, 640, 765, 777, 779, 934,\n",
" 769, 645, 1185, 1324, 830, 1187, 1111, 760, 642,\n",
" 1604, 1095, 966, 473, 1138, 1811, 988, 917, 1156,\n",
" 687, 11714, 836, 1347, 2866, 1646, 259, 609, 697,\n",
" 585, 475, 690, 308, 1823, 1361, 1273, 924, 584,\n",
" 2078, 831, 1172, 893, 1872, 1885, 1266, 447, 2148,\n",
" 1730, 730, 289, 502, 333, 1325, 247, 879, 1342,\n",
" 1327, 1598, 1514, 1058, 738, 1935, 481, 1522, 1282,\n",
" 456, 880, 900, 798, 1277, 442, 1051, 790, 1292,\n",
" 1047, 528, 1211, 1493, 1793, 574, 930, 1998, 271,\n",
" 706, 1481, 1677, 1661, 1286, 1408, 1090, 595, 1451,\n",
" 1267, 993, 1714, 878, 641, 749, 1511, 603, 353,\n",
" 877, 1236, 1141, 397, 784, 1024, 1357, 1301, 770,\n",
" 922, 1438, 753, 607, 1363, 638, 490, 431, 565,\n",
" 517, 833, 489, 1760, 986, 1841, 1620, 1360, 474,\n",
" 1099, 978, 1624, 1946, 1268, 1307, 696, 649, 666,\n",
" 2151, 551, 800, 971, 1323, 2377, 1845, 1083, 694,\n",
" 463, 419, 345, 1515, 1505, 2056, 1203, 729, 460,\n",
" 1356, 876, 911, 1190, 780, 448, 2410, 1848, 1148,\n",
" 834, 1275, 1028, 1197, 724, 890, 1705, 505, 789,\n",
" 2959, 518, 461, 1719, 2858, 3156, 2225, 2177, 1968,\n",
" 1888, 1308, 2736, 1103, 557, 2195, 843, 1664, 723,\n",
" 4508, 562, 501, 2018, 1076, 1202, 3301, 691, 1440,\n",
" 1869, 1178, 418, 1820, 1413, 488, 1304, 363, 2108,\n",
" 521, 1659, 87, 1411, 1528, 3292, 7058, 1578, 627,\n",
" 874, 1996, 1488, 5679, 1234, 5603, 400, 889, 3268,\n",
" 875, 949, 2265, 441, 742, 425, 2476, 2971, 614,\n",
" 1816, 1375, 1405, 2297, 1062, 1113, 420, 2469, 658,\n",
" 1951, 2670, 2578, 1995, 1032, 994, 1011, 2421, 1296,\n",
" 155, 494, 426, 1086, 961, 2236, 1829, 764, 1834,\n",
" 1054, 617, 1529, 2266, 637, 626, 1832, 1016, 2002,\n",
" 1756, 746, 1285, 2690, 1118, 5332, 980, 1807, 970,\n",
" 1228, 1195, 1132, 1768, 1384, 1080, 7063, 1817, 1452,\n",
" 1975, 1368, 702, 1974, 1781, 1036, 944, 663, 364,\n",
" 1539, 1345, 1680, 2209, 741, 1575, 695, 1317, 294,\n",
" 1525, 424, 997, 1473, 1552, 2819, 2188, 1668, 3057,\n",
" 799, 1502, 2606, 552, 1694, 1759, 1110, 399, 1470,\n",
" 1174, 5877, 1474, 1688, 526, 686, 5908, 1107, 2070,\n",
" 1468, 1246, 1685, 556, 1533, 1917, 1346, 732, 692,\n",
" 579, 421, 362, 3505, 1855, 2711, 1586, 3739, 681,\n",
" 1708, 2278, 1701, 722, 1482, 928, 827, 832, 527,\n",
" 604, 173, 1341, 3329, 1553, 859, 167, 916, 828,\n",
" 2082, 1176, 1108, 975, 3008, 1516, 2269, 1699, 2073,\n",
" 1031, 1503, 2364, 1030, 1442, 5666, 2715, 1437, 2067,\n",
" 1426, 2908, 1279, 866, 4283, 279, 2658, 3015, 2004,\n",
" 1391, 4736, 748, 1466, 644, 683, 2705, 1297, 731,\n",
" 1252, 2216, 3141, 3273, 1518, 1723, 1588, 972, 682,\n",
" 1094, 668, 175, 967, 402, 3894, 1960, 1599, 2000,\n",
" 2084, 1621, 714, 1109, 3989, 873, 1572, 1163, 1991,\n",
" 1716, 1673, 2562, 2874, 965, 462, 605, 1948, 1736,\n",
" 3518, 2054, 2467, 1681, 1272, 1205, 750, 2156, 2566,\n",
" 115, 524, 3184, 676, 1678, 612, 328, 955, 1441,\n",
" 1675, 3965, 2909, 623, 822, 867, 3025, 1993, 792,\n",
" 636, 4057, 3743, 2337, 2570, 2418, 2472, 3910, 1662,\n",
" 2123, 2628, 3208, 2080, 3699, 2913, 864, 2505, 870,\n",
" 7536, 1924, 1671, 1064, 1836, 1866, 4741, 841, 1369,\n",
" 5681, 3112, 1366, 2223, 1198, 1039, 3811, 3571, 1387,\n",
" 1171, 1365, 1531, 1590, 11706, 2308, 4860, 1641, 1045,\n",
" 1901])"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1062,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Levy\"] = df[\"Levy\"].replace(\"-\", \"0\")\n",
"df[\"Levy\"] = df[\"Levy\"].astype(\"int64\")\n",
"df[\"Levy\"].unique()"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1063,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 6, 4, 8, 1, 12, 3, 2, 16, 5, 7, 9, 10, 14])"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1063,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Cylinders\"] = df[\"Cylinders\"].astype(\"int64\")\n",
"df[\"Cylinders\"].unique()"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1064,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['04-May', '02-Mar', '>5'], dtype=object)"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1064,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Doors\"].unique()"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1065,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['Четырехдверный', 'Двухдверный', 'Многодверный'], dtype=object)"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1065,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Doors\"] = df[\"Doors\"].map(\n",
" {\"02-Mar\": \"Двухдверный\", \"04-May\": \"Четырехдверный\", \">5\": \"Многодверный\"}\n",
")\n",
"df[\"Doors\"].unique()"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1066,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 1, 3, 6, ..., 627220, 872946, 26307500])"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1066,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted_df = df.sort_values(by=\"Price\")\n",
"sorted_df[\"Price\"].unique()"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1067,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Количество строк до удаления некорректных значений: 19237\n",
"Количество строк после удаления некорректных значений: 17574\n"
]
}
],
"source": [
"print(f\"Количество строк до удаления некорректных значений: {len(df)}\")\n",
"df = df[df[\"Price\"] >= 500]\n",
"print(f\"Количество строк после удаления некорректных значений: {len(df)}\")"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1068,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 500, 549, 600, ..., 627220, 872946, 26307500])"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1068,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted_df = df.sort_values(by=\"Price\")\n",
"sorted_df[\"Price\"].unique()"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1069,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([1943, 1953, 1957, 1964, 1965, 1968, 1973, 1974, 1977, 1978, 1980,\n",
" 1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991,\n",
" 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,\n",
" 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,\n",
" 2014, 2015, 2016, 2017, 2018, 2019, 2020])"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1069,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"sorted_df = df.sort_values(by=\"Prod. year\")\n",
"sorted_df[\"Prod. year\"].unique()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Ручной синтез признаков."
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1070,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,\n",
" 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,\n",
" 34, 35, 36, 37, 38, 39, 40, 42, 43, 46, 47, 52, 55, 56, 63, 67, 77])"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1070,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[\"Age\"] = 2020 - df[\"Prod. year\"]\n",
"df = df.drop(\"Prod. year\", axis=1)\n",
"sorted_df = df.sort_values(by=\"Age\")\n",
"sorted_df[\"Age\"].unique()"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1071,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" <th>Levy</th>\n",
" <th>Manufacturer</th>\n",
" <th>Model</th>\n",
" <th>Category</th>\n",
" <th>Leather interior</th>\n",
" <th>Fuel type</th>\n",
" <th>Engine volume</th>\n",
" <th>Mileage</th>\n",
" <th>Cylinders</th>\n",
" <th>Gear box type</th>\n",
" <th>Drive wheels</th>\n",
" <th>Doors</th>\n",
" <th>Wheel</th>\n",
" <th>Color</th>\n",
" <th>Airbags</th>\n",
" <th>Age</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>13328</td>\n",
" <td>1399</td>\n",
" <td>LEXUS</td>\n",
" <td>RX 450</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>3.5</td>\n",
" <td>186005</td>\n",
" <td>6</td>\n",
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>12</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>16621</td>\n",
" <td>1018</td>\n",
" <td>CHEVROLET</td>\n",
" <td>Equinox</td>\n",
" <td>Jeep</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>3.0</td>\n",
" <td>192000</td>\n",
" <td>6</td>\n",
" <td>Tiptronic</td>\n",
" <td>4x4</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8467</td>\n",
" <td>0</td>\n",
" <td>HONDA</td>\n",
" <td>FIT</td>\n",
" <td>Hatchback</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>1.3</td>\n",
" <td>200000</td>\n",
" <td>4</td>\n",
" <td>Variator</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Right-hand drive</td>\n",
" <td>Black</td>\n",
" <td>2</td>\n",
" <td>14</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>3607</td>\n",
" <td>862</td>\n",
" <td>FORD</td>\n",
" <td>Escape</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>2.5</td>\n",
" <td>168966</td>\n",
" <td>4</td>\n",
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>0</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>11726</td>\n",
" <td>446</td>\n",
" <td>HONDA</td>\n",
" <td>FIT</td>\n",
" <td>Hatchback</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>1.3</td>\n",
" <td>91901</td>\n",
" <td>4</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>4</td>\n",
" <td>6</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19231</th>\n",
" <td>5802</td>\n",
" <td>1055</td>\n",
" <td>MERCEDES-BENZ</td>\n",
" <td>E 350</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>3.5</td>\n",
" <td>107800</td>\n",
" <td>6</td>\n",
" <td>Automatic</td>\n",
" <td>Rear</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>12</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19232</th>\n",
" <td>8467</td>\n",
" <td>0</td>\n",
" <td>MERCEDES-BENZ</td>\n",
" <td>CLK 200</td>\n",
" <td>Coupe</td>\n",
" <td>Yes</td>\n",
" <td>CNG</td>\n",
" <td>2.0</td>\n",
" <td>300000</td>\n",
" <td>4</td>\n",
" <td>Manual</td>\n",
" <td>Rear</td>\n",
" <td>Двухдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>5</td>\n",
" <td>21</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19233</th>\n",
" <td>15681</td>\n",
" <td>831</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Sonata</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>2.4</td>\n",
" <td>161600</td>\n",
" <td>4</td>\n",
" <td>Tiptronic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Red</td>\n",
" <td>8</td>\n",
" <td>9</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19234</th>\n",
" <td>26108</td>\n",
" <td>836</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Tucson</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>2.0</td>\n",
" <td>116365</td>\n",
" <td>4</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>4</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>19235</th>\n",
" <td>5331</td>\n",
" <td>1288</td>\n",
" <td>CHEVROLET</td>\n",
" <td>Captiva</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>2.0</td>\n",
" <td>51258</td>\n",
" <td>4</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>4</td>\n",
" <td>13</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>17574 rows × 17 columns</p>\n",
"</div>"
],
"text/plain": [
" Price Levy Manufacturer Model Category Leather interior \\\n",
"0 13328 1399 LEXUS RX 450 Jeep Yes \n",
"1 16621 1018 CHEVROLET Equinox Jeep No \n",
"2 8467 0 HONDA FIT Hatchback No \n",
"3 3607 862 FORD Escape Jeep Yes \n",
"4 11726 446 HONDA FIT Hatchback Yes \n",
"... ... ... ... ... ... ... \n",
"19231 5802 1055 MERCEDES-BENZ E 350 Sedan Yes \n",
"19232 8467 0 MERCEDES-BENZ CLK 200 Coupe Yes \n",
"19233 15681 831 HYUNDAI Sonata Sedan Yes \n",
"19234 26108 836 HYUNDAI Tucson Jeep Yes \n",
"19235 5331 1288 CHEVROLET Captiva Jeep Yes \n",
"\n",
" Fuel type Engine volume Mileage Cylinders Gear box type Drive wheels \\\n",
"0 Hybrid 3.5 186005 6 Automatic 4x4 \n",
"1 Petrol 3.0 192000 6 Tiptronic 4x4 \n",
"2 Petrol 1.3 200000 4 Variator Front \n",
"3 Hybrid 2.5 168966 4 Automatic 4x4 \n",
"4 Petrol 1.3 91901 4 Automatic Front \n",
"... ... ... ... ... ... ... \n",
"19231 Diesel 3.5 107800 6 Automatic Rear \n",
"19232 CNG 2.0 300000 4 Manual Rear \n",
"19233 Petrol 2.4 161600 4 Tiptronic Front \n",
"19234 Diesel 2.0 116365 4 Automatic Front \n",
"19235 Diesel 2.0 51258 4 Automatic Front \n",
"\n",
" Doors Wheel Color Airbags Age \n",
"0 Четырехдверный Left wheel Silver 12 10 \n",
"1 Четырехдверный Left wheel Black 8 9 \n",
"2 Четырехдверный Right-hand drive Black 2 14 \n",
"3 Четырехдверный Left wheel White 0 9 \n",
"4 Четырехдверный Left wheel Silver 4 6 \n",
"... ... ... ... ... ... \n",
"19231 Четырехдверный Left wheel Grey 12 7 \n",
"19232 Двухдверный Left wheel Silver 5 21 \n",
"19233 Четырехдверный Left wheel Red 8 9 \n",
"19234 Четырехдверный Left wheel Grey 4 10 \n",
"19235 Четырехдверный Left wheel Black 4 13 \n",
"\n",
"[17574 rows x 17 columns]"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1071,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Очистка дубликатов и пропущенных значений"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1072,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"np.int64(2773)"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1072,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.duplicated().sum()"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1073,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [],
"source": [
"df.drop_duplicates(inplace=True)"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1074,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Price 0\n",
"Levy 0\n",
"Manufacturer 0\n",
"Model 0\n",
"Category 0\n",
"Leather interior 0\n",
"Fuel type 0\n",
"Engine volume 0\n",
"Mileage 0\n",
"Cylinders 0\n",
"Gear box type 0\n",
"Drive wheels 0\n",
"Doors 0\n",
"Wheel 0\n",
"Color 0\n",
"Airbags 0\n",
"Age 0\n",
"dtype: int64"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1074,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.isna().sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Очистка выбросов"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1075,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Price int64\n",
"Levy int64\n",
"Manufacturer object\n",
"Model object\n",
"Category object\n",
"Leather interior object\n",
"Fuel type object\n",
"Engine volume float64\n",
"Mileage int64\n",
"Cylinders int64\n",
"Gear box type object\n",
"Drive wheels object\n",
"Doors object\n",
"Wheel object\n",
"Color object\n",
"Airbags int64\n",
"Age int64\n",
"dtype: object"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1075,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.dtypes"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1076,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWkAAAGECAYAAAD0odESAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAc3ElEQVR4nO3df1BVdeL/8deVrhdQYHMJECVlIy0TEc0QytT5CITWfthWx2mawZpq2x2oXHLa6Ltj0a/7R1nOzrpa4yhTu1ZbKTZExo0ScMRKk12tzbQlaRMI95NcgbzeLvf7R+ttCdB7Ebhvuc/HzJ3xnvt+c9935vD0zLnnXixer9crAICRRgV7AQCA/hFpADAYkQYAgxFpADAYkQYAgxFpADAYkQYAgxFpADAYkQYAgxFpADDYBRXp2tpa3XTTTUpMTJTFYlF5eXlA8x955BFZLJZetzFjxgzNggHgPF1Qke7s7FRaWprWrVs3oPmrVq1Sc3Nzj9u0adO0bNmyQV4pAAyOCyrSeXl5evzxx/WLX/yiz8ddLpdWrVqlCRMmaMyYMcrIyNDOnTt9j48dO1YJCQm+W2trqz755BPdcccdw/QKACAwF1Skz6WoqEj19fV6+eWX9fe//13Lli3TDTfcoMOHD/c5fuPGjZoyZYrmzZs3zCsFAP+MmEg3NTVp8+bNevXVVzVv3jxddtllWrVqla677jpt3ry51/hTp07pL3/5C0fRAIx2UbAXMFgOHDggj8ejKVOm9Njucrn005/+tNf4bdu26eTJk1qxYsVwLREAAjZiIt3R0aGwsDDt27dPYWFhPR4bO3Zsr/EbN27UjTfeqPj4+OFaIgAEbMREOj09XR6PR19//fU5zzE3Njbqvffe0xtvvDFMqwOAgbmgIt3R0aEjR4747jc2NqqhoUHjxo3TlClTdOutt6qgoEBr1qxRenq62traVF1drRkzZmjJkiW+eZs2bdL48eOVl5cXjJcBAH6zXEh/43Dnzp1auHBhr+0rVqxQWVmZ3G63Hn/8cb3wwgv66quvFBsbq7lz56q0tFSpqamSpO7ubk2aNEkFBQV64oknhvslAEBALqhIA0CoGTGX4AHASHRBnJPu7u7WsWPHFBUVJYvFEuzlAMB583q9OnnypBITEzVqVP/HyxdEpI8dO6akpKRgLwMABt2XX36piRMn9vv4BRHpqKgoSd+/mOjo6CCvBiOZ2+1WVVWVcnJyZLVag70cjGBOp1NJSUm+vvXngoj0mVMc0dHRRBpDyu12KzIyUtHR0UQaw+Jcp3B54xAADEakAcBgRBoADEakAcBgRBoADEakAcBgRBoADEakgf/weDyqqalRbW2tampq5PF4gr0kgEgDkrR161alpKQoOztbzzzzjLKzs5WSkqKtW7cGe2kIcUQaIW/r1q1aunSpUlNTVVdXp5deekl1dXVKTU3V0qVLCTWC6oL4Pmmn06mYmBi1t7fzsXAMKo/Ho5SUFKWmpqq8vFwej0eVlZVavHixwsLClJ+fr4MHD+rw4cO9/nYmcD787RpH0ghpdXV1+uKLL/TQQw/1+rrIUaNGqaSkRI2NjaqrqwvSChHqiDRCWnNzsyRp+vTpfT5+ZvuZccBwI9IIaePHj5ckHTx4sM/Hz2w/Mw4YbkQaIW3evHmaPHmynnzySXV3d/d4rLu7W3a7XcnJyZo3b16QVohQR6QR0sLCwrRmzRpVVFQoPz9fe/bs0bfffqs9e/YoPz9fFRUVevrpp3nTEEFzQXzpPzCUbr75Zr322mu6//77df311/u2Jycn67XXXtPNN98cxNUh1HEJHvAfHo9H7733nt566y3l5eVp4cKFHEFjyPjbNY6kgf8ICwvT/Pnz1dnZqfnz5xNoGIFz0gBgMCINAAYj0gBgMCINAAYj0gBgsIAibbfbNWfOHEVFRSkuLk75+fk6dOjQWeeUlZXJYrH0uIWHh5/XogEgVAQU6ZqaGhUWFmrPnj1yOBxyu93KyclRZ2fnWedFR0erubnZdzt69Oh5LRoAQkVA10nv2LGjx/2ysjLFxcVp3759PT6p9WMWi0UJCQl+P4/L5ZLL5fLddzqdkiS32y232x3IkoGAnNm/2M8w1Pzdx87rwyzt7e2SpHHjxp11XEdHhyZNmqTu7m7NmjVLTz75pK666qp+x9vtdpWWlvbaXlVVpcjIyPNZMuAXh8MR7CVghOvq6vJr3IA/Ft7d3a2f//znOnHihHbt2tXvuPr6eh0+fFgzZsxQe3u7nn76adXW1urjjz/WxIkT+5zT15F0UlKSjh8/zsfCMaTcbrccDoeys7NltVqDvRyMYE6nU7GxsUP3sfDCwkIdPHjwrIGWpMzMTGVmZvruZ2Vl6corr9Rzzz2nxx57rM85NptNNput13ar1covDoYF+xqGmr/714AiXVRUpIqKCtXW1vZ7NHy2haWnp+vIkSMDeWoACCkBXd3h9XpVVFSkbdu26d1331VycnLAT+jxeHTgwAH+0gUA+CGgI+nCwkJt2bJF27dvV1RUlFpaWiRJMTExioiIkCQVFBRowoQJstvtkqRHH31Uc+fOVUpKik6cOKGnnnpKR48e1Z133jnILwUARp6AIr1+/XpJ0oIFC3ps37x5s2677TZJUlNTU4+/uvzNN9/orrvuUktLiy6++GLNnj1bu3fv1rRp085v5QAQAvjSf+C/uN1uVVZWavHixbxxiCHlb9f47g4AMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDEWkAMBiRBgCDBRRpu92uOXPmKCoqSnFxccrPz9ehQ4fOOe/VV1/VFVdcofDwcKWmpqqysnLACwaAUBJQpGtqalRYWKg9e/bI4XDI7XYrJydHnZ2d/c7ZvXu3brnlFt1xxx3av3+/8vPzlZ+fr4MHD5734gFgpLN4vV7vQCe3tbUpLi5ONTU1uv766/scs3z5cnV2dqqiosK3be7cuZo5c6Y2bNjQ5xyXyyWXy+W773Q6lZSUpOPHjys6OnqgywXOye12y+FwKDs7W1arNdjLwQjmdDoVGxur9vb2s3btovN5kvb2dknSuHHj+h1TX1+v4uLiHttyc3NVXl7e7xy73a7S0tJe26uqqhQZGTmwxQIBcDgcwV4CRriuri6/xg040t3d3Vq5cqWuvfZaTZ8+vd9xLS0tio+P77EtPj5eLS0t/c4pKSnpEfYzR9I5OTkcSWNIcSSN4eJ0Ov0aN+BIFxYW6uDBg9q1a9dAf0S/bDabbDZbr+1Wq5VfHAwL9jUMNX/3rwFFuqioSBUVFaqtrdXEiRPPOjYhIUGtra09trW2tiohIWEgTw0AISWgqzu8Xq+Kioq0bds2vfvuu0pOTj7nnMzMTFVXV/fY5nA4lJmZGdhKASAEBXQkXVhYqC1btmj79u2KiorynVeOiYlRRESEJKmgoEATJkyQ3W6XJN13332aP3++1qxZoyVLlujll1/W3r179fzzzw/ySwGAkSegI+n169ervb1dCxYs0Pjx4323V155xTemqalJzc3NvvtZWVnasmWLnn/+eaWlpem1115TeXn5Wd9sBAB8L6AjaX8uqd65c2evbcuWLdOyZcsCeSoAgPjuDgAwGpEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJEGAIMRaQAwGJE
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAF1CAYAAAAA3+oBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAyAklEQVR4nO3de1xUZf4H8A+XmeE6XLzMqCFQmq7KJmU/ZFPzgmCiC6KVLJkprqurrS6aprtl/Pqt99y2tVB/7dq+Nsv1lv4WLznhBcwRFa+omRVqaQMmMoOAwzA8vz/cOS9GMMecwyjn8369eMl5nu+ceY6v4cPhzDPP8RJCCBARkaJ4e3oARETU/Bj+REQKxPAnIlIghj8RkQIx/ImIFIjhT0SkQAx/IiIF8vX0ADypvr4ely9fRnBwMLy8vDw9HCKieyaEQGVlJdq3bw9v79uf3ys6/C9fvoyIiAhPD4OIyO2+/fZbPPTQQ7ftV3T4BwcHA7j5n6TVaj08GmrJbDYbdu7cicTERKhUKk8Ph1owi8WCiIgIKd9uR9Hh77jUo9VqGf4kK5vNhoCAAGi1WoY/NYs7XcrmG75ERAp01+Gfn5+P4cOHo3379vDy8sLmzZulPpvNhtmzZyMmJgaBgYFo3749XnzxRVy+fNlpH+Xl5cjIyIBWq0VoaCgyMzNx/fp1p5oTJ06gb9++8PPzQ0REBBYvXtxoLOvXr0fXrl3h5+eHmJgYbNu27W4Ph4hIke46/KuqqvDYY4/h3XffbdRXXV2NI0eO4LXXXsORI0ewadMmnD17Fr/85S+d6jIyMnDq1CkYDAbk5uYiPz8fEydOlPotFgsSExMRGRmJoqIiLFmyBG+88QZWrVol1ezfvx/p6enIzMzE0aNHkZqaitTUVBQXF9/tIRERKY+4BwDEJ5988qM1Bw8eFADEhQsXhBBCnD59WgAQhw4dkmq2b98uvLy8xKVLl4QQQrz33nsiLCxMWK1WqWb27NmiS5cu0vZzzz0nkpOTnZ4rLi5O/OY3v3F5/GazWQAQZrPZ5ccQ/RS1tbVi8+bNora21tNDoRbO1VyT/Q1fs9kMLy8vhIaGAgCMRiNCQ0PRq1cvqSYhIQHe3t4oLCzEiBEjYDQa0a9fP6jVaqkmKSkJixYtwrVr1xAWFgaj0YisrCyn50pKSnK6DHUrq9UKq9UqbVssFgA3L1fZbDY3HC1R0xyvL77OSG6uvsZkDf8bN25g9uzZSE9Pl2bTmEwmtG3b1nkQvr4IDw+HyWSSaqKjo51qdDqd1BcWFgaTySS1Naxx7KMpCxYsQHZ2dqP2nTt3IiAg4O4PkOguGQwGTw+BWrjq6mqX6mQLf5vNhueeew5CCOTk5Mj1NHdlzpw5Tn8tOObDJiYmcqonycpms8FgMGDw4MGc6kmyclzRuBNZwt8R/BcuXMCuXbucglWv16OsrMypvq6uDuXl5dDr9VJNaWmpU41j+041jv6maDQaaDSaRu0qlYo/kCSbmpoazJgxAwcOHMCOHTuwbNky+Pv7e3pY1EK5mmVun+fvCP5z587hs88+Q6tWrZz64+PjUVFRgaKiIqlt165dqK+vR1xcnFSTn5/vdO3KYDCgS5cuCAsLk2ry8vKc9m0wGBAfH+/uQyL6yVJTUxEQEIAVK1bg2LFjWLFiBQICApCamurpoZHS3e07yZWVleLo0aPi6NGjAoBYtmyZOHr0qLhw4YKora0Vv/zlL8VDDz0kjh07Jr7//nvpq+HMnSFDhojY2FhRWFgo9u3bJzp37izS09Ol/oqKCqHT6cSYMWNEcXGxWLt2rQgICBArV66Uaj7//HPh6+srli5dKs6cOSPmzZsnVCqVOHnypMvHwtk+JKeUlBQBQKjVajFr1iyRk5MjZs2aJdRqtQAgUlJSPD1EaoFczbW7Dv/du3cLAI2+xo4dK0pKSprsAyB2794t7ePq1asiPT1dBAUFCa1WK8aNGycqKyudnuf48eOiT58+QqPRiA4dOoiFCxc2Gsu6devEo48+KtRqtejevbvYunXrXR0Lw5/kUl1dLQW/1Wp1mupptVqlXwDV1dWeHiq1MLKFf0vC8Ce5TJkyRQAQr776qhCi8Tz/WbNmCQBiypQpnhwmtUCu5hrX9iGSwblz5wAAEyZMaLI/MzPTqY6ouTH8iWTQuXNnAMD777/fZP/f/vY3pzqi5uYlhBCeHoSnWCwWhISEwGw2c54/uVVNTQ0CAgKgVqtRWVkJLy8vbNu2DUOHDoUQAsHBwaitrUV1dTWnfZJbuZprPPMnkoG/vz9SUlJQW1uL4OBgzJ07F5cuXcLcuXOl4E9JSWHwk8fwzJ9n/iSj1NRUbNmypVF7SkrKj65DRfRT8cyf6D6wefNmVFdXY9KkSejZsycmTZqE6upqBj95nKJv40jUHPz9/fHOO+9I1/y5lAjdD3jmT0SkQAx/IiIFYvgTESkQw5+ISIEY/kRECsTwJyJSIIY/EZECMfyJiBSI4U9EpEAMfyIiBWL4ExEpEMOfiEiBGP5ERArE8CciUiCGPxGRAjH8iYgUiOFPRKRADH8iIgVi+BMRKRDDn4hIgRj+REQKxPAnIlIghj8RkQIx/ImIFIjhT0SkQAx/IiIFYvgTESkQw5+ISIEY/kRECsTwJyJSIIY/EZECMfyJiBSI4U9EpEB3Hf75+fkYPnw42rdvDy8vL2zevNmpXwiB119/He3atYO/vz8SEhJw7tw5p5ry8nJkZGRAq9UiNDQUmZmZuH79ulPNiRMn0LdvX/j5+SEiIgKLFy9uNJb169eja9eu8PPzQ0xMDLZt23a3h0NEpEh3Hf5VVVV47LHH8O677zbZv3jxYrzzzjtYsWIFCgsLERgYiKSkJNy4cUOqycjIwKlTp2AwGJCbm4v8/HxMnDhR6rdYLEhMTERkZCSKioqwZMkSvPHGG1i1apVUs3//fqSnpyMzMxNHjx5FamoqUlNTUVxcfLeHRESkPOIeABCffPKJtF1fXy/0er1YsmSJ1FZRUSE0Go34+OOPhRBCnD59WgAQhw4dkmq2b98uvLy8xKVLl4QQQrz33nsiLCxMWK1WqWb27NmiS5cu0vZzzz0nkpOTncYTFxcnfvOb37g8frPZLAAIs9ns8mOIfora2lqxefNmUVtb6+mhUAvnaq75uvMXSUlJCUwmExISEqS2kJAQxMXFwWg0YvTo0TAajQgNDUWvXr2kmoSEBHh7e6OwsBAjRoyA0WhEv379oFarpZqkpCQsWrQI165dQ1hYGIxGI7KyspyePykpqdFlqIasViusVqu0bbFYAAA2mw02m+1eD5/othyvL77OSG6uvsbcGv4mkwkAoNPpnNp1Op3UZzKZ0LZtW+dB+PoiPDzcqSY6OrrRPhx9YWFhMJlMP/o8TVmwYAGys7Mbte/cuRMBAQGuHCLRPTEYDJ4eArVw1dXVLtW5Nfzvd3PmzHH6a8FisSAiIgKJiYnQarUeHBm1dDabDQaDAYMHD4ZKpfL0cKgFc1zRuBO3hr9erwcAlJaWol27dlJ7aWkpevbsKdWUlZU5Pa6urg7l5eXS4/V6PUpLS51qHNt3qnH0N0Wj0UCj0TRqV6lU/IGkZsHXGsnN1deXW+f5R0dHQ6/XIy8vT2qzWCwoLCxEfHw8ACA+Ph4VFRUoKiqSanbt2oX6+nrExcVJNfn5+U7XrgwGA7p06YKwsDCppuHzOGocz0NERD/ibt9JrqysFEePHhVHjx4VAMSyZcvE0aNHxYULF4QQQixcuFCEhoaKLVu2iBMnToiUlBQRHR0tampqpH0MGTJExMbGisLCQrFv3z7RuXNnkZ6eLvVXVFQInU4nxowZI4qLi8XatWtFQECAWLlypVTz+eefC19fX7F06VJx5swZMW/ePKFSqcTJkyddPhbO9qHmwtk+1FxczbW7Dv/du3cLAI2+xo4dK4S4Od3ztddeEzqdTmg0GjFo0CBx9uxZp31cvXpVpKeni6CgIKHVasW4ceNEZWWlU83x48dFnz59hEajER06dBALFy5sNJZ169aJRx99VKjVatG9e3exdevWuzoWhj81F4Y/NRdXc81
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWkAAAGECAYAAAD0odESAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAiy0lEQVR4nO3df1BU973/8deyblESwBgV0WDguiapkfgriWIuoleRAPVm9YuT/pjRJE3yzUy8kwx2eou5Y2L7rdzeik2n8d7UySSZdKpJpSudQUjdoOL2Bm6uRtPQNBa9EtMIqFVZQcTNcr5/KHtDBNk1wH5gn48ZZjyf8/nsee/M2ZefOefsZ22WZVkCABgpJtIFAAB6R0gDgMEIaQAwGCENAAYjpAHAYIQ0ABiMkAYAgxHSAGAwQhoADEZIA4DBhlRI79+/X8uWLdPEiRNls9lUVlYW9mv85je/0cyZMxUXF6fbb79dP/3pT/u/UADoJ0MqpNva2jRjxgxt2bLlhsZXVlbqO9/5jp566inV1dXp3//93/Wzn/1ML730Uj9XCgD9wzZUF1iy2WzauXOnXC5XsK2jo0PPPfectm/frvPnz2v69On6yU9+ooULF0qSvv3tb8vv92vHjh3BMb/4xS/0b//2bzpx4oRsNtsgvwsAuL4hNZPuy5o1a1RTU6M333xTf/zjH7Vy5Uo9+OCDqq+vl3QlxEeOHNltzKhRo/TXv/5Vn3zySSRKBoDrGjYhfeLECb322mvasWOHMjMzNWXKFH3ve9/T3//93+u1116TJOXk5MjtdquqqkqdnZ36y1/+opKSEklSY2NjJMsHgB6NiHQB/eXDDz9UIBDQHXfc0a29o6NDt956qyTpiSee0LFjx/SNb3xDfr9fCQkJeuaZZ/TCCy8oJmbY/H8FYBgZNiHd2toqu92ugwcPym63d9t38803S7pyHfsnP/mJNm7cqKamJo0bN05VVVWSpL/7u78b9JoBoC/DJqRnzZqlQCCgU6dOKTMz87p97Xa7Jk2aJEnavn27MjIyNG7cuMEoEwDCMqRCurW1VUePHg1uHz9+XIcPH9aYMWN0xx136Dvf+Y5WrVqlkpISzZo1S6dPn1ZVVZXuuece5efn68yZMyotLdXChQt16dKl4DXs6urqCL4rALgOawjZu3evJemav9WrV1uWZVmXL1+21q9fb6WmploOh8NKTk62li9fbv3xj3+0LMuyTp8+bc2bN8+66aabrLi4OGvx4sVWbW1tBN8RAFzfkH1OGgCiAY80AIDBhsQ16c7OTp08eVLx8fF8KxDAsGBZli5cuKCJEyde9xHgIRHSJ0+eVEpKSqTLAIB+9+mnn+q2227rdf+QCOn4+HhJV95MQkJChKvBcOb3+7V7924tXbpUDocj0uVgGPP5fEpJSQnmW2+GREh3XeJISEggpDGg/H6/4uLilJCQQEhjUPR1CZcbhwBgMEIaAAxGSAOAwQhpADAYIQ0ABiOkAcBghDQAGGxIPCcNDIazZ88qMzNTn376qVJSUuT1ejVmzJhIl4UoR0gDkiZMmKDm5ubg9kcffaRbb71VSUlJampqimBliHZc7kDU+2JAz507Vxs2bNDcuXMlSc3NzZowYUIky0OUI6QR1c6ePRsM6AsXLsjr9WrGjBnyer26cOGCpCtBffbs2UiWiShGSCOqZWVlSZLmzZsX/MHiLjfffLPuv//+bv2AwUZII6qdPHlSkvTjH/+4x/0//OEPu/UDBhshjag2ceJESdJzzz3X4/7169d36wcMNkIaUa3rl+Jra2vV2trabV9ra6vee++9bv2AwUZII6qNGTNGSUlJkq78uMQDDzyg999/Xw888EBwMfakpCSel0bEDIlfC/f5fEpMTFRLSwuL/mNAfPk56S48J42BEmquMZMGJDU1Nelvf/ubpk2bpvj4eE2bNk1/+9vfCGhEHN84BK4aM2aMDh8+rIqKCuXl5fHzWTACM2kAMBghDQAGI6QBwGCENAAYjJAGAIMR0gBgMEIaAAxGSAOAwQhpADAYIQ0ABgsrpIuLi3XfffcpPj5e48ePl8vl0pEjR/oct2PHDt11110aOXKk0tPTVVFRccMFA0A0CSukq6ur9fTTT6u2tlYej0d+v19Lly5VW1tbr2Peffddfetb39J3v/tdHTp0SC6XSy6XS3V1dV+5eAAY7r7SUqWnT5/W+PHjVV1drQULFvTY5+GHH1ZbW5vKy8uDbfPmzdPMmTP18ssvh3QclirFYPH7/SywhEERaq59pVXwWlpaJOm6C6LX1NSosLCwW1tOTo7Kysp6HdPR0aGOjo7gts/nk3TlA+T3+79CxcD1dZ1fnGcYaKGeYzcc0p2dnXr22Wf1wAMPaPr06b32a2pqCv7yRZe+FlIvLi7Whg0brmnfvXu34uLibrRkIGQejyfSJWCYu3jxYkj9bjikn376adXV1ekPf/jDjb5Er4qKirrNvn0+n1JSUrR06VIud2BA+f1+eTweZWdnc7kDA6rrCkFfbiik16xZo/Lycu3fv1+33Xbbdfv29LNEzc3NmjBhQq9jYmNjFRsbe027w+Hgg4NBwbmGgRbq+RXW0x2WZWnNmjXauXOn9uzZo7S0tD7HZGRkqKqqqlubx+NRRkZGOIcGgKgU1kz66aef1rZt2/S73/1O8fHxwevKiYmJGjVqlCRp1apVmjRpkoqLiyVJzzzzjLKyslRSUqL8/Hy9+eabOnDggLZu3drPbwUAhp+wZtL/8R//oZaWFi1cuFDJycnBv7feeivY58SJE2psbAxuz58/X9u2bdPWrVs1Y8YMlZaWqqys7Lo3GwEAV4Q1kw7lkep9+/Zd07Zy5UqtXLkynEMBAMTaHQBgNEIaAAxGSAOAwQhpADAYIQ0ABiOkAcBghDQAGIyQBgCDEdIAYDBCGgAMRkgDgMEIaQAwGCENAAYjpAHAYIQ0ABiMkAYAgxHSAGAwQhoADEZIA4DBCGngqkAgoOrqau3fv1/V1dUKBAKRLgkgpAFJcrvdcjqdys7O1ubNm5WdnS2n0ym32x3p0hDlCGlEPbfbrYKCAqWnp8vr9Wr79u3yer1KT09XQUEBQY2IslmWZUW6iL74fD4lJiaqpaVFCQkJkS4Hw0ggEJDT6VR6errKysoUCARUUVGhvLw82e12uVwu1dXVqb6+Xna7PdLlYhgJNdeYSSOqeb1eNTQ0aN26dYqJ6f5xiImJUVFRkY4fPy6v1xuhChHtCGlEtcbGRknS9OnTe9zf1d7VDxhshDSiWnJysiSprq6ux/1d7V39gMFGSCOqZWZmKjU1VRs3blRnZ2e3fZ2dnSouLlZaWpoyMzMjVCGiHSGNqGa321VSUqLy8nK5XC7V1taqvb1dtbW1crlcKi8v16ZNm7hpiIgZEekCgEhbsWKFSktLtXbtWi1YsCDYnpaWptLSUq1YsSKC1SHa8QgecFUgENDevXtVWVmp3NxcLVq0iBk0BkyoucZMGrjKbrcrKytLbW1tysrKIqBhBK5JA4DBCGkAMBghDQAGI6QBwGCENAAYjJAGAIMR0gBgMEIaAAxGSAOAwQhpADAYIQ0ABiOkAcBghDQAGIyQBgCDEdIAYDBCGgAMRkgDgMEIaQAwGCENAAYjpAHAYIQ0ABiMkAYAgxHSAGAwQhoADEZIA4DBCGkAMBghDQAGI6QBwGBhh/T+/fu1bNkyTZw4UTabTWVlZdftv2/fPtlstmv+mpqabrRmAIgaYYd0W1ubZsyYoS1btoQ17siRI2psbAz+jR8/PtxDA0DUGRHugNzcXOXm5oZ9oPHjx2v06NFhjwOAaBZ2SN+omTNnqqOjQ9OnT9cLL7ygBx54oNe+HR0d6ujoCG77fD5Jkt/vl9/vH/BaEb26zi/OMwy0UM+xAQ/p5ORkvfzyy7r33nvV0dGhV155RQsXLtR//dd/afbs2T2OKS4u1oYNG65p3717t+Li4ga6ZEAejyfSJWCYu3jxYkj9bJZlWTd6EJvNpp07d8rlcoU1LisrS5MnT9avfvWrHvf3NJNOSUnRmTNnlJCQcKPlAn3y+/3yeDzKzs6Ww+GIdDk
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWUAAAF3CAYAAAB9m1orAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAuIklEQVR4nO3de1RU570+8GcYZoY7VjSDVBAqRoxijdboeI2GS9WmUCCNadJjE9OkXo+gIWKNHlxGkoiXo1FyjB5NPNUmUiSJooWgIRjBKNZUjkdFQqIGGa8wCDoMzPz+8De7ThgTBmfY29nPZy1WmHe/s+fr6uZZb797z94Ki8ViARERSYKH2AUQEdG/MJSJiCSEoUxEJCEMZSIiCWEoExFJCEOZiEhCGMpERBLCUCYikhCGMhGRhHiKXcD3mc1m1NbWwt/fHwqFQuxyiIjum8ViQWNjI0JCQuDh8SNrYYsDWltbLYsXL7aEh4dbvLy8LD/72c8sy5Yts5jNZmGO2Wy2vPbaa5bg4GCLl5eX5YknnrCcPXu2w59x4cIFCwD+8Ic//HG7nwsXLvxoBjq0Un7zzTeRk5OD9957DwMHDsSxY8fw/PPPIzAwEHPnzgUAvPXWW1i3bh3ee+89RERE4LXXXkN8fDxOnToFLy+vH/0Mf39/AMCFCxcQEBDgSHlEDjGZTCgsLERcXBxUKpXY5ZAbMxgMCA0NFfLthzgUyocPH0ZCQgKmTJkCAAgPD8fOnTvx5ZdfAgAsFgvWrl2LxYsXIyEhAQDw/vvvQ6vVIj8/H1OnTv3Rz7C2LAICAhjK5FImkwk+Pj4ICAhgKFOX6EhL1qFQHjVqFDZt2oSzZ8/i4YcfxldffYVDhw5h9erVAICamhrU1dUhJiZGeE9gYCBGjBiBsrIyu6FsNBphNBqF1waDAcCdPxiTyeRIeUQOsR5fPM7I1Rw5xhwK5YULF8JgMCAqKgpKpRJtbW14/fXX8eyzzwIA6urqAABardbmfVqtVtj2fVlZWcjMzGw3XlhYCB8fH0fKI+qUoqIisUsgN9fc3NzhuQ6F8ocffoi//OUv2LFjBwYOHIgTJ05g3rx5CAkJwbRp0xwuFAAyMjKQlpYmvLb2XuLi4ti+IJcymUwoKipCbGws2xfkUtYOQEc4FMqvvPIKFi5cKLQhoqOj8e233yIrKwvTpk1DcHAwAECv16NXr17C+/R6PYYMGWJ3nxqNBhqNpt24SqXiHwp1CR5r5GqOHF8OfXmkubm53TV2SqUSZrMZABAREYHg4GAUFxcL2w0GA44cOQKdTufIRxERyZJDK+Unn3wSr7/+OsLCwjBw4ED84x//wOrVq/HCCy8AuHNmcd68eVi+fDn69esnXBIXEhKCxMREV9RPRORWHArl9evX47XXXsPMmTNx+fJlhISE4OWXX8aSJUuEOenp6WhqasJLL72E+vp6jBkzBvv37+/QNcpERHKnkNqDUw0GAwIDA9HQ0MATfeQybW1tOHjwIPbt24dJkyZhwoQJUCqVYpdFbsqRXOMNiUh28vLyEBkZidjYWKxevRqxsbGIjIxEXl6e2KURMZRJXvLy8pCSkoLo6GiUlpZi586dKC0tRXR0NFJSUhjMJDq2L0g22traEBkZiejoaOTn56OtrQ0FBQWYPHkylEolEhMTUVlZiaqqKrYyyKnYviCyo7S0FN988w0WLVrU7tJODw8PZGRkoKamBqWlpSJVSMRQJhm5dOkSAGDQoEF2t1vHrfOIxMBQJtmwfsu0srLS7nbr+N3fRiXqagxlko2xY8ciPDwcK1asEL6FamU2m5GVlYWIiAiMHTtWpAqJGMokI0qlEqtWrcKePXuQmJiI8vJy3Lp1C+Xl5UhMTMSePXuQnZ3Nk3wkKsk9o4/IlZKSkpCbm4v58+dj3LhxwnhERARyc3ORlJQkYnVEvCSOZIrf6KOu5EiucaVMsqRUKjF+/Hg0NTVh/PjxDGSSDPaUiYgkhKFMRCQhDGUiIglhKBMRSQhDmYhIQhjKREQSwlAmIpIQhjIRkYQwlImIJIShTEQkIQxlIiIJYSgTEUkIQ5mISEIYykREEsJQJiKSEIYyEZGEMJSJiCTEoVAODw+HQqFo9zNr1iwAwO3btzFr1iwEBQXBz88PycnJ0Ov1LimciMgdORTKR48exaVLl4SfoqIiAMBTTz0FAEhNTcUnn3yCXbt2oaSkBLW1tXwQJRGRAxx6Rl/Pnj1tXr/xxhvo27cvxo8fj4aGBmzZsgU7duzAxIkTAQBbt27FgAEDUF5ejpEjRzqvaiIiN9XpB6e2tLTgf/7nf5CWlgaFQoGKigqYTCbExMQIc6KiohAWFoaysrJ7hrLRaITRaBReGwwGAIDJZILJZOpseUQ/ynp88TgjV3PkGOt0KOfn56O+vh5/+MMfAAB1dXVQq9Xo1q2bzTytVou6urp77icrKwuZmZntxgsLC+Hj49PZ8og6zNqGI3KV5ubmDs/tdChv2bIFkyZNQkhISGd3AQDIyMhAWlqa8NpgMCA0NBRxcXEICAi4r30T/RCTyYSioiLExsZCpVKJXQ65MWsHoCM6FcrffvstPv30U+Tl5QljwcHBaGlpQX19vc1qWa/XIzg4+J770mg00Gg07cZVKhX/UKhL8FgjV3Pk+OrUdcpbt27FQw89hClTpghjw4YNg0qlQnFxsTB25swZnD9/HjqdrjMfQ0QkOw6vlM1mM7Zu3Ypp06bB0/Nfbw8MDMT06dORlpaG7t27IyAgAHPmzIFOp+OVF0REHeRwKH/66ac4f/48XnjhhXbb1qxZAw8PDyQnJ8NoNCI+Ph4bN250SqFERHKgsFgsFrGLuJvBYEBgYCAaGhp4oo9c5tatW0hLSxOuoV+9ejW8vb3FLovclCO5xlAm2UlMTMRHH33UbjwhIQH5+fldXxC5PUdyjTckIlmxBrJarUZ6ejpycnKQnp4OtVqNjz76CImJiWKXSDLHlTLJxq1bt+Dj4wO1Wo3GxkYoFAoUFBRg8uTJsFgs8Pf3R0tLC5qbm9nKIKfiSpnIjldeeQUAkJaWBrVabbNNrVZj3rx5NvOIxMBQJtmoqqoCALz44ot2t0+fPt1mHpEYGMokG/369QMAbN682e72LVu22MwjEgN7yiQb7CmTWNhTJrLD29sbCQkJaGlpgb+/PxYtWoTvvvsOixYtEgI5ISGBgUyi4kqZZIfXKVNX40qZ6Afk5+ejubkZf/rTnzBkyBD86U9/QnNzMwOZJKHT91MmepB5e3tj3bp1Qk+Zt+4kqeBKmYhIQhjKREQSwlAmIpIQhjIRkYQwlEmW2traUFJSgs8//xwlJSVoa2sTuyQiAAxlkqG8vDxERkYiNjYWq1evRmxsLCIjI20eBEwkFoYyyUpeXh5SUlIQHR2N0tJS7Ny5E6WlpYiOjkZKSgqDmUTHb/SRbLS1tSEyMhLR0dHIz89HW1ubcJ2yUqlEYmIiKisrUVVVBaVSKXa55Eb4jT4iO0pLS/HNN99g0aJF8PCwPfQ9PDyQkZGBmpoalJaWilQhEUOZZOTSpUsAgEGDBtndbh23ziMSA0OZZKNXr14AgMrKSrvbrePWeURiYCiTbIwdOxbh4eFYsWIFzGazzTaz2YysrCxERERg7NixIlVIxFAmGVEqlVi1ahX27NmDxMRElJeX49atWygvL0diYiL27NmD7OxsnuQjUfEucSQrSUlJyM3Nxfz58zFu3DhhPCIiArm5uUhKShKxOiJeEkcy1dbWhoMHD2Lfvn2YNGkSJkyYwBUyuYwjucaVMsmSUqnE+PHj0dTUhPHjxzOQSTLYUyYikhCHQ/m7777Dc889h6CgIHh7eyM6OhrHjh0TtlssFixZsgS9evWCt7c3YmJiUFVV5dSiiYjclUOhfOPGDYwePRoqlQr79u3DqVOnsGrVKvzkJz8R5rz11ltYt24d3nnnHRw5cgS+vr6Ij4/H7du3nV48EZG7cain/OabbyI0NBRbt24VxiIiIoTfLRYL1q5di8WLFyMhIQEA8P7770Or1SI/Px9Tp051UtlERO7JoVD++OOPER8fj6eeegolJSX46U9
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"numeric_features_with_outliers = [\n",
" \"Price\",\n",
" \"Levy\",\n",
" \"Mileage\",\n",
" \"Age\",\n",
"]\n",
"\n",
"i = 1\n",
"for col in numeric_features_with_outliers:\n",
" plt.figure(figsize=(4, 30))\n",
" plt.subplot(6, 1, i)\n",
" df.boxplot(column=col)\n",
" i += 1"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1077,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Количество строк до удаления выбросов: 14801\n",
"Количество строк после удаления выбросов: 12597\n"
]
}
],
"source": [
"def remove_outliers(df, column):\n",
" Q1 = df[column].quantile(0.25)\n",
" Q3 = df[column].quantile(0.75)\n",
" IQR = Q3 - Q1\n",
" lower_bound = Q1 - 1.5 * IQR\n",
" upper_bound = Q3 + 1.5 * IQR\n",
" return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]\n",
"\n",
"print(f\"Количество строк до удаления выбросов: {len(df)}\")\n",
"\n",
"for column in numeric_features_with_outliers:\n",
" df = remove_outliers(df, column)\n",
"\n",
"print(f\"Количество строк после удаления выбросов: {len(df)}\")"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1078,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAX8AAAF1CAYAAAAA3+oBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAhoUlEQVR4nO3df3RU9YH38U9+zEwSwgQQSMySIIoSEQQJJMyp1rLGRJv2LErPYuvZQ1utShMkxkXMc7pQetwnrW5P8UfA7eNZ4zl9WljcR7siItkg8bAMEoKp4Wd/BcHiBBCSIfyYTJLv80c3t47hRxIySZPv+3UOB+fe73znOznDm8vN9SbGGGMEALBK7GAvAAAw8Ig/AFiI+AOAhYg/AFiI+AOAhYg/AFiI+AOAheIHewGDqbOzU8eOHdPIkSMVExMz2MsBgKtmjNGZM2eUnp6u2NhLH99bHf9jx44pIyNjsJcBAP3u6NGjmjBhwiX3Wx3/kSNHSvrzF8nr9Q7yajCchcNhbdmyRfn5+XK5XIO9HAxjwWBQGRkZTt8uxer4d53q8Xq9xB9RFQ6HlZSUJK/XS/wxIK50Kptv+AKAhYg/AFiI+AOAhYg/AFiI+AOAhYg/AFiI+AOAhYg/EGWtra1asGCBli5dqgULFqi1tXWwlwTY/T95AdGWk5Oj2tpa5/HHH3+skSNHas6cOdq1a9cgrgy248gfiJIvhv/zamtrlZOTM8ArAv6C+ANR0Nraesnwd6mtreUUEAYN8QeiYOHChf06DuhvxB+Igvfee69fxwH9jfgDUXD+/Pl+HQf0N+IPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgIeIPABYi/gBgofjBXgAwFJ07d04HDx7sl7n27Nlz0e1ZWVlKSkrql9cAvoj4A31w8OBBZWdn98tcl5qnrq5Os2bN6pfXAL6I+AN9kJWVpbq6ukvuP3XqlO6+++4rzlNVVaUxY8Zc8jWAaCH+QB8kJSVd8ag8NTVVTU1Nl92fl5fX30sDeoRv+AJREggElJqaetF9qampCgQCA7wi4C+IPxBFgUBAn332mW64KUsxCSN1w01Z+uyzzwg/Bh3xB6JszJgxen3LdmUu/ZVe37L9kuf4gYFE/AHAQsQfACxE/AHAQsQfACxE/AHAQsQfACxE/AHAQsQfACx0VfH/8Y9/rJiYGJWUlDjbLly4oKKiIl1zzTVKTk7WggULut3f5MiRIyosLFRSUpLGjx+vZcuWqb29PWLMtm3bNGvWLHk8Hk2ePFmVlZXdXr+iokLXXXedEhISlJubq127dl3N2wEAa/Q5/rW1tfrXf/1X3XrrrRHbn3jiCb311lvasGGDampqdOzYMd1///3O/o6ODhUWFqqtrU07duzQa6+9psrKSq1YscIZ09jYqMLCQs2bN0/19fUqKSnRww8/rHfffdcZs379epWWlmrlypXas2ePZsyYoYKCAh0/fryvbwkArBFjjDG9fVJra6tmzZqlNWvW6JlnntHMmTO1evVqtbS0aNy4cfrlL3+pb3zjG5L+fN/zm2++WX6/X3PnztU777yjr33tazp27Jhz06uXX35Zy5cv14kTJ+R2u7V8+XK9/fbb2rt3r/OaDzzwgJqbm7V582ZJUm5urubMmaOXXnpJktTZ2amMjAwtWbJETz/99EXXHQqFFAqFnMfBYFAZGRk6efKkvF5vb78MQI/95sgpfeP/7Nbr35utGZnc3gHREwwGNXbsWLW0tFy2a326pXNRUZEKCwuVl5enZ555xtleV1encDgccZvarKwsZWZmOvH3+/2aPn16xN0OCwoKtHjxYu3bt0+33Xab/H5/t1vdFhQUOKeX2traVFdXp7KyMmd/bGys8vLy5Pf7L7nu8vJyrVq1qtv2LVu28BOTEFVHWyUpXjt37tSf9l5pNNB3586d69G4Xsd/3bp12rNnj2pra7vtCwQCcrvdGjVqVMT2z9++9mK3ue16fKUxwWBQ58+f1+nTp9XR0XHRMZf70XplZWUqLS11Hncd+efn53Pkj6j6zZFTUsNuzZ07lyN/RFUwGOzRuF7F/+jRo1q6dKmqqqqUkJDQp4UNJo/HI4/H0227y+WSy+UahBXBFvHx8c7vfNYQTT39fPXqG751dXU6fvy4Zs2apfj4eMXHx6umpkYvvPCC4uPjlZqaqra2NjU3N0c8r6mpSWlpaZKktLS0blf/dD2+0hiv16vExESNHTtWcXFxFx3TNQcA4NJ6Ff+77rpLDQ0Nqq+vd37Nnj1bDz74oPPfLpdL1dXVznMOHTqkI0eOyOfzSZJ8Pp8aGhoirsqpqqqS1+vV1KlTnTGfn6NrTNccbrdb2dnZEWM6OztVXV3tjAEAXIa5SnfeeadZunSp8/ixxx4zmZmZZuvWrWb37t3G5/MZn8/n7G9vbzfTpk0z+fn5pr6+3mzevNmMGzfOlJWVOWP++Mc/mqSkJLNs2TJz4MABU1FRYeLi4szmzZudMevWrTMej8dUVlaa/fv3m0ceecSMGjXKBAKBHq+9paXFSDItLS1X90UAruDDwyfNxOUbzYeHTw72UjDM9bRr/f4D3H/2s58pNjZWCxYsUCgUUkFBgdasWePsj4uL08aNG7V48WL5fD6NGDFCixYt0o9+9CNnzKRJk/T222/riSee0PPPP68JEybolVdeUUFBgTNm4cKFOnHihFasWKFAIKCZM2dq8+bNl/yZqQCAv+jTdf7DRTAYVEpKyhWvhwWuVv3Hn2n+2p16c/FczZx4zWAvB8NYT7vGvX0AwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAsRPwBwELEHwAs1Kv4r127Vrfeequ8Xq+8Xq98Pp/eeecdZ/+FCxdUVFSka665RsnJyVqwYIGampoi5jhy5IgKCwuVlJSk8ePHa9myZWpvb48Ys23bNs2aNUsej0eTJ09WZWVlt7VUVFTouuuuU0JCgnJzc7Vr167evBUAsFqv4j9hwgT9+Mc/Vl1dnXbv3q2//du/1d/93d9p3759kqQnnnhCb731ljZs2KCamhodO3ZM999/v/P8jo4OFRYWqq2tTTt27NBrr72myspKrVixwhnT2NiowsJCzZs3T/X19SopKdHDDz+sd9991xmzfv16lZaWauXKldqzZ49mzJihgoICHT9+/Gq/HgBgB3OVRo8ebV555RXT3NxsXC6X2bBhg7PvwIEDRpLx+/3GGGM2bdpkYmNjTSAQcMasXbvWeL1eEwqFjDHGPPXUU+aWW26JeI2FCxeagoIC53FOTo4pKipyHnd0dJj09HRTXl7eq7W3tLQYSaalpaVXzwN668PDJ83E5RvNh4dPDvZSMMz1tGvxff1Lo6OjQxs2bNDZs2fl8/lUV1encDisvLw8Z0xWVpYyMzPl9/s1d+5c+f1+TZ8+Xampqc6YgoICLV68WPv27dNtt90mv98fMUfXmJKSEklSW1ub6urqVFZW5uyPjY1VXl6e/H7/ZdccCoUUCoWcx8FgUJIUDocVDof7+qUArqjr1GZ7ezufNURVTz9fvY5/Q0ODfD6fLly4oOTkZL3xxhuaOnWq6uvr5Xa7NWrUqIjxqampCgQCkqRAIBAR/q79XfsuNyYYDOr8+fM6ffq0Ojo6Ljrm4MGDl117eXm5Vq1a1W37li1blJSUdOU3D/TR0VZJitfOnTv1p72DvRo
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXYAAAF1CAYAAAD8ysHLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAc3klEQVR4nO3de3BU9eH38U8umyUhbEKAZImGgPqUACI3LcmIjkBMiJQRcaxcqggRpzTxN5hWKf1ZheoYRR8vraBjW6F/gCJ1tC1YmsjFVNiARIMhKhUGDAqbCDFZQmDZJef5wydb14BsYJfAl/drJhP2nO+e/R7n8M7x7GETZVmWJQCAMaK7egIAgPAi7ABgGMIOAIYh7ABgGMIOAIYh7ABgGMIOAIaJ7eoJREpbW5sOHDigHj16KCoqqqunAwDnzLIsHTlyROnp6YqOPv15ubFhP3DggDIyMrp6GgAQdvv379fll19+2vXGhr1Hjx6Svv0P4HA4ung2MJnP51NZWZny8vJks9m6ejowmMfjUUZGRqBvp2Ns2NsvvzgcDsKOiPL5fEpISJDD4SDsOC/OdHmZN08BwDCEHQAMQ9gBwDCEHQAMQ9gBwDCEHQAMQ9gBwDCEHQAMQ9gBwDCEHQAMQ9gBwDDGflYMcLZaW1v12WefhTy+5ZhXW2r2qGfv7UqMt4f0nKysLCUkJJztFIEfRNiB7/nss880atSoTj9vcSfGVlVVaeTIkZ1+DSAUhB34nqysLFVVVYU8ftfBJpWsrtGzdwzVwL7JIb8GECmEHfiehISETp1NR39xWPZ/H9Ogq4dpeGavCM4MCA1vngKAYQg7ABiGsAOAYQg7ABiGsAOAYToV9tLSUl133XXq0aOHUlNTNXnyZO3atStozPHjx1VUVKRevXopMTFRt99+u+rr64PG1NXVaeLEiUpISFBqaqoefPBB+f3+oDGbNm3SyJEjZbfbddVVV2n58uVnt4cAcInpVNjfe+89FRUVqbKyUuXl5fL5fMrLy9PRo0cDYx544AH94x//0OrVq/Xee+/pwIEDmjJlSmD9yZMnNXHiRJ04cUJbtmzRX/7yFy1fvlyPPPJIYMzevXs1ceJEjR07VtXV1Zo3b57uvfde/etf/wrDLgOA4axz0NDQYEmy3nvvPcuyLKupqcmy2WzW6tWrA2M+/fRTS5Llcrksy7Ksd955x4qOjrbcbndgzEsvvWQ5HA7L6/ValmVZDz30kDVkyJCg17rzzjut/Pz8kOfW3NxsSbKam5vPev+AUHy075CVOX+N9dG+Q109FRgu1K6d0z9Qam5uliSlpKRI+vafSft8PuXm5gbGZGVlqV+/fnK5XMrOzpbL5dLQoUOVlpYWGJOfn6+5c+eqtrZWI0aMkMvlCtpG+5h58+addi5er1derzfw2OPxSJJ8Pp98Pt+57Cbwg9ovI/r9fo41RFSox9dZh72trU3z5s3T9ddfr6uvvlqS5Ha7FRcXp+Tk5KCxaWlpcrvdgTHfjXr7+vZ1PzTG4/Ho2LFjio+P7zCf0tJSLVq0qMPysrIyPmwJEbW/RZJiVVlZqa92dvVsYLLW1taQxp112IuKirRz5069//77Z7uJsFqwYIFKSkoCjz0ejzIyMpSXlyeHw9GFM4PpdtQ1SjXblZ2drWH9Urp6OjBY+5WIMzmrsBcXF2vNmjWqqKjQ5ZdfHljudDp14sQJNTU1BZ2119fXy+l0BsZs27YtaHvtd818d8z376Spr6+Xw+E45dm6JNntdtntHT8y1WazyWazdX4ngRDFxsYGvnOsIZJCPb46dVeMZVkqLi7WW2+9pQ0bNmjAgAFB60eNGiWbzab169cHlu3atUt1dXXKycmRJOXk5KimpkYNDQ2BMeXl5XI4HBo8eHBgzHe30T6mfRsAgNPr1Bl7UVGRVq5cqb/97W/q0aNH4Jp4UlKS4uPjlZSUpMLCQpWUlCglJUUOh0P333+/cnJylJ2dLUnKy8vT4MGDddddd2nx4sVyu916+OGHVVRUFDjj/vnPf64XX3xRDz30kGbPnq0NGzbojTfe0Nq1a8O8+wBgoM7caiPplF/Lli0LjDl27Jj1i1/8wurZs6eVkJBg3XbbbdbBgweDtrNv3z6roKDAio+Pt3r37m398pe/tHw+X9CYjRs3WsOHD7fi4uKsK664Iug1QsHtjjhfuN0R50uoXYuyLMvquh8rkePxeJSUlKTm5mbePEVEVX9xWJNfqtTbc7P5PHZEVKhd47NiAMAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwhB0ADEPYAcAwnQ57RUWFJk2apPT0dEVFRentt98OWn/PPfcoKioq6GvChAlBYxobGzVjxgw5HA4lJyersLBQLS0tQWM+/vhj3XDDDerWrZsyMjK0ePHizu8dAFyCOh32o0ePatiwYVqyZMlpx0yYMEEHDx4MfL322mtB62fMmKHa2lqVl5drzZo1qqio0H333RdY7/F4lJeXp8zMTFVVVenpp5/WwoUL9corr3R2ugBwyYnt7BMKCgpUUFDwg2PsdrucTucp13366adat26dPvjgA1177bWSpD/84Q+65ZZb9Mwzzyg9PV0rVqzQiRMn9OqrryouLk5DhgxRdXW1nn322aAfAACAjjod9lBs2rRJqamp6tmzp8aNG6fHH39cvXr1kiS5XC4lJycHoi5Jubm5io6O1tatW3XbbbfJ5XLpxhtvVFxcXGBMfn6+nnrqKX3zzTfq2bNnh9f0er3yer2Bxx6PR5Lk8/nk8/kisZuAJMnv9we+c6whkkI9vsIe9gkTJmjKlCkaMGCA9uzZo9/85jcqKCiQy+VSTEyM3G63UlNTgycRG6uUlBS53W5Jktvt1oABA4LGpKWlBdadKuylpaVatGhRh+VlZWVKSEgI1+4BHexvkaRYVVZW6qudXT0bmKy1tTWkcWEP+9SpUwN/Hjp0qK655hpdeeWV2rRpk8aPHx/ulwtYsGCBSkpKAo89Ho8yMjKUl5cnh8MRsdcFdtQ1SjXblZ2drWH9Urp6OjBY+5WIM4nIpZjvuuKKK9S7d2/t3r1b48ePl9PpVENDQ9AYv9+vxsbGwHV5p9Op+vr6oDHtj0937d5ut8tut3dYbrPZZLPZwrErwCnFxsYGvnOsIZJCPb4ifh/7l19+qcOHD6tv376SpJycHDU1NamqqiowZsOGDWpra9Po0aMDYyoqKoKuJ5WXl2vgwIGnvAwDAPivToe9paVF1dXVqq6uliTt3btX1dXVqqurU0tLix588EFVVlZq3759Wr9+vW699VZdddVVys/PlyQNGjRIEyZM0Jw5c7Rt2zZt3rxZxcXFmjp1qtLT0yVJ06dPV1xcnAoLC1VbW6tVq1bphRdeCLrUAgA4DauTNm7caEnq8DVz5kyrtbXVysvLs/r06WPZbDYrMzPTmjNnjuV2u4O2cfjwYWvatGlWYmKi5XA4rFmzZllHjhwJGrNjxw5rzJgxlt1uty677DLrySef7NQ8m5ubLUlWc3NzZ3cR6JSP9h2yMuevsT7ad6irpwLDhdq1KMuyrC78uRIxHo9HSUlJam5u5s1TRFT1F4c1+aVKvT03W8Mze3X1dGCwULvGZ8UAgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrADgGEIOwAYhrA
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAF1CAYAAADyT33hAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAzhUlEQVR4nO3de1hTd7oH+i+XJJBiAAG5tIAXdrVe6oVWzIy1tVKiZXy8tHu07XPGtiqjxdmlWKrMPvUyu+fQsdO9p3UYOz372eI+01ZLq52pVQqDgtsatKBRRGRXhdJWArVKIgIhwHv+6GE9RpaaKBGl38/z5MG1fm9+602M62uyFlk+IiIgIiK6gm9/N0BERLcnBgQREaliQBARkSoGBBERqWJAEBGRKgYEERGpYkAQEZEq//5u4HbW3d2Ns2fPYtCgQfDx8envdoiIbpqI4OLFi4iJiYGv77XfIzAgruHs2bOIjY3t7zaIiPrcN998g3vuueeaNQyIaxg0aBCAH59Ig8HQz93QQOZ0OlFYWIiUlBRoNJr+bocGMLvdjtjYWGX/di0MiGvo+VjJYDAwIMirnE4n9Ho9DAYDA4JuCXc+NudBaiIiUsWAICIiVQwIIiJSxYAgIiJVDAgiIlLFgCAiIlUeBcSmTZtw//33K6d9Go1G7N69Wxl/5JFH4OPj43JbtmyZyxz19fVITU2FXq/HkCFDkJWVhc7OTpeakpISTJo0CTqdDgkJCcjLy+vVS25uLoYOHYqAgAAkJSXh0KFDLuPt7e1IT09HWFgYgoKC8MQTT6CxsdGTh0tE9JPmUUDcc889eP3111FRUYHy8nI8+uijmDNnDqqqqpSapUuXoqGhQblt2LBBGevq6kJqaio6Ojpw4MABbNmyBXl5eVizZo1SU1tbi9TUVEyfPh0WiwUZGRlYsmQJPv/8c6Vm27ZtyMzMxNq1a3H48GGMHz8eJpMJTU1NSs1LL72ETz/9FPn5+SgtLcXZs2cxf/78G3qSiLypq6sLpaWl2LdvH0pLS9HV1dXfLRH9SG5SaGio/Od//qeIiDz88MPy4osvXrV2165d4uvrK1arVVm3adMmMRgM4nA4RETklVdekTFjxrjcb8GCBWIymZTlyZMnS3p6urLc1dUlMTExkpOTIyIizc3NotFoJD8/X6mprq4WAGI2m91+bDabTQCIzWZz+z5Envj4449l6NChAkC5DR06VD7++OP+bo0GKE/2azf8m9RdXV3Iz8/HpUuXYDQalfXvvfce/vrXvyIqKgqzZ8/Gq6++Cr1eDwAwm80YN24cIiMjlXqTyYTly5ejqqoKEydOhNlsRnJyssu2TCYTMjIyAAAdHR2oqKhAdna2Mu7r64vk5GSYzWYAQEVFBZxOp8s8o0aNQlxcHMxmM6ZMmaL6mBwOBxwOh7Jst9sB/Phbrk6n80aeJqKr2rFjBxYuXAg/Pz+X9d9++y2efPJJbN26FfPmzeun7mig8mRf5nFAVFZWwmg0or29HUFBQdixYwdGjx4NAHj66acRHx+PmJgYHDt2DKtWrUJNTQ22b98OALBarS7hAEBZtlqt16yx2+1oa2vDhQsX0NXVpVpz8uRJZQ6tVouQkJBeNT3bUZOTk4P169f3Wl9YWKiEHFFf6OrqQlpaGkSk1zG4nuW0tDT4+/v3ChCim9Ha2up2rccBMXLkSFgsFthsNnz00UdYtGgRSktLMXr0aKSlpSl148aNQ3R0NGbMmIHTp09jxIgRnm7qlsvOzkZmZqay3POlVikpKfwuJupTe/fuhc1mu2aNzWZDUFAQpk+ffou6op+Cnk9G3OFxQGi1WiQkJAAAEhMT8eWXX+Ktt97CX/7yl161SUlJAIBTp05hxIgRiIqK6nW2Uc+ZRVFRUcrPK882amxshMFgQGBgIPz8/ODn56dac/kcHR0daG5udnkXcXmNGp1OB51O12u9RqPhF6hRn/rss8/crktJSfFyN/RT4sm+7KZ/D6K7u9vlc/vLWSwWAEB0dDQAwGg0orKy0uVso6KiIhgMBuVjKqPRiOLiYpd5ioqKlOMcWq0WiYmJLjXd3d0oLi5WahITE6HRaFxqampqUF9f73K8hKi/bN68uU/riLzCk6Pfq1evltLSUqmtrZVjx47J6tWrxcfHRwoLC+XUqVPyu9/9TsrLy6W2tlb+9re/yfDhw2XatGnK/Ts7O2Xs2LGSkpIiFotFCgoKJCIiQrKzs5WaM2fOiF6vl6ysLKmurpbc3Fzx8/OTgoICpWbr1q2i0+kkLy9PTpw4IWlpaRISEuJydtSyZcskLi5O9uzZI+Xl5WI0GsVoNHrycHkWE3mNr6+vctZSWFiYDB8+XEJDQ2X48OESFhamjPn6+vZ3qzTAeLJf8yggnn/+eYmPjxetVisREREyY8YMKSwsFBGR+vp6mTZtmgwePFh0Op0kJCRIVlZWrybq6upk1qxZEhgYKOHh4bJy5UpxOp0uNXv37pUJEyaIVquV4cOHy+bNm3v1snHjRomLixOtViuTJ0+WsrIyl/G2tjZ54YUXJDQ0VPR6vcybN08aGho8ebgMCPIaPz8/l1Nbr3bz8/Pr71ZpgPFkv+YjInLL37bcIex2O4KDg2Gz2XiQmvpUWFgYzp8/f926wYMH44cffrgFHdFPhSf7NX4XE1E/iIiI6NM6Im9gQBD1A3fPRffknHWivsaAIOoHDAi6EzAgiPrB1U4Nv9E6Im9gQBD1gyu/XuNm64i8gQFB1A/4DoLuBAwIon7g7tnlPAud+hMDgoiIVDEgiIhIFQOCiIhUMSCIiEgVA4KoH7h7lTheTY76EwOCqB+oXZjqZuqIvMHjK8oRkXtaW1uV66TfjMOHD191bNSoUbxeOnkNA4LIS06ePInExMSbmqO1tfWac1RUVGDSpEk3tQ2iq2FAEHnJqFGjUFFRoTq2ZcsWvP3228qyv0aDzi6Bv58POp1OZf2//Mu/YNGiRdfcBpG38IJB18ALBpG3dHR0ICAg4Jq/Ke3j44P29nZotdpb2BkNdLxgENFtTqvV4uWXX75mzcsvv8xwoH7Fj5iI+smGDRsAAG+++Sa6u7uV9b6+vli5cqUyTtRf+BHTNfAjJroVOjo68K+vbcBfPjuIX6cm4f/6P1/hOwfyGk/2a3wHQdTPtFotnlm8HPkdE/HM4ikMB7pt8BgEERGpYkAQEZEqBgQREaliQBARkSoGBBERqWJAEBGRKgYEERGpYkAQEZEqBgQREaliQBARkSoGBBERqWJAEBGRKo8CYtOmTbj//vthMBhgMBhgNBqxe/duZby9vR3p6ekICwtDUFAQnnjiCTQ2NrrMUV9fj9TUVOj1egwZMgRZWVno7Ox0qSkpKcGkSZOg0+mQkJCAvLy8Xr3k5uZi6NChCAgIQFJSEg4dOuQy7k4vRER0dR4FxD333IPXX38dFRUVKC8vx6OPPoo5c+agqqoKAPDSSy/h008/RX5+PkpLS3H27FnMnz9fuX9XVxdSU1PR0dGBAwcOYMuWLcjLy8OaNWuUmtraWqSmpmL69OmwWCzIyMjAkiVL8Pnnnys127ZtQ2ZmJtauXYvDhw9j/PjxMJlMaGpqUmqu1wsREV3bTV8PYvDgwXjjjTfw5JNPIiIiAu+//z6efPJJAD9etP2+++6D2WzGlClTsHv3bvziF7/A2bNnERkZCQB45513sGrVKnz//ffQarVYtWoVPvvsMxw/flzZxsKFC9Hc3IyCggIAQFJSEh588EH86U9/AgB0d3cjNjYWv/nNb7B69WrYbLbr9qLG4XDA4XAoy3a7HbGxsTh37hyvB0FedbT+PJ78f8rx0dIHMD5ucH+3QwOY3W5HeHi4d68H0dXVhfz8fFy6dAlGoxEVFRVwOp1ITk5WakaNGoW4uDhlp2w2mzFu3DglHADAZDJh+fLlqKqqwsSJE2E2m13m6KnJyMgA8OPFVSoqKpCdna2M+/r6Ijk5GWazGQDc6kVNTk4O1q9f32t9YWEh9Hq9508SkZu+aQEAf5SVleG749erJrpxra2tbtd6HBCVlZUwGo1ob29HUFAQduzYgdGjR8NisUCr1SI
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAWUAAAF1CAYAAAAwU/sgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAW5ElEQVR4nO3df2xV9f3H8deFe7lQaIul0NKvrWPgisrEiYBVWGDWFvRrxoRvkG/2DTr8kQU0Wox+0Q1Xx75N3DBkGUiy4NBvvjLUKNsM0VYmIFpUWAD5OirtugmDFlDb2x94e2rP9w+/vVtpgd5yTvvm3ucjafDec/o5H8LpM8dzzj0NuK7rCgBgwqCBngAA4B+IMgAYQpQBwBCiDACGEGUAMIQoA4AhRBkADAkO9ATO1NHRoWPHjik1NVWBQGCgpwMAF8x1XTU1NSknJ0eDBp37WNhclI8dO6bc3NyBngYAeO7IkSO69NJLz7mOuSinpqZK+mryaWlpAzwbJDLHcVReXq6ioiKFQqGBng4SWCQSUW5ubqxv52Iuyp2nLNLS0ogyfOU4jlJSUpSWlkaU0S96c0qWC30AYAhRBgBDiDIAGEKUAcAQogwAhhBlADCEKAOAIebuUwb6Q0/3i/Kb0WABR8pIOme7gZ9nrcACooykcr7wEmYMNKKMpPHPwR0yZIja2tq0ZcsWtbW1aciQIT2uB/Q3ooykFI1Gz/kaGChEGQAMIcoAYAhRRlIKh8PnfA0MFO5TRtJwXTd2Ee/Mi3tnrgcMFI6UkVTOF1yCjIFGlJF0zhZeggwLiDKSkuu6Xe5TJsiwgigDgCFEGQAMIcoAYAi3xCEp8ehOWBXXkXJZWZmmTp2q1NRUjRkzRvPmzVNVVVWXdb744gstXbpUo0aN0ogRIzR//nzV19d7OmngQvDoTlgWV5R37NihpUuXavfu3aqoqJDjOCoqKlJLS0tsnYceekh/+MMf9NJLL2nHjh06duyYbr/9ds8nDvQFj+6EdQH3Av6f7eTJkxozZox27Nihb3/722psbNTo0aP1wgsvaMGCBZKkQ4cO6YorrlBlZaWuv/76844ZiUSUnp6uxsZGpaWl9XVqQDdnBretrU1bt27VLbfc0u3TfZzKgJfi6doFnVNubGyUJGVkZEiS9u7dK8dxVFhYGFtn4sSJysvLO2uUo9Fol8cmRiIRSZLjOHIc50KmB5xVW1tbbP9yHKfbx67Z9+ClePanPke5o6NDDz74oG688UZNmjRJklRXV6chQ4Zo5MiRXdbNyspSXV1dj+OUlZWptLS02/vl5eVKSUnp6/SAc9q6dWvsvysqKs65HLhQra2tvV63z1FeunSpDh48qF27dvV1CEnSihUrVFJSEnsdiUSUm5uroqIiTl/AN7fccoscx1FFRYVuvvlmhUKhbssBr3SeAeiNPkV52bJleu2117Rz505deumlsfezs7PV1tamhoaGLkfL9fX1ys7O7nGscDjc42MTQ6FQtx8UwCudvw5K+mpfO/OcMvsevBTP/hRXlF3X1f33369XX31V27dv17hx47osnzJlikKhkLZt26b58+dLkqqqqvTJJ5+ooKAgnk0BnvvnR3dK4tGdMCmuKC9dulQvvPCCfve73yk1NTV2njg9PV3Dhg1Tenq6lixZopKSEmVkZCgtLU3333+/CgoKenXnBeC3M8Pc03JgIMV1S9zZdubf/OY3uvPOOyV99eGR5cuXa9OmTYpGoyouLta6devOevriTNwSh/7AJ/rQn+Lp2gXdp+wHooz+4jhO7D5lziHDT/F0jQcSAYAhRBkADOEpcUhKnFOGVRwpI+nwlDhYRpSRVHhKHKwjykgaPT0lrvMXp55rPaA/EWUkpTPPH3M+GVYQZQAwhCgDgCFEGUnpzPPGnEeGFdynjKTBU+JwMeBIGUnlfMElyBhoRBlJ52zhJciwgCgjKbmu2+U+ZYIMK4gyABhClAHAEKIMAIYQZQAwhCgDgCFEGQAM4RN9SCitra06dOhQr9ZtPh3Vux/W6JLMPRoxLNzrbUycOFEpKSl9nSJwTkQZCeXQoUOaMmVKXN/zVJzb2Lt3r6699to4vwvoHaKMhDJx4kTt3bu3V+tWHW9QyUsf6ul/+6byx46MaxuAX4gyEkpKSkqvj2IH/e1Thd8+rSsmTdY1l43yeWZA73ChDwAMIcoAYAhRBgBDiDIAGEKUAcAQogwAhhBlADCEKAOAIUQZAAwhygBgCFEGAEOIMgAYQpQBwBCiDACGEGUAMIQoA4AhRBkADCHKAGAIUQYAQ4gyABhClAHAEKIMAIYQZQAwhCgDgCFEGQAMIcoAYAhRBgBDiDIAGEKUAcAQogwAhhBlADCEKAOAIUQZAAwhygBgCFEGAEOIMgAYEneUd+7cqdtuu005OTkKBALasmVLl+V33nmnAoFAl685c+Z4NV8ASGhxR7mlpUWTJ0/W2rVrz7rOnDlzdPz48djXpk2bLmiSAJAsgvF+w9y5czV37txzrhMOh5Wdnd3nSQFAsoo7yr2xfft2jRkzRpdccom+853vaNWqVRo1alSP60ajUUWj0djrSCQiSXIcR47j+DE9QJLU3t4e+5N9DX6KZ//yPMpz5szR7bffrnHjxqmmpkaPPfaY5s6dq8rKSg0ePLjb+mVlZSotLe32fnl5uVJSUryeHhBzpFmSgtq9e7f+fnCgZ4NE1tra2ut1A67run3dUCAQ0Kuvvqp58+addZ2//OUvGj9+vN58803ddNNN3Zb3dKScm5urU6dOKS0tra9TA85r/yefacGv9+jle67T5LyMgZ4OElgkElFmZqYaGxvP2zVfTl/8s69//evKzMxUdXV1j1EOh8MKh8Pd3g+FQgqFQn5PD0ksGAzG/mRfg5/i2b98v0/56NGj+vTTTzV27Fi/NwUAF724j5Sbm5tVXV0de11bW6t9+/YpIyNDGRkZKi0t1fz585Wdna2amho98sgjmjBhgoqLiz2dOAAkorijvGfPHs2ePTv2uqSkRJK0ePFiPfPMMzpw4ICee+45NTQ0KCcnR0VFRfrpT3/a4ykKAEBXcUd51qxZOte1wTfeeOOCJgQAyYxnXwCAIUQZAAwhygBgCFEGAEOIMgAYQpQBwBCiDACGEGUAMIQoA4AhRBkADCHKAGAIUQYAQ4gyABhClAHAEKIMAIYQZQAwhCgDgCFEGQAMIcoAYAhRBgBDiDIAGEKUAcAQogwAhhBlADCEKAOAIUQZAAwhygBgCFEGAEOIMgAYQpQBwBCiDACGEGUAMIQoA4AhRBkADCHKAGAIUQYAQ4gyABhClAHAEKIMAIYQZQAwhCgDgCFEGQAMIcoAYAhRBgBDiDIAGEKUAcAQogwAhhBlADCEKAOAIcGBngBwPrWnWtQSbfd83JqTLbE/g0F/fhSGh4Malzncl7GRmIgyTKs91aLZv9ju6zaWv/yhr+O/9fAswoxeI8owrfMIec3CazRhzAhvxz4d1WvbK/Wvswo0fFjY07ElqfpEsx7cvM+Xo3wkLqKMi8KEMSM06V/SPR3TcRzVjZauvewShUIhT8cG+ooLfQBgCFEGAEOIMgAYQpQBwBCiDACGEGUAMIQoA4AhRBkADIk7yjt37tRtt92mnJwcBQIBbdmypcty13W1cuVKjR07VsOGDVNhYaEOHz7s1XwBIKHFHeWWlhZNnjxZa9eu7XH5U089pV/+8pdav3693nvvPQ0fPlzFxcX64osvLniyAJDo4v6Y9dy5czV37twel7muqzVr1uhHP/qRvvvd70qSnn/+eWVlZWnLli264447un1PNBpVNBqNvY5EIpK++gis4zjxTg8Jpr29Pfan1/tD53h+7Wd+zh0Xl3j+/T199kVtba3q6upUWFgYey89PV3Tp09XZWVlj1EuKytTaWlpt/fLy8uVkpLi5fRwETrSLElB7dq1S3/z9nlEMRUVFb6M2x9zx8WhtbW11+t6GuW6ujpJUlZWVpf3s7KyYsvOtGLFCpWUlMReRyIR5ebmqqioSGlpaV5ODxeh/z0W0S8+3K0ZM2boqhxv9wfHcVRRUaGbb77ZlwcS+Tl3XFw
"text/plain": [
"<Figure size 400x3000 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"i = 1\n",
"for col in numeric_features_with_outliers:\n",
" plt.figure(figsize=(4, 30))\n",
" plt.subplot(6, 1, i)\n",
" df.boxplot(column=col)\n",
" i += 1"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Разбиение на выборки"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1079,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Размеры выборок:\n",
"Обучающая выборка: 10077 записей\n",
"Тестовая выборка: 2520 записей\n"
]
}
],
"source": [
"train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)\n",
"\n",
"print(\"Размеры выборок:\")\n",
"print(f\"Обучающая выборка: {train_df.shape[0]} записей\")\n",
"print(f\"Тестовая выборка: {test_df.shape[0]} записей\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Дискретизация числовых признаков"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1080,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Price int64\n",
"Levy int64\n",
"Manufacturer object\n",
"Model object\n",
"Category object\n",
"Leather interior object\n",
"Fuel type object\n",
"Engine volume float64\n",
"Mileage int64\n",
"Cylinders int64\n",
"Gear box type object\n",
"Drive wheels object\n",
"Doors object\n",
"Wheel object\n",
"Color object\n",
"Airbags int64\n",
"Age int64\n",
"dtype: object"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1080,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df.dtypes"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1081,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" <th>Levy</th>\n",
" <th>Manufacturer</th>\n",
" <th>Model</th>\n",
" <th>Category</th>\n",
" <th>Leather interior</th>\n",
" <th>Fuel type</th>\n",
" <th>Engine volume</th>\n",
" <th>Mileage</th>\n",
" <th>Cylinders</th>\n",
" <th>Gear box type</th>\n",
" <th>Drive wheels</th>\n",
" <th>Doors</th>\n",
" <th>Wheel</th>\n",
" <th>Color</th>\n",
" <th>Airbags</th>\n",
" <th>Age</th>\n",
" <th>Age_bin</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>14829</th>\n",
" <td>6743</td>\n",
" <td>966</td>\n",
" <td>DAEWOO</td>\n",
" <td>Lacetti</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>2.0</td>\n",
" <td>62227</td>\n",
" <td>4</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>4</td>\n",
" <td>11</td>\n",
" <td>Старый</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3632</th>\n",
" <td>20005</td>\n",
" <td>583</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Elantra</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>1.6</td>\n",
" <td>94479</td>\n",
" <td>4</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Red</td>\n",
" <td>4</td>\n",
" <td>9</td>\n",
" <td>Средний</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4982</th>\n",
" <td>13172</td>\n",
" <td>836</td>\n",
" <td>DODGE</td>\n",
" <td>Caliber</td>\n",
" <td>Hatchback</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>2.0</td>\n",
" <td>114000</td>\n",
" <td>4</td>\n",
" <td>Variator</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>8</td>\n",
" <td>10</td>\n",
" <td>Средний</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16758</th>\n",
" <td>8781</td>\n",
" <td>584</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Elantra</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>1.8</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>Tiptronic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>10</td>\n",
" <td>6</td>\n",
" <td>Средний</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6875</th>\n",
" <td>25086</td>\n",
" <td>0</td>\n",
" <td>TOYOTA</td>\n",
" <td>Prius</td>\n",
" <td>Hatchback</td>\n",
" <td>No</td>\n",
" <td>Hybrid</td>\n",
" <td>1.8</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>12</td>\n",
" <td>5</td>\n",
" <td>Новый</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18201</th>\n",
" <td>10349</td>\n",
" <td>0</td>\n",
" <td>AUDI</td>\n",
" <td>A4</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>2.4</td>\n",
" <td>150000</td>\n",
" <td>6</td>\n",
" <td>Manual</td>\n",
" <td>4x4</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>4</td>\n",
" <td>13</td>\n",
" <td>Старый</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7436</th>\n",
" <td>2038</td>\n",
" <td>765</td>\n",
" <td>KIA</td>\n",
" <td>Avella</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>2.0</td>\n",
" <td>125621</td>\n",
" <td>4</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>12</td>\n",
" <td>5</td>\n",
" <td>Новый</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7728</th>\n",
" <td>13485</td>\n",
" <td>843</td>\n",
" <td>TOYOTA</td>\n",
" <td>Prius</td>\n",
" <td>Hatchback</td>\n",
" <td>No</td>\n",
" <td>Hybrid</td>\n",
" <td>1.5</td>\n",
" <td>212000</td>\n",
" <td>4</td>\n",
" <td>Variator</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>8</td>\n",
" <td>12</td>\n",
" <td>Старый</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1136</th>\n",
" <td>15677</td>\n",
" <td>0</td>\n",
" <td>FORD</td>\n",
" <td>Fiesta</td>\n",
" <td>Sedan</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>1.6</td>\n",
" <td>74800</td>\n",
" <td>4</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>8</td>\n",
" <td>4</td>\n",
" <td>Новый</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10640</th>\n",
" <td>16308</td>\n",
" <td>751</td>\n",
" <td>KIA</td>\n",
" <td>Optima EX</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>2.4</td>\n",
" <td>92000</td>\n",
" <td>12</td>\n",
" <td>Tiptronic</td>\n",
" <td>Front</td>\n",
" <td>Четырехдверный</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>Средний</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10077 rows × 18 columns</p>\n",
"</div>"
],
"text/plain": [
" Price Levy Manufacturer Model Category Leather interior \\\n",
"14829 6743 966 DAEWOO Lacetti Sedan Yes \n",
"3632 20005 583 HYUNDAI Elantra Sedan Yes \n",
"4982 13172 836 DODGE Caliber Hatchback No \n",
"16758 8781 584 HYUNDAI Elantra Sedan Yes \n",
"6875 25086 0 TOYOTA Prius Hatchback No \n",
"... ... ... ... ... ... ... \n",
"18201 10349 0 AUDI A4 Sedan Yes \n",
"7436 2038 765 KIA Avella Sedan Yes \n",
"7728 13485 843 TOYOTA Prius Hatchback No \n",
"1136 15677 0 FORD Fiesta Sedan No \n",
"10640 16308 751 KIA Optima EX Sedan Yes \n",
"\n",
" Fuel type Engine volume Mileage Cylinders Gear box type Drive wheels \\\n",
"14829 Diesel 2.0 62227 4 Automatic Front \n",
"3632 Petrol 1.6 94479 4 Automatic Front \n",
"4982 Petrol 2.0 114000 4 Variator Front \n",
"16758 Petrol 1.8 60000 4 Tiptronic Front \n",
"6875 Hybrid 1.8 0 4 Automatic Front \n",
"... ... ... ... ... ... ... \n",
"18201 Petrol 2.4 150000 6 Manual 4x4 \n",
"7436 Petrol 2.0 125621 4 Automatic Front \n",
"7728 Hybrid 1.5 212000 4 Variator Front \n",
"1136 Petrol 1.6 74800 4 Automatic Front \n",
"10640 Petrol 2.4 92000 12 Tiptronic Front \n",
"\n",
" Doors Wheel Color Airbags Age Age_bin \n",
"14829 Четырехдверный Left wheel White 4 11 Старый \n",
"3632 Четырехдверный Left wheel Red 4 9 Средний \n",
"4982 Четырехдверный Left wheel Silver 8 10 Средний \n",
"16758 Четырехдверный Left wheel Grey 10 6 Средний \n",
"6875 Четырехдверный Left wheel Silver 12 5 Новый \n",
"... ... ... ... ... ... ... \n",
"18201 Четырехдверный Left wheel Grey 4 13 Старый \n",
"7436 Четырехдверный Left wheel Silver 12 5 Новый \n",
"7728 Четырехдверный Left wheel Silver 8 12 Старый \n",
"1136 Четырехдверный Left wheel Silver 8 4 Новый \n",
"10640 Четырехдверный Left wheel Silver 8 7 Средний \n",
"\n",
"[10077 rows x 18 columns]"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1081,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"numeric_features_for_discritization = [\"Age\"]\n",
"\n",
"def discretize_features(df, features, bins=4, labels=[\"Новый\", \"Средний\", \"Старый\", \"Очень старый\"]):\n",
" for feature in features:\n",
" try:\n",
" df[f\"{feature}_bin\"] = pd.cut(df[feature], bins=bins, labels=labels) # type: ignore\n",
" except Exception as e:\n",
" print(f\"Ошибка при дискретизации признака {feature}: {e}\")\n",
" return df\n",
"\n",
"\n",
"train_df = discretize_features(train_df, numeric_features_for_discritization)\n",
"test_df = discretize_features(test_df, numeric_features_for_discritization)\n",
"\n",
"train_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Унитарное кодирование категориальных признаков"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1082,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Price int64\n",
"Levy int64\n",
"Manufacturer object\n",
"Model object\n",
"Category object\n",
"Leather interior object\n",
"Fuel type object\n",
"Engine volume float64\n",
"Mileage int64\n",
"Cylinders int64\n",
"Gear box type object\n",
"Drive wheels object\n",
"Doors object\n",
"Wheel object\n",
"Color object\n",
"Airbags int64\n",
"Age int64\n",
"Age_bin category\n",
"dtype: object"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1082,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df.dtypes"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1083,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" <th>Levy</th>\n",
" <th>Manufacturer</th>\n",
" <th>Model</th>\n",
" <th>Engine volume</th>\n",
" <th>Mileage</th>\n",
" <th>Cylinders</th>\n",
" <th>Color</th>\n",
" <th>Airbags</th>\n",
" <th>Age</th>\n",
" <th>...</th>\n",
" <th>Drive wheels_Rear</th>\n",
" <th>Doors_Двухдверный</th>\n",
" <th>Doors_Многодверный</th>\n",
" <th>Doors_Четырехдверный</th>\n",
" <th>Wheel_Left wheel</th>\n",
" <th>Wheel_Right-hand drive</th>\n",
" <th>Age_bin_Новый</th>\n",
" <th>Age_bin_Средний</th>\n",
" <th>Age_bin_Старый</th>\n",
" <th>Age_bin_Очень старый</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>14829</th>\n",
" <td>6743</td>\n",
" <td>966</td>\n",
" <td>DAEWOO</td>\n",
" <td>Lacetti</td>\n",
" <td>2.0</td>\n",
" <td>62227</td>\n",
" <td>4</td>\n",
" <td>White</td>\n",
" <td>4</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3632</th>\n",
" <td>20005</td>\n",
" <td>583</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Elantra</td>\n",
" <td>1.6</td>\n",
" <td>94479</td>\n",
" <td>4</td>\n",
" <td>Red</td>\n",
" <td>4</td>\n",
" <td>9</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4982</th>\n",
" <td>13172</td>\n",
" <td>836</td>\n",
" <td>DODGE</td>\n",
" <td>Caliber</td>\n",
" <td>2.0</td>\n",
" <td>114000</td>\n",
" <td>4</td>\n",
" <td>Silver</td>\n",
" <td>8</td>\n",
" <td>10</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16758</th>\n",
" <td>8781</td>\n",
" <td>584</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Elantra</td>\n",
" <td>1.8</td>\n",
" <td>60000</td>\n",
" <td>4</td>\n",
" <td>Grey</td>\n",
" <td>10</td>\n",
" <td>6</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6875</th>\n",
" <td>25086</td>\n",
" <td>0</td>\n",
" <td>TOYOTA</td>\n",
" <td>Prius</td>\n",
" <td>1.8</td>\n",
" <td>0</td>\n",
" <td>4</td>\n",
" <td>Silver</td>\n",
" <td>12</td>\n",
" <td>5</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18201</th>\n",
" <td>10349</td>\n",
" <td>0</td>\n",
" <td>AUDI</td>\n",
" <td>A4</td>\n",
" <td>2.4</td>\n",
" <td>150000</td>\n",
" <td>6</td>\n",
" <td>Grey</td>\n",
" <td>4</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7436</th>\n",
" <td>2038</td>\n",
" <td>765</td>\n",
" <td>KIA</td>\n",
" <td>Avella</td>\n",
" <td>2.0</td>\n",
" <td>125621</td>\n",
" <td>4</td>\n",
" <td>Silver</td>\n",
" <td>12</td>\n",
" <td>5</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7728</th>\n",
" <td>13485</td>\n",
" <td>843</td>\n",
" <td>TOYOTA</td>\n",
" <td>Prius</td>\n",
" <td>1.5</td>\n",
" <td>212000</td>\n",
" <td>4</td>\n",
" <td>Silver</td>\n",
" <td>8</td>\n",
" <td>12</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1136</th>\n",
" <td>15677</td>\n",
" <td>0</td>\n",
" <td>FORD</td>\n",
" <td>Fiesta</td>\n",
" <td>1.6</td>\n",
" <td>74800</td>\n",
" <td>4</td>\n",
" <td>Silver</td>\n",
" <td>8</td>\n",
" <td>4</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10640</th>\n",
" <td>16308</td>\n",
" <td>751</td>\n",
" <td>KIA</td>\n",
" <td>Optima EX</td>\n",
" <td>2.4</td>\n",
" <td>92000</td>\n",
" <td>12</td>\n",
" <td>Silver</td>\n",
" <td>8</td>\n",
" <td>7</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10077 rows × 46 columns</p>\n",
"</div>"
],
"text/plain": [
" Price Levy Manufacturer Model Engine volume Mileage Cylinders \\\n",
"14829 6743 966 DAEWOO Lacetti 2.0 62227 4 \n",
"3632 20005 583 HYUNDAI Elantra 1.6 94479 4 \n",
"4982 13172 836 DODGE Caliber 2.0 114000 4 \n",
"16758 8781 584 HYUNDAI Elantra 1.8 60000 4 \n",
"6875 25086 0 TOYOTA Prius 1.8 0 4 \n",
"... ... ... ... ... ... ... ... \n",
"18201 10349 0 AUDI A4 2.4 150000 6 \n",
"7436 2038 765 KIA Avella 2.0 125621 4 \n",
"7728 13485 843 TOYOTA Prius 1.5 212000 4 \n",
"1136 15677 0 FORD Fiesta 1.6 74800 4 \n",
"10640 16308 751 KIA Optima EX 2.4 92000 12 \n",
"\n",
" Color Airbags Age ... Drive wheels_Rear Doors_Двухдверный \\\n",
"14829 White 4 11 ... False False \n",
"3632 Red 4 9 ... False False \n",
"4982 Silver 8 10 ... False False \n",
"16758 Grey 10 6 ... False False \n",
"6875 Silver 12 5 ... False False \n",
"... ... ... ... ... ... ... \n",
"18201 Grey 4 13 ... False False \n",
"7436 Silver 12 5 ... False False \n",
"7728 Silver 8 12 ... False False \n",
"1136 Silver 8 4 ... False False \n",
"10640 Silver 8 7 ... False False \n",
"\n",
" Doors_Многодверный Doors_Четырехдверный Wheel_Left wheel \\\n",
"14829 False True True \n",
"3632 False True True \n",
"4982 False True True \n",
"16758 False True True \n",
"6875 False True True \n",
"... ... ... ... \n",
"18201 False True True \n",
"7436 False True True \n",
"7728 False True True \n",
"1136 False True True \n",
"10640 False True True \n",
"\n",
" Wheel_Right-hand drive Age_bin_Новый Age_bin_Средний Age_bin_Старый \\\n",
"14829 False False False True \n",
"3632 False False True False \n",
"4982 False False True False \n",
"16758 False False True False \n",
"6875 False True False False \n",
"... ... ... ... ... \n",
"18201 False False False True \n",
"7436 False True False False \n",
"7728 False False False True \n",
"1136 False True False False \n",
"10640 False False True False \n",
"\n",
" Age_bin_Очень старый \n",
"14829 False \n",
"3632 False \n",
"4982 False \n",
"16758 False \n",
"6875 False \n",
"... ... \n",
"18201 False \n",
"7436 False \n",
"7728 False \n",
"1136 False \n",
"10640 False \n",
"\n",
"[10077 rows x 46 columns]"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1083,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"categorical_features_for_encoding = [\n",
" \"Leather interior\",\n",
" \"Category\",\n",
" \"Fuel type\",\n",
" \"Gear box type\",\n",
" \"Drive wheels\",\n",
" \"Doors\",\n",
" \"Wheel\",\n",
" \"Age_bin\",\n",
"]\n",
"\n",
"train_df = pd.get_dummies(train_df, columns=categorical_features_for_encoding)\n",
"test_df = pd.get_dummies(test_df, columns=categorical_features_for_encoding)\n",
"\n",
"train_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Масштабирование признаков"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1084,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Price int64\n",
"Levy int64\n",
"Manufacturer object\n",
"Model object\n",
"Engine volume float64\n",
"Mileage int64\n",
"Cylinders int64\n",
"Color object\n",
"Airbags int64\n",
"Age int64\n",
"Leather interior_No bool\n",
"Leather interior_Yes bool\n",
"Category_Cabriolet bool\n",
"Category_Coupe bool\n",
"Category_Goods wagon bool\n",
"Category_Hatchback bool\n",
"Category_Jeep bool\n",
"Category_Limousine bool\n",
"Category_Microbus bool\n",
"Category_Minivan bool\n",
"Category_Pickup bool\n",
"Category_Sedan bool\n",
"Category_Universal bool\n",
"Fuel type_CNG bool\n",
"Fuel type_Diesel bool\n",
"Fuel type_Hybrid bool\n",
"Fuel type_Hydrogen bool\n",
"Fuel type_LPG bool\n",
"Fuel type_Petrol bool\n",
"Fuel type_Plug-in Hybrid bool\n",
"Gear box type_Automatic bool\n",
"Gear box type_Manual bool\n",
"Gear box type_Tiptronic bool\n",
"Gear box type_Variator bool\n",
"Drive wheels_4x4 bool\n",
"Drive wheels_Front bool\n",
"Drive wheels_Rear bool\n",
"Doors_Двухдверный bool\n",
"Doors_Многодверный bool\n",
"Doors_Четырехдверный bool\n",
"Wheel_Left wheel bool\n",
"Wheel_Right-hand drive bool\n",
"Age_bin_Новый bool\n",
"Age_bin_Средний bool\n",
"Age_bin_Старый bool\n",
"Age_bin_Очень старый bool\n",
"dtype: object"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1084,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df.dtypes"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1085,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" <th>Levy</th>\n",
" <th>Manufacturer</th>\n",
" <th>Model</th>\n",
" <th>Engine volume</th>\n",
" <th>Mileage</th>\n",
" <th>Cylinders</th>\n",
" <th>Color</th>\n",
" <th>Airbags</th>\n",
" <th>Age</th>\n",
" <th>...</th>\n",
" <th>Drive wheels_Rear</th>\n",
" <th>Doors_Двухдверный</th>\n",
" <th>Doors_Многодверный</th>\n",
" <th>Doors_Четырехдверный</th>\n",
" <th>Wheel_Left wheel</th>\n",
" <th>Wheel_Right-hand drive</th>\n",
" <th>Age_bin_Новый</th>\n",
" <th>Age_bin_Средний</th>\n",
" <th>Age_bin_Старый</th>\n",
" <th>Age_bin_Очень старый</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>14829</th>\n",
" <td>-0.936428</td>\n",
" <td>0.909873</td>\n",
" <td>DAEWOO</td>\n",
" <td>Lacetti</td>\n",
" <td>-0.212078</td>\n",
" <td>-0.855905</td>\n",
" <td>-0.399820</td>\n",
" <td>White</td>\n",
" <td>-0.681491</td>\n",
" <td>0.446831</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3632</th>\n",
" <td>0.288147</td>\n",
" <td>0.076376</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Elantra</td>\n",
" <td>-0.757467</td>\n",
" <td>-0.422001</td>\n",
" <td>-0.399820</td>\n",
" <td>Red</td>\n",
" <td>-0.681491</td>\n",
" <td>0.013523</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4982</th>\n",
" <td>-0.342793</td>\n",
" <td>0.626963</td>\n",
" <td>DODGE</td>\n",
" <td>Caliber</td>\n",
" <td>-0.212078</td>\n",
" <td>-0.159374</td>\n",
" <td>-0.399820</td>\n",
" <td>Silver</td>\n",
" <td>0.330763</td>\n",
" <td>0.230177</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>16758</th>\n",
" <td>-0.748245</td>\n",
" <td>0.078552</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Elantra</td>\n",
" <td>-0.484772</td>\n",
" <td>-0.885866</td>\n",
" <td>-0.399820</td>\n",
" <td>Grey</td>\n",
" <td>0.836890</td>\n",
" <td>-0.636438</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6875</th>\n",
" <td>0.757313</td>\n",
" <td>-1.192368</td>\n",
" <td>TOYOTA</td>\n",
" <td>Prius</td>\n",
" <td>-0.484772</td>\n",
" <td>-1.693079</td>\n",
" <td>-0.399820</td>\n",
" <td>Silver</td>\n",
" <td>1.343017</td>\n",
" <td>-0.853091</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>18201</th>\n",
" <td>-0.603461</td>\n",
" <td>-1.192368</td>\n",
" <td>AUDI</td>\n",
" <td>A4</td>\n",
" <td>0.333312</td>\n",
" <td>0.324954</td>\n",
" <td>1.520116</td>\n",
" <td>Grey</td>\n",
" <td>-0.681491</td>\n",
" <td>0.880138</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7436</th>\n",
" <td>-1.370875</td>\n",
" <td>0.472450</td>\n",
" <td>KIA</td>\n",
" <td>Avella</td>\n",
" <td>-0.212078</td>\n",
" <td>-0.003030</td>\n",
" <td>-0.399820</td>\n",
" <td>Silver</td>\n",
" <td>1.343017</td>\n",
" <td>-0.853091</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7728</th>\n",
" <td>-0.313891</td>\n",
" <td>0.642196</td>\n",
" <td>TOYOTA</td>\n",
" <td>Prius</td>\n",
" <td>-0.893814</td>\n",
" <td>1.159074</td>\n",
" <td>-0.399820</td>\n",
" <td>Silver</td>\n",
" <td>0.330763</td>\n",
" <td>0.663484</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1136</th>\n",
" <td>-0.111488</td>\n",
" <td>-1.192368</td>\n",
" <td>FORD</td>\n",
" <td>Fiesta</td>\n",
" <td>-0.757467</td>\n",
" <td>-0.686753</td>\n",
" <td>-0.399820</td>\n",
" <td>Silver</td>\n",
" <td>0.330763</td>\n",
" <td>-1.069745</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>10640</th>\n",
" <td>-0.053223</td>\n",
" <td>0.441983</td>\n",
" <td>KIA</td>\n",
" <td>Optima EX</td>\n",
" <td>0.333312</td>\n",
" <td>-0.455352</td>\n",
" <td>7.279922</td>\n",
" <td>Silver</td>\n",
" <td>0.330763</td>\n",
" <td>-0.419784</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>10077 rows × 46 columns</p>\n",
"</div>"
],
"text/plain": [
" Price Levy Manufacturer Model Engine volume Mileage \\\n",
"14829 -0.936428 0.909873 DAEWOO Lacetti -0.212078 -0.855905 \n",
"3632 0.288147 0.076376 HYUNDAI Elantra -0.757467 -0.422001 \n",
"4982 -0.342793 0.626963 DODGE Caliber -0.212078 -0.159374 \n",
"16758 -0.748245 0.078552 HYUNDAI Elantra -0.484772 -0.885866 \n",
"6875 0.757313 -1.192368 TOYOTA Prius -0.484772 -1.693079 \n",
"... ... ... ... ... ... ... \n",
"18201 -0.603461 -1.192368 AUDI A4 0.333312 0.324954 \n",
"7436 -1.370875 0.472450 KIA Avella -0.212078 -0.003030 \n",
"7728 -0.313891 0.642196 TOYOTA Prius -0.893814 1.159074 \n",
"1136 -0.111488 -1.192368 FORD Fiesta -0.757467 -0.686753 \n",
"10640 -0.053223 0.441983 KIA Optima EX 0.333312 -0.455352 \n",
"\n",
" Cylinders Color Airbags Age ... Drive wheels_Rear \\\n",
"14829 -0.399820 White -0.681491 0.446831 ... False \n",
"3632 -0.399820 Red -0.681491 0.013523 ... False \n",
"4982 -0.399820 Silver 0.330763 0.230177 ... False \n",
"16758 -0.399820 Grey 0.836890 -0.636438 ... False \n",
"6875 -0.399820 Silver 1.343017 -0.853091 ... False \n",
"... ... ... ... ... ... ... \n",
"18201 1.520116 Grey -0.681491 0.880138 ... False \n",
"7436 -0.399820 Silver 1.343017 -0.853091 ... False \n",
"7728 -0.399820 Silver 0.330763 0.663484 ... False \n",
"1136 -0.399820 Silver 0.330763 -1.069745 ... False \n",
"10640 7.279922 Silver 0.330763 -0.419784 ... False \n",
"\n",
" Doors_Двухдверный Doors_Многодверный Doors_Четырехдверный \\\n",
"14829 False False True \n",
"3632 False False True \n",
"4982 False False True \n",
"16758 False False True \n",
"6875 False False True \n",
"... ... ... ... \n",
"18201 False False True \n",
"7436 False False True \n",
"7728 False False True \n",
"1136 False False True \n",
"10640 False False True \n",
"\n",
" Wheel_Left wheel Wheel_Right-hand drive Age_bin_Новый \\\n",
"14829 True False False \n",
"3632 True False False \n",
"4982 True False False \n",
"16758 True False False \n",
"6875 True False True \n",
"... ... ... ... \n",
"18201 True False False \n",
"7436 True False True \n",
"7728 True False False \n",
"1136 True False True \n",
"10640 True False False \n",
"\n",
" Age_bin_Средний Age_bin_Старый Age_bin_Очень старый \n",
"14829 False True False \n",
"3632 True False False \n",
"4982 True False False \n",
"16758 True False False \n",
"6875 False False False \n",
"... ... ... ... \n",
"18201 False True False \n",
"7436 False False False \n",
"7728 False True False \n",
"1136 False False False \n",
"10640 True False False \n",
"\n",
"[10077 rows x 46 columns]"
]
},
2024-12-07 00:18:42 +04:00
"execution_count": 1085,
2024-12-07 00:08:27 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scaler = StandardScaler()\n",
"\n",
"numeric_features_for_stardartization = [\n",
" \"Price\",\n",
" \"Levy\",\n",
" \"Engine volume\",\n",
" \"Mileage\",\n",
" \"Cylinders\",\n",
" \"Airbags\",\n",
" \"Age\",\n",
"]\n",
"\n",
"train_df[numeric_features_for_stardartization] = scaler.fit_transform(\n",
" train_df[numeric_features_for_stardartization]\n",
")\n",
"test_df[numeric_features_for_stardartization] = scaler.transform(\n",
" test_df[numeric_features_for_stardartization]\n",
")\n",
"\n",
"train_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Конструирование признаков с помощью Featuretools"
]
},
{
"cell_type": "code",
2024-12-07 00:18:42 +04:00
"execution_count": 1086,
2024-12-07 00:08:27 +04:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
2024-12-07 00:18:42 +04:00
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\featuretools\\entityset\\entityset.py:1733: UserWarning: index id not found in dataframe, creating new integer column\n",
2024-12-07 00:08:27 +04:00
" warnings.warn(\n",
2024-12-07 00:18:42 +04:00
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n"
2024-12-07 00:08:27 +04:00
]
}
],
"source": [
"es = ft.EntitySet(id=\"car_data\")\n",
2024-12-07 00:18:42 +04:00
"es = es.add_dataframe(dataframe_name=\"train\", dataframe=train_df, index=\"id\")\n",
2024-12-07 00:08:27 +04:00
"feature_matrix, feature_defs = ft.dfs(\n",
" entityset=es,\n",
" target_dataframe_name=\"train\",\n",
" max_depth=1,\n",
2024-12-07 00:18:42 +04:00
")"
]
},
{
"cell_type": "code",
"execution_count": 1087,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[<Feature: Price>,\n",
" <Feature: Levy>,\n",
" <Feature: Manufacturer>,\n",
" <Feature: Model>,\n",
" <Feature: Engine volume>,\n",
" <Feature: Mileage>,\n",
" <Feature: Cylinders>,\n",
" <Feature: Color>,\n",
" <Feature: Airbags>,\n",
" <Feature: Age>,\n",
" <Feature: Leather interior_No>,\n",
" <Feature: Leather interior_Yes>,\n",
" <Feature: Category_Cabriolet>,\n",
" <Feature: Category_Coupe>,\n",
" <Feature: Category_Goods wagon>,\n",
" <Feature: Category_Hatchback>,\n",
" <Feature: Category_Jeep>,\n",
" <Feature: Category_Limousine>,\n",
" <Feature: Category_Microbus>,\n",
" <Feature: Category_Minivan>,\n",
" <Feature: Category_Pickup>,\n",
" <Feature: Category_Sedan>,\n",
" <Feature: Category_Universal>,\n",
" <Feature: Fuel type_CNG>,\n",
" <Feature: Fuel type_Diesel>,\n",
" <Feature: Fuel type_Hybrid>,\n",
" <Feature: Fuel type_Hydrogen>,\n",
" <Feature: Fuel type_LPG>,\n",
" <Feature: Fuel type_Petrol>,\n",
" <Feature: Fuel type_Plug-in Hybrid>,\n",
" <Feature: Gear box type_Automatic>,\n",
" <Feature: Gear box type_Manual>,\n",
" <Feature: Gear box type_Tiptronic>,\n",
" <Feature: Gear box type_Variator>,\n",
" <Feature: Drive wheels_4x4>,\n",
" <Feature: Drive wheels_Front>,\n",
" <Feature: Drive wheels_Rear>,\n",
" <Feature: Doors_Двухдверный>,\n",
" <Feature: Doors_Многодверный>,\n",
" <Feature: Doors_Четырехдверный>,\n",
" <Feature: Wheel_Left wheel>,\n",
" <Feature: Wheel_Right-hand drive>,\n",
" <Feature: Age_bin_Новый>,\n",
" <Feature: Age_bin_Средний>,\n",
" <Feature: Age_bin_Старый>,\n",
" <Feature: Age_bin_Очень старый>]"
]
},
"execution_count": 1087,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
2024-12-07 00:08:27 +04:00
"feature_defs"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}