pred_analytics/Lab4.ipynb

1311 lines
44 KiB
Plaintext
Raw Normal View History

2025-01-13 14:42:39 +04:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" <th>Levy</th>\n",
" <th>Manufacturer</th>\n",
" <th>Model</th>\n",
" <th>Prod. year</th>\n",
" <th>Category</th>\n",
" <th>Leather interior</th>\n",
" <th>Fuel type</th>\n",
" <th>Engine volume</th>\n",
" <th>Mileage</th>\n",
" <th>Cylinders</th>\n",
" <th>Gear box type</th>\n",
" <th>Drive wheels</th>\n",
" <th>Doors</th>\n",
" <th>Wheel</th>\n",
" <th>Color</th>\n",
" <th>Airbags</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>45654403</th>\n",
" <td>13328</td>\n",
" <td>1399</td>\n",
" <td>LEXUS</td>\n",
" <td>RX 450</td>\n",
" <td>2010</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>3.5</td>\n",
" <td>186005 km</td>\n",
" <td>6.0</td>\n",
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>44731507</th>\n",
" <td>16621</td>\n",
" <td>1018</td>\n",
" <td>CHEVROLET</td>\n",
" <td>Equinox</td>\n",
" <td>2011</td>\n",
" <td>Jeep</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>3</td>\n",
" <td>192000 km</td>\n",
" <td>6.0</td>\n",
" <td>Tiptronic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45774419</th>\n",
" <td>8467</td>\n",
" <td>-</td>\n",
" <td>HONDA</td>\n",
" <td>FIT</td>\n",
" <td>2006</td>\n",
" <td>Hatchback</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>1.3</td>\n",
" <td>200000 km</td>\n",
" <td>4.0</td>\n",
" <td>Variator</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Right-hand drive</td>\n",
" <td>Black</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45769185</th>\n",
" <td>3607</td>\n",
" <td>862</td>\n",
" <td>FORD</td>\n",
" <td>Escape</td>\n",
" <td>2011</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>2.5</td>\n",
" <td>168966 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45809263</th>\n",
" <td>11726</td>\n",
" <td>446</td>\n",
" <td>HONDA</td>\n",
" <td>FIT</td>\n",
" <td>2014</td>\n",
" <td>Hatchback</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>1.3</td>\n",
" <td>91901 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45798355</th>\n",
" <td>8467</td>\n",
" <td>-</td>\n",
" <td>MERCEDES-BENZ</td>\n",
" <td>CLK 200</td>\n",
" <td>1999</td>\n",
" <td>Coupe</td>\n",
" <td>Yes</td>\n",
" <td>CNG</td>\n",
" <td>2.0 Turbo</td>\n",
" <td>300000 km</td>\n",
" <td>4.0</td>\n",
" <td>Manual</td>\n",
" <td>Rear</td>\n",
" <td>02-Mar</td>\n",
" <td>Left wheel</td>\n",
" <td>Silver</td>\n",
" <td>5</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45778856</th>\n",
" <td>15681</td>\n",
" <td>831</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Sonata</td>\n",
" <td>2011</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>2.4</td>\n",
" <td>161600 km</td>\n",
" <td>4.0</td>\n",
" <td>Tiptronic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Red</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45804997</th>\n",
" <td>26108</td>\n",
" <td>836</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Tucson</td>\n",
" <td>2010</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>2</td>\n",
" <td>116365 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45793526</th>\n",
" <td>5331</td>\n",
" <td>1288</td>\n",
" <td>CHEVROLET</td>\n",
" <td>Captiva</td>\n",
" <td>2007</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>2</td>\n",
" <td>51258 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45813273</th>\n",
" <td>470</td>\n",
" <td>753</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Sonata</td>\n",
" <td>2012</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>2.4</td>\n",
" <td>186923 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>19237 rows × 17 columns</p>\n",
"</div>"
],
"text/plain": [
" Price Levy Manufacturer Model Prod. year Category \\\n",
"ID \n",
"45654403 13328 1399 LEXUS RX 450 2010 Jeep \n",
"44731507 16621 1018 CHEVROLET Equinox 2011 Jeep \n",
"45774419 8467 - HONDA FIT 2006 Hatchback \n",
"45769185 3607 862 FORD Escape 2011 Jeep \n",
"45809263 11726 446 HONDA FIT 2014 Hatchback \n",
"... ... ... ... ... ... ... \n",
"45798355 8467 - MERCEDES-BENZ CLK 200 1999 Coupe \n",
"45778856 15681 831 HYUNDAI Sonata 2011 Sedan \n",
"45804997 26108 836 HYUNDAI Tucson 2010 Jeep \n",
"45793526 5331 1288 CHEVROLET Captiva 2007 Jeep \n",
"45813273 470 753 HYUNDAI Sonata 2012 Sedan \n",
"\n",
" Leather interior Fuel type Engine volume Mileage Cylinders \\\n",
"ID \n",
"45654403 Yes Hybrid 3.5 186005 km 6.0 \n",
"44731507 No Petrol 3 192000 km 6.0 \n",
"45774419 No Petrol 1.3 200000 km 4.0 \n",
"45769185 Yes Hybrid 2.5 168966 km 4.0 \n",
"45809263 Yes Petrol 1.3 91901 km 4.0 \n",
"... ... ... ... ... ... \n",
"45798355 Yes CNG 2.0 Turbo 300000 km 4.0 \n",
"45778856 Yes Petrol 2.4 161600 km 4.0 \n",
"45804997 Yes Diesel 2 116365 km 4.0 \n",
"45793526 Yes Diesel 2 51258 km 4.0 \n",
"45813273 Yes Hybrid 2.4 186923 km 4.0 \n",
"\n",
" Gear box type Drive wheels Doors Wheel Color Airbags \n",
"ID \n",
"45654403 Automatic 4x4 04-May Left wheel Silver 12 \n",
"44731507 Tiptronic 4x4 04-May Left wheel Black 8 \n",
"45774419 Variator Front 04-May Right-hand drive Black 2 \n",
"45769185 Automatic 4x4 04-May Left wheel White 0 \n",
"45809263 Automatic Front 04-May Left wheel Silver 4 \n",
"... ... ... ... ... ... ... \n",
"45798355 Manual Rear 02-Mar Left wheel Silver 5 \n",
"45778856 Tiptronic Front 04-May Left wheel Red 8 \n",
"45804997 Automatic Front 04-May Left wheel Grey 4 \n",
"45793526 Automatic Front 04-May Left wheel Black 4 \n",
"45813273 Automatic Front 04-May Left wheel White 12 \n",
"\n",
"[19237 rows x 17 columns]"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"from sklearn import set_config\n",
"\n",
"set_config(transform_output=\"pandas\")\n",
"\n",
"random_state=9\n",
"\n",
"data_car = pd.read_csv('car_price_prediction.csv', index_col=\"ID\")\n",
"\n",
"data_car"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'X_train'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" <th>Levy</th>\n",
" <th>Manufacturer</th>\n",
" <th>Model</th>\n",
" <th>Prod. year</th>\n",
" <th>Category</th>\n",
" <th>Leather interior</th>\n",
" <th>Fuel type</th>\n",
" <th>Engine volume</th>\n",
" <th>Mileage</th>\n",
" <th>Cylinders</th>\n",
" <th>Gear box type</th>\n",
" <th>Drive wheels</th>\n",
" <th>Doors</th>\n",
" <th>Wheel</th>\n",
" <th>Color</th>\n",
" <th>Airbags</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>45788006</th>\n",
" <td>17249</td>\n",
" <td>-</td>\n",
" <td>TOYOTA</td>\n",
" <td>Prius</td>\n",
" <td>2014</td>\n",
" <td>Sedan</td>\n",
" <td>No</td>\n",
" <td>Hybrid</td>\n",
" <td>1.8</td>\n",
" <td>80000 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45809459</th>\n",
" <td>15988</td>\n",
" <td>502</td>\n",
" <td>TOYOTA</td>\n",
" <td>Aqua</td>\n",
" <td>2013</td>\n",
" <td>Universal</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>1.5</td>\n",
" <td>44545 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40784381</th>\n",
" <td>25089</td>\n",
" <td>880</td>\n",
" <td>NISSAN</td>\n",
" <td>Kicks</td>\n",
" <td>2018</td>\n",
" <td>Jeep</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>1.6</td>\n",
" <td>4363 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45621750</th>\n",
" <td>941</td>\n",
" <td>1053</td>\n",
" <td>MERCEDES-BENZ</td>\n",
" <td>E 350</td>\n",
" <td>2014</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>3.5</td>\n",
" <td>184467 km</td>\n",
" <td>6.0</td>\n",
" <td>Automatic</td>\n",
" <td>Rear</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45794605</th>\n",
" <td>7213</td>\n",
" <td>-</td>\n",
" <td>TOYOTA</td>\n",
" <td>Vitz</td>\n",
" <td>2005</td>\n",
" <td>Hatchback</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>1</td>\n",
" <td>0 km</td>\n",
" <td>3.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Right-hand drive</td>\n",
" <td>Silver</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45209376</th>\n",
" <td>19601</td>\n",
" <td>690</td>\n",
" <td>FORD</td>\n",
" <td>Escape</td>\n",
" <td>2016</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>1.5 Turbo</td>\n",
" <td>108295 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45810135</th>\n",
" <td>11848</td>\n",
" <td>790</td>\n",
" <td>CHEVROLET</td>\n",
" <td>Lacetti</td>\n",
" <td>2009</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>1.6</td>\n",
" <td>87250 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45801699</th>\n",
" <td>1500</td>\n",
" <td>-</td>\n",
" <td>DODGE</td>\n",
" <td>Durango</td>\n",
" <td>2001</td>\n",
" <td>Jeep</td>\n",
" <td>No</td>\n",
" <td>Petrol</td>\n",
" <td>4.7</td>\n",
" <td>0 km</td>\n",
" <td>8.0</td>\n",
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45788660</th>\n",
" <td>35</td>\n",
" <td>-</td>\n",
" <td>TOYOTA</td>\n",
" <td>Mark X Zio</td>\n",
" <td>2005</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>1.5</td>\n",
" <td>50000 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45743014</th>\n",
" <td>13485</td>\n",
" <td>1018</td>\n",
" <td>BMW</td>\n",
" <td>328 Xdrive</td>\n",
" <td>2011</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>3</td>\n",
" <td>120000 km</td>\n",
" <td>6.0</td>\n",
" <td>Tiptronic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>15389 rows × 17 columns</p>\n",
"</div>"
],
"text/plain": [
" Price Levy Manufacturer Model Prod. year Category \\\n",
"ID \n",
"45788006 17249 - TOYOTA Prius 2014 Sedan \n",
"45809459 15988 502 TOYOTA Aqua 2013 Universal \n",
"40784381 25089 880 NISSAN Kicks 2018 Jeep \n",
"45621750 941 1053 MERCEDES-BENZ E 350 2014 Sedan \n",
"45794605 7213 - TOYOTA Vitz 2005 Hatchback \n",
"... ... ... ... ... ... ... \n",
"45209376 19601 690 FORD Escape 2016 Jeep \n",
"45810135 11848 790 CHEVROLET Lacetti 2009 Sedan \n",
"45801699 1500 - DODGE Durango 2001 Jeep \n",
"45788660 35 - TOYOTA Mark X Zio 2005 Sedan \n",
"45743014 13485 1018 BMW 328 Xdrive 2011 Sedan \n",
"\n",
" Leather interior Fuel type Engine volume Mileage Cylinders \\\n",
"ID \n",
"45788006 No Hybrid 1.8 80000 km 4.0 \n",
"45809459 Yes Hybrid 1.5 44545 km 4.0 \n",
"40784381 No Petrol 1.6 4363 km 4.0 \n",
"45621750 Yes Diesel 3.5 184467 km 6.0 \n",
"45794605 No Petrol 1 0 km 3.0 \n",
"... ... ... ... ... ... \n",
"45209376 Yes Petrol 1.5 Turbo 108295 km 4.0 \n",
"45810135 Yes Petrol 1.6 87250 km 4.0 \n",
"45801699 No Petrol 4.7 0 km 8.0 \n",
"45788660 Yes Petrol 1.5 50000 km 4.0 \n",
"45743014 Yes Petrol 3 120000 km 6.0 \n",
"\n",
" Gear box type Drive wheels Doors Wheel Color Airbags \n",
"ID \n",
"45788006 Automatic Front 04-May Left wheel Black 8 \n",
"45809459 Automatic Front 04-May Left wheel Grey 4 \n",
"40784381 Automatic Front 04-May Left wheel Grey 0 \n",
"45621750 Automatic Rear 04-May Left wheel White 12 \n",
"45794605 Automatic Front 04-May Right-hand drive Silver 4 \n",
"... ... ... ... ... ... ... \n",
"45209376 Automatic Front 04-May Left wheel Black 10 \n",
"45810135 Automatic Front 04-May Left wheel Grey 4 \n",
"45801699 Automatic 4x4 04-May Left wheel Black 7 \n",
"45788660 Automatic Front 04-May Left wheel White 4 \n",
"45743014 Tiptronic 4x4 04-May Left wheel Black 12 \n",
"\n",
"[15389 rows x 17 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'y_train'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Airbags</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>45788006</th>\n",
" <td>8</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45809459</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>40784381</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45621750</th>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45794605</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45209376</th>\n",
" <td>10</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45810135</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45801699</th>\n",
" <td>7</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45788660</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45743014</th>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>15389 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" Airbags\n",
"ID \n",
"45788006 8\n",
"45809459 4\n",
"40784381 0\n",
"45621750 12\n",
"45794605 4\n",
"... ...\n",
"45209376 10\n",
"45810135 4\n",
"45801699 7\n",
"45788660 4\n",
"45743014 12\n",
"\n",
"[15389 rows x 1 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'X_test'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Price</th>\n",
" <th>Levy</th>\n",
" <th>Manufacturer</th>\n",
" <th>Model</th>\n",
" <th>Prod. year</th>\n",
" <th>Category</th>\n",
" <th>Leather interior</th>\n",
" <th>Fuel type</th>\n",
" <th>Engine volume</th>\n",
" <th>Mileage</th>\n",
" <th>Cylinders</th>\n",
" <th>Gear box type</th>\n",
" <th>Drive wheels</th>\n",
" <th>Doors</th>\n",
" <th>Wheel</th>\n",
" <th>Color</th>\n",
" <th>Airbags</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>45809681</th>\n",
" <td>59834</td>\n",
" <td>690</td>\n",
" <td>HONDA</td>\n",
" <td>Civic</td>\n",
" <td>2016</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>1.5</td>\n",
" <td>3785 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43797018</th>\n",
" <td>167781</td>\n",
" <td>1292</td>\n",
" <td>LAND ROVER</td>\n",
" <td>Range Rover</td>\n",
" <td>2016</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Diesel</td>\n",
" <td>3.0 Turbo</td>\n",
" <td>81000 km</td>\n",
" <td>6.0</td>\n",
" <td>Tiptronic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45651960</th>\n",
" <td>7997</td>\n",
" <td>1850</td>\n",
" <td>LEXUS</td>\n",
" <td>RX 400</td>\n",
" <td>2008</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>3.5</td>\n",
" <td>244731 km</td>\n",
" <td>6.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45797083</th>\n",
" <td>15289</td>\n",
" <td>836</td>\n",
" <td>HYUNDAI</td>\n",
" <td>Genesis</td>\n",
" <td>2010</td>\n",
" <td>Coupe</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>2</td>\n",
" <td>115363 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Red</td>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45765586</th>\n",
" <td>627</td>\n",
" <td>836</td>\n",
" <td>TOYOTA</td>\n",
" <td>Tacoma</td>\n",
" <td>2012</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>2.7</td>\n",
" <td>394069 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Rear</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45644398</th>\n",
" <td>392</td>\n",
" <td>781</td>\n",
" <td>TOYOTA</td>\n",
" <td>Camry</td>\n",
" <td>2012</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Hybrid</td>\n",
" <td>2.5</td>\n",
" <td>314373 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Grey</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45816463</th>\n",
" <td>17249</td>\n",
" <td>-</td>\n",
" <td>FORD</td>\n",
" <td>Transit</td>\n",
" <td>1996</td>\n",
" <td>Goods wagon</td>\n",
" <td>No</td>\n",
" <td>Diesel</td>\n",
" <td>2.5 Turbo</td>\n",
" <td>230000 km</td>\n",
" <td>4.0</td>\n",
" <td>Manual</td>\n",
" <td>Rear</td>\n",
" <td>02-Mar</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45619877</th>\n",
" <td>24500</td>\n",
" <td>919</td>\n",
" <td>BMW</td>\n",
" <td>X5</td>\n",
" <td>2012</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>3.0 Turbo</td>\n",
" <td>1000 km</td>\n",
" <td>6.0</td>\n",
" <td>Automatic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45641164</th>\n",
" <td>7213</td>\n",
" <td>530</td>\n",
" <td>NISSAN</td>\n",
" <td>Juke</td>\n",
" <td>2013</td>\n",
" <td>Jeep</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>1.6</td>\n",
" <td>312118 km</td>\n",
" <td>4.0</td>\n",
" <td>Automatic</td>\n",
" <td>Front</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>Black</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45788400</th>\n",
" <td>24462</td>\n",
" <td>-</td>\n",
" <td>MERCEDES-BENZ</td>\n",
" <td>E 350</td>\n",
" <td>2008</td>\n",
" <td>Sedan</td>\n",
" <td>Yes</td>\n",
" <td>Petrol</td>\n",
" <td>3.5</td>\n",
" <td>142000 km</td>\n",
" <td>6.0</td>\n",
" <td>Tiptronic</td>\n",
" <td>4x4</td>\n",
" <td>04-May</td>\n",
" <td>Left wheel</td>\n",
" <td>White</td>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3848 rows × 17 columns</p>\n",
"</div>"
],
"text/plain": [
" Price Levy Manufacturer Model Prod. year Category \\\n",
"ID \n",
"45809681 59834 690 HONDA Civic 2016 Sedan \n",
"43797018 167781 1292 LAND ROVER Range Rover 2016 Jeep \n",
"45651960 7997 1850 LEXUS RX 400 2008 Jeep \n",
"45797083 15289 836 HYUNDAI Genesis 2010 Coupe \n",
"45765586 627 836 TOYOTA Tacoma 2012 Sedan \n",
"... ... ... ... ... ... ... \n",
"45644398 392 781 TOYOTA Camry 2012 Sedan \n",
"45816463 17249 - FORD Transit 1996 Goods wagon \n",
"45619877 24500 919 BMW X5 2012 Jeep \n",
"45641164 7213 530 NISSAN Juke 2013 Jeep \n",
"45788400 24462 - MERCEDES-BENZ E 350 2008 Sedan \n",
"\n",
" Leather interior Fuel type Engine volume Mileage Cylinders \\\n",
"ID \n",
"45809681 Yes Petrol 1.5 3785 km 4.0 \n",
"43797018 Yes Diesel 3.0 Turbo 81000 km 6.0 \n",
"45651960 Yes Hybrid 3.5 244731 km 6.0 \n",
"45797083 Yes Petrol 2 115363 km 4.0 \n",
"45765586 Yes Petrol 2.7 394069 km 4.0 \n",
"... ... ... ... ... ... \n",
"45644398 Yes Hybrid 2.5 314373 km 4.0 \n",
"45816463 No Diesel 2.5 Turbo 230000 km 4.0 \n",
"45619877 Yes Petrol 3.0 Turbo 1000 km 6.0 \n",
"45641164 Yes Petrol 1.6 312118 km 4.0 \n",
"45788400 Yes Petrol 3.5 142000 km 6.0 \n",
"\n",
" Gear box type Drive wheels Doors Wheel Color Airbags \n",
"ID \n",
"45809681 Automatic Front 04-May Left wheel Grey 4 \n",
"43797018 Tiptronic 4x4 04-May Left wheel Grey 12 \n",
"45651960 Automatic Front 04-May Left wheel Black 12 \n",
"45797083 Automatic Front 04-May Left wheel Red 4 \n",
"45765586 Automatic Rear 04-May Left wheel Black 12 \n",
"... ... ... ... ... ... ... \n",
"45644398 Automatic Front 04-May Left wheel Grey 12 \n",
"45816463 Manual Rear 02-Mar Left wheel White 2 \n",
"45619877 Automatic 4x4 04-May Left wheel White 12 \n",
"45641164 Automatic Front 04-May Left wheel Black 0 \n",
"45788400 Tiptronic 4x4 04-May Left wheel White 12 \n",
"\n",
"[3848 rows x 17 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'y_test'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Airbags</th>\n",
" </tr>\n",
" <tr>\n",
" <th>ID</th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>45809681</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>43797018</th>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45651960</th>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45797083</th>\n",
" <td>4</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45765586</th>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45644398</th>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45816463</th>\n",
" <td>2</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45619877</th>\n",
" <td>12</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45641164</th>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>45788400</th>\n",
" <td>12</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>3848 rows × 1 columns</p>\n",
"</div>"
],
"text/plain": [
" Airbags\n",
"ID \n",
"45809681 4\n",
"43797018 12\n",
"45651960 12\n",
"45797083 4\n",
"45765586 12\n",
"... ...\n",
"45644398 12\n",
"45816463 2\n",
"45619877 12\n",
"45641164 0\n",
"45788400 12\n",
"\n",
"[3848 rows x 1 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from src.utils import split_stratified_into_train_val_test\n",
"\n",
"X_train, X_val, X_test, y_train, y_val, y_test = split_stratified_into_train_val_test(\n",
" data_car,\n",
" stratify_colname=\"Airbags\",\n",
" frac_train=0.80,\n",
" frac_val=0,\n",
" frac_test=0.20,\n",
" random_state=random_state,\n",
")\n",
"\n",
"display(\"X_train\", X_train)\n",
"display(\"y_train\", y_train)\n",
"\n",
"display(\"X_test\", X_test)\n",
"display(\"y_test\", y_test)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}