mai_pi-33_zakharov/lab3_2.ipynb at f77a5e53355c937dc16452e945f2deff1cc53ee2

Zakharov_Rostislav f77a5e5335 feat(lab3): make lab3

2024-12-07 00:08:27 +04:00

205 KiB

Raw Blame History

Загрузка набора данных¶

In [971]:

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
import featuretools as ft
import re
from sklearn.preprocessing import StandardScaler
from imblearn.over_sampling import RandomOverSampler
from sklearn.model_selection import train_test_split


df = pd.read_csv("../data/car_price_prediction.csv")

df = df.drop(columns=["ID"])

df

Out[971]:

	Price	Levy	Manufacturer	Model	Prod. year	Category	Leather interior	Fuel type	Engine volume	Mileage	Cylinders	Gear box type	Drive wheels	Doors	Wheel	Color	Airbags
0	13328	1399	LEXUS	RX 450	2010	Jeep	Yes	Hybrid	3.5	186005 km	6.0	Automatic	4x4	04-May	Left wheel	Silver	12
1	16621	1018	CHEVROLET	Equinox	2011	Jeep	No	Petrol	3	192000 km	6.0	Tiptronic	4x4	04-May	Left wheel	Black	8
2	8467	-	HONDA	FIT	2006	Hatchback	No	Petrol	1.3	200000 km	4.0	Variator	Front	04-May	Right-hand drive	Black	2
3	3607	862	FORD	Escape	2011	Jeep	Yes	Hybrid	2.5	168966 km	4.0	Automatic	4x4	04-May	Left wheel	White	0
4	11726	446	HONDA	FIT	2014	Hatchback	Yes	Petrol	1.3	91901 km	4.0	Automatic	Front	04-May	Left wheel	Silver	4
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
19232	8467	-	MERCEDES-BENZ	CLK 200	1999	Coupe	Yes	CNG	2.0 Turbo	300000 km	4.0	Manual	Rear	02-Mar	Left wheel	Silver	5
19233	15681	831	HYUNDAI	Sonata	2011	Sedan	Yes	Petrol	2.4	161600 km	4.0	Tiptronic	Front	04-May	Left wheel	Red	8
19234	26108	836	HYUNDAI	Tucson	2010	Jeep	Yes	Diesel	2	116365 km	4.0	Automatic	Front	04-May	Left wheel	Grey	4
19235	5331	1288	CHEVROLET	Captiva	2007	Jeep	Yes	Diesel	2	51258 km	4.0	Automatic	Front	04-May	Left wheel	Black	4
19236	470	753	HYUNDAI	Sonata	2012	Sedan	Yes	Hybrid	2.4	186923 km	4.0	Automatic	Front	04-May	Left wheel	White	12

19237 rows × 17 columns

Анализ датасета и очистка данных¶

In [972]:

df.dtypes

Out[972]:

Price                 int64
Levy                 object
Manufacturer         object
Model                object
Prod. year            int64
Category             object
Leather interior     object
Fuel type            object
Engine volume        object
Mileage              object
Cylinders           float64
Gear box type        object
Drive wheels         object
Doors                object
Wheel                object
Color                object
Airbags               int64
dtype: object

In [973]:

df["Engine volume"] = df["Engine volume"].str.replace("Turbo", "")
df["Engine volume"] = pd.to_numeric(df["Engine volume"])
df["Engine volume"].unique()

Out[973]:

array([ 3.5,  3. ,  1.3,  2.5,  2. ,  1.8,  2.4,  4. ,  1.6,  3.3,  2.2,
        4.7,  1.5,  4.4,  1.4,  3.6,  2.3,  5.5,  2.8,  3.2,  3.8,  4.6,
        1.2,  5. ,  1.7,  2.9,  0.5,  1.9,  2.7,  4.8,  5.3,  0.4,  1.1,
        2.1,  0.7,  5.4,  3.7,  1. ,  2.6,  0.8,  0.2,  5.7,  6.7,  6.2,
        3.4,  6.3,  4.3,  4.2,  0. , 20. ,  0.3,  5.9,  5.6,  6. ,  0.6,
        6.8,  4.5,  7.3,  0.1,  3.1,  6.4,  3.9,  0.9,  5.2,  5.8])

In [974]:

df["Mileage"] = df["Mileage"].str.replace("km", "")
df["Mileage"] = df["Mileage"].astype("int64")
df["Mileage"].unique()

Out[974]:

array([186005, 192000, 200000, ..., 140607, 307325, 186923])

In [975]:

df["Levy"] = df["Levy"].replace("-", "0")
df["Levy"] = df["Levy"].astype("int64")
df["Levy"].unique()

Out[975]:

array([ 1399,  1018,     0,   862,   446,   891,   761,   751,   394,
        1053,  1055,  1079,   810,  2386,  1850,   531,   586,  1249,
        2455,   583,  1537,  1288,   915,  1750,   707,  1077,  1486,
        1091,   650,   382,  1436,  1194,   503,  1017,  1104,   639,
         629,   919,   781,   530,   640,   765,   777,   779,   934,
         769,   645,  1185,  1324,   830,  1187,  1111,   760,   642,
        1604,  1095,   966,   473,  1138,  1811,   988,   917,  1156,
         687, 11714,   836,  1347,  2866,  1646,   259,   609,   697,
         585,   475,   690,   308,  1823,  1361,  1273,   924,   584,
        2078,   831,  1172,   893,  1872,  1885,  1266,   447,  2148,
        1730,   730,   289,   502,   333,  1325,   247,   879,  1342,
        1327,  1598,  1514,  1058,   738,  1935,   481,  1522,  1282,
         456,   880,   900,   798,  1277,   442,  1051,   790,  1292,
        1047,   528,  1211,  1493,  1793,   574,   930,  1998,   271,
         706,  1481,  1677,  1661,  1286,  1408,  1090,   595,  1451,
        1267,   993,  1714,   878,   641,   749,  1511,   603,   353,
         877,  1236,  1141,   397,   784,  1024,  1357,  1301,   770,
         922,  1438,   753,   607,  1363,   638,   490,   431,   565,
         517,   833,   489,  1760,   986,  1841,  1620,  1360,   474,
        1099,   978,  1624,  1946,  1268,  1307,   696,   649,   666,
        2151,   551,   800,   971,  1323,  2377,  1845,  1083,   694,
         463,   419,   345,  1515,  1505,  2056,  1203,   729,   460,
        1356,   876,   911,  1190,   780,   448,  2410,  1848,  1148,
         834,  1275,  1028,  1197,   724,   890,  1705,   505,   789,
        2959,   518,   461,  1719,  2858,  3156,  2225,  2177,  1968,
        1888,  1308,  2736,  1103,   557,  2195,   843,  1664,   723,
        4508,   562,   501,  2018,  1076,  1202,  3301,   691,  1440,
        1869,  1178,   418,  1820,  1413,   488,  1304,   363,  2108,
         521,  1659,    87,  1411,  1528,  3292,  7058,  1578,   627,
         874,  1996,  1488,  5679,  1234,  5603,   400,   889,  3268,
         875,   949,  2265,   441,   742,   425,  2476,  2971,   614,
        1816,  1375,  1405,  2297,  1062,  1113,   420,  2469,   658,
        1951,  2670,  2578,  1995,  1032,   994,  1011,  2421,  1296,
         155,   494,   426,  1086,   961,  2236,  1829,   764,  1834,
        1054,   617,  1529,  2266,   637,   626,  1832,  1016,  2002,
        1756,   746,  1285,  2690,  1118,  5332,   980,  1807,   970,
        1228,  1195,  1132,  1768,  1384,  1080,  7063,  1817,  1452,
        1975,  1368,   702,  1974,  1781,  1036,   944,   663,   364,
        1539,  1345,  1680,  2209,   741,  1575,   695,  1317,   294,
        1525,   424,   997,  1473,  1552,  2819,  2188,  1668,  3057,
         799,  1502,  2606,   552,  1694,  1759,  1110,   399,  1470,
        1174,  5877,  1474,  1688,   526,   686,  5908,  1107,  2070,
        1468,  1246,  1685,   556,  1533,  1917,  1346,   732,   692,
         579,   421,   362,  3505,  1855,  2711,  1586,  3739,   681,
        1708,  2278,  1701,   722,  1482,   928,   827,   832,   527,
         604,   173,  1341,  3329,  1553,   859,   167,   916,   828,
        2082,  1176,  1108,   975,  3008,  1516,  2269,  1699,  2073,
        1031,  1503,  2364,  1030,  1442,  5666,  2715,  1437,  2067,
        1426,  2908,  1279,   866,  4283,   279,  2658,  3015,  2004,
        1391,  4736,   748,  1466,   644,   683,  2705,  1297,   731,
        1252,  2216,  3141,  3273,  1518,  1723,  1588,   972,   682,
        1094,   668,   175,   967,   402,  3894,  1960,  1599,  2000,
        2084,  1621,   714,  1109,  3989,   873,  1572,  1163,  1991,
        1716,  1673,  2562,  2874,   965,   462,   605,  1948,  1736,
        3518,  2054,  2467,  1681,  1272,  1205,   750,  2156,  2566,
         115,   524,  3184,   676,  1678,   612,   328,   955,  1441,
        1675,  3965,  2909,   623,   822,   867,  3025,  1993,   792,
         636,  4057,  3743,  2337,  2570,  2418,  2472,  3910,  1662,
        2123,  2628,  3208,  2080,  3699,  2913,   864,  2505,   870,
        7536,  1924,  1671,  1064,  1836,  1866,  4741,   841,  1369,
        5681,  3112,  1366,  2223,  1198,  1039,  3811,  3571,  1387,
        1171,  1365,  1531,  1590, 11706,  2308,  4860,  1641,  1045,
        1901])

In [976]:

df["Cylinders"] = df["Cylinders"].astype("int64")
df["Cylinders"].unique()

Out[976]:

array([ 6,  4,  8,  1, 12,  3,  2, 16,  5,  7,  9, 10, 14])

In [977]:

df["Doors"].unique()

Out[977]:

array(['04-May', '02-Mar', '>5'], dtype=object)

In [978]:

df["Doors"] = df["Doors"].map(
    {"02-Mar": "Двухдверный", "04-May": "Четырехдверный", ">5": "Многодверный"}
)
df["Doors"].unique()

Out[978]:

array(['Четырехдверный', 'Двухдверный', 'Многодверный'], dtype=object)

In [979]:

sorted_df = df.sort_values(by="Price")
sorted_df["Price"].unique()

Out[979]:

array([       1,        3,        6, ...,   627220,   872946, 26307500])

In [980]:

print(f"Количество строк до удаления некорректных значений: {len(df)}")
df = df[df["Price"] >= 500]
print(f"Количество строк после удаления некорректных значений: {len(df)}")

Количество строк до удаления некорректных значений: 19237
Количество строк после удаления некорректных значений: 17574

In [981]:

sorted_df = df.sort_values(by="Price")
sorted_df["Price"].unique()

Out[981]:

array([     500,      549,      600, ...,   627220,   872946, 26307500])

In [982]:

sorted_df = df.sort_values(by="Prod. year")
sorted_df["Prod. year"].unique()

Out[982]:

array([1943, 1953, 1957, 1964, 1965, 1968, 1973, 1974, 1977, 1978, 1980,
       1981, 1982, 1983, 1984, 1985, 1986, 1987, 1988, 1989, 1990, 1991,
       1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
       2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013,
       2014, 2015, 2016, 2017, 2018, 2019, 2020])

Ручной синтез признаков.¶

In [983]:

df["Age"] = 2020 - df["Prod. year"]
df = df.drop("Prod. year", axis=1)
sorted_df = df.sort_values(by="Age")
sorted_df["Age"].unique()

Out[983]:

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
       34, 35, 36, 37, 38, 39, 40, 42, 43, 46, 47, 52, 55, 56, 63, 67, 77])

In [984]:

df

Out[984]:

	Price	Levy	Manufacturer	Model	Category	Leather interior	Fuel type	Engine volume	Mileage	Cylinders	Gear box type	Drive wheels	Doors	Wheel	Color	Airbags	Age
0	13328	1399	LEXUS	RX 450	Jeep	Yes	Hybrid	3.5	186005	6	Automatic	4x4	Четырехдверный	Left wheel	Silver	12	10
1	16621	1018	CHEVROLET	Equinox	Jeep	No	Petrol	3.0	192000	6	Tiptronic	4x4	Четырехдверный	Left wheel	Black	8	9
2	8467	0	HONDA	FIT	Hatchback	No	Petrol	1.3	200000	4	Variator	Front	Четырехдверный	Right-hand drive	Black	2	14
3	3607	862	FORD	Escape	Jeep	Yes	Hybrid	2.5	168966	4	Automatic	4x4	Четырехдверный	Left wheel	White	0	9
4	11726	446	HONDA	FIT	Hatchback	Yes	Petrol	1.3	91901	4	Automatic	Front	Четырехдверный	Left wheel	Silver	4	6
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
19231	5802	1055	MERCEDES-BENZ	E 350	Sedan	Yes	Diesel	3.5	107800	6	Automatic	Rear	Четырехдверный	Left wheel	Grey	12	7
19232	8467	0	MERCEDES-BENZ	CLK 200	Coupe	Yes	CNG	2.0	300000	4	Manual	Rear	Двухдверный	Left wheel	Silver	5	21
19233	15681	831	HYUNDAI	Sonata	Sedan	Yes	Petrol	2.4	161600	4	Tiptronic	Front	Четырехдверный	Left wheel	Red	8	9
19234	26108	836	HYUNDAI	Tucson	Jeep	Yes	Diesel	2.0	116365	4	Automatic	Front	Четырехдверный	Left wheel	Grey	4	10
19235	5331	1288	CHEVROLET	Captiva	Jeep	Yes	Diesel	2.0	51258	4	Automatic	Front	Четырехдверный	Left wheel	Black	4	13

17574 rows × 17 columns

Очистка дубликатов и пропущенных значений¶

In [985]:

df.duplicated().sum()

Out[985]:

np.int64(2773)

In [986]:

df.drop_duplicates(inplace=True)

In [987]:

df.isna().sum()

Out[987]:

Price               0
Levy                0
Manufacturer        0
Model               0
Category            0
Leather interior    0
Fuel type           0
Engine volume       0
Mileage             0
Cylinders           0
Gear box type       0
Drive wheels        0
Doors               0
Wheel               0
Color               0
Airbags             0
Age                 0
dtype: int64

Очистка выбросов¶

In [988]:

df.dtypes

Out[988]:

Price                 int64
Levy                  int64
Manufacturer         object
Model                object
Category             object
Leather interior     object
Fuel type            object
Engine volume       float64
Mileage               int64
Cylinders             int64
Gear box type        object
Drive wheels         object
Doors                object
Wheel                object
Color                object
Airbags               int64
Age                   int64
dtype: object

In [989]:

numeric_features_with_outliers = [
    "Price",
    "Levy",
    "Mileage",
    "Age",
]

i = 1
for col in numeric_features_with_outliers:
    plt.figure(figsize=(4, 30))
    plt.subplot(6, 1, i)
    df.boxplot(column=col)
    i += 1

No description has been provided for this image

In [990]:

def remove_outliers(df, column):
    Q1 = df[column].quantile(0.25)
    Q3 = df[column].quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]

print(f"Количество строк до удаления выбросов: {len(df)}")

for column in numeric_features_with_outliers:
    df = remove_outliers(df, column)

print(f"Количество строк после удаления выбросов: {len(df)}")

Количество строк до удаления выбросов: 14801
Количество строк после удаления выбросов: 12597

In [991]:

i = 1
for col in numeric_features_with_outliers:
    plt.figure(figsize=(4, 30))
    plt.subplot(6, 1, i)
    df.boxplot(column=col)
    i += 1

Разбиение на выборки¶

In [992]:

train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)

print("Размеры выборок:")
print(f"Обучающая выборка: {train_df.shape[0]} записей")
print(f"Тестовая выборка: {test_df.shape[0]} записей")

Размеры выборок:
Обучающая выборка: 10077 записей
Тестовая выборка: 2520 записей

Дискретизация числовых признаков¶

In [993]:

train_df.dtypes

Out[993]:

Price                 int64
Levy                  int64
Manufacturer         object
Model                object
Category             object
Leather interior     object
Fuel type            object
Engine volume       float64
Mileage               int64
Cylinders             int64
Gear box type        object
Drive wheels         object
Doors                object
Wheel                object
Color                object
Airbags               int64
Age                   int64
dtype: object

In [994]:

numeric_features_for_discritization = ["Age"]

def discretize_features(df, features, bins=4, labels=["Новый", "Средний", "Старый", "Очень старый"]):
    for feature in features:
        try:
            df[f"{feature}_bin"] = pd.cut(df[feature], bins=bins, labels=labels)  # type: ignore
        except Exception as e:
            print(f"Ошибка при дискретизации признака {feature}: {e}")
    return df


train_df = discretize_features(train_df, numeric_features_for_discritization)
test_df = discretize_features(test_df, numeric_features_for_discritization)

train_df

Out[994]:

	Price	Levy	Manufacturer	Model	Category	Leather interior	Fuel type	Engine volume	Mileage	Cylinders	Gear box type	Drive wheels	Doors	Wheel	Color	Airbags	Age	Age_bin
14829	6743	966	DAEWOO	Lacetti	Sedan	Yes	Diesel	2.0	62227	4	Automatic	Front	Четырехдверный	Left wheel	White	4	11	Старый
3632	20005	583	HYUNDAI	Elantra	Sedan	Yes	Petrol	1.6	94479	4	Automatic	Front	Четырехдверный	Left wheel	Red	4	9	Средний
4982	13172	836	DODGE	Caliber	Hatchback	No	Petrol	2.0	114000	4	Variator	Front	Четырехдверный	Left wheel	Silver	8	10	Средний
16758	8781	584	HYUNDAI	Elantra	Sedan	Yes	Petrol	1.8	60000	4	Tiptronic	Front	Четырехдверный	Left wheel	Grey	10	6	Средний
6875	25086	0	TOYOTA	Prius	Hatchback	No	Hybrid	1.8	0	4	Automatic	Front	Четырехдверный	Left wheel	Silver	12	5	Новый
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
18201	10349	0	AUDI	A4	Sedan	Yes	Petrol	2.4	150000	6	Manual	4x4	Четырехдверный	Left wheel	Grey	4	13	Старый
7436	2038	765	KIA	Avella	Sedan	Yes	Petrol	2.0	125621	4	Automatic	Front	Четырехдверный	Left wheel	Silver	12	5	Новый
7728	13485	843	TOYOTA	Prius	Hatchback	No	Hybrid	1.5	212000	4	Variator	Front	Четырехдверный	Left wheel	Silver	8	12	Старый
1136	15677	0	FORD	Fiesta	Sedan	No	Petrol	1.6	74800	4	Automatic	Front	Четырехдверный	Left wheel	Silver	8	4	Новый
10640	16308	751	KIA	Optima EX	Sedan	Yes	Petrol	2.4	92000	12	Tiptronic	Front	Четырехдверный	Left wheel	Silver	8	7	Средний

10077 rows × 18 columns

Унитарное кодирование категориальных признаков¶

In [995]:

train_df.dtypes

Out[995]:

Price                  int64
Levy                   int64
Manufacturer          object
Model                 object
Category              object
Leather interior      object
Fuel type             object
Engine volume        float64
Mileage                int64
Cylinders              int64
Gear box type         object
Drive wheels          object
Doors                 object
Wheel                 object
Color                 object
Airbags                int64
Age                    int64
Age_bin             category
dtype: object

In [996]:

categorical_features_for_encoding = [
    "Leather interior",
    "Category",
    "Fuel type",
    "Gear box type",
    "Drive wheels",
    "Doors",
    "Wheel",
    "Age_bin",
]

train_df = pd.get_dummies(train_df, columns=categorical_features_for_encoding)
test_df = pd.get_dummies(test_df, columns=categorical_features_for_encoding)

train_df

Out[996]:

	Price	Levy	Manufacturer	Model	Engine volume	Mileage	Cylinders	Color	Airbags	Age	...	Drive wheels_Rear	Doors_Двухдверный	Doors_Многодверный	Doors_Четырехдверный	Wheel_Left wheel	Wheel_Right-hand drive	Age_bin_Новый	Age_bin_Средний	Age_bin_Старый	Age_bin_Очень старый
14829	6743	966	DAEWOO	Lacetti	2.0	62227	4	White	4	11	...	False	False	False	True	True	False	False	False	True	False
3632	20005	583	HYUNDAI	Elantra	1.6	94479	4	Red	4	9	...	False	False	False	True	True	False	False	True	False	False
4982	13172	836	DODGE	Caliber	2.0	114000	4	Silver	8	10	...	False	False	False	True	True	False	False	True	False	False
16758	8781	584	HYUNDAI	Elantra	1.8	60000	4	Grey	10	6	...	False	False	False	True	True	False	False	True	False	False
6875	25086	0	TOYOTA	Prius	1.8	0	4	Silver	12	5	...	False	False	False	True	True	False	True	False	False	False
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
18201	10349	0	AUDI	A4	2.4	150000	6	Grey	4	13	...	False	False	False	True	True	False	False	False	True	False
7436	2038	765	KIA	Avella	2.0	125621	4	Silver	12	5	...	False	False	False	True	True	False	True	False	False	False
7728	13485	843	TOYOTA	Prius	1.5	212000	4	Silver	8	12	...	False	False	False	True	True	False	False	False	True	False
1136	15677	0	FORD	Fiesta	1.6	74800	4	Silver	8	4	...	False	False	False	True	True	False	True	False	False	False
10640	16308	751	KIA	Optima EX	2.4	92000	12	Silver	8	7	...	False	False	False	True	True	False	False	True	False	False

10077 rows × 46 columns

Масштабирование признаков¶

In [997]:

train_df.dtypes

Out[997]:

Price                         int64
Levy                          int64
Manufacturer                 object
Model                        object
Engine volume               float64
Mileage                       int64
Cylinders                     int64
Color                        object
Airbags                       int64
Age                           int64
Leather interior_No            bool
Leather interior_Yes           bool
Category_Cabriolet             bool
Category_Coupe                 bool
Category_Goods wagon           bool
Category_Hatchback             bool
Category_Jeep                  bool
Category_Limousine             bool
Category_Microbus              bool
Category_Minivan               bool
Category_Pickup                bool
Category_Sedan                 bool
Category_Universal             bool
Fuel type_CNG                  bool
Fuel type_Diesel               bool
Fuel type_Hybrid               bool
Fuel type_Hydrogen             bool
Fuel type_LPG                  bool
Fuel type_Petrol               bool
Fuel type_Plug-in Hybrid       bool
Gear box type_Automatic        bool
Gear box type_Manual           bool
Gear box type_Tiptronic        bool
Gear box type_Variator         bool
Drive wheels_4x4               bool
Drive wheels_Front             bool
Drive wheels_Rear              bool
Doors_Двухдверный              bool
Doors_Многодверный             bool
Doors_Четырехдверный           bool
Wheel_Left wheel               bool
Wheel_Right-hand drive         bool
Age_bin_Новый                  bool
Age_bin_Средний                bool
Age_bin_Старый                 bool
Age_bin_Очень старый           bool
dtype: object

In [998]:

scaler = StandardScaler()

numeric_features_for_stardartization = [
    "Price",
    "Levy",
    "Engine volume",
    "Mileage",
    "Cylinders",
    "Airbags",
    "Age",
]

train_df[numeric_features_for_stardartization] = scaler.fit_transform(
    train_df[numeric_features_for_stardartization]
)
test_df[numeric_features_for_stardartization] = scaler.transform(
    test_df[numeric_features_for_stardartization]
)

train_df

Out[998]:

	Price	Levy	Manufacturer	Model	Engine volume	Mileage	Cylinders	Color	Airbags	Age	...	Drive wheels_Rear	Doors_Двухдверный	Doors_Многодверный	Doors_Четырехдверный	Wheel_Left wheel	Wheel_Right-hand drive	Age_bin_Новый	Age_bin_Средний	Age_bin_Старый	Age_bin_Очень старый
14829	-0.936428	0.909873	DAEWOO	Lacetti	-0.212078	-0.855905	-0.399820	White	-0.681491	0.446831	...	False	False	False	True	True	False	False	False	True	False
3632	0.288147	0.076376	HYUNDAI	Elantra	-0.757467	-0.422001	-0.399820	Red	-0.681491	0.013523	...	False	False	False	True	True	False	False	True	False	False
4982	-0.342793	0.626963	DODGE	Caliber	-0.212078	-0.159374	-0.399820	Silver	0.330763	0.230177	...	False	False	False	True	True	False	False	True	False	False
16758	-0.748245	0.078552	HYUNDAI	Elantra	-0.484772	-0.885866	-0.399820	Grey	0.836890	-0.636438	...	False	False	False	True	True	False	False	True	False	False
6875	0.757313	-1.192368	TOYOTA	Prius	-0.484772	-1.693079	-0.399820	Silver	1.343017	-0.853091	...	False	False	False	True	True	False	True	False	False	False
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
18201	-0.603461	-1.192368	AUDI	A4	0.333312	0.324954	1.520116	Grey	-0.681491	0.880138	...	False	False	False	True	True	False	False	False	True	False
7436	-1.370875	0.472450	KIA	Avella	-0.212078	-0.003030	-0.399820	Silver	1.343017	-0.853091	...	False	False	False	True	True	False	True	False	False	False
7728	-0.313891	0.642196	TOYOTA	Prius	-0.893814	1.159074	-0.399820	Silver	0.330763	0.663484	...	False	False	False	True	True	False	False	False	True	False
1136	-0.111488	-1.192368	FORD	Fiesta	-0.757467	-0.686753	-0.399820	Silver	0.330763	-1.069745	...	False	False	False	True	True	False	True	False	False	False
10640	-0.053223	0.441983	KIA	Optima EX	0.333312	-0.455352	7.279922	Silver	0.330763	-0.419784	...	False	False	False	True	True	False	False	True	False	False