MII/lec4_reg.ipynb at 9eba71f6ee0cbe6d91393ef613c2cab9aaf0b34e

Загрузка данных¶

In [94]:

import pandas as pd

from sklearn import set_config

set_config(transform_output="pandas")

random_state = 9

df = pd.read_csv("data/car_price_prediction.csv", index_col="ID")
df["distance_nokm"] = pd.to_numeric(df["Mileage"].str.replace(" km", "", regex=False))


df["distance_norm"] = df["distance_nokm"].clip(0, 350000)

df.boxplot(column="distance_norm")
df["Levy"] = pd.to_numeric(df["Levy"].str.replace("-", "0", regex=False))
# df["Cylinders"] = pd.to_numeric(df["Cylinders"].str.replace(".", "", regex=False))

# average_mileage = df["distance_norm"].mean()
# print(f"Среднее значение поля 'пробег': {average_mileage}")
# average_mileage = df["distance_norm"].mean()
# df["above_average_mileage"] = (df["distance_norm"] > average_mileage).astype(int)

df

Out[94]:

	Price	Levy	Manufacturer	Model	Prod_year	Category	Leather_interior	Fuel type	Engine volume	Mileage	Cylinders	Gear box type	Drive wheels	Doors	Wheel	Color	Airbags	distance_nokm	distance_norm
ID
45654403	13328	1399	LEXUS	RX 450	2010	Jeep	Yes	Hybrid	3.5	186005 km	6.0	Automatic	4x4	04-May	Left wheel	Silver	12	186005	186005
44731507	16621	1018	CHEVROLET	Equinox	2011	Jeep	No	Petrol	3	192000 km	6.0	Tiptronic	4x4	04-May	Left wheel	Black	8	192000	192000
45774419	8467	0	HONDA	FIT	2006	Hatchback	No	Petrol	1.3	200000 km	4.0	Variator	Front	04-May	Right-hand drive	Black	2	200000	200000
45769185	3607	862	FORD	Escape	2011	Jeep	Yes	Hybrid	2.5	168966 km	4.0	Automatic	4x4	04-May	Left wheel	White	0	168966	168966
45809263	11726	446	HONDA	FIT	2014	Hatchback	Yes	Petrol	1.3	91901 km	4.0	Automatic	Front	04-May	Left wheel	Silver	4	91901	91901
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
45798355	8467	0	MERCEDES-BENZ	CLK 200	1999	Coupe	Yes	CNG	2.0 Turbo	300000 km	4.0	Manual	Rear	02-Mar	Left wheel	Silver	5	300000	300000
45778856	15681	831	HYUNDAI	Sonata	2011	Sedan	Yes	Petrol	2.4	161600 km	4.0	Tiptronic	Front	04-May	Left wheel	Red	8	161600	161600
45804997	26108	836	HYUNDAI	Tucson	2010	Jeep	Yes	Diesel	2	116365 km	4.0	Automatic	Front	04-May	Left wheel	Grey	4	116365	116365
45793526	5331	1288	CHEVROLET	Captiva	2007	Jeep	Yes	Diesel	2	51258 km	4.0	Automatic	Front	04-May	Left wheel	Black	4	51258	51258
45813273	470	753	HYUNDAI	Sonata	2012	Sedan	Yes	Hybrid	2.4	186923 km	4.0	Automatic	Front	04-May	Left wheel	White	12	186923	186923

19237 rows × 19 columns

In [95]:

from sklearn.compose import ColumnTransformer
from sklearn.discriminant_analysis import StandardScaler
from sklearn.impute import SimpleImputer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder

from transformers import TitanicFeatures


# columns_to_drop = ["Survived", "Name", "Cabin", "Ticket", "Embarked", "Parch", "Fare"]
columns_to_drop = [
    # "Price",
    "Doors",
    # "Color",
    # "Gear box type",
    # "Prod_year",
    "Mileage",
    # "Airbags",
    "Levy",
    # "Leather_interior",
    "Fuel type",
    "Drive wheels",
    "Engine volume",
    "Wheel",
    "distance_nokm",
    "Model",
     "Cylinders"
]
num_columns = [
    "Airbags",
    "distance_norm",
    # "Cylinders"
]
cat_columns = [
    "Color",
    "Gear box type",
    "Leather_interior",
    "Manufacturer",
    "Category"
]

num_imputer = SimpleImputer(strategy="median")
num_scaler = StandardScaler()
preprocessing_num = Pipeline(
    [
        ("imputer", num_imputer),
        ("scaler", num_scaler),
    ]
)

cat_imputer = SimpleImputer(strategy="constant", fill_value="unknown")
cat_encoder = OneHotEncoder(handle_unknown="ignore", sparse_output=False, drop="first")
preprocessing_cat = Pipeline(
    [
        ("imputer", cat_imputer),
        ("encoder", cat_encoder),
    ]
)

features_preprocessing = ColumnTransformer(
    verbose_feature_names_out=False,
    transformers=[
        ("prepocessing_num", preprocessing_num, num_columns),
        ("prepocessing_cat", preprocessing_cat, cat_columns),
        #("prepocessing_features", cat_imputer, ["Name", "Cabin"]),
    ],
    remainder="passthrough"
)

# features_engineering = ColumnTransformer(
#     verbose_feature_names_out=False,
#     transformers=[
#         ("add_features", TitanicFeatures(), ["Name", "Cabin"]),
#     ],
#     remainder="passthrough",
# )

drop_columns = ColumnTransformer(
    verbose_feature_names_out=False,
    transformers=[
        ("drop_columns", "drop", columns_to_drop),
    ],
    remainder="passthrough",
)

# features_postprocessing = ColumnTransformer(
#     verbose_feature_names_out=False,
#     transformers=[
#         ("prepocessing_cat", preprocessing_cat, ["Cabin_type"]),
#     ],
#     remainder="passthrough",
# )

pipeline_end = Pipeline(
    [
        ("features_preprocessing", features_preprocessing),
       # ("features_engineering", features_engineering),
        ("drop_columns", drop_columns),
       # ("features_postprocessing", features_postprocessing),
    ]
)

In [96]:

preprocessing_result = pipeline_end.fit_transform(df)
df = pd.DataFrame(
    preprocessing_result,
    columns=pipeline_end.get_feature_names_out(),
)

Формирование выборок¶

In [97]:

from utils import split_stratified_into_train_val_test

X_train, X_val, X_test, y_train, y_val, y_test = split_stratified_into_train_val_test(
    df,
    target_colname="Price",
    stratify_colname="Airbags",
    frac_train=0.80,
    frac_val=0,
    frac_test=0.20,
    random_state=random_state,
)

display("X_train", X_train)
display("y_train", y_train)

display("X_test", X_test)
display("y_test", y_test)

'X_train'

	Airbags	distance_norm	Color_Black	Color_Blue	Color_Brown	Color_Carnelian red	Color_Golden	Color_Green	Color_Grey	Color_Orange	...	Category_Hatchback	Category_Jeep	Category_Limousine	Category_Microbus	Category_Minivan	Category_Pickup	Category_Sedan	Category_Universal	Price	Prod_year
ID
45664431	-1.523737	0.012512	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1019	2013
45186186	1.254005	-0.768336	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	14113	2010
45757605	-0.134866	0.093264	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	15994	2010
45761763	-1.060780	-0.935158	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	24891	2011
45762505	1.254005	0.496836	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	706	2015
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
45738958	-0.134866	1.046858	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	7840	2005
45761444	1.254005	-0.288173	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	37633	2010
45809946	-0.597823	-0.567004	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	5209	2013
45789900	0.328091	-0.568642	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	14426	2006
45788440	-0.597823	0.037171	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	12544	2011

15389 rows × 97 columns

'y_train'

	Price
ID
45664431	1019
45186186	14113
45757605	15994
45761763	24891
45762505	706
...	...
45738958	7840
45761444	37633
45809946	5209
45789900	14426
45788440	12544

15389 rows × 1 columns

'X_test'

	Airbags	distance_norm	Color_Black	Color_Blue	Color_Brown	Color_Carnelian red	Color_Golden	Color_Green	Color_Grey	Color_Orange	...	Category_Hatchback	Category_Jeep	Category_Limousine	Category_Microbus	Category_Minivan	Category_Pickup	Category_Sedan	Category_Universal	Price	Prod_year
ID
41976837	0.328091	-1.184551	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	90006	2015
45793567	-0.597823	-0.719669	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	7850	2009
45812786	-1.060780	1.832171	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	2352	1998
45808317	1.254005	-0.453739	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	1333	2015
45809653	-0.597823	-1.437871	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	62493	2018
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
45793612	-1.060780	-0.018923	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	51746	2014
45567237	-1.523737	0.242249	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	784	2014
45802935	-0.597823	0.299701	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	41221	2016
45102093	-0.597823	0.279496	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	10976	2003
45733630	-0.597823	-1.055951	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	55343	2018

3848 rows × 97 columns

'y_test'

	Price
ID
41976837	90006
45793567	7850
45812786	2352
45808317	1333
45809653	62493
...	...
45793612	51746
45567237	784
45802935	41221
45102093	10976
45733630	55343

3848 rows × 1 columns

In [98]:

X_test

Out[98]:

	Airbags	distance_norm	Color_Black	Color_Blue	Color_Brown	Color_Carnelian red	Color_Golden	Color_Green	Color_Grey	Color_Orange	...	Category_Hatchback	Category_Jeep	Category_Limousine	Category_Microbus	Category_Minivan	Category_Pickup	Category_Sedan	Category_Universal	Price	Prod_year
ID
41976837	0.328091	-1.184551	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	90006	2015
45793567	-0.597823	-0.719669	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	7850	2009
45812786	-1.060780	1.832171	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	2352	1998
45808317	1.254005	-0.453739	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	1333	2015
45809653	-0.597823	-1.437871	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	62493	2018
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
45793612	-1.060780	-0.018923	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	51746	2014
45567237	-1.523737	0.242249	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	784	2014
45802935	-0.597823	0.299701	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	41221	2016
45102093	-0.597823	0.279496	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	10976	2003
45733630	-0.597823	-1.055951	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	55343	2018

3848 rows × 97 columns

In [99]:

X_train

Out[99]:

	Airbags	distance_norm	Color_Black	Color_Blue	Color_Brown	Color_Carnelian red	Color_Golden	Color_Green	Color_Grey	Color_Orange	...	Category_Hatchback	Category_Jeep	Category_Limousine	Category_Microbus	Category_Minivan	Category_Pickup	Category_Sedan	Category_Universal	Price	Prod_year
ID
45664431	-1.523737	0.012512	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	1019	2013
45186186	1.254005	-0.768336	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	14113	2010
45757605	-0.134866	0.093264	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	15994	2010
45761763	-1.060780	-0.935158	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	24891	2011
45762505	1.254005	0.496836	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	706	2015
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
45738958	-0.134866	1.046858	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	7840	2005
45761444	1.254005	-0.288173	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1.0	0.0	37633	2010
45809946	-0.597823	-0.567004	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	5209	2013
45789900	0.328091	-0.568642	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	14426	2006
45788440	-0.597823	0.037171	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	12544	2011

15389 rows × 97 columns

In [100]:

y_test

Out[100]:

	Price
ID
41976837	90006
45793567	7850
45812786	2352
45808317	1333
45809653	62493
...	...
45793612	51746
45567237	784
45802935	41221
45102093	10976
45733630	55343

3848 rows × 1 columns

In [101]:

y_train

Out[101]:

	Price
ID
45664431	1019
45186186	14113
45757605	15994
45761763	24891
45762505	706
...	...
45738958	7840
45761444	37633
45809946	5209
45789900	14426
45788440	12544

15389 rows × 1 columns

Определение перечня алгоритмов решения задачи аппроксимации (регрессии)¶

In [102]:

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model, tree, neighbors, ensemble, neural_network

random_state = 9

models = {
    # "mlp": {
    #     "model": neural_network.MLPRegressor(
    #         activation="tanh",
    #         hidden_layer_sizes=(3,),
    #         max_iter=500,
    #         early_stopping=True,
    #         random_state=random_state,
    #     )
    # },
    "linear": {"model": linear_model.LinearRegression(n_jobs=-1)},
    "linear_poly": {
        "model": make_pipeline(
            PolynomialFeatures(degree=2),
            linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),
        )
    },
    "linear_interact": {
        "model": make_pipeline(
            PolynomialFeatures(interaction_only=True),
            linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),
        )
    },
    "ridge": {"model": linear_model.RidgeCV()},
    "decision_tree": {
        "model": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)
    },
    "knn": {"model": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},
    "random_forest": {
        "model": ensemble.RandomForestRegressor(
            max_depth=7, random_state=random_state, n_jobs=-1
        )
    },
}

Определение функции для стандартизации значений в столбце "Температура" для MLP¶

In [103]:

from pandas import DataFrame
from sklearn import preprocessing
import numpy as np

stndart_scaler = preprocessing.StandardScaler()


def std_temp(df: DataFrame) -> DataFrame:
    df["distance_norm"] = np.array(stndart_scaler.fit_transform(
        df["distance_norm"].reshape(-1, 1))
    ).reshape(df["distance_norm"].shape)
    return df


def std_temp2(df: DataFrame) -> DataFrame:
    # Преобразуем столбец в массив NumPy и применяем reshape
    df["distance_norm"] = stndart_scaler.fit_transform(
        df["distance_norm"].values.reshape(-1, 1)  # Изменяем на reshape(-1, 1)
    )
    return df


def std_temp3(df: DataFrame) -> DataFrame:
    # Проверка на NaN и заполнение средним значением
    df["distance_norm"].fillna(df["distance_norm"].mean(), inplace=True)

    # Преобразуем столбец в массив NumPy и применяем reshape
    df["distance_norm"] = stndart_scaler.fit_transform(
        df["distance_norm"].values.reshape(-1, 1)
    )
    return df

In [104]:

print(X_train.isnull().sum())
print(X_test.isnull().sum())

Airbags               0
distance_norm         0
Color_Black           0
Color_Blue            0
Color_Brown           0
                     ..
Category_Pickup       0
Category_Sedan        0
Category_Universal    0
Price                 0
Prod_year             0
Length: 97, dtype: int64
Airbags               0
distance_norm         0
Color_Black           0
Color_Blue            0
Color_Brown           0
                     ..
Category_Pickup       0
Category_Sedan        0
Category_Universal    0
Price                 0
Prod_year             0
Length: 97, dtype: int64

Обучение и оценка моделей с помощью различных алгоритмов¶

In [105]:

import math
from pandas import DataFrame
from sklearn import metrics





for model_name in models.keys():
    print(f"Model: {model_name}")
    X_train: DataFrame = X_train.copy()
    X_test: DataFrame = X_test.copy()

    if model_name == "mlp":
        X_train = std_temp(X_train)
        X_test = std_temp(X_test)

    fitted_model = models[model_name]["model"].fit(
        X_train.values, y_train.values.ravel()
    )
    y_train_pred = fitted_model.predict(X_train.values)
    y_test_pred = fitted_model.predict(X_test.values)
    models[model_name]["fitted"] = fitted_model
    models[model_name]["train_preds"] = y_train_pred
    models[model_name]["preds"] = y_test_pred
    models[model_name]["RMSE_train"] = math.sqrt(
        metrics.mean_squared_error(y_train, y_train_pred)
    )
    models[model_name]["RMSE_test"] = math.sqrt(
        metrics.mean_squared_error(y_test, y_test_pred)
    )
    models[model_name]["RMAE_test"] = math.sqrt(
        metrics.mean_absolute_error(y_test, y_test_pred)
    )
    models[model_name]["R2_test"] = metrics.r2_score(y_test, y_test_pred)

Model: linear
Model: linear_poly
Model: linear_interact
Model: ridge
Model: decision_tree
Model: knn
Model: random_forest

Вывод результатов оценки¶

In [106]:

reg_metrics = pd.DataFrame.from_dict(models, "index")[
    ["RMSE_train", "RMSE_test", "RMAE_test", "R2_test"]
]
reg_metrics.sort_values(by="RMSE_test").style.background_gradient(
    cmap="viridis", low=1, high=0.3, subset=["RMSE_train", "RMSE_test"]
).background_gradient(cmap="plasma", low=0.3, high=1, subset=["RMAE_test", "R2_test"])

Out[106]:

	RMSE_train	RMSE_test	RMAE_test	R2_test
linear	0.000000	0.000000	0.000005	1.000000
linear_interact	0.000000	0.000000	0.000071	1.000000
linear_poly	0.000001	0.000069	0.001155	1.000000
ridge	676.783751	14251.537610	25.290145	0.998871
decision_tree	418.103239	410021.797952	82.474628	0.065532
random_forest	1051.981912	413832.680119	82.077377	0.048080
knn	4091.598286	417393.077890	82.148464	0.031630

Вывод реального и "спрогнозированного" результата для обучающей и тестовой выборок¶

Получение лучшей модели

In [107]:

best_model = str(reg_metrics.sort_values(by="RMSE_test").iloc[0].name)

display(best_model)

'linear'

Вывод для обучающей выборки

In [ ]:

pd.concat(
    [
        X_train,
        y_train,
        pd.Series(
            models[best_model]["train_preds"],
            index=y_train.index,
            name="PricePred",
        ),
    ],
    axis=1,
).head(5)

Out[ ]:

	Airbags	distance_norm	Color_Black	Color_Blue	Color_Brown	Color_Carnelian red	Color_Golden	Color_Green	Color_Grey	Color_Orange	...	Category_Limousine	Category_Microbus	Category_Minivan	Category_Pickup	Category_Sedan	Category_Universal	Price	Prod_year	Price	DensityPred
ID
45664431	-1.523737	0.012512	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	1019	2013	1019	1019.0
45186186	1.254005	-0.768336	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	1.0	0.0	14113	2010	14113	14113.0
45757605	-0.134866	0.093264	0.0	1.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	1.0	0.0	15994	2010	15994	15994.0
45761763	-1.060780	-0.935158	1.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	24891	2011	24891	24891.0
45762505	1.254005	0.496836	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	1.0	0.0	706	2015	706	706.0

5 rows × 99 columns

Вывод для тестовой выборки