Add regression example for lec4

This commit is contained in:
Aleksey Filippov 2024-10-30 12:46:07 +04:00
parent 35b3491a77
commit 31f735cd41
3 changed files with 997 additions and 0 deletions

View File

@ -0,0 +1,18 @@
T;Al2O3;TiO2;Density
30;0;0;1,05696
55;0;0;1,04158
25;0,05;0;1,08438
30;0,05;0;1,08112
35;0,05;0;1,07781
40;0,05;0;1,07446
60;0,05;0;1,06053
35;0,3;0;1,17459
65;0,3;0;1,14812
45;0;0,05;1,07424
50;0;0,05;1,07075
55;0;0,05;1,06721
20;0;0,3;1,22417
30;0;0,3;1,2131
40;0;0,3;1,20265
60;0;0,3;1,18265
70;0;0,3;1,17261
1 T Al2O3 TiO2 Density
2 30 0 0 1,05696
3 55 0 0 1,04158
4 25 0,05 0 1,08438
5 30 0,05 0 1,08112
6 35 0,05 0 1,07781
7 40 0,05 0 1,07446
8 60 0,05 0 1,06053
9 35 0,3 0 1,17459
10 65 0,3 0 1,14812
11 45 0 0,05 1,07424
12 50 0 0,05 1,07075
13 55 0 0,05 1,06721
14 20 0 0,3 1,22417
15 30 0 0,3 1,2131
16 40 0 0,3 1,20265
17 60 0 0,3 1,18265
18 70 0 0,3 1,17261

View File

@ -0,0 +1,39 @@
T;Al2O3;TiO2;Density
20;0;0;1,0625
25;0;0;1,05979
35;0;0;1,05404
40;0;0;1,05103
45;0;0;1,04794
50;0;0;1,04477
60;0;0;1,03826
65;0;0;1,03484
70;0;0;1,03182
20;0,05;0;1,08755
45;0,05;0;1,07105
50;0,05;0;1,0676
55;0,05;0;1,06409
65;0,05;0;1,05691
70;0,05;0;1,05291
20;0,3;0;1,18861
25;0,3;0;1,18389
30;0,3;0;1,1792
40;0,3;0;1,17017
45;0,3;0;1,16572
50;0,3;0;1,16138
55;0,3;0;1,15668
60;0,3;0;1,15233
70;0,3;0;1,14414
20;0;0,05;1,09098
25;0;0,05;1,08775
30;0;0,05;1,08443
35;0;0,05;1,08108
40;0;0,05;1,07768
60;0;0,05;1,06362
65;0;0,05;1,05999
70;0;0,05;1,05601
25;0;0,3;1,2186
35;0;0,3;1,20776
45;0;0,3;1,19759
50;0;0,3;1,19268
55;0;0,3;1,18746
65;0;0,3;1,178
1 T Al2O3 TiO2 Density
2 20 0 0 1,0625
3 25 0 0 1,05979
4 35 0 0 1,05404
5 40 0 0 1,05103
6 45 0 0 1,04794
7 50 0 0 1,04477
8 60 0 0 1,03826
9 65 0 0 1,03484
10 70 0 0 1,03182
11 20 0,05 0 1,08755
12 45 0,05 0 1,07105
13 50 0,05 0 1,0676
14 55 0,05 0 1,06409
15 65 0,05 0 1,05691
16 70 0,05 0 1,05291
17 20 0,3 0 1,18861
18 25 0,3 0 1,18389
19 30 0,3 0 1,1792
20 40 0,3 0 1,17017
21 45 0,3 0 1,16572
22 50 0,3 0 1,16138
23 55 0,3 0 1,15668
24 60 0,3 0 1,15233
25 70 0,3 0 1,14414
26 20 0 0,05 1,09098
27 25 0 0,05 1,08775
28 30 0 0,05 1,08443
29 35 0 0,05 1,08108
30 40 0 0,05 1,07768
31 60 0 0,05 1,06362
32 65 0 0,05 1,05999
33 70 0 0,05 1,05601
34 25 0 0,3 1,2186
35 35 0 0,3 1,20776
36 45 0 0,3 1,19759
37 50 0 0,3 1,19268
38 55 0 0,3 1,18746
39 65 0 0,3 1,178

940
lec4_reg.ipynb Normal file
View File

@ -0,0 +1,940 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Загрузка данных"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" <th>Density</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.06250</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>25</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.05979</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>35</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.05404</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2 Density\n",
"0 20 0.0 0.0 1.06250\n",
"1 25 0.0 0.0 1.05979\n",
"2 35 0.0 0.0 1.05404"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" <th>Density</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>1.05696</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>55</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>1.04158</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>25</td>\n",
" <td>0.05</td>\n",
" <td>0.0</td>\n",
" <td>1.08438</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2 Density\n",
"0 30 0.00 0.0 1.05696\n",
"1 55 0.00 0.0 1.04158\n",
"2 25 0.05 0.0 1.08438"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"\n",
"density_train = pd.read_csv(\"data/density/density_train.csv\", sep=\";\", decimal=\",\")\n",
"density_test = pd.read_csv(\"data/density/density_test.csv\", sep=\";\", decimal=\",\")\n",
"\n",
"display(density_train.head(3))\n",
"display(density_test.head(3))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Формирование выборок"
]
},
{
"cell_type": "code",
"execution_count": 147,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>25</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>35</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2\n",
"0 20 0.0 0.0\n",
"1 25 0.0 0.0\n",
"2 35 0.0 0.0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 1.06250\n",
"1 1.05979\n",
"2 1.05404\n",
"Name: Density, dtype: float64"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>55</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>25</td>\n",
" <td>0.05</td>\n",
" <td>0.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2\n",
"0 30 0.00 0.0\n",
"1 55 0.00 0.0\n",
"2 25 0.05 0.0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 1.05696\n",
"1 1.04158\n",
"2 1.08438\n",
"Name: Density, dtype: float64"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"density_y_train = density_train[\"Density\"]\n",
"density_train = density_train.drop([\"Density\"], axis=1)\n",
"\n",
"display(density_train.head(3))\n",
"display(density_y_train.head(3))\n",
"\n",
"density_y_test = density_test[\"Density\"]\n",
"density_test = density_test.drop([\"Density\"], axis=1)\n",
"\n",
"display(density_test.head(3))\n",
"display(density_y_test.head(3))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Определение перечня алгоритмов решения задачи аппроксимации (регрессии)"
]
},
{
"cell_type": "code",
"execution_count": 148,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn import linear_model, tree, neighbors, ensemble, neural_network\n",
"\n",
"random_state = 9\n",
"\n",
"models = {\n",
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
" \"linear_poly\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(degree=2),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"linear_interact\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(interaction_only=True),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestRegressor(\n",
" max_depth=7, random_state=random_state, n_jobs=-1\n",
" )\n",
" },\n",
" \"mlp\": {\n",
" \"model\": neural_network.MLPRegressor(\n",
" activation=\"tanh\",\n",
" hidden_layer_sizes=(3,),\n",
" max_iter=500,\n",
" early_stopping=True,\n",
" random_state=random_state,\n",
" )\n",
" },\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Определение функции для стандартизации значений в столбце \"Температура\" для MLP"
]
},
{
"cell_type": "code",
"execution_count": 149,
"metadata": {},
"outputs": [],
"source": [
"from pandas import DataFrame\n",
"from sklearn import preprocessing\n",
"\n",
"stndart_scaler = preprocessing.StandardScaler()\n",
"\n",
"def std_temp(df: DataFrame) -> DataFrame:\n",
" df[\"T\"] = stndart_scaler.fit_transform(\n",
" df[\"T\"].to_numpy().reshape(-1, 1)\n",
" ).reshape(df[\"T\"].shape)\n",
" return df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Обучение и оценка моделей с помощью различных алгоритмов"
]
},
{
"cell_type": "code",
"execution_count": 150,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: linear\n",
"Model: linear_poly\n",
"Model: linear_interact\n",
"Model: ridge\n",
"Model: decision_tree\n",
"Model: knn\n",
"Model: random_forest\n",
"Model: mlp\n"
]
}
],
"source": [
"import math\n",
"from pandas import DataFrame\n",
"from sklearn import metrics\n",
"\n",
"for model_name in models.keys():\n",
" print(f\"Model: {model_name}\")\n",
" X_train: DataFrame = density_train.copy()\n",
" X_test: DataFrame = density_test.copy()\n",
"\n",
" if model_name == \"mlp\":\n",
" X_train = std_temp(X_train)\n",
" X_test = std_temp(X_test)\n",
"\n",
" fitted_model = models[model_name][\"model\"].fit(\n",
" X_train.values, density_y_train.values.ravel()\n",
" )\n",
" y_train_pred = fitted_model.predict(X_train.values)\n",
" y_test_pred = fitted_model.predict(X_test.values)\n",
" models[model_name][\"fitted\"] = fitted_model\n",
" models[model_name][\"train_preds\"] = y_train_pred\n",
" models[model_name][\"preds\"] = y_test_pred\n",
" models[model_name][\"RMSE_train\"] = math.sqrt(\n",
" metrics.mean_squared_error(density_y_train, y_train_pred)\n",
" )\n",
" models[model_name][\"RMSE_test\"] = math.sqrt(\n",
" metrics.mean_squared_error(density_y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"RMAE_test\"] = math.sqrt(\n",
" metrics.mean_absolute_error(density_y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"R2_test\"] = metrics.r2_score(density_y_test, y_test_pred)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вывод результатов оценки"
]
},
{
"cell_type": "code",
"execution_count": 151,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\">\n",
"#T_b1edf_row0_col0, #T_b1edf_row0_col1, #T_b1edf_row4_col0 {\n",
" background-color: #26818e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row0_col2, #T_b1edf_row7_col3 {\n",
" background-color: #4e02a2;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row0_col3, #T_b1edf_row1_col3, #T_b1edf_row2_col3, #T_b1edf_row7_col2 {\n",
" background-color: #da5a6a;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row1_col0, #T_b1edf_row1_col1 {\n",
" background-color: #26828e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row1_col2 {\n",
" background-color: #5b01a5;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row2_col0, #T_b1edf_row3_col0 {\n",
" background-color: #25838e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row2_col1 {\n",
" background-color: #25858e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row2_col2 {\n",
" background-color: #6700a8;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row3_col1 {\n",
" background-color: #23888e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row3_col2 {\n",
" background-color: #7401a8;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row3_col3, #T_b1edf_row4_col3 {\n",
" background-color: #d9586a;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row4_col1 {\n",
" background-color: #238a8d;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row4_col2 {\n",
" background-color: #7b02a8;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row5_col0 {\n",
" background-color: #20928c;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row5_col1 {\n",
" background-color: #1f988b;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row5_col2 {\n",
" background-color: #9613a1;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row5_col3 {\n",
" background-color: #d5546e;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row6_col0 {\n",
" background-color: #50c46a;\n",
" color: #000000;\n",
"}\n",
"#T_b1edf_row6_col1 {\n",
" background-color: #7ad151;\n",
" color: #000000;\n",
"}\n",
"#T_b1edf_row6_col2 {\n",
" background-color: #cd4a76;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row6_col3 {\n",
" background-color: #8104a7;\n",
" color: #f1f1f1;\n",
"}\n",
"#T_b1edf_row7_col0, #T_b1edf_row7_col1 {\n",
" background-color: #a8db34;\n",
" color: #000000;\n",
"}\n",
"</style>\n",
"<table id=\"T_b1edf\">\n",
" <thead>\n",
" <tr>\n",
" <th class=\"blank level0\" >&nbsp;</th>\n",
" <th id=\"T_b1edf_level0_col0\" class=\"col_heading level0 col0\" >RMSE_train</th>\n",
" <th id=\"T_b1edf_level0_col1\" class=\"col_heading level0 col1\" >RMSE_test</th>\n",
" <th id=\"T_b1edf_level0_col2\" class=\"col_heading level0 col2\" >RMAE_test</th>\n",
" <th id=\"T_b1edf_level0_col3\" class=\"col_heading level0 col3\" >R2_test</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th id=\"T_b1edf_level0_row0\" class=\"row_heading level0 row0\" >linear_poly</th>\n",
" <td id=\"T_b1edf_row0_col0\" class=\"data row0 col0\" >0.000319</td>\n",
" <td id=\"T_b1edf_row0_col1\" class=\"data row0 col1\" >0.000362</td>\n",
" <td id=\"T_b1edf_row0_col2\" class=\"data row0 col2\" >0.016643</td>\n",
" <td id=\"T_b1edf_row0_col3\" class=\"data row0 col3\" >0.999965</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_b1edf_level0_row1\" class=\"row_heading level0 row1\" >linear_interact</th>\n",
" <td id=\"T_b1edf_row1_col0\" class=\"data row1 col0\" >0.001131</td>\n",
" <td id=\"T_b1edf_row1_col1\" class=\"data row1 col1\" >0.001491</td>\n",
" <td id=\"T_b1edf_row1_col2\" class=\"data row1 col2\" >0.033198</td>\n",
" <td id=\"T_b1edf_row1_col3\" class=\"data row1 col3\" >0.999413</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_b1edf_level0_row2\" class=\"row_heading level0 row2\" >linear</th>\n",
" <td id=\"T_b1edf_row2_col0\" class=\"data row2 col0\" >0.002464</td>\n",
" <td id=\"T_b1edf_row2_col1\" class=\"data row2 col1\" >0.003261</td>\n",
" <td id=\"T_b1edf_row2_col2\" class=\"data row2 col2\" >0.049891</td>\n",
" <td id=\"T_b1edf_row2_col3\" class=\"data row2 col3\" >0.997191</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_b1edf_level0_row3\" class=\"row_heading level0 row3\" >random_forest</th>\n",
" <td id=\"T_b1edf_row3_col0\" class=\"data row3 col0\" >0.002716</td>\n",
" <td id=\"T_b1edf_row3_col1\" class=\"data row3 col1\" >0.005575</td>\n",
" <td id=\"T_b1edf_row3_col2\" class=\"data row3 col2\" >0.067298</td>\n",
" <td id=\"T_b1edf_row3_col3\" class=\"data row3 col3\" >0.991788</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_b1edf_level0_row4\" class=\"row_heading level0 row4\" >decision_tree</th>\n",
" <td id=\"T_b1edf_row4_col0\" class=\"data row4 col0\" >0.000346</td>\n",
" <td id=\"T_b1edf_row4_col1\" class=\"data row4 col1\" >0.006433</td>\n",
" <td id=\"T_b1edf_row4_col2\" class=\"data row4 col2\" >0.076138</td>\n",
" <td id=\"T_b1edf_row4_col3\" class=\"data row4 col3\" >0.989067</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_b1edf_level0_row5\" class=\"row_heading level0 row5\" >ridge</th>\n",
" <td id=\"T_b1edf_row5_col0\" class=\"data row5 col0\" >0.013989</td>\n",
" <td id=\"T_b1edf_row5_col1\" class=\"data row5 col1\" >0.015356</td>\n",
" <td id=\"T_b1edf_row5_col2\" class=\"data row5 col2\" >0.116380</td>\n",
" <td id=\"T_b1edf_row5_col3\" class=\"data row5 col3\" >0.937703</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_b1edf_level0_row6\" class=\"row_heading level0 row6\" >knn</th>\n",
" <td id=\"T_b1edf_row6_col0\" class=\"data row6 col0\" >0.053108</td>\n",
" <td id=\"T_b1edf_row6_col1\" class=\"data row6 col1\" >0.056776</td>\n",
" <td id=\"T_b1edf_row6_col2\" class=\"data row6 col2\" >0.217611</td>\n",
" <td id=\"T_b1edf_row6_col3\" class=\"data row6 col3\" >0.148414</td>\n",
" </tr>\n",
" <tr>\n",
" <th id=\"T_b1edf_level0_row7\" class=\"row_heading level0 row7\" >mlp</th>\n",
" <td id=\"T_b1edf_row7_col0\" class=\"data row7 col0\" >0.079478</td>\n",
" <td id=\"T_b1edf_row7_col1\" class=\"data row7 col1\" >0.067910</td>\n",
" <td id=\"T_b1edf_row7_col2\" class=\"data row7 col2\" >0.247692</td>\n",
" <td id=\"T_b1edf_row7_col3\" class=\"data row7 col3\" >-0.218339</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n"
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x26429205520>"
]
},
"execution_count": 151,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n",
" [\"RMSE_train\", \"RMSE_test\", \"RMAE_test\", \"R2_test\"]\n",
"]\n",
"reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n",
" cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n",
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"RMAE_test\", \"R2_test\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вывод реального и \"спрогнозированного\" результата для обучающей и тестовой выборок"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Получение лучшей модели"
]
},
{
"cell_type": "code",
"execution_count": 152,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'linear_poly'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"best_model = str(reg_metrics.sort_values(by=\"RMSE_test\").iloc[0].name)\n",
"\n",
"display(best_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод для обучающей выборки"
]
},
{
"cell_type": "code",
"execution_count": 153,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" <th>Density</th>\n",
" <th>DensityPred</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>20</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.06250</td>\n",
" <td>1.063174</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>25</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.05979</td>\n",
" <td>1.060117</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>35</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.05404</td>\n",
" <td>1.053941</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>40</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.05103</td>\n",
" <td>1.050822</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>45</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>1.04794</td>\n",
" <td>1.047683</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2 Density DensityPred\n",
"0 20 0.0 0.0 1.06250 1.063174\n",
"1 25 0.0 0.0 1.05979 1.060117\n",
"2 35 0.0 0.0 1.05404 1.053941\n",
"3 40 0.0 0.0 1.05103 1.050822\n",
"4 45 0.0 0.0 1.04794 1.047683"
]
},
"execution_count": 153,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [\n",
" density_train,\n",
" density_y_train,\n",
" pd.Series(\n",
" models[best_model][\"train_preds\"],\n",
" index=density_y_train.index,\n",
" name=\"DensityPred\",\n",
" ),\n",
" ],\n",
" axis=1,\n",
").head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод для тестовой выборки"
]
},
{
"cell_type": "code",
"execution_count": 154,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>T</th>\n",
" <th>Al2O3</th>\n",
" <th>TiO2</th>\n",
" <th>Density</th>\n",
" <th>DensityPred</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>30</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>1.05696</td>\n",
" <td>1.057040</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>55</td>\n",
" <td>0.00</td>\n",
" <td>0.0</td>\n",
" <td>1.04158</td>\n",
" <td>1.041341</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>25</td>\n",
" <td>0.05</td>\n",
" <td>0.0</td>\n",
" <td>1.08438</td>\n",
" <td>1.084063</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>30</td>\n",
" <td>0.05</td>\n",
" <td>0.0</td>\n",
" <td>1.08112</td>\n",
" <td>1.080764</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>35</td>\n",
" <td>0.05</td>\n",
" <td>0.0</td>\n",
" <td>1.07781</td>\n",
" <td>1.077444</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" T Al2O3 TiO2 Density DensityPred\n",
"0 30 0.00 0.0 1.05696 1.057040\n",
"1 55 0.00 0.0 1.04158 1.041341\n",
"2 25 0.05 0.0 1.08438 1.084063\n",
"3 30 0.05 0.0 1.08112 1.080764\n",
"4 35 0.05 0.0 1.07781 1.077444"
]
},
"execution_count": 154,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [\n",
" density_test,\n",
" density_y_test,\n",
" pd.Series(\n",
" models[best_model][\"preds\"],\n",
" index=density_y_test.index,\n",
" name=\"DensityPred\",\n",
" ),\n",
" ],\n",
" axis=1,\n",
").head(5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}