{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Загрузка данных"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Density | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 20 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.06250 | \n",
"
\n",
" \n",
" 1 | \n",
" 25 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.05979 | \n",
"
\n",
" \n",
" 2 | \n",
" 35 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.05404 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2 Density\n",
"0 20 0.0 0.0 1.06250\n",
"1 25 0.0 0.0 1.05979\n",
"2 35 0.0 0.0 1.05404"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Density | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 30 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.05696 | \n",
"
\n",
" \n",
" 1 | \n",
" 55 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.04158 | \n",
"
\n",
" \n",
" 2 | \n",
" 25 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 1.08438 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2 Density\n",
"0 30 0.00 0.0 1.05696\n",
"1 55 0.00 0.0 1.04158\n",
"2 25 0.05 0.0 1.08438"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"\n",
"density_train = pd.read_csv(\"data/density/density_train.csv\", sep=\";\", decimal=\",\")\n",
"density_test = pd.read_csv(\"data/density/density_test.csv\", sep=\";\", decimal=\",\")\n",
"\n",
"display(density_train.head(3))\n",
"display(density_test.head(3))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Формирование выборок"
]
},
{
"cell_type": "code",
"execution_count": 147,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 20 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 25 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 35 | \n",
" 0.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2\n",
"0 20 0.0 0.0\n",
"1 25 0.0 0.0\n",
"2 35 0.0 0.0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 1.06250\n",
"1 1.05979\n",
"2 1.05404\n",
"Name: Density, dtype: float64"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 30 | \n",
" 0.00 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 55 | \n",
" 0.00 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 25 | \n",
" 0.05 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2\n",
"0 30 0.00 0.0\n",
"1 55 0.00 0.0\n",
"2 25 0.05 0.0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 1.05696\n",
"1 1.04158\n",
"2 1.08438\n",
"Name: Density, dtype: float64"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"density_y_train = density_train[\"Density\"]\n",
"density_train = density_train.drop([\"Density\"], axis=1)\n",
"\n",
"display(density_train.head(3))\n",
"display(density_y_train.head(3))\n",
"\n",
"density_y_test = density_test[\"Density\"]\n",
"density_test = density_test.drop([\"Density\"], axis=1)\n",
"\n",
"display(density_test.head(3))\n",
"display(density_y_test.head(3))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Определение перечня алгоритмов решения задачи аппроксимации (регрессии)"
]
},
{
"cell_type": "code",
"execution_count": 148,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn import linear_model, tree, neighbors, ensemble, neural_network\n",
"\n",
"random_state = 9\n",
"\n",
"models = {\n",
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
" \"linear_poly\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(degree=2),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"linear_interact\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(interaction_only=True),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestRegressor(\n",
" max_depth=7, random_state=random_state, n_jobs=-1\n",
" )\n",
" },\n",
" \"mlp\": {\n",
" \"model\": neural_network.MLPRegressor(\n",
" activation=\"tanh\",\n",
" hidden_layer_sizes=(3,),\n",
" max_iter=500,\n",
" early_stopping=True,\n",
" random_state=random_state,\n",
" )\n",
" },\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Определение функции для стандартизации значений в столбце \"Температура\" для MLP"
]
},
{
"cell_type": "code",
"execution_count": 149,
"metadata": {},
"outputs": [],
"source": [
"from pandas import DataFrame\n",
"from sklearn import preprocessing\n",
"\n",
"stndart_scaler = preprocessing.StandardScaler()\n",
"\n",
"def std_temp(df: DataFrame) -> DataFrame:\n",
" df[\"T\"] = stndart_scaler.fit_transform(\n",
" df[\"T\"].to_numpy().reshape(-1, 1)\n",
" ).reshape(df[\"T\"].shape)\n",
" return df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Обучение и оценка моделей с помощью различных алгоритмов"
]
},
{
"cell_type": "code",
"execution_count": 150,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: linear\n",
"Model: linear_poly\n",
"Model: linear_interact\n",
"Model: ridge\n",
"Model: decision_tree\n",
"Model: knn\n",
"Model: random_forest\n",
"Model: mlp\n"
]
}
],
"source": [
"import math\n",
"from pandas import DataFrame\n",
"from sklearn import metrics\n",
"\n",
"for model_name in models.keys():\n",
" print(f\"Model: {model_name}\")\n",
" X_train: DataFrame = density_train.copy()\n",
" X_test: DataFrame = density_test.copy()\n",
"\n",
" if model_name == \"mlp\":\n",
" X_train = std_temp(X_train)\n",
" X_test = std_temp(X_test)\n",
"\n",
" fitted_model = models[model_name][\"model\"].fit(\n",
" X_train.values, density_y_train.values.ravel()\n",
" )\n",
" y_train_pred = fitted_model.predict(X_train.values)\n",
" y_test_pred = fitted_model.predict(X_test.values)\n",
" models[model_name][\"fitted\"] = fitted_model\n",
" models[model_name][\"train_preds\"] = y_train_pred\n",
" models[model_name][\"preds\"] = y_test_pred\n",
" models[model_name][\"RMSE_train\"] = math.sqrt(\n",
" metrics.mean_squared_error(density_y_train, y_train_pred)\n",
" )\n",
" models[model_name][\"RMSE_test\"] = math.sqrt(\n",
" metrics.mean_squared_error(density_y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"RMAE_test\"] = math.sqrt(\n",
" metrics.mean_absolute_error(density_y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"R2_test\"] = metrics.r2_score(density_y_test, y_test_pred)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вывод результатов оценки"
]
},
{
"cell_type": "code",
"execution_count": 151,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" RMSE_train | \n",
" RMSE_test | \n",
" RMAE_test | \n",
" R2_test | \n",
"
\n",
" \n",
" \n",
" \n",
" linear_poly | \n",
" 0.000319 | \n",
" 0.000362 | \n",
" 0.016643 | \n",
" 0.999965 | \n",
"
\n",
" \n",
" linear_interact | \n",
" 0.001131 | \n",
" 0.001491 | \n",
" 0.033198 | \n",
" 0.999413 | \n",
"
\n",
" \n",
" linear | \n",
" 0.002464 | \n",
" 0.003261 | \n",
" 0.049891 | \n",
" 0.997191 | \n",
"
\n",
" \n",
" random_forest | \n",
" 0.002716 | \n",
" 0.005575 | \n",
" 0.067298 | \n",
" 0.991788 | \n",
"
\n",
" \n",
" decision_tree | \n",
" 0.000346 | \n",
" 0.006433 | \n",
" 0.076138 | \n",
" 0.989067 | \n",
"
\n",
" \n",
" ridge | \n",
" 0.013989 | \n",
" 0.015356 | \n",
" 0.116380 | \n",
" 0.937703 | \n",
"
\n",
" \n",
" knn | \n",
" 0.053108 | \n",
" 0.056776 | \n",
" 0.217611 | \n",
" 0.148414 | \n",
"
\n",
" \n",
" mlp | \n",
" 0.079478 | \n",
" 0.067910 | \n",
" 0.247692 | \n",
" -0.218339 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 151,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n",
" [\"RMSE_train\", \"RMSE_test\", \"RMAE_test\", \"R2_test\"]\n",
"]\n",
"reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n",
" cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n",
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"RMAE_test\", \"R2_test\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Вывод реального и \"спрогнозированного\" результата для обучающей и тестовой выборок"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Получение лучшей модели"
]
},
{
"cell_type": "code",
"execution_count": 152,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'linear_poly'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"best_model = str(reg_metrics.sort_values(by=\"RMSE_test\").iloc[0].name)\n",
"\n",
"display(best_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод для обучающей выборки"
]
},
{
"cell_type": "code",
"execution_count": 153,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Density | \n",
" DensityPred | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 20 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.06250 | \n",
" 1.063174 | \n",
"
\n",
" \n",
" 1 | \n",
" 25 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.05979 | \n",
" 1.060117 | \n",
"
\n",
" \n",
" 2 | \n",
" 35 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.05404 | \n",
" 1.053941 | \n",
"
\n",
" \n",
" 3 | \n",
" 40 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.05103 | \n",
" 1.050822 | \n",
"
\n",
" \n",
" 4 | \n",
" 45 | \n",
" 0.0 | \n",
" 0.0 | \n",
" 1.04794 | \n",
" 1.047683 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2 Density DensityPred\n",
"0 20 0.0 0.0 1.06250 1.063174\n",
"1 25 0.0 0.0 1.05979 1.060117\n",
"2 35 0.0 0.0 1.05404 1.053941\n",
"3 40 0.0 0.0 1.05103 1.050822\n",
"4 45 0.0 0.0 1.04794 1.047683"
]
},
"execution_count": 153,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [\n",
" density_train,\n",
" density_y_train,\n",
" pd.Series(\n",
" models[best_model][\"train_preds\"],\n",
" index=density_y_train.index,\n",
" name=\"DensityPred\",\n",
" ),\n",
" ],\n",
" axis=1,\n",
").head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод для тестовой выборки"
]
},
{
"cell_type": "code",
"execution_count": 154,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" T | \n",
" Al2O3 | \n",
" TiO2 | \n",
" Density | \n",
" DensityPred | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 30 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.05696 | \n",
" 1.057040 | \n",
"
\n",
" \n",
" 1 | \n",
" 55 | \n",
" 0.00 | \n",
" 0.0 | \n",
" 1.04158 | \n",
" 1.041341 | \n",
"
\n",
" \n",
" 2 | \n",
" 25 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 1.08438 | \n",
" 1.084063 | \n",
"
\n",
" \n",
" 3 | \n",
" 30 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 1.08112 | \n",
" 1.080764 | \n",
"
\n",
" \n",
" 4 | \n",
" 35 | \n",
" 0.05 | \n",
" 0.0 | \n",
" 1.07781 | \n",
" 1.077444 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" T Al2O3 TiO2 Density DensityPred\n",
"0 30 0.00 0.0 1.05696 1.057040\n",
"1 55 0.00 0.0 1.04158 1.041341\n",
"2 25 0.05 0.0 1.08438 1.084063\n",
"3 30 0.05 0.0 1.08112 1.080764\n",
"4 35 0.05 0.0 1.07781 1.077444"
]
},
"execution_count": 154,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [\n",
" density_test,\n",
" density_y_test,\n",
" pd.Series(\n",
" models[best_model][\"preds\"],\n",
" index=density_y_test.index,\n",
" name=\"DensityPred\",\n",
" ),\n",
" ],\n",
" axis=1,\n",
").head(5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.7"
}
},
"nbformat": 4,
"nbformat_minor": 2
}