1187 lines
165 KiB
Plaintext
1187 lines
165 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Stores"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['Store ID ', 'Store_Area', 'Items_Available', 'Daily_Customer_Count',\n",
|
|||
|
" 'Store_Sales'],\n",
|
|||
|
" dtype='object')\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Stores.csv\")\n",
|
|||
|
"print(df.columns)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Бизнес-цели**\n",
|
|||
|
"1. Прогнозирование посетителей в магазине:\n",
|
|||
|
"\n",
|
|||
|
"Цель: Разработать модель, которая будет предсказывать посещение клиентами магазина на основе его характеристик (размер, распродажи, количество ассортимента).\n",
|
|||
|
"\n",
|
|||
|
"Применение: Предсказывание посещения магазинов клиентами.\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация параметров магазина:\n",
|
|||
|
"\n",
|
|||
|
"Цель: Определить оптимальные коэффициенты для различных факторов, влияющих на посещаемость магазина чтобы максимизировать прибыль компании при наименьших затратах на пространство магазина и его ассортимент.\n",
|
|||
|
"\n",
|
|||
|
"Применение: Создавать магазин с максимальной посещаемостью."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Прогнозирование посетителей в магазине**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Среднее значение поля 'Daily_Customer_Count': 786.3504464285714\n",
|
|||
|
" Store ID Store_Area Items_Available Daily_Customer_Count Store_Sales \\\n",
|
|||
|
"0 1 1659 1961 530 66490 \n",
|
|||
|
"1 2 1461 1752 210 39820 \n",
|
|||
|
"2 3 1340 1609 720 54010 \n",
|
|||
|
"3 4 1451 1748 620 53730 \n",
|
|||
|
"4 5 1770 2111 450 46620 \n",
|
|||
|
"\n",
|
|||
|
" above_average_count customers_volatility \n",
|
|||
|
"0 0 1550 \n",
|
|||
|
"1 0 1550 \n",
|
|||
|
"2 0 1550 \n",
|
|||
|
"3 0 1550 \n",
|
|||
|
"4 0 1550 \n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"# Устанавливаем случайное состояние\n",
|
|||
|
"random_state = 42\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем среднее значение посещаемости\n",
|
|||
|
"average_count = df['Daily_Customer_Count'].mean()\n",
|
|||
|
"print(f\"Среднее значение поля 'Daily_Customer_Count': {average_count}\")\n",
|
|||
|
"\n",
|
|||
|
"# Создаем новую переменную, указывающую, превышает ли посещаемость среднюю\n",
|
|||
|
"df[\"above_average_count\"] = (df[\"Daily_Customer_Count\"] > average_count).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем волатильность (разницу между максимальной и минимальной посещаемостью)\n",
|
|||
|
"df[\"customers_volatility\"] = df[\"Daily_Customer_Count\"].max() - df[\"Daily_Customer_Count\"].min()\n",
|
|||
|
"\n",
|
|||
|
"print(df.head())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Оптимизация параметров магазина**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Средняя посещаемость для 'Store_Area':\n",
|
|||
|
"Store_Area\n",
|
|||
|
"775 1090.0\n",
|
|||
|
"780 790.0\n",
|
|||
|
"854 660.0\n",
|
|||
|
"869 850.0\n",
|
|||
|
"891 630.0\n",
|
|||
|
" ... \n",
|
|||
|
"2063 810.0\n",
|
|||
|
"2067 790.0\n",
|
|||
|
"2169 600.0\n",
|
|||
|
"2214 740.0\n",
|
|||
|
"2229 660.0\n",
|
|||
|
"Name: Daily_Customer_Count, Length: 583, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя посещаемость для 'Items_Available':\n",
|
|||
|
"Items_Available\n",
|
|||
|
"932 1090.0\n",
|
|||
|
"951 790.0\n",
|
|||
|
"1018 660.0\n",
|
|||
|
"1050 850.0\n",
|
|||
|
"1059 870.0\n",
|
|||
|
" ... \n",
|
|||
|
"2492 790.0\n",
|
|||
|
"2493 810.0\n",
|
|||
|
"2617 600.0\n",
|
|||
|
"2647 740.0\n",
|
|||
|
"2667 660.0\n",
|
|||
|
"Name: Daily_Customer_Count, Length: 616, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя посещаемость для 'Store_Sales':\n",
|
|||
|
"Store_Sales\n",
|
|||
|
"14920 990.0\n",
|
|||
|
"16370 880.0\n",
|
|||
|
"17670 660.0\n",
|
|||
|
"20270 870.0\n",
|
|||
|
"21300 850.0\n",
|
|||
|
" ... \n",
|
|||
|
"101820 820.0\n",
|
|||
|
"102310 1310.0\n",
|
|||
|
"102920 680.0\n",
|
|||
|
"105150 980.0\n",
|
|||
|
"116320 860.0\n",
|
|||
|
"Name: Daily_Customer_Count, Length: 816, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя посещаемость для 'Store_Area' и 'Items_Available':\n",
|
|||
|
"Store_Area Items_Available\n",
|
|||
|
"775 932 1090.0\n",
|
|||
|
"780 951 790.0\n",
|
|||
|
"854 1018 660.0\n",
|
|||
|
"869 1050 850.0\n",
|
|||
|
"891 1073 630.0\n",
|
|||
|
" ... \n",
|
|||
|
"2063 2493 810.0\n",
|
|||
|
"2067 2492 790.0\n",
|
|||
|
"2169 2617 600.0\n",
|
|||
|
"2214 2647 740.0\n",
|
|||
|
"2229 2667 660.0\n",
|
|||
|
"Name: Daily_Customer_Count, Length: 892, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя посещаемость для 'Store_Sales' и 'Items_Available':\n",
|
|||
|
"Store_Sales Items_Available\n",
|
|||
|
"14920 1508 990.0\n",
|
|||
|
"16370 1790 880.0\n",
|
|||
|
"17670 1877 660.0\n",
|
|||
|
"20270 1946 870.0\n",
|
|||
|
"21300 1686 850.0\n",
|
|||
|
" ... \n",
|
|||
|
"101820 1758 820.0\n",
|
|||
|
"102310 1587 1310.0\n",
|
|||
|
"102920 1638 680.0\n",
|
|||
|
"105150 2104 980.0\n",
|
|||
|
"116320 2414 860.0\n",
|
|||
|
"Name: Daily_Customer_Count, Length: 896, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя посещаемость для 'Store_Sales' и 'Store_Area':\n",
|
|||
|
"Store_Sales Store_Area\n",
|
|||
|
"14920 1250 990.0\n",
|
|||
|
"16370 1477 880.0\n",
|
|||
|
"17670 1537 660.0\n",
|
|||
|
"20270 1624 870.0\n",
|
|||
|
"21300 1397 850.0\n",
|
|||
|
" ... \n",
|
|||
|
"101820 1486 820.0\n",
|
|||
|
"102310 1303 1310.0\n",
|
|||
|
"102920 1365 680.0\n",
|
|||
|
"105150 1775 980.0\n",
|
|||
|
"116320 1989 860.0\n",
|
|||
|
"Name: Daily_Customer_Count, Length: 896, dtype: float64\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"# Устанавливаем случайное состояние\n",
|
|||
|
"random_state = 42\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем среднюю посещаемость для каждого значения каждого признака\n",
|
|||
|
"for column in [\n",
|
|||
|
" \"Store_Area\",\n",
|
|||
|
" \"Items_Available\",\n",
|
|||
|
" \"Store_Sales\"\n",
|
|||
|
"]:\n",
|
|||
|
" print(f\"Средняя посещаемость для '{column}':\")\n",
|
|||
|
" print(df.groupby(column)[\"Daily_Customer_Count\"].mean())\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"print(\"Средняя посещаемость для 'Store_Area' и 'Items_Available':\")\n",
|
|||
|
"print(df.groupby([\"Store_Area\", \"Items_Available\"])[\"Daily_Customer_Count\"].mean())\n",
|
|||
|
"print()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"print(\"Средняя посещаемость для 'Store_Sales' и 'Items_Available':\")\n",
|
|||
|
"print(df.groupby([\"Store_Sales\", \"Items_Available\"])[\"Daily_Customer_Count\"].mean())\n",
|
|||
|
"print()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"print(\"Средняя посещаемость для 'Store_Sales' и 'Store_Area':\")\n",
|
|||
|
"print(df.groupby([\"Store_Sales\", \"Store_Area\"])[\"Daily_Customer_Count\"].mean())\n",
|
|||
|
"print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"\n",
|
|||
|
"R² (коэффициент детерминации)\n",
|
|||
|
"\n",
|
|||
|
"MAE (средняя абсолютная ошибка)\n",
|
|||
|
"\n",
|
|||
|
"RMSE (среднеквадратичная ошибка)\n",
|
|||
|
"\n",
|
|||
|
"1. Прогнозирование посещаемости(Выбранные модели):\n",
|
|||
|
"\n",
|
|||
|
"Линейная регрессия\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (регрессия)\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (регрессия)\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация тарифной сетки(Выбранные модели):\n",
|
|||
|
"\n",
|
|||
|
"Логистическая регрессия\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (классификация)\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (классификация)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 23,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE: 128.2349845770729\n",
|
|||
|
"MSE: 161.32830081184198\n",
|
|||
|
"RMSE: 161.32830081184198\n",
|
|||
|
"R²: 0.6834552578435291\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"MAE: 138.44166666666663\n",
|
|||
|
"MSE: 174.93720571806458\n",
|
|||
|
"RMSE: 174.93720571806458\n",
|
|||
|
"R²: 0.6277982973898757\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"MAE: 134.01783568908053\n",
|
|||
|
"MSE: 172.79331833544663\n",
|
|||
|
"RMSE: 172.79331833544663\n",
|
|||
|
"R²: 0.6368651961896755\n",
|
|||
|
"\n",
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Accuracy: 1.0\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Accuracy: 1.0\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Accuracy: 1.0\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, root_mean_squared_error, r2_score, accuracy_score\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df.drop(\"Daily_Customer_Count\", axis=1)\n",
|
|||
|
"y_reg = df[\"Daily_Customer_Count\"]\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Стандартизируем признаки для задачи регрессии\n",
|
|||
|
"scaler_reg = StandardScaler()\n",
|
|||
|
"X_train_reg = scaler_reg.fit_transform(X_train_reg)\n",
|
|||
|
"X_test_reg = scaler_reg.transform(X_test_reg)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" model.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" y_pred_reg = model.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = root_mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = root_mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df.drop(\"Daily_Customer_Count\", axis=1)\n",
|
|||
|
"y_class = (df[\"Daily_Customer_Count\"] > df[\"Daily_Customer_Count\"].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Стандартизируем признаки для задачи классификации\n",
|
|||
|
"scaler_class = StandardScaler()\n",
|
|||
|
"X_train_class = scaler_class.fit_transform(X_train_class)\n",
|
|||
|
"X_test_class = scaler_class.transform(X_test_class)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" model.fit(X_train_class, y_train_class)\n",
|
|||
|
" y_pred_class = model.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Прогнозирование посещаемости (Конвейер для задачи регрессии):**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 24,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE: 240.99246411452697\n",
|
|||
|
"MSE: 287.6996858707222\n",
|
|||
|
"RMSE: 287.6996858707222\n",
|
|||
|
"R²: -0.0066830595689202354\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"MAE: 247.44388888888895\n",
|
|||
|
"MSE: 305.8892672738508\n",
|
|||
|
"RMSE: 305.8892672738508\n",
|
|||
|
"R²: -0.13800052740628277\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"MAE: 251.88728528608513\n",
|
|||
|
"MSE: 303.34928472605816\n",
|
|||
|
"RMSE: 303.34928472605816\n",
|
|||
|
"R²: -0.1191799867669745\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, root_mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Store_Area\", \"Items_Available\", \"Store_Sales\"]\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df[\"Daily_Customer_Count\"]\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" pipeline.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" y_pred_reg = pipeline.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = root_mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = root_mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Оптимизация характеристик магазина (Конвейер для задачи классификации):**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Accuracy: 0.46111111111111114\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Accuracy: 0.5055555555555555\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Accuracy: 0.4777777777777778\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import accuracy_score\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Store_Area\", \"Items_Available\", \"Store_Sales\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df[\"Daily_Customer_Count\"] > df[\"Daily_Customer_Count\"].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" pipeline.fit(X_train_class, y_train_class)\n",
|
|||
|
" y_pred_class = pipeline.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Прогнозирование посещения (Настройка гиперпараметров для задачи регрессии):**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"Best Parameters: {}\n",
|
|||
|
"MAE: 240.99246411452697\n",
|
|||
|
"MSE: 287.6996858707222\n",
|
|||
|
"RMSE: 287.6996858707222\n",
|
|||
|
"R²: -0.0066830595689202354\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"Best Parameters: {'model__max_depth': 10, 'model__n_estimators': 100}\n",
|
|||
|
"MAE: 245.73014669031528\n",
|
|||
|
"MSE: 296.12210890714334\n",
|
|||
|
"RMSE: 296.12210890714334\n",
|
|||
|
"R²: -0.06648721200392305\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.01, 'model__max_depth': 3, 'model__n_estimators': 100}\n",
|
|||
|
"MAE: 241.00773277392423\n",
|
|||
|
"MSE: 287.08212054003224\n",
|
|||
|
"RMSE: 287.08212054003224\n",
|
|||
|
"R²: -0.002365882066503122\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, root_mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Store_Area\", \"Items_Available\", \"Store_Sales\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": (LinearRegression(), {}),\n",
|
|||
|
" \"Random Forest Regression\": (RandomForestRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Regression\": (GradientBoostingRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Daily_Customer_Count']\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, (model, params) in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" grid_search.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_reg = best_model.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = root_mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = root_mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Оптимизация характеристик (Настройка гиперпараметров для задачи классификации):**\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Best Parameters: {'model__C': 10, 'model__solver': 'lbfgs'}\n",
|
|||
|
"Accuracy: 0.46111111111111114\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Best Parameters: {'model__max_depth': 10, 'model__n_estimators': 100}\n",
|
|||
|
"Accuracy: 0.49444444444444446\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 3, 'model__n_estimators': 100}\n",
|
|||
|
"Accuracy: 0.4722222222222222\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import accuracy_score\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"numerical_cols = [\"Store_Area\", \"Items_Available\", \"Store_Sales\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": (LogisticRegression(), {\n",
|
|||
|
" 'model__C': [0.1, 1, 10],\n",
|
|||
|
" 'model__solver': ['liblinear', 'lbfgs']\n",
|
|||
|
" }),\n",
|
|||
|
" \"Random Forest Classification\": (RandomForestClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Classification\": (GradientBoostingClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Daily_Customer_Count'] > df['Daily_Customer_Count'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, (model, params) in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='accuracy')\n",
|
|||
|
" grid_search.fit(X_train_class, y_train_class)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_class = best_model.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Прогнозирование посещаемости (Регрессия):**\n",
|
|||
|
"\n",
|
|||
|
"* MAE: Хорошо подходит для задач, где важно понимать среднее отклонение предсказаний от фактических значений. Эта метрика легко интерпретируется, так как она измеряется в тех же единицах, что и целевая переменная\n",
|
|||
|
"\n",
|
|||
|
"* MSE и RMSE: Полезны для задач, где важно минимизировать влияние выбросов, так как они возводят ошибки в квадрат.\n",
|
|||
|
"\n",
|
|||
|
"* R²: Позволяет оценить, насколько хорошо модель объясняет вариацию целевой переменной. Значение R² близкое к 1 указывает на хорошее качество модели.\n",
|
|||
|
"\n",
|
|||
|
"**Оптимизация характеристик (Классификация):**\n",
|
|||
|
"\n",
|
|||
|
"* Accuracy: Хорошо подходит для задач, где классы сбалансированы. Эта метрика показывает общую точность модели.\n",
|
|||
|
"\n",
|
|||
|
"* Precision и Recall: Важны для задач, где важно минимизировать ошибки определенного типа (ложноположительные или ложноотрицательные). \n",
|
|||
|
"\n",
|
|||
|
"* F1-score: Позволяет оценить баланс между precision и recall."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"Best Parameters: {}\n",
|
|||
|
"MAE: 240.99246411452697\n",
|
|||
|
"MSE: 287.6996858707222\n",
|
|||
|
"RMSE: 287.6996858707222\n",
|
|||
|
"R²: -0.0066830595689202354\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"Best Parameters: {'model__max_depth': 10, 'model__n_estimators': 200}\n",
|
|||
|
"MAE: 242.93767895499704\n",
|
|||
|
"MSE: 295.64502542919416\n",
|
|||
|
"RMSE: 295.64502542919416\n",
|
|||
|
"R²: -0.06305353687186832\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.01, 'model__max_depth': 3, 'model__n_estimators': 100}\n",
|
|||
|
"MAE: 240.98814751014328\n",
|
|||
|
"MSE: 287.0735167856578\n",
|
|||
|
"RMSE: 287.0735167856578\n",
|
|||
|
"R²: -0.0023058018316517437\n",
|
|||
|
"\n",
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Best Parameters: {'model__C': 10, 'model__solver': 'lbfgs'}\n",
|
|||
|
"Accuracy: 0.46111111111111114\n",
|
|||
|
"Precision: 0.475\n",
|
|||
|
"Recall: 0.2\n",
|
|||
|
"F1-score: 0.2814814814814815\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgwAAAHHCAYAAADTQQDlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABPgUlEQVR4nO3deVxU1fsH8M8MwkDAgKiAKCCCCyRumIaouCC4pqn5dQfFzMINc63MXcwyzXLXcElzQTOX3BVNRXPPBUlwTQG3AEFZ5/z+MObXCDiMc4EBP29f96Vz7plznzuMzDNnuVcmhBAgIiIiegV5SQdAREREho8JAxEREWnFhIGIiIi0YsJAREREWjFhICIiIq2YMBAREZFWTBiIiIhIKyYMREREpBUTBiIiItKKCcMb6vr16/D394eVlRVkMhm2bdsmafu3bt2CTCbDqlWrJG23NGvZsiVatmwpWXupqakYPHgw7O3tIZPJMGrUKMnaNhSRkZGQyWSIjIyUpL1Vq1ZBJpPh1q1bkrRHwJQpUyCTyUo6DCoGTBhKUFxcHD766CNUr14dpqamUCqV8PHxwXfffYfnz58X6bEDAwNx6dIlzJw5E2vXrkWjRo2K9HjFKSgoCDKZDEqlMt/X8fr165DJZJDJZPjmm290bv/+/fuYMmUKLly4IEG0r2/WrFlYtWoVPv74Y6xduxb9+/cv0uNVq1YNnTp1KtJjSGXWrFmSJ8Evy00+crdy5cqhSpUqCAoKwr1794r02EQlQlCJ2LlzpzAzMxPW1tZixIgRYtmyZeKHH34QvXr1EsbGxuLDDz8ssmM/e/ZMABCff/55kR1DpVKJ58+fi+zs7CI7RkECAwNFuXLlhJGRkdi4cWOe/ZMnTxampqYCgPj66691bv/06dMCgAgPD9fpeRkZGSIjI0Pn4xWkSZMmwsfHR7L2tHF2dhYdO3YstuMJIUROTo54/vy5yMnJ0el55ubmIjAwME95dna2eP78uVCpVHrHFh4eLgCIadOmibVr14rly5eL4OBgYWRkJFxdXcXz58/1PkZpkJWV9cac65uuXMmmK2+mmzdvolevXnB2dsahQ4dQuXJl9b6QkBDExsZi165dRXb8hw8fAgCsra2L7BgymQympqZF1r42CoUCPj4++Pnnn9GzZ0+NfevXr0fHjh2xZcuWYonl2bNneOutt2BiYiJpuw8ePICHh4dk7WVnZ0OlUkkepz7kcrmk7yMjIyMYGRlJ1h4AtG/fXt1DN3jwYFSsWBFfffUVtm/fnue9V5SEEEhPT4eZmVmxHRMAypUrh3Ll+FHyJuCQRAmYM2cOUlNTsXLlSo1kIZebmxtGjhypfpydnY3p06fD1dUVCoUC1apVw2effYaMjAyN5+V2GR87dgyNGzeGqakpqlevjjVr1qjrTJkyBc7OzgCAsWPHQiaToVq1agBedOXn/vu/8huj3L9/P5o1awZra2tYWFigVq1a+Oyzz9T7C5rDcOjQITRv3hzm5uawtrZGly5dEB0dne/xYmNjERQUBGtra1hZWWHgwIF49uxZwS/sS/r06YPdu3cjKSlJXXb69Glcv34dffr0yVP/yZMnGDNmDDw9PWFhYQGlUon27dvj4sWL6jqRkZF45513AAADBw5Ud0fnnmfLli1Rp04dnD17Fi1atMBbb72lfl1ensMQGBgIU1PTPOcfEBCA8uXL4/79+/meV+64/s2bN7Fr1y51DLnj8g8ePEBwcDDs7OxgamqKevXqYfXq1Rpt5P58vvnmG8yfP1/93rp69WqhXtuCFPa9qlKpMGXKFDg4OOCtt95Cq1atcPXqVVSrVg1BQUF5zvW/cxiuX7+O7t27w97eHqampqhatSp69eqF5ORkAC+S1bS0NKxevVr92uS2WdAcht27d8PX1xeWlpZQKpV45513sH79+td6DZo3bw7gxZDjf127dg09evSAjY0NTE1N0ahRI2zfvj3P8//880/4+vrCzMwMVatWxYwZMxAeHp4n7tz/73v37kWjRo1gZmaGpUuXAgCSkpIwatQoODo6QqFQwM3NDV999RVUKpXGsTZs2AAvLy/1eXt6euK7775T78/KysLUqVNRo0YNmJqaokKFCmjWrBn279+vrpPf7wcpf2eR4WBaWAJ27NiB6tWro2nTpoWqP3jwYKxevRo9evTAp59+ilOnTiEsLAzR0dH45ZdfNOrGxsaiR48eCA4ORmBgIH788UcEBQXBy8sLb7/9Nrp16wZra2uEhoaid+/e6NChAywsLHSK/8qVK+jUqRPq1q2LadOmQaFQIDY2FsePH3/l8w4cOID27dujevXqmDJlCp4/f47vv/8ePj4+OHfuXJ5kpWfPnnBxcUFYWBjOnTuHFStWwNbWFl999VWh4uzWrRuGDh2KrVu3YtCgQQBe9C7Url0bDRs2zFP/xo0b2LZtGz744AO4uLggMTERS5cuha+vL65evQoHBwe4u7tj2rRp+PLLLzFkyBD1h8N/f5aPHz9G+/bt0atXL/Tr1w92dnb5xvfdd9/h0KFDCAwMRFRUFIyMjLB06VLs27cPa9euhYODQ77Pc3d3x9q1axEaGoqqVavi008/BQBUqlQJz58/R8uWLREbG4thw4bBxcUFmzdvRlBQEJKSkjQSUQAIDw9Heno6hgwZAoVCARsbm0K9tgUp7Ht14sSJmDNnDjp37oyAgABcvHgRAQEBSE9Pf2X7mZmZCAgIQEZGBoYPHw57e3vcu3cPO3fuRFJSEqysrLB27VoMHjwYjRs3xpAhQwAArq6uBba5atUqDBo0CG+//TYmTpwIa2trnD9/Hnv27Mk3sdQm90O9fPny6rIrV67Ax8cHVapUwYQJE2Bubo5Nmzaha9eu2LJlC95//30AwL1799CqVSvIZDJMnDgR5ubmWLFiBRQKRb7HiomJQe/evfHRRx/hww8/RK1atfDs2TP4+vri3r17+Oijj+Dk5IQTJ05g4sSJiI+Px/z58wG8SPp79+6NNm3aqP9PRUdH4/jx4+r3yZQpUxAWFqZ+PVNSUnDmzBmcO3cObdu2LfA1kPJ3FhmQkh4TedMkJycLAKJLly6Fqn/hwgUBQAwePFijfMyYMQKAOHTokLrM2dlZABBHjx5Vlz148EAoFArx6aefqstu3ryZ7/h9YGCgcHZ2zhPD5MmTxX/fKvPmzRMAxMOHDwuMO/cY/x3nr1+/vrC1tRWPHz9Wl128eFHI5XIxYMCAPMcbNGiQRpvvv/++qFChQoHH/O95mJubCyGE6NGjh2jTpo0Q4sV4uL29vZg6dWq+r0F6enqesfKbN28KhUIhpk2bpi571RwGX19fAUAsWbIk332+vr4aZXv37hUAxIwZM8SNGzeEhYWF6Nq1q9ZzFCL/OQXz588XAMRPP/2kLsvMzBTe3t7CwsJCpKSkqM8LgFAqleLBgwevfbz/Kux7NSEhQZQrVy7PeU6ZMkUA0Jh7cPjwYQFAHD58WAghxPnz5wUAsXnz5lfGWtAchtx5Bzdv3hRCCJGUlCQsLS1FkyZN8ozDa5vnkNvWgQMHxMOHD8Xdu3dFRESEqFSpklAoFOLu3bvqum3atBGenp4iPT1do/2mTZuKGjVqqMuGDx8uZDKZOH/+vLrs8ePHwsbGRiNuIf7///uePXs04po+fbowNzcXf/31l0b5hAkThJGRkbhz544QQoiRI0cKpVL5ynlG9erV0zpv5eXfD0XxO4sMA4ckillKSgoAwNLSslD1f/vtNwDA6NGjNcpzv1W+PNfBw8ND/a0XePGts1atWrhx48Zrx/yy3LkPv/76a54uzoLEx8fjwoULCAoK0vgWW7duXbRt21Z9nv81dOhQjcfNmzfH48eP1a9hYfTp0weRkZFISEjAoUOHkJCQUOC3RoVCAbn8xX+JnJwcPH78WD3ccu7cuUIfU6FQYODAgYWq6+/vj48++gjTpk1Dt27dYGpqqu5Wfh2//fYb7O3t0bt3b3WZsbExRowYgdTUVBw5ckSjfvfu3VGpUqXXPt7Lxwa0v1cPHjyI7OxsfPLJJxr1hg8frvUYVlZWAIC9e/fqNDxVkP3
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Best Parameters: {'model__max_depth': 10, 'model__n_estimators': 100}\n",
|
|||
|
"Accuracy: 0.4888888888888889\n",
|
|||
|
"Precision: 0.5205479452054794\n",
|
|||
|
"Recall: 0.4\n",
|
|||
|
"F1-score: 0.4523809523809524\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhkAAAHHCAYAAAASxkpJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABhsElEQVR4nO3dd1gUV9sG8HtBWBBYUEGKIlIURMGaEGOPSImxG2MHRY3GHntib/iqiUaT2GLBRGMSW7pd7Poau0aJICgqoqKwgFKE8/3hy3yOFFmYlZL75zXXxZ6ZOfPMMi7PnnPmjEoIIUBERESkMIOSDoCIiIjKJyYZREREpBdMMoiIiEgvmGQQERGRXjDJICIiIr1gkkFERER6wSSDiIiI9IJJBhEREekFkwwiIiLSCyYZenL9+nX4+fnB0tISKpUKO3fuVLT+mJgYqFQqbNiwQdF6y7LWrVujdevWitWXkpKCQYMGwc7ODiqVCmPGjFGs7rKC11npVhp+PzVr1kRwcLCsLK/Pvw0bNkClUiEmJua1x6hSqTBz5szXflwq50lGVFQUPvzwQ7i4uMDExAQajQbNmjXDF198gadPn+r12EFBQbh06RLmzZuHb7/9Fk2aNNHr8V6n4OBgqFQqaDSaPN/H69evQ6VSQaVSYfHixTrXf/fuXcycORPnz59XINqimz9/PjZs2IBhw4bh22+/Rb9+/fR6vJo1a0rvm0qlgpmZGd58801s3LhRr8cta15+n15c0tLSSjq8XI4fP46ZM2ciMTFRp/3Cw8PRtWtX2NnZwdjYGFWrVkWHDh2wfft2/QSqoJL4/Pvjjz+YSJRCFUo6AH35/fff8f7770OtVqN///6oV68eMjIycPToUUyYMAFXrlzB6tWr9XLsp0+f4sSJE/j0008xYsQIvRzDyckJT58+hZGRkV7qf5UKFSrgyZMn+PXXX9GjRw/Zuk2bNsHExKTIH/h3797FrFmzULNmTTRo0KDQ++3Zs6dIx8vPgQMH8NZbb2HGjBmK1luQBg0aYNy4cQCAuLg4fPPNNwgKCkJ6ejoGDx782uIo7V58n15kbGxcAtEU7Pjx45g1axaCg4NhZWVVqH1mzJiB2bNno1atWvjwww/h5OSEhIQE/PHHH+jWrRs2bdqE3r176zfwQoqIiICBwf9/X83v869fv37o2bMn1Gq1XuL4448/8NVXX+WZaDx9+hQVKpTbP3elWrl816Ojo9GzZ084OTnhwIEDsLe3l9YNHz4ckZGR+P333/V2/AcPHgBAoT9QikKlUsHExERv9b+KWq1Gs2bN8P333+dKMjZv3oz27dtj27ZtryWWJ0+eoGLFior/gbl//z48PT0Vq+/Zs2fIzs4uMM5q1aqhb9++0uvg4GC4uLhgyZIlTDJe8PL7pJTs7GxkZGSU6P+trVu3Yvbs2ejevTs2b94s+yIxYcIE7N69G5mZmSUW38teThry+/wzNDSEoaHh6wpLpiR/n/96ohwaOnSoACCOHTtWqO0zMzPF7NmzhYuLizA2NhZOTk5iypQpIi0tTbadk5OTaN++vThy5Ih44403hFqtFs7OziIsLEzaZsaMGQKAbHFychJCCBEUFCT9/KKcfV60Z88e0axZM2FpaSnMzMxE7dq1xZQpU6T10dHRAoBYv369bL/9+/eL5s2bi4oVKwpLS0vRsWNH8ffff+d5vOvXr4ugoCBhaWkpNBqNCA4OFqmpqa98v4KCgoSZmZnYsGGDUKvV4vHjx9K6//73vwKA2LZtmwAgFi1aJK1LSEgQ48aNE/Xq1RNmZmbCwsJCBAQEiPPnz0vbHDx4MNf79+J5tmrVStStW1f89ddfokWLFsLU1FSMHj1aWteqVSuprv79+wu1Wp3r/P38/ISVlZW4c+dOnueXXwzR0dFCCCHi4+PFwIEDRdWqVYVarRbe3t5iw4YNsjpyfj+LFi0SS5YsES4uLsLAwECcO3cu3/c15/p6WZMmTYSxsbGs7PDhw6J79+7C0dFRGBsbi+rVq4sxY8aIJ0+eyLbL+V3dvn1bdOrUSZiZmQlra2sxbtw48ezZM9m2jx8/FkFBQUKj0QhLS0vRv39/ce7cuWJfZxEREaJPnz5Co9EIa2trMXXqVJGdnS1u3bolOnbsKCwsLIStra1YvHhxvu9NYd6nF6WkpIiPP/5YVK9eXRgbG4vatWuLRYsWiezsbNl2AMTw4cPFd999Jzw9PUWFChXEjh07hBBC3L59WwwYMEBUrVpVGBsbC09PT7F27dpcx1q2bJnw9PQUpqamwsrKSjRu3Fhs2rRJ9h7kdy3lxcPDQ1SuXFlotdpXvhd5fQ5cuHBBBAUFCWdnZ6FWq4Wtra0YMGCAePjwoWxfrVYrRo8eLZycnISxsbGwsbERvr6+4syZM9I2//zzj+jatauwtbUVarVaVKtWTXzwwQciMTFR2sbJyUkEBQXle745n3nr16/P89z/+OMP0bJlS2Fubi4sLCxEkyZNpPdPiMJd60FBQXm+zzkAiBkzZsiOe/bsWREQECAsLCyEmZmZeOedd8SJEydk2+TEfPToUTF27FhhbW0tKlasKDp37izu37//yt8PCVEuWzJ+/fVXuLi44O233y7U9oMGDUJYWBi6d++OcePG4dSpUwgNDcXVq1exY8cO2baRkZHo3r07QkJCEBQUhHXr1iE4OBiNGzdG3bp10bVrV1hZWWHs2LHo1asX3n33XZibm+sU/5UrV/Dee+/B29sbs2fPhlqtRmRkJI4dO1bgfvv27UNgYCBcXFwwc+ZMPH36FMuXL0ezZs1w9uxZ1KxZU7Z9jx494OzsjNDQUJw9exbffPMNqlativ/85z+FirNr164YOnQotm/fjoEDBwJ43orh4eGBRo0a5dr+xo0b2LlzJ95//304OzsjPj4eq1atQqtWrfD333/DwcEBderUwezZszF9+nQMGTIELVq0AADZ7zIhIQGBgYHo2bMn+vbtC1tb2zzj++KLL3DgwAEEBQXhxIkTMDQ0xKpVq7Bnzx58++23cHBwyHO/OnXq4Ntvv8XYsWNRvXp1qVnexsYGT58+RevWrREZGYkRI0bA2dkZP/30E4KDg5GYmIjRo0fL6lq/fj3S0tIwZMgQqNVqVK5cuVDvbY5nz57h9u3bqFSpkqz8p59+wpMnTzBs2DBUqVIF//3vf7F8+XLcvn0bP/30k2zbrKws+Pv7w8fHB4sXL8a+ffvw2WefwdXVFcOGDQMACCHQqVMnHD16FEOHDkWdOnWwY8cOBAUF5YpJ1+vsgw8+QJ06dbBgwQL8/vvvmDt3LipXroxVq1bhnXfewX/+8x9s2rQJ48ePxxtvvIGWLVu+8n3JzMzEw4cPZWUVK1ZExYoVIYRAx44dcfDgQYSEhKBBgwbYvXs3JkyYgDt37mDJkiWy/Q4cOIAff/wRI0aMgLW1NWrWrIn4+Hi89dZbUKlUGDFiBGxsbPDnn38iJCQEWq1WGgS8Zs0ajBo1Ct27d8fo0aORlpaGixcv4tSpU+jduze6du2Kf/75B99//z2WLFkCa2trAM+vpbxcv34d165dw8CBA2FhYfHK9yEve/fuxY0bNzBgwADY2dlJXcNXrlzByZMnoVKpAABDhw7F1q1bMWLECHh6eiIhIQFHjx7F1atX0ahRI2RkZMDf3x/p6ekYOXIk7OzscOfOHfz2229ITEyEpaVlrmPr+vm3YcMGDBw4EHXr1sWUKVNgZWWFc+fOYdeuXVJ3UGGu9Q8//BB3797F3r178e23377yPbpy5QpatGgBjUaDiRMnwsjICKtWrULr1q1x6NAh+Pj4yLYfOXIkKlWqhBkzZiAmJgZLly7FiBEj8MMPPxT69/KvVdJZjtKSkpIEANGpU6dCbX/+/HkBQAwaNEhWPn78eAFAHDhwQCpzcnISAMThw4elsvv37wu1Wi3GjRsnlb34LfZFhW3JWLJkiQAgHjx4kG/ceX2DadCggahatapISEiQyi5cuCAMDAxE//79cx1v4MCBsjq7dOkiqlSpku8xXzwPMzMzIYQQ3bt3F23bthVCCJGVlSXs7OzErFmz8nwP0tL
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 3, 'model__n_estimators': 100}\n",
|
|||
|
"Accuracy: 0.4722222222222222\n",
|
|||
|
"Precision: 0.5\n",
|
|||
|
"Recall: 0.42105263157894735\n",
|
|||
|
"F1-score: 0.45714285714285713\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgwAAAHHCAYAAADTQQDlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABYQ0lEQVR4nO3dd1gUV9sG8HtpS10QBAFFBKyIDfNqFLsIEmOsrwlRA6gxGit238SILRhLNFFjb7FEjS3R2LBgj7FHjQUQFUVFUZpIP98fhv1cF1wWBljw/nnNJXtm9swzu8vw7DlnzsiEEAJEREREb6FX2gEQERGR7mPCQERERBoxYSAiIiKNmDAQERGRRkwYiIiISCMmDERERKQREwYiIiLSiAkDERERacSEgYiIiDRiwlAAERER8PHxgaWlJWQyGXbu3Clp/Xfu3IFMJsOaNWskrbcsa9OmDdq0aSNZfSkpKRgwYADs7e0hk8kwcuRIyerWdXl9vkJCQiCTyUovqHKmrLyeunCuqVatGgIDA1XK8jrHrlmzBjKZDHfu3CnxGGUyGUJCQkp8v7quzCQMUVFR+OKLL+Dq6gpjY2MoFAp4eXnhhx9+wMuXL4t13wEBAbhy5QpmzJiBdevW4b333ivW/ZWkwMBAyGQyKBSKPF/HiIgIyGQyyGQyzJkzR+v6Y2NjERISgkuXLkkQbeF9++23WLNmDQYPHox169ahb9++xb7PnJwc/Pzzz+jQoQMqVqwIQ0ND2NnZwcfHB8uWLUN6enqxx1CatH3vc/9AvL7Y2dmhbdu22Lt3b/EGWwCpqakICQlBeHh4aYeSp/DwcHTv3h329vYwMjKCnZ0dOnfujO3bt5d2aBqVxjl2z549TAq0JcqA3bt3CxMTE2FlZSWGDx8uli1bJhYuXCg++eQTYWhoKD7//PNi23dqaqoAIL766qti20dOTo54+fKlyMrKKrZ95CcgIEAYGBgIfX19sXnzZrX1kydPFsbGxgKAmD17ttb1nz17VgAQq1ev1up56enpIj09Xev95adp06bCy8tLsvo0SU1NFb6+vgKAaN68uQgNDRWrVq0Sc+bMEZ07dxb6+vqiX79+JRJLdHS02nuQmZkpXr58Waz71fa9X716tQAgpk6dKtatWyd+/vlnMXv2bFG3bl0BQOzatatY49XkyZMnAoCYPHmy2rqSeD3f5ptvvhEARI0aNcQ333wjVq5cKWbNmiXatGkjAIgNGzYIIfL+LJS0tLQ0kZGRoXyc3zk2KytLvHz5UuTk5BRLHEOGDBH5/Ql8+fKlyMzMLJb9lmUGpZGkaCM6OhqffPIJnJ2dcfjwYTg4OCjXDRkyBJGRkfjjjz+Kbf9PnjwBAFhZWRXbPmQyGYyNjYutfk3kcjm8vLzwyy+/oFevXirrNm7ciE6dOmHbtm0lEktqaipMTU1hZGQkab1xcXFwd3eXrL6srCzk5OTkG2dwcDD279+P+fPnY8SIESrrRo8ejYiICISFhRVpH0VhYGAAAwPd/PX38/NT+YbZv39/VKpUCb/88gs+/PDDUowsf6X5em7duhVTp05Fz549sXHjRhgaGirXjR07Fvv370dmZmapxJYXuVyu8ji/c6y+vj709fVLKiwVpXk+1mmlnbFoMmjQIAFAnDx5skDbZ2ZmiqlTpwpXV1dhZGQknJ2dxcSJE0VaWprKds7OzqJTp07i+PHj4j//+Y+Qy+XCxcVFrF27VrnN5MmTBQCVxdnZWQjx6pt57s+vy33O6w4cOCC8vLyEpaWlMDMzEzVr1hQTJ05Urs8v6z906JBo0aKFMDU1FZaWluKjjz4S//zzT577i4iIEAEBAcLS0lIoFAoRGBgoXrx4ofH1CggIEGZmZmLNmjVCLpeL58+fK9f99ddfAoDYtm2bWgtDfHy8GD16tPDw8BBmZmbCwsJCdOzYUVy6dEm5zZEjR9Rev9ePs3Xr1qJu3bri3LlzomXLlsLExESMGDFCua5169bKuj777DMhl8vVjt/Hx0dYWVmJBw8e5Hl8+cUQHR0thBDi8ePHol+/fsLOzk7I5XJRv359sWbNGpU6ct+f2bNni3nz5glXV1ehp6cnLl68mOc+7927J/T19UXHjh3f8sqrets+0tPTxaRJk4Snp6dQKBTC1NRUtGjRQhw+fFitnufPn4uAgAChUCiEpaWl+Oyzz8TFixfVPl95fU6FEGLdunXC09NTGBsbiwoVKoiPP/5Y3Lt3T2Wb3Pft2rVrok2bNsLExEQ4OjqK7777TrmNpvc+L7ktDGfPnlUpz8nJEQqFQnz22Wcq5SkpKWLUqFGiSpUqwsjISNSsWVPMnj1b7RtpQc8JZ8+eFT4+PsLGxkYYGxuLatWqiaCgIJX3580lt7Uhr9cTgBgyZIjYsWOHqFu3rjAyMhLu7u5i7969asd+5MgR0bhxYyGXy4Wrq6tYsmRJvu/Rm2rXri2sra1FUlKSxm3zOtdcvnxZBAQECBcXFyGXy0WlSpVEUFCQePr0qcpzk5KSxIgRI4Szs7MwMjIStra2wtvbW5w/f165za1bt0T37t1FpUqVhFwuF5UrVxYff/yxSEhIUG7j7OwsAgICVF63vM6xuZ+H3N/VXHv27BGtWrUS5ubmwsLCQrz33nvKFhQhhDh27Jjo2bOncHJyEkZGRqJKlSpi5MiRIjU1VblNQEBAnu9nrtff21wXLlwQHTt2FBYWFsLMzEy0a9dOnD59WmWb3JhPnDghgoODRcWKFYWpqano2rWriIuL0/j+6Drd/Irxml27dsHV1RXNmzcv0PYDBgzA2rVr0bNnT4wePRpnzpxBaGgorl+/jh07dqhsGxkZiZ49e6J///4ICAjAqlWrEBgYiMaNG6Nu3bro3r07rKysEBwcDH9/f3zwwQcwNzfXKv5r167hww8/RP369TF16lTI5XJERkbi5MmTb33ewYMH4efnB1dXV4SEhODly5dYsGABvLy8cOHCBVSrVk1l+169esHFxQWhoaG4cOECVqxYATs7O3z33XcFirN79+4YNGgQtm/fjn79+gF41bpQu3ZteHp6qm1/+/Zt7Ny5E//973/h4uKCx48fY+nSpWjdujX++ecfODo6ok6dOpg6dSq++eYbDBw4EC1btgQAlfcyPj4efn5++OSTT9CnTx9UqlQpz/h++OEHHD58GAEBATh9+jT09fWxdOlSHDhwAOvWrYOjo2Oez6tTpw7WrVuH4OBgVKlSBaNHjwYA2Nra4uXLl2jTpg0iIyMxdOhQuLi44Ndff0VgYCASEhLUWgZWr16NtLQ0DBw4EHK5HNbW1nnuc+/evcjOzkafPn00vOrq8tpHUlISVqxYAX9/f3z++edITk7GypUr4evri7/++gsNGzYEAAgh0KVLF5w4cQKDBg1CnTp1sGPHDgQEBBRo3zNmzMCkSZPQq1cvDBgwAE+ePMGCBQvQqlUrXLx4UeUb4PPnz9GxY0d0794dvXr1wtatWzF+/HjUq1cPfn5+BXrv85OYmIinT59CCIG4uDgsWLAAKSkpKq+nEAIfffQRjhw5gv79+6Nhw4bYv38/xo4diwcPHmDevHnKbQtyToiLi4OPjw9sbW0xYcIEWFlZ4c6dO8r+f1tbWyxevBiDBw9Gt27d0L17dwBA/fr133osJ06cwPbt2/Hll1/CwsICP/74I3r06IF79+7BxsYGAHDx4kV07NgRDg4OmDJlCrKzszF16lTY2tpqfK0iIiJw48YN9OvXDxYWFhq3z0tYWBhu376NoKAg2Nvb49q1a1i2bBmuXbuGP//8UzmYc9CgQdi6dSuGDh0Kd3d3xMfH48SJE7h+/To8PT2RkZEBX19fpKenY9iwYbC3t8eDBw+we/duJCQkwNLSUm3f2p5j16xZg379+qFu3bqYOHEirKyscPHiRezbtw+ffvopAODXX39FamoqBg8eDBsbG/z1119YsGAB7t+/j19//RUA8MUXXyA2NhZhYWFYt26dxtfo2rVraNmyJRQKBcaNGwdDQ0MsXboUbdq0wdGjR9G0aVOV7YcNG4YKFSpg8uTJuHPnDubPn4+hQ4di8+bNBX5
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGwCAYAAABVdURTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABIzElEQVR4nO3deXhM9/4H8PfMJJlENjSyGiJ2tUQS3ERLEY0uyq1Wikq4LWq/Um1RpGqJLpRaK9oqtZWL6rWVtFpryWZPNAuxJcSSVTLJzPf3h5/pTQWZmJkzM3m/nmeep3Nyzsx7Ds28febMOTIhhAARERGRlZBLHYCIiIjIkFhuiIiIyKqw3BAREZFVYbkhIiIiq8JyQ0RERFaF5YaIiIisCssNERERWRUbqQOYmlarxdWrV+Hs7AyZTCZ1HCIiIqoCIQQKCgrg7e0NufzRs5kaV26uXr0KlUoldQwiIiKqhkuXLqF+/fqPXKfGlRtnZ2cA93aOi4uLxGmIiIioKvLz86FSqXTv449S48rN/Y+iXFxcWG6IiIgsTFUOKeEBxURERGRVWG6IiIjIqrDcEBERkVVhuSEiIiKrwnJDREREVoXlhoiIiKwKyw0RERFZFZYbIiIisiosN0RERGRVWG6IiIjIqkhabn7//Xf07t0b3t7ekMlk2LZt22O32b9/PwICAqBUKtGkSROsWrXK6DmJiIjIckhaboqKitCuXTssWbKkSutnZmbipZdeQrdu3ZCcnIx///vfePvtt7Fnzx4jJyUiIiJLIemFM1944QW88MILVV5/+fLlaNSoEebNmwcAaNmyJQ4ePIgvvvgCYWFhxopJREREVXQs8xaaezjDtZatZBks6pibI0eOIDQ0tMKysLAwHDly5KHblJaWIj8/v8KNiIiIDEsIge8OX8CA2KMYsz4R5RqtZFksqtxkZ2fDw8OjwjIPDw/k5+fj7t27lW4TExMDV1dX3U2lUpkiKhERUY2hLtdi8pZTiN5+BhqtgJuTEuVaIVkeiyo31TF58mTk5eXpbpcuXZI6EhERkdW4UVCKgbFHseH4JchlwJQXW2B+/3awt1VIlknSY2705enpiZycnArLcnJy4OLiAgcHh0q3USqVUCqVpohHRERUo5y6nIfha+JxLa8EzvY2WDSgPZ5r7i51LMsqN8HBwdi5c2eFZXv37kVwcLBEiYiIiGqmH5Ov4P3NJ1FaroVfPUesjAiCXz0nqWMBkPhjqcLCQiQnJyM5ORnAva96JycnIysrC8C9j5QiIiJ067/zzjvIyMjA+++/j5SUFCxduhQ//PADJkyYIEV8IiKiGkejFZi7KwXjNySjtFyLbs3rYdvozmZTbACJJzfx8fHo1q2b7n5UVBQAIDIyEqtWrcK1a9d0RQcAGjVqhB07dmDChAlYuHAh6tevj5UrV/Jr4ERERCaQX1KG8euT8GvqDQDAyOcaY+LzzaGQyyROVpFMCCHd4cwSyM/Ph6urK/Ly8uDi4iJ1HCIiIouQcaMQb6+OR8aNIiht5Pj0tbbo4+9jsufX5/3boo65ISIiItP7NfU6xq1PQkFJObxc7bFicBDa1HeVOtZDsdwQERFRpYQQWPF7BubuToEQQFDDOlj2ZiDqOZv3t5BZboiIiOgBJWUaTPrPSWxLvgoAGNBRhRmvtIadjfmfIo/lhoiIiCq4lncXI9Yk4OTlPCjkMkT3boXB/2gImcy8Dhx+GJYbIiIi0km4eAsj1iQit7AUdWrZYumgQAQ3fkrqWHphuSEiIiIAwA/HL2HqttNQa7Ro4emM2IggqOrWkjqW3lhuiIiIarhyjRazdpzDqsMXAAAvtPbE56+3g6PSMmuCZaYmIiIig7hdpMbodYk4nH4TABDVsxnGdGsCuZmdmE8fLDdEREQ1VEp2PoatjselW3fhaKfA/HB/hD3tKXWsJ8ZyQ0REVAPtPp2NqB+SUazWQFXXASsjOqC5p7PUsQyC5YaIiKgG0WoFFv2Shi/2nQcAhDR+CksGBqCOo53EyQyH5YaIiKiGKCotx8RNJ7DrdDYAYEiIL6a+1BI2CvM/MZ8+WG6IiIhqgEu3ijFsdTxSsgtgq5Bhdt826N9BJXUso2C5ISIisnKH03Mxem0ibheXwc1Jia8GByCwYV2pYxkNyw0REZGVEkJg9ZGL+Pi/Z6HRCrSt74qvBgfCy9VB6mhGxXJDRERkhdTlWkz/8TQ2HL8EAOjr7425/drC3lYhcTLjY7khIiKyMjcKSjHy+wTEX7wNuQyY9EILDHvWz2IufPmkWG6IiIisyKnLeRi+Jh7X8krgbG+DRQPa47nm7lLHMimWGyIiIivxY/IVvL/5JErLtfCr54jYiCA0ruckdSyTY7khIiKycBqtwOc/p2LZ/nQAQLfm9bBwQHu42NtKnEwaLDdEREQWLL+kDOPXJ+HX1BsAgJHPNcbE55tDYcEXvnxSLDdEREQWKuNGId5eHY+MG0VQ2sjx6Wtt0cffR+pYkmO5ISIiskD7U69j7PokFJSUw8vVHisGB6FNfVepY5kFlhsiIiILIoTAit8z8MnuFGgFENSwDpa9GYh6zkqpo5kNlhsiIiILUVKmweQtp7A16QoA4I0OKszo8zSUNtZ/Yj59sNwQERFZgGt5dzFiTQJOXs6DQi5DdO9WGPyPhjXmxHz6YLkhIiIycwkXb2HEmkTkFpaiTi1bLB0UiODGT0kdy2yx3BAREZmxH45fwtRtp6HWaNHC0xmxEUFQ1a0ldSyzxnJDRERkhso1WszacQ6rDl8AAPR62hPz+reDo5Jv3Y/DPURERGRmbhepMXpdIg6n3wQATAhthrHdm0Beg0/Mpw+WGyIiIjOSml2AYavjkXWrGLXsFJjf3x+9WntKHcuisNwQERGZiT1nshG1MRlFag1UdR0QGxGEFp4uUseyOCw3REREEtNqBRb9koYv9p0HAIQ0fgpLBgagjqOdxMksE8sNERGRhIpKyzFx0wnsOp0NABgS4oupL7WEjUIucTLLxXJDREQkkUu3ijFsdTxSsgtgq5Bhdt826N9BJXUsi8dyQ0REJIHD6bkYvTYRt4vL4OakxFeDAxDYsK7UsawCyw0REZEJCSGw5uhFzPjpLDRagTY+rlgREQgvVwepo1kNlhsiIiITUZdrEb39NNYfuwQA6OPvjU/6tYW9LS98aUgsN0RERCZwo6AUI79PQPzF25DJgEm9WmB4Fz9e+NIIWG6IiIiM7NTlPAxfE49reSVwtrfBogHt8Vxzd6ljWS2WGyIiIiP6MfkK3t98EqXlWvjVc0RsRBAa13OSOpZVY7khIiIyAo1W4POfU7FsfzoAoFvzelg4oD1c7G0lTmb9WG6IiIgMLL+kDP/ekIxfUq4DAN7p2hjvhTWHghe+NAmWGyIiIgPKuFGIYavjkX6jCEobOT59rS36+PtIHatGYbkhIiIykP2p1zF2fRIKSsrh5WqPFYOD0Ka+q9SxahyWGyIioickhMCK3zPwye4UaAUQ2LAOlr0ZAHdne6mj1UgsN0RERE+gpEyDyVtOYWvSFQDAGx1UmNHnaShteGI+qbDcEBERVVN2XgmGr4nHyct5UMhliO7dCoP/0ZAn5pMYyw0REVE1JFy8jXe+T8CNglLUqWWLJYMCENLYTepYBJYbIiIivf0QfwlTt56GWqNFC09nxEYEQVW3ltSx6P+x3BAREVVRuUaL2TvP4dtDFwAAYU97YH5/fzgq+XZqTvinQUREVAW3i9QYvS4Rh9NvAgAmhDbD2O5NIOeJ+cwOyw0REdFjpGYXYNjqeGTdKkYtOwXm9/dHr9aeUseih2C5ISIieoQ9Z7IRtTEZRWoNVHUdEBsRhBaeLlLHokdguSEiIqqEViuw6Jc0fLHvPAAgpPFTWDIwAHUc7SRORo/DckNERPQ3RaXlmLjpBHadzgYADAnxxYcvtYStQi5xMqoKlhsiIqL/celWMYatjkdKdgFsFTLM6tsa4R0aSB2L9CB5BV2yZAl8fX1hb2+PTp064dixY49cf8GCBWjevDkcHBygUqkwYcIElJSUmCgtERFZsyPpN/HK4oNIyS6Am5MSG4b
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn import metrics\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, root_mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Store_Area\", \"Items_Available\", \"Store_Sales\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": (LinearRegression(), {}),\n",
|
|||
|
" \"Random Forest Regression\": (RandomForestRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Regression\": (GradientBoostingRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Daily_Customer_Count']\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, (model, params) in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" grid_search.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_reg = best_model.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = root_mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = root_mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": (LogisticRegression(), {\n",
|
|||
|
" 'model__C': [0.1, 1, 10],\n",
|
|||
|
" 'model__solver': ['liblinear', 'lbfgs']\n",
|
|||
|
" }),\n",
|
|||
|
" \"Random Forest Classification\": (RandomForestClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Classification\": (GradientBoostingClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Daily_Customer_Count'] > df['Daily_Customer_Count'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, (model, params) in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='accuracy')\n",
|
|||
|
" grid_search.fit(X_train_class, y_train_class)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_class = best_model.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" precision = precision_score(y_test_class, y_pred_class)\n",
|
|||
|
" recall = recall_score(y_test_class, y_pred_class)\n",
|
|||
|
" f1 = f1_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print(f\"Precision: {precision}\")\n",
|
|||
|
" print(f\"Recall: {recall}\")\n",
|
|||
|
" print(f\"F1-score: {f1}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
" # Визуализация матрицы ошибок\n",
|
|||
|
" cm = confusion_matrix(y_test_class, y_pred_class)\n",
|
|||
|
" disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Less', 'More'])\n",
|
|||
|
" disp.plot(cmap=plt.cm.Greens)\n",
|
|||
|
" plt.title(f'Confusion Matrix for {name}')\n",
|
|||
|
" plt.show()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"fpr, tpr, _ = metrics.roc_curve(y_test_class, y_pred_class)\n",
|
|||
|
"# построение ROC кривой\n",
|
|||
|
"plt.plot(fpr, tpr)\n",
|
|||
|
"plt.ylabel(\"True Positive Rate\")\n",
|
|||
|
"plt.xlabel(\"False Positive Rate\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Вывод для задачи регрессии:**\n",
|
|||
|
"\n",
|
|||
|
"* Random Forest Regression демонстрирует наилучшие результаты по метрикам MAE и R², что указывает на высокую точность и стабильность модели.\n",
|
|||
|
"\n",
|
|||
|
"* Linear Regression и Gradient Boosting Regression также показывают хорошие результаты, но уступают случайному лесу.\n",
|
|||
|
"\n",
|
|||
|
"**Вывод для задачи классификации:**\n",
|
|||
|
"* Random Forest Classification демонстрирует наилучшие результаты по всем метрикам (Accuracy, Precision, Recall, F1-score), что указывает на высокую точность и стабильность модели.\n",
|
|||
|
"* Logistic Regression и Gradient Boosting Classification также показывают хорошие результаты, но уступают случайному лесу.\n",
|
|||
|
"\n",
|
|||
|
"Для оценки смещения (bias) и дисперсии (variance) моделей можно использовать метод перекрестной проверки (cross-validation). Этот метод позволяет оценить, насколько хорошо модель обобщается на новых данных.\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи регрессии: Для задачи регрессии мы будем использовать метрики MAE (Mean Absolute Error) и R² (R-squared) для оценки смещения и дисперсии.\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи классификации: Для задачи классификации мы будем использовать метрики Accuracy, Precision, Recall и F1-score для оценки смещения и дисперсии."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Оценки смещения и дисперсии:**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 29,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Оценка смещения и дисперсии для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 214.80552977981765, Std = 10.606512171542404\n",
|
|||
|
"R² (Cross-Validation): Mean = -0.013983192308878256, Std = 0.013712813782736416\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 230.15120484171325, Std = 8.88536995738051\n",
|
|||
|
"R² (Cross-Validation): Mean = -0.1689259714896298, Std = 0.05762118378061254\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 222.8658681378296, Std = 7.310181499028071\n",
|
|||
|
"R² (Cross-Validation): Mean = -0.10108866381648976, Std = 0.039818300656240396\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.5055307262569833, Std = 0.03499561917769727\n",
|
|||
|
"Precision (Cross-Validation): Mean = 0.5065468552510806, Std = 0.054654647753909255\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.36069969356486214, Std = 0.041986149284426406\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.41699563277139867, Std = 0.022647838103859376\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.49109869646182497, Std = 0.017650016523272163\n",
|
|||
|
"Precision (Cross-Validation): Mean = 0.48596843575835946, Std = 0.03060840061747847\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.46731358529111333, Std = 0.05935645676746673\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.4565802914056861, Std = 0.04036180126394377\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.5201241464928616, Std = 0.02397366176492078\n",
|
|||
|
"Precision (Cross-Validation): Mean = 0.509409816068139, Std = 0.024503659178518863\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.5012257405515832, Std = 0.0542411233430998\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.5069332041602828, Std = 0.032565636650784505\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import cross_val_score\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Store_Area\", \"Items_Available\", \"Store_Sales\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Daily_Customer_Count']\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи регрессии\n",
|
|||
|
"print(\"Оценка смещения и дисперсии для задачи регрессии:\")\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" mae_scores = -cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" r2_scores = cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='r2')\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"MAE (Cross-Validation): Mean = {mae_scores.mean()}, Std = {mae_scores.std()}\")\n",
|
|||
|
" print(f\"R² (Cross-Validation): Mean = {r2_scores.mean()}, Std = {r2_scores.std()}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Daily_Customer_Count'] > df['Daily_Customer_Count'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи классификации\n",
|
|||
|
"print(\"Оценка смещения и дисперсии для задачи классификации:\")\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" accuracy_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='accuracy')\n",
|
|||
|
" precision_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='precision')\n",
|
|||
|
" recall_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='recall')\n",
|
|||
|
" f1_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='f1')\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Accuracy (Cross-Validation): Mean = {accuracy_scores.mean()}, Std = {accuracy_scores.std()}\")\n",
|
|||
|
" print(f\"Precision (Cross-Validation): Mean = {precision_scores.mean()}, Std = {precision_scores.std()}\")\n",
|
|||
|
" print(f\"Recall (Cross-Validation): Mean = {recall_scores.mean()}, Std = {recall_scores.std()}\")\n",
|
|||
|
" print(f\"F1-score (Cross-Validation): Mean = {f1_scores.mean()}, Std = {f1_scores.std()}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "aimenv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|