2024-11-23 11:55:22 +04:00
{
"cells": [
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 4,
2024-11-23 11:55:22 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['HeartDisease', 'BMI', 'Smoking', 'AlcoholDrinking', 'Stroke',\n",
" 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory',\n",
" 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'SleepTime',\n",
" 'Asthma', 'KidneyDisease', 'SkinCancer'],\n",
" dtype='object')\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>HeartDisease</th>\n",
" <th>BMI</th>\n",
" <th>Smoking</th>\n",
" <th>AlcoholDrinking</th>\n",
" <th>Stroke</th>\n",
" <th>PhysicalHealth</th>\n",
" <th>MentalHealth</th>\n",
" <th>DiffWalking</th>\n",
" <th>Sex</th>\n",
" <th>AgeCategory</th>\n",
" <th>Race</th>\n",
" <th>Diabetic</th>\n",
" <th>PhysicalActivity</th>\n",
" <th>GenHealth</th>\n",
" <th>SleepTime</th>\n",
" <th>Asthma</th>\n",
" <th>KidneyDisease</th>\n",
" <th>SkinCancer</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>No</td>\n",
" <td>16.60</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>3.0</td>\n",
" <td>30.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>55-59</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>5.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>No</td>\n",
" <td>20.34</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>80 or older</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>7.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>No</td>\n",
" <td>26.58</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>20.0</td>\n",
" <td>30.0</td>\n",
" <td>No</td>\n",
" <td>Male</td>\n",
" <td>65-69</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Fair</td>\n",
" <td>8.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>No</td>\n",
" <td>24.21</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>75-79</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Good</td>\n",
" <td>6.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>No</td>\n",
" <td>23.71</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>28.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Female</td>\n",
" <td>40-44</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>8.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Yes</td>\n",
" <td>28.87</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>6.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Female</td>\n",
" <td>75-79</td>\n",
" <td>Black</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Fair</td>\n",
" <td>12.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>No</td>\n",
" <td>21.63</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>15.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>70-74</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Fair</td>\n",
" <td>4.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>No</td>\n",
" <td>31.64</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Female</td>\n",
" <td>80 or older</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Good</td>\n",
" <td>9.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>No</td>\n",
" <td>26.45</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>80 or older</td>\n",
" <td>White</td>\n",
" <td>No, borderline diabetes</td>\n",
" <td>No</td>\n",
" <td>Fair</td>\n",
" <td>5.0</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>No</td>\n",
" <td>40.69</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Male</td>\n",
" <td>65-69</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Good</td>\n",
" <td>10.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" HeartDisease BMI Smoking AlcoholDrinking Stroke PhysicalHealth \\\n",
"0 No 16.60 Yes No No 3.0 \n",
"1 No 20.34 No No Yes 0.0 \n",
"2 No 26.58 Yes No No 20.0 \n",
"3 No 24.21 No No No 0.0 \n",
"4 No 23.71 No No No 28.0 \n",
"5 Yes 28.87 Yes No No 6.0 \n",
"6 No 21.63 No No No 15.0 \n",
"7 No 31.64 Yes No No 5.0 \n",
"8 No 26.45 No No No 0.0 \n",
"9 No 40.69 No No No 0.0 \n",
"\n",
" MentalHealth DiffWalking Sex AgeCategory Race \\\n",
"0 30.0 No Female 55-59 White \n",
"1 0.0 No Female 80 or older White \n",
"2 30.0 No Male 65-69 White \n",
"3 0.0 No Female 75-79 White \n",
"4 0.0 Yes Female 40-44 White \n",
"5 0.0 Yes Female 75-79 Black \n",
"6 0.0 No Female 70-74 White \n",
"7 0.0 Yes Female 80 or older White \n",
"8 0.0 No Female 80 or older White \n",
"9 0.0 Yes Male 65-69 White \n",
"\n",
" Diabetic PhysicalActivity GenHealth SleepTime Asthma \\\n",
"0 Yes Yes Very good 5.0 Yes \n",
"1 No Yes Very good 7.0 No \n",
"2 Yes Yes Fair 8.0 Yes \n",
"3 No No Good 6.0 No \n",
"4 No Yes Very good 8.0 No \n",
"5 No No Fair 12.0 No \n",
"6 No Yes Fair 4.0 Yes \n",
"7 Yes No Good 9.0 Yes \n",
"8 No, borderline diabetes No Fair 5.0 No \n",
"9 No Yes Good 10.0 No \n",
"\n",
" KidneyDisease SkinCancer \n",
"0 No Yes \n",
"1 No No \n",
"2 No No \n",
"3 No Yes \n",
"4 No No \n",
"5 No No \n",
"6 No Yes \n",
"7 No No \n",
"8 Yes No \n",
"9 No No "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd \n",
"df = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\")\n",
"print(df.columns)\n",
"\n",
"display(df.head(10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Регрессия"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Цель: Разработать модель регрессии, которая будет предсказывать количество часов сна, которое человек получает в сутки, на основе е г о демографических данных, образа жизни и состояния здоровья.\n",
"\n",
"Применение:\n",
"\n",
"Медицинские учреждения: Модель может помочь врачам оценить качество сна пациента и разработать индивидуальные планы лечения и профилактики нарушений сна.\n",
"\n",
"Компании, разрабатывающие приложения для отслеживания сна: Модель может использоваться для улучшения своих продуктов и предоставления более точных рекомендаций.\n",
"\n",
"Исследования в области сна: Модель может помочь в изучении факторов, влияющих на качество сна."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Сначала подготовим данные для работы - удалим выбросы."
]
},
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 12,
2024-11-23 11:55:22 +04:00
"metadata": {},
2024-12-11 20:59:23 +04:00
"outputs": [],
2024-11-23 11:55:22 +04:00
"source": [
"import pandas as pd\n",
"from scipy import stats\n",
"\n",
2024-12-11 20:59:23 +04:00
"data = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\").head(5000)\n",
2024-11-23 11:55:22 +04:00
"\n",
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']\n",
"\n",
"z_scores = stats.zscore(data[numeric_features])\n",
"\n",
"threshold = 3\n",
"\n",
2024-12-11 20:59:23 +04:00
"data = data[(z_scores < threshold).all(axis=1)]"
2024-11-23 11:55:22 +04:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Теперь перейдем к делению на выборки и созданию ориентира"
]
},
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 13,
2024-11-23 11:55:22 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-12-11 20:59:23 +04:00
"Размер обучающей выборки: (3440, 16)\n",
"Размер тестовой выборки: (861, 16)\n",
"Baseline MAE: 0.9691505955757231\n",
"Baseline MSE: 1.5758684447764715\n",
"Baseline R²: -0.0006615872431061653\n"
2024-11-23 11:55:22 +04:00
]
}
],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
"\n",
"features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
"target = 'SleepTime'\n",
"\n",
"global X_train, X_test, y_train, y_test\n",
"X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)\n",
"\n",
"print(\"Размер обучающей выборки:\", X_train.shape)\n",
"print(\"Размер тестовой выборки:\", X_test.shape)\n",
"\n",
"baseline_predictions = [y_train.mean()] * len(y_test)\n",
"\n",
"print('Baseline MAE:', mean_absolute_error(y_test, baseline_predictions))\n",
"print('Baseline MSE:', mean_squared_error(y_test, baseline_predictions))\n",
"print('Baseline R²:', r2_score(y_test, baseline_predictions))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Создание конвейера и обучение моделей"
]
},
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 14,
2024-11-23 11:55:22 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: Linear Regression trained.\n",
"Model: Decision Tree trained.\n",
"Model: Gradient Boosting trained.\n"
]
}
],
"source": [
"import pandas as pd\n",
"from scipy import stats\n",
"from sklearn.model_selection import train_test_split, RandomizedSearchCV\n",
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.tree import DecisionTreeRegressor\n",
"from sklearn.ensemble import GradientBoostingRegressor\n",
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
"\n",
"categorical_features = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth']\n",
"\n",
"preprocessor = ColumnTransformer(\n",
" transformers=[\n",
" ('num', StandardScaler(), numeric_features),\n",
" ('cat', OneHotEncoder(), categorical_features)])\n",
"\n",
"pipeline_linear_regression = Pipeline(steps=[\n",
" ('preprocessor', preprocessor),\n",
" ('regressor', LinearRegression())])\n",
"\n",
"pipeline_decision_tree = Pipeline(steps=[\n",
" ('preprocessor', preprocessor),\n",
" ('regressor', DecisionTreeRegressor(random_state=42))])\n",
"\n",
"pipeline_gradient_boosting = Pipeline(steps=[\n",
" ('preprocessor', preprocessor),\n",
" ('regressor', GradientBoostingRegressor(random_state=42))])\n",
"\n",
"pipelines = [\n",
" ('Linear Regression', pipeline_linear_regression),\n",
" ('Decision Tree', pipeline_decision_tree),\n",
" ('Gradient Boosting', pipeline_gradient_boosting)\n",
"]\n",
"\n",
"for name, pipeline in pipelines:\n",
" pipeline.fit(X_train, y_train)\n",
" print(f\"Model: {name} trained.\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Оценка качества моделей"
]
},
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 15,
2024-11-23 11:55:22 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: Linear Regression\n",
2024-12-11 20:59:23 +04:00
"MAE: 0.9720871556511324\n",
"MSE: 1.512023769950756\n",
"R²: 0.039879178618337674\n",
2024-11-23 11:55:22 +04:00
"\n",
"Model: Decision Tree\n",
2024-12-11 20:59:23 +04:00
"MAE: 1.4547038327526132\n",
"MSE: 3.6193379790940767\n",
"R²: -1.2982454524896956\n",
2024-11-23 11:55:22 +04:00
"\n",
"Model: Gradient Boosting\n",
2024-12-11 20:59:23 +04:00
"MAE: 0.9718200023112538\n",
"MSE: 1.5324871722382205\n",
"R²: 0.02688511132722371\n",
2024-11-23 11:55:22 +04:00
"\n"
]
}
],
"source": [
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
"\n",
"for name, pipeline in pipelines:\n",
" y_pred = pipeline.predict(X_test)\n",
" print(f\"Model: {name}\")\n",
" print('MAE:', mean_absolute_error(y_test, y_pred))\n",
" print('MSE:', mean_squared_error(y_test, y_pred))\n",
" print('R²:', r2_score(y_test, y_pred))\n",
" print()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Линейная регрессия имеет низкое смещение, так как MAE и MSE близки к 1. Однако, R² близок к 0, что указывает на то, что модель не очень хорошо объясняет дисперсию в данных. Это может быть связано с тем, что линейная модель не может хорошо аппроксимировать сложные зависимости в данных.\n",
"\n",
"Дерево решений имеет высокое смещение и дисперсию. Отрицательный R² указывает на то, что модель работает хуже, чем простое предсказание среднего значения. Это свидетельствует о переобучении и высокой дисперсии.\n",
"\n",
"Градиентный бустинг имеет низкое смещение, так как MAE и MSE близки к 1. R² также близок к 0, что указывает на то, что модель не очень хорошо объясняет дисперсию в данных. Однако, это лучший результат среди всех моделей, что указывает на то, что градиентный бустинг лучше справляется с данными, чем линейная регрессия.\n",
"\n",
"Линейная регрессия и Градиентный бустинг имеют низкое смещение, но низкий R², что указывает на то, что они не могут хорошо объяснить дисперсию в данных.\n",
"\n",
"Дерево решений имеет высокую дисперсию и переобучение, что приводит к отрицательному R²."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Классификация"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Цель: Разработать модель, которая сможет предсказывать вероятность развития сердечно-сосудистых заболеваний (HeartDisease) у пациентов на основе их демографических данных, образа жизни и состояния здоровья.\n",
"\n",
"Применение: Модель может использоваться в медицинских учреждениях для раннего выявления пациентов с высоким риском сердечных заболеваний, что позволит назначить профилактические меры и улучшить результаты лечения."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Проведем деление на выборки и создание ориентира"
]
},
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 16,
2024-11-23 11:55:22 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-12-11 20:59:23 +04:00
"Размер обучающей выборки: (3440, 16)\n",
"Размер тестовой выборки: (861, 16)\n"
2024-11-23 11:55:22 +04:00
]
}
],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
"target = 'HeartDisease'\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)\n",
"\n",
"print(\"Размер обучающей выборки:\", X_train.shape)\n",
"print(\"Размер тестовой выборки:\", X_test.shape)"
]
},
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 17,
2024-11-23 11:55:22 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HeartDisease\n",
2024-12-11 20:59:23 +04:00
"0 3900\n",
"1 3900\n",
2024-11-23 11:55:22 +04:00
"Name: count, dtype: int64\n",
"Лучшие гиперпараметры для логистической регрессии:\n",
2024-12-11 20:59:23 +04:00
"{'classifier__C': np.float64(0.26020637194111806), 'classifier__penalty': 'l2', 'classifier__solver': 'liblinear'}\n",
"Accuracy: 0.7667\n",
"Precision: 0.7470\n",
"Recall: 0.8028\n",
"F1-Score: 0.7739\n",
"ROC-AUC: 0.8408\n",
2024-11-23 11:55:22 +04:00
"Лучшие гиперпараметры для случайного леса:\n",
2024-12-11 20:59:23 +04:00
"{'classifier__bootstrap': False, 'classifier__max_depth': np.int64(98), 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 13, 'classifier__n_estimators': 413}\n",
"Accuracy: 0.9487\n",
"Precision: 0.9665\n",
"Recall: 0.9291\n",
"F1-Score: 0.9474\n",
"ROC-AUC: 0.9874\n"
]
},
{
"ename": "KeyboardInterrupt",
"evalue": "",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[17], line 125\u001b[0m\n\u001b[0;32m 120\u001b[0m gb_pipeline \u001b[38;5;241m=\u001b[39m Pipeline([\n\u001b[0;32m 121\u001b[0m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mclassifier\u001b[39m\u001b[38;5;124m'\u001b[39m, GradientBoostingClassifier(random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m42\u001b[39m))\n\u001b[0;32m 122\u001b[0m ])\n\u001b[0;32m 124\u001b[0m gb_random_search \u001b[38;5;241m=\u001b[39m RandomizedSearchCV(gb_pipeline, param_distributions\u001b[38;5;241m=\u001b[39mgb_param_dist, n_iter\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m50\u001b[39m, cv\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m, random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m42\u001b[39m, n_jobs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m--> 125\u001b[0m \u001b[43mgb_random_search\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 127\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mЛу чшие гиперпараметры для градиентного бустинга:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 128\u001b[0m \u001b[38;5;28mprint\u001b[39m(gb_random_search\u001b[38;5;241m.\u001b[39mbest_params_)\n",
"File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\base.py:1473\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1466\u001b[0m estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m 1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m 1469\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m 1470\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 1471\u001b[0m )\n\u001b[0;32m 1472\u001b[0m ):\n\u001b[1;32m-> 1473\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:1019\u001b[0m, in \u001b[0;36mBaseSearchCV.fit\u001b[1;34m(self, X, y, **params)\u001b[0m\n\u001b[0;32m 1013\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_results(\n\u001b[0;32m 1014\u001b[0m all_candidate_params, n_splits, all_out, all_more_results\n\u001b[0;32m 1015\u001b[0m )\n\u001b[0;32m 1017\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m results\n\u001b[1;32m-> 1019\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_search\u001b[49m\u001b[43m(\u001b[49m\u001b[43mevaluate_candidates\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1021\u001b[0m \u001b[38;5;66;03m# multimetric is determined here because in the case of a callable\u001b[39;00m\n\u001b[0;32m 1022\u001b[0m \u001b[38;5;66;03m# self.scoring the return type is only known after calling\u001b[39;00m\n\u001b[0;32m 1023\u001b[0m first_test_score \u001b[38;5;241m=\u001b[39m all_out[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_scores\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
"File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:1960\u001b[0m, in \u001b[0;36mRandomizedSearchCV._run_search\u001b[1;34m(self, evaluate_candidates)\u001b[0m\n\u001b[0;32m 1958\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_run_search\u001b[39m(\u001b[38;5;28mself\u001b[39m, evaluate_candidates):\n\u001b[0;32m 1959\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Search n_iter candidates from param_distributions\"\"\"\u001b[39;00m\n\u001b[1;32m-> 1960\u001b[0m \u001b[43mevaluate_candidates\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1961\u001b[0m \u001b[43m \u001b[49m\u001b[43mParameterSampler\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1962\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparam_distributions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_iter\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom_state\u001b[49m\n\u001b[0;32m 1963\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1964\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:965\u001b[0m, in \u001b[0;36mBaseSearchCV.fit.<locals>.evaluate_candidates\u001b[1;34m(candidate_params, cv, more_results)\u001b[0m\n\u001b[0;32m 957\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28mprint\u001b[39m(\n\u001b[0;32m 959\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFitting \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;124m folds for each of \u001b[39m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;124m candidates,\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 960\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m totalling \u001b[39m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m fits\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[0;32m 961\u001b[0m n_splits, n_candidates, n_candidates \u001b[38;5;241m*\u001b[39m n_splits\n\u001b[0;32m 962\u001b[0m )\n\u001b[0;32m 963\u001b[0m )\n\u001b[1;32m--> 965\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mparallel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 966\u001b[0m \u001b[43m \u001b[49m\u001b[43mdelayed\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_fit_and_score\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 967\u001b[0m \u001b[43m \u001b[49m\u001b[43mclone\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbase_estimator\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 968\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 969\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 970\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrain\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 971\u001b[0m \u001b[43m \u001b[49m\u001b[43mtest\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 972\u001b[0m \u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparameters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 973\u001b[0m \u001b[43m \u001b[49m\u001b[43msplit_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msplit_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_splits\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 974\u001b[0m \u001b[43m \u001b[49m\u001b[43mcandidate_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcand_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_candidates\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 975\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfit_and_score_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 976\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 977\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mcand_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43msplit_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrain\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mproduct\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 978\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43menumerate\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcandidate_params\u001b[49m\u0
"File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\utils\\parallel.py:74\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m 69\u001b[0m config \u001b[38;5;241m=\u001b[39m get_config()\n\u001b[0;32m 70\u001b[0m iterable_with_config \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 71\u001b[0m (_with_config(delayed_func, config), args, kwargs)\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m delayed_func, args, kwargs \u001b[38;5;129;01min\u001b[39;00m iterable\n\u001b[0;32m 73\u001b[0m )\n\u001b[1;32m---> 74\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__call__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43miterable_with_config\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\joblib\\parallel.py:2007\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m 2001\u001b[0m \u001b[38;5;66;03m# The first item from the output is blank, but it makes the interpreter\u001b[39;00m\n\u001b[0;32m 2002\u001b[0m \u001b[38;5;66;03m# progress until it enters the Try/Except block of the generator and\u001b[39;00m\n\u001b[0;32m 2003\u001b[0m \u001b[38;5;66;03m# reaches the first `yield` statement. This starts the asynchronous\u001b[39;00m\n\u001b[0;32m 2004\u001b[0m \u001b[38;5;66;03m# dispatch of the tasks to the workers.\u001b[39;00m\n\u001b[0;32m 2005\u001b[0m \u001b[38;5;28mnext\u001b[39m(output)\n\u001b[1;32m-> 2007\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_generator \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43moutput\u001b[49m\u001b[43m)\u001b[49m\n",
"File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\joblib\\parallel.py:1650\u001b[0m, in \u001b[0;36mParallel._get_outputs\u001b[1;34m(self, iterator, pre_dispatch)\u001b[0m\n\u001b[0;32m 1647\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m\n\u001b[0;32m 1649\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backend\u001b[38;5;241m.\u001b[39mretrieval_context():\n\u001b[1;32m-> 1650\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_retrieve()\n\u001b[0;32m 1652\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mGeneratorExit\u001b[39;00m:\n\u001b[0;32m 1653\u001b[0m \u001b[38;5;66;03m# The generator has been garbage collected before being fully\u001b[39;00m\n\u001b[0;32m 1654\u001b[0m \u001b[38;5;66;03m# consumed. This aborts the remaining tasks if possible and warn\u001b[39;00m\n\u001b[0;32m 1655\u001b[0m \u001b[38;5;66;03m# the user if necessary.\u001b[39;00m\n\u001b[0;32m 1656\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
"File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\joblib\\parallel.py:1762\u001b[0m, in \u001b[0;36mParallel._retrieve\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1757\u001b[0m \u001b[38;5;66;03m# If the next job is not ready for retrieval yet, we just wait for\u001b[39;00m\n\u001b[0;32m 1758\u001b[0m \u001b[38;5;66;03m# async callbacks to progress.\u001b[39;00m\n\u001b[0;32m 1759\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ((\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jobs) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m\n\u001b[0;32m 1760\u001b[0m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jobs[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mget_status(\n\u001b[0;32m 1761\u001b[0m timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtimeout) \u001b[38;5;241m==\u001b[39m TASK_PENDING)):\n\u001b[1;32m-> 1762\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0.01\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1763\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[0;32m 1765\u001b[0m \u001b[38;5;66;03m# We need to be careful: the job list can be filling up as\u001b[39;00m\n\u001b[0;32m 1766\u001b[0m \u001b[38;5;66;03m# we empty it and Python list are not thread-safe by\u001b[39;00m\n\u001b[0;32m 1767\u001b[0m \u001b[38;5;66;03m# default hence the use of the lock\u001b[39;00m\n",
"\u001b[1;31mKeyboardInterrupt\u001b[0m: "
2024-11-23 11:55:22 +04:00
]
}
],
"source": [
"import pandas as pd\n",
"from imblearn.over_sampling import SMOTE\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import OneHotEncoder, LabelEncoder\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score\n",
"from scipy.stats import uniform, randint\n",
"from sklearn.model_selection import RandomizedSearchCV\n",
"\n",
"features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
"target = 'HeartDisease'\n",
"\n",
"label_encoder = LabelEncoder()\n",
"data[target] = label_encoder.fit_transform(data[target])\n",
"\n",
"categorical_features = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth']\n",
"\n",
"categorical_transformer = Pipeline(steps=[\n",
" ('onehot', OneHotEncoder(handle_unknown='ignore'))\n",
"])\n",
"\n",
"numeric_transformer = Pipeline(steps=[\n",
" ('scaler', StandardScaler())\n",
"])\n",
"\n",
"preprocessor = ColumnTransformer(\n",
" transformers=[\n",
" ('num', numeric_transformer, numeric_features),\n",
" ('cat', categorical_transformer, categorical_features)\n",
" ])\n",
"\n",
"X = preprocessor.fit_transform(data[features])\n",
"y = data[target]\n",
"\n",
"smote = SMOTE(random_state=42)\n",
"X_resampled, y_resampled = smote.fit_resample(X, y)\n",
"\n",
"print(pd.Series(y_resampled).value_counts())\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)\n",
"\n",
"def evaluate_model(model, X_test, y_test):\n",
" y_pred = model.predict(X_test)\n",
" y_pred_proba = model.predict_proba(X_test)[:, 1]\n",
" \n",
" accuracy = accuracy_score(y_test, y_pred)\n",
" precision = precision_score(y_test, y_pred, pos_label=1) \n",
" recall = recall_score(y_test, y_pred, pos_label=1) \n",
" f1 = f1_score(y_test, y_pred, pos_label=1) \n",
" roc_auc = roc_auc_score(y_test, y_pred_proba)\n",
" \n",
" print(f\"Accuracy: {accuracy:.4f}\")\n",
" print(f\"Precision: {precision:.4f}\")\n",
" print(f\"Recall: {recall:.4f}\")\n",
" print(f\"F1-Score: {f1:.4f}\")\n",
" print(f\"ROC-AUC: {roc_auc:.4f}\")\n",
" \n",
" return {\n",
" 'accuracy': accuracy,\n",
" 'precision': precision,\n",
" 'recall': recall,\n",
" 'f1': f1,\n",
" 'roc_auc': roc_auc\n",
" }\n",
"\n",
"logreg_param_dist = {\n",
" 'classifier__C': uniform(loc=0, scale=4),\n",
" 'classifier__penalty': ['l1', 'l2'],\n",
" 'classifier__solver': ['liblinear', 'saga']\n",
"}\n",
"\n",
"logreg_pipeline = Pipeline([\n",
" ('classifier', LogisticRegression(max_iter=1000, random_state=42))\n",
"])\n",
"\n",
"logreg_random_search = RandomizedSearchCV(logreg_pipeline, param_distributions=logreg_param_dist, n_iter=50, cv=5, random_state=42, n_jobs=-1)\n",
"logreg_random_search.fit(X_train, y_train)\n",
"\n",
"print(\"Лучшие гиперпараметры для логистической регрессии:\")\n",
"print(logreg_random_search.best_params_)\n",
"\n",
"logreg_best_model = logreg_random_search.best_estimator_\n",
"logreg_results = evaluate_model(logreg_best_model, X_test, y_test)\n",
"\n",
"rf_param_dist = {\n",
" 'classifier__n_estimators': randint(100, 1000),\n",
" 'classifier__max_depth': [None] + list(randint(10, 100).rvs(10)),\n",
" 'classifier__min_samples_split': randint(2, 20),\n",
" 'classifier__min_samples_leaf': randint(1, 20),\n",
" 'classifier__bootstrap': [True, False]\n",
"}\n",
"\n",
"rf_pipeline = Pipeline([\n",
" ('classifier', RandomForestClassifier(random_state=42))\n",
"])\n",
"\n",
"rf_random_search = RandomizedSearchCV(rf_pipeline, param_distributions=rf_param_dist, n_iter=50, cv=5, random_state=42, n_jobs=-1)\n",
"rf_random_search.fit(X_train, y_train)\n",
"\n",
"print(\"Лучшие гиперпараметры для случайного леса:\")\n",
"print(rf_random_search.best_params_)\n",
"\n",
"rf_best_model = rf_random_search.best_estimator_\n",
"rf_results = evaluate_model(rf_best_model, X_test, y_test)\n",
"\n",
"gb_param_dist = {\n",
" 'classifier__n_estimators': randint(100, 1000),\n",
" 'classifier__learning_rate': uniform(0.01, 0.5),\n",
" 'classifier__max_depth': [None] + list(randint(10, 100).rvs(10)),\n",
" 'classifier__min_samples_split': randint(2, 20),\n",
" 'classifier__min_samples_leaf': randint(1, 20),\n",
" 'classifier__subsample': uniform(0.5, 0.5)\n",
"}\n",
"\n",
"gb_pipeline = Pipeline([\n",
" ('classifier', GradientBoostingClassifier(random_state=42))\n",
"])\n",
"\n",
"gb_random_search = RandomizedSearchCV(gb_pipeline, param_distributions=gb_param_dist, n_iter=50, cv=5, random_state=42, n_jobs=-1)\n",
"gb_random_search.fit(X_train, y_train)\n",
"\n",
"print(\"Лучшие гиперпараметры для градиентного бустинга:\")\n",
"print(gb_random_search.best_params_)\n",
"\n",
"gb_best_model = gb_random_search.best_estimator_\n",
"gb_results = evaluate_model(gb_best_model, X_test, y_test)\n",
"\n",
"print(\"\\nР е зу льта ты моделей:\")\n",
"print(\"\\nLogistic Regression:\")\n",
"for metric, value in logreg_results.items():\n",
" print(f\"{metric.capitalize()}: {value:.4f}\")\n",
"\n",
"print(\"\\nRandom Forest:\")\n",
"for metric, value in rf_results.items():\n",
" print(f\"{metric.capitalize()}: {value:.4f}\")\n",
"\n",
"print(\"\\nGradient Boosting:\")\n",
"for metric, value in gb_results.items():\n",
" print(f\"{metric.capitalize()}: {value:.4f}\")"
]
},
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 11,
2024-11-23 11:55:22 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Logistic Regression Metrics:\n",
"Accuracy: 0.7398\n",
"Precision: 0.7239\n",
"Recall: 0.7564\n",
"F1-Score: 0.7398\n",
"ROC-AUC: 0.8338\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAAHHCAYAAAB3K7g2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABBhklEQVR4nO3deZyN9f//8ecxzGrGmBhjncXY921ky4iIiCSJsofK3oJKlkgkW2RJ2bMXSiEkYco6o+xjS7KExjYLM3P9/vCb83XMYg5nHJfP4367zc2c63pf1/U61xwzz/O+3u/rWAzDMAQAAGAS2ZxdAAAAgD0ILwAAwFQILwAAwFQILwAAwFQILwAAwFQILwAAwFQILwAAwFQILwAAwFQILwAAwFQIL3joHTlyRA0bNlSuXLlksVi0YsUKh+7/xIkTslgsmj17tkP3a2bh4eEKDw932P6uXbumrl27KiAgQBaLRX379nXYvh8WmzZtksVi0aZNmxyyv9mzZ8tisejEiRMO2R+koUOHymKxOLsMOADhBZly9OhRde/eXSEhIXJ3d5ePj49q1aqliRMnKi4uLkuP3aFDB/3xxx8aOXKk5s2bp6pVq2bp8R6kjh07ymKxyMfHJ83zeOTIEVksFlksFo0dO9bu/f/zzz8aOnSoIiMjHVDtvfvoo480e/Zsvfbaa5o3b55eeeWVLD1eUFCQmjZtmqXHcJSPPvrI4YH8TilBKOUre/bsKliwoDp27KjTp09n6bGBLGEAd/H9998bHh4ehq+vr9G7d29jxowZxuTJk402bdoYOXLkMF599dUsO3ZsbKwhyXjvvfey7BjJyclGXFyckZiYmGXHSE+HDh2M7NmzGy4uLsbixYtTrR8yZIjh7u5uSDI++eQTu/e/Y8cOQ5Ixa9Ysu7ZLSEgwEhIS7D5eeqpXr27UqlXLYfu7m8DAQOOZZ555YMczDMNISkoy4uLijKSkJLu28/LyMjp06JBqeWJiohEXF2ckJyffd22zZs0yJBnDhw835s2bZ3zxxRdGly5dDBcXF6No0aJGXFzcfR/DDG7evPk/81wfddmdG53wsDt+/LjatGmjwMBAbdy4Ufnz57eue+ONNxQdHa3Vq1dn2fH//fdfSZKvr2+WHcNiscjd3T3L9n83bm5uqlWrlhYuXKjWrVvbrPv666/1zDPPaPny5Q+kltjYWHl6esrV1dWh+z1//rxKly7tsP0lJiYqOTnZ4XXej2zZsjn0deTi4iIXFxeH7U+SGjdubO257Nq1q/LkyaPRo0dr1apVqV57WckwDMXHx8vDw+OBHVOSsmfPruzZ+bP3KOCyETI0ZswYXbt2TV9++aVNcEkRGhqqPn36WB8nJibqww8/VNGiReXm5qagoCC9++67SkhIsNkupVt/y5YtCgsLk7u7u0JCQjR37lxrm6FDhyowMFCS9Pbbb8tisSgoKEjSrcstKd/fLq1r2j/99JNq164tX19f5cyZUyVKlNC7775rXZ/emJeNGzeqTp068vLykq+vr5o3b64DBw6kebzo6Gh17NhRvr6+ypUrlzp16qTY2Nj0T+wd2rZtqx9//FExMTHWZTt27NCRI0fUtm3bVO0vXbqkt956S+XKlVPOnDnl4+Ojxo0bKyoqytpm06ZNqlatmiSpU6dO1ksGKc8zPDxcZcuW1a5du/TEE0/I09PTel7uHPPSoUMHubu7p3r+jRo1Uu7cufXPP/+k+bxSxoEcP35cq1evttaQMo7j/Pnz6tKli/Llyyd3d3dVqFBBc+bMsdlHys9n7NixmjBhgvW1tX///kyd2/Rk9rWanJysoUOHqkCBAvL09FS9evW0f/9+BQUFqWPHjqme6+1jXo4cOaLnn39eAQEBcnd3V6FChdSmTRtdvnxZ0q3gfP36dc2ZM8d6blL2md6Ylx9//FF169aVt7e3fHx8VK1aNX399df3dA7q1Kkj6dZl4dsdPHhQrVq1kp+fn9zd3VW1alWtWrUq1fZ79+5V3bp15eHhoUKFCmnEiBGaNWtWqrpT/r+vXbtWVatWlYeHh6ZPny5JiomJUd++fVW4cGG5ubkpNDRUo0ePVnJyss2xFi1apCpVqlifd7ly5TRx4kTr+ps3b2rYsGEqVqyY3N3d9dhjj6l27dr66aefrG3S+v3gyN9ZeHCIoMjQd999p5CQENWsWTNT7bt27ao5c+aoVatWevPNN/X7779r1KhROnDggL799lubttHR0WrVqpW6dOmiDh066KuvvlLHjh1VpUoVlSlTRi1btpSvr6/69eunl156SU2aNFHOnDntqn/fvn1q2rSpypcvr+HDh8vNzU3R0dHaunVrhtutX79ejRs3VkhIiIYOHaq4uDh99tlnqlWrlnbv3p0qOLVu3VrBwcEaNWqUdu/erZkzZ8rf31+jR4/OVJ0tW7ZUjx499M0336hz586SbvW6lCxZUpUrV07V/tixY1qxYoVeeOEFBQcH69y5c5o+fbrq1q2r/fv3q0CBAipVqpSGDx+uDz74QN26dbP+obr9Z3nx4kU1btxYbdq00csvv6x8+fKlWd/EiRO1ceNGdejQQREREXJxcdH06dO1bt06zZs3TwUKFEhzu1KlSmnevHnq16+fChUqpDfffFOSlDdvXsXFxSk8PFzR0dHq2bOngoODtXTpUnXs2FExMTE2oViSZs2apfj4eHXr1k1ubm7y8/PL1LlNT2Zfq4MGDdKYMWPUrFkzNWrUSFFRUWrUqJHi4+Mz3P+NGzfUqFEjJSQkqFevXgoICNDp06f1/fffKyYmRrly5dK8efPUtWtXhYWFqVu3bpKkokWLprvP2bNnq3PnzipTpowGDRokX19f7dmzR2vWrEkz5N5NSsDInTu3ddm+fftUq1YtFSxYUAMHDpSXl5eWLFmiFi1aaPny5XruueckSadPn1a9evVksVg0aNAgeXl5aebMmXJzc0vzWIcOHdJLL72k7t2769VXX1WJEiUUGxurunXr6vTp0+revbuKFCmibdu2adCgQTpz5owmTJgg6dYbkJdeekn169e3/p86cOCAtm7dan2dDB06VKNGjbKezytXrmjnzp3avXu3nnrqqXTPgSN/Z+EBcvZ1Kzy8Ll++bEgymjdvnqn2kZGRhiSja9euNsvfeustQ5KxceNG67LAwEBDkrF582brsvPnzxtubm7Gm2++aV12/PjxNMd7dOjQwQgMDExVw5AhQ4zbX9bjx483JBn//vtvunWnHOP2cSEVK1Y0/P39jYsXL1qXRUVFGdmyZTPat2+f6nidO3e22edzzz1nPPbYY+ke8/bn4eXlZRiGYbRq1cqoX7++YRi3xk8EBAQYw4YNS/McxMfHpxpbcfz4ccPNzc0YPny4dVlGY17q1q1rSDKmTZuW5rq6devaLFu7dq0hyRgxYoRx7NgxI2fOnEaLFi3u+hwNI+0xKBMmTDAkGfPnz7cuu3HjhlGjRg0jZ86cxpUrV6zPS5Lh4+NjnD9//p6Pd7vMvlbPnj1rZM+ePdXzHDp0qCHJZqzKzz//bEgyfv75Z8MwDGPPnj2GJGPp0qUZ1premJeUcSrHjx83DMMwYmJiDG9vb6N69eqpxm3cbVxMyr7Wr19v/Pvvv8apU6eMZcuWGXnz5jXc3NyMU6dOWdvWr1/fKFeunBEfH2+z/5o1axrFihWzLuvVq5dhsViMPXv2WJddvHjR8PPzs6nbMP7v//uaNWts6vrwww8NLy8v4/DhwzbLBw4caLi4uBh//fWXYRiG0adPH8PHxyfDcWkVKlS46zinO38/ZMXvLDwYXDZCuq5cuSJJ8vb2zlT7H374QZLUv39/m+Up77bvHBtTunRpa2+AdOvdeIkSJXTs2LF7rvlOKWNlVq5cmaobOj1nzpxRZGSkOnbsaPPuvnz58nrqqaesz/N2PXr0sHlcp04dXbx40XoOM6Nt27batGmTzp49q40bN+rs2bPpvpt2c3NTtmy3/vsmJSXp4sWL1ktiu3fvzvQx3dzc1KlTp0y1bdiwobp3767hw4erZcuWcnd3t3b934sffvhBAQEBeumll6zLcuTIod69e+v
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB7v0lEQVR4nO3dd3xN9/8H8NdN5GYvIhISMuzaMYOmCFGqghJb1FaqRitGjSpqU7VbYqXWl1IrLRU1UirEljQiZoIQWTLv/fz+yC+XK0MuNzlJ7uv5eNxHe9/3jPe9J3Hf+ZzPkAkhBIiIiIh0kJ7UCRARERFJhYUQERER6SwWQkRERKSzWAgRERGRzmIhRERERDqLhRARERHpLBZCREREpLNYCBEREZHOYiFEREREOouFEBG91aJFi+Di4gJ9fX00aNBA6nQKhUwmw6xZs7RyrKioKMhkMvj7+2vleAQEBQVBJpMhKChI6lSolGEhRMWev78/ZDKZ6lGmTBlUqlQJvr6+ePjwYa77CCGwdetWfPjhh7CysoKJiQnq1q2L7777DsnJyXmea9++ffj4449hY2MDuVyOihUrolevXvjrr78KlGtqaiqWLVuGZs2awdLSEkZGRqhevTrGjBmD8PDwd3r/Uvvjjz/wzTffoGXLlti0aRPmzZtXqOfz9fWFmZlZoZ5DWwICArB8+fJCPUd2UZX90NPTQ9myZfHxxx8jODi4UM9NpAtkXGuMijt/f38MHjwY3333HZydnZGamop//vkH/v7+cHJywrVr12BkZKTaXqFQoG/fvti1axdat26N7t27w8TEBKdOnUJAQABq166NY8eOoUKFCqp9hBD4/PPP4e/vj4YNG+Kzzz6DnZ0doqOjsW/fPoSEhODMmTNwd3fPM8/Y2Fh07NgRISEh+OSTT+Dp6QkzMzOEhYVhx44diImJQXp6eqF+VoXBz88PixYtQkpKCuRyeaGfz9fXF3v27EFSUlKhn+t1qampKFOmDMqUKVPgfT755BNcu3YNUVFRanEhBNLS0mBgYAB9ff33yisqKgrOzs7o06cPOnXqBIVCgfDwcKxevRopKSn4999/Ubdu3fc6R0mgVCqRnp4OuVwOPT3+DU9aJIiKuU2bNgkA4t9//1WLT548WQAQO3fuVIvPmzdPABCTJk3KcawDBw4IPT090bFjR7X4okWLBADx1VdfCaVSmWO/LVu2iHPnzuWbZ+fOnYWenp7Ys2dPjtdSU1PFxIkT892/oDIyMkRaWppWjlUQgwcPFqamplo7nlKpFC9fvszz9UGDBmn1fIWpc+fOokqVKoV6jjt37ggAYtGiRWrxI0eOCABi1KhRhXr+3CQlJRX5OYkKCwshKvbyKoQOHjwoAIh58+apYi9fvhTW1taievXqIiMjI9fjDR48WAAQwcHBqn3Kli0ratasKTIzM98px3/++UcAEMOGDSvQ9h4eHsLDwyNHfNCgQWpfrK9/CS5btky4uLgIPT098c8//wh9fX0xa9asHMe4deuWACBWrlypisXFxYlx48YJBwcHIZfLhaurq/jhhx+EQqHIN08AOR6bNm0SQmQVZN99951wcXERcrlcVKlSRUyZMkWkpqaqHaNKlSqic+fO4ujRo8LNzU0YGhqKZcuW5XnOghZCu3btEo0aNRJGRkaiXLlyol+/fuLBgwe5blerVi1haGgoPvjgA7F3794cn3P2e505c6bqeUJCghg3bpyoUqWKkMvlonz58sLT01OEhIQIIbKu4ZufTfYxs69b9meV7ebNm6Jnz57CxsZGGBkZierVq4upU6fm+z7zKoSSkpIEANGhQwe1eEGvdWxsrOjfv78wNzcXlpaWYuDAgSI0NDRH3tnXIyIiQnz88cfCzMxMdO3aVQghhEKhEMuWLRO1a9cWhoaGwtbWVgwfPlw8f/5c7Vz//vuv6NChgyhXrpwwMjISTk5OYvDgwWrb/Prrr6JRo0bCzMxMmJubizp16ojly5erXj9x4oQAIE6cOKG2X0F+DrLfw4MHD0TXrl2FqampsLGxERMnTnzn33kqPQreBkxUzGTfjrC2tlbFTp8+jbi4OIwbNy7PWxwDBw7Epk2bcPDgQTRv3hynT5/G8+fP8dVXX73zbYwDBw4AAAYMGPBO+7/Npk2bkJqaiuHDh8PQ0BD29vbw8PDArl27MHPmTLVtd+7cCX19ffTs2RMA8PLlS3h4eODhw4cYMWIEKleujLNnz2LKlCmIjo7Ot4/L1q1bsX79epw/fx4///wzAKhuDw4dOhSbN2/GZ599hokTJ+LcuXOYP38+bt68iX379qkdJywsDH369MGIESMwbNgw1KhR470+j+zbpU2aNMH8+fPx+PFjrFixAmfOnMGlS5dgZWUFADh06BB8fHxQt25dzJ8/H3FxcRgyZAgqVar01nOMHDkSe/bswZgxY1C7dm08e/YMp0+fxs2bN9GoUSNMmzYN8fHxePDgAZYtWwYA+fZtunLlClq3bg0DAwMMHz4cTk5OuH37Nn7//XfMnTtX488gt5//gl5rpVKJLl264Pz58xg1ahRq1qyJ/fv3Y9CgQbmeKzMzE15eXmjVqhUWL14MExMTAMCIESNU1+LLL7/EnTt38NNPP+HSpUs4c+YMDAwM8OTJE3To0AHly5eHn58frKysEBUVhb1796qO/+eff6JPnz5o164dFixYAAC4efMmzpw5g3HjxuX5GRT05wDIumXu5eWFZs2aYfHixTh27BiWLFkCV1dXjBo1SuPPn0oRqSsxorfJbhE6duyYePr0qbh//77Ys2ePKF++vDA0NBT3799Xbbt8+XIBQOzbty/P4z1//lwAEN27dxdCCLFixYq37vM23bp1EwBEXFxcgbbXtEXIwsJCPHnyRG3bdevWCQDi6tWravHatWuLtm3bqp7PmTNHmJqaivDwcLXt/Pz8hL6+vrh3716+uebWQpPdcjB06FC1+KRJkwQA8ddff6liVapUEQDE0aNH8z1Pfud7XXp6urC1tRV16tQRKSkpqnh2C+GMGTNUsbp16woHBweRmJioigUFBam13mTDGy1ClpaW4osvvsg317xujeXWIvThhx8Kc3NzcffuXbVtc7sVm9uxZs+eLZ4+fSpiYmLEqVOnRJMmTQQAsXv3btW2Bb3W//vf/wQAtRYXhUIh2rZtm2uLEADh5+endsxTp04JAGL79u1q8aNHj6rF9+3bl2uL7uvGjRsnLCws8m2debNFSJOfg+z38N1336kds2HDhsLNzS3Pc5JuYI8zKjE8PT1Rvnx5ODo64rPPPoOpqSkOHDgABwcH1TaJiYkAAHNz8zyPk/1aQkKC2n/z2+dttHGM/PTo0QPly5dXi3Xv3h1lypTBzp07VbFr167hxo0b8PHxUcV2796N1q1bw9raGrGxsaqHp6cnFAoF/v77b43zOXz4MABgwoQJavGJEycCyGqJeZ2zszO8vLw0Pk9uLly4gCdPnmD06NFqneQ7d+6MmjVrqs796NEjXL16FQMHDlRrqfHw8ChQ52IrKyucO3cOjx49eu+cnz59ir///huff/45KleurPaaTCYr0DFmzpyJ8uXLw87ODq1bt8bNmzexZMkSfPbZZ6ptCnqtjx49CgMDAwwbNky1r56eHr744os8z/9mq8nu3bthaWmJ9u3bq53Lzc0NZmZmOHHiBACoWmUOHjyIjIyMXI9tZWWF5ORk/PnnnwX6LICC/xy8buTIkWrPW7dujcjIyAKfk0onFkJUYqxatQp//vkn9uzZg06dOiE2NhaGhoZq22QXItkFUW7eLJYsLCzeus/baOMY+XF2ds4Rs7GxQbt27bBr1y5VbOfOnShTpgy6d++uiv333384evQoypcvr/bw9PQEADx58kTjfO7evQs9PT1UrVpVLW5nZwcrKyvcvXv3rfm/q+xj53Z7rWbNmqrXs//7Zo55xd60cOFCXLt2DY6OjmjatClmzZr1zl+a2fvVqVPnnfYHgOHDh+PPP//E77//jvHjxyMlJQUKhUJtm4Je67t378Le3l51iytbXp9LmTJl1P7gyD5XfHw8bG1tc5wvKSl
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Random Forest Metrics:\n",
"Accuracy: 0.9122\n",
"Precision: 0.9571\n",
"Recall: 0.8590\n",
"F1-Score: 0.9054\n",
"ROC-AUC: 0.9773\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAAHHCAYAAAB3K7g2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+LElEQVR4nO3deZyN5f/H8feZMftKjEHMguz7FrJFRERSSTJjSRSyFdrsFLJHJGuqIVnKt5ItWb4Ig0hMliRGjG1mzDAz9+8Pjzk/x8wwZ5xx3L6v5+NxHs257uu+z+e+59Z5z3Vf9zkWwzAMAQAAmISLswsAAACwB+EFAACYCuEFAACYCuEFAACYCuEFAACYCuEFAACYCuEFAACYCuEFAACYCuEFAACYCuEF/xOOHDmipk2bKiAgQBaLRStWrHDo9o8fPy6LxaL58+c7dLtm1rBhQzVs2NBh24uPj1e3bt0UHBwsi8Wivn37OmzbZsF5BtxAeME98+eff+rVV19VeHi4PD095e/vr7p162rKlCm6evVqrr52RESE9u/fr9GjR2vRokWqXr16rr7evRQZGSmLxSJ/f/9Mj+ORI0dksVhksVg0YcIEu7f/zz//aNiwYYqOjnZAtTk3ZswYzZ8/Xz179tSiRYv08ssv5+rrhYaGWo+bxWKRj4+PatasqYULF+bq65rNrcfp5kdSUpKzy8tg69atGjZsmC5evOjsUnAX8ji7APxvWL16tZ577jl5eHioU6dOKl++vK5du6bNmzfrzTff1IEDBzR79uxcee2rV69q27Zteuedd9SrV69ceY2QkBBdvXpVbm5uubL9O8mTJ48SExP17bff6vnnn7dZtnjxYnl6eub4jeSff/7R8OHDFRoaqsqVK2d7vTVr1uTo9bKyfv16Pfrooxo6dKhDt3s7lStX1oABAyRJp0+f1pw5cxQREaHk5GS98sor96yO+93Nx+lm7u7uTqjm9rZu3arhw4crMjJSgYGBzi4HOUR4Qa47duyY2rdvr5CQEK1fv16FChWyLnv99dcVExOj1atX59rr//vvv5KUq/+jslgs8vT0zLXt34mHh4fq1q2rL7/8MkN4+eKLL/TUU09p2bJl96SWxMREeXt7O/yN6+zZsypbtqzDtpeSkqK0tLTb1lmkSBF17NjR+jwyMlLh4eGaNGkS4eUmtx4nR0lLS9O1a9ec+m8L9ycuGyHXjRs3TvHx8frss89sgku6EiVK6I033rA+T0lJ0ciRI1W8eHF5eHgoNDRUb7/9tpKTk23WCw0NVcuWLbV582bVrFlTnp6eCg8PtxnWHzZsmEJCQiRJb775piwWi0JDQyXdeCNK//lmw4YNk8VisWn76aef9NhjjykwMFC+vr4qVaqU3n77bevyrOYirF+/XvXq1ZOPj48CAwPVunVr/f7775m+XkxMjPWvwYCAAHXu3FmJiYlZH9hbdOjQQd9//73NcPjOnTt15MgRdejQIUP/uLg4DRw4UBUqVJCvr6/8/f3VvHlz7d2719pn48aNqlGjhiSpc+fO1ssB6fvZsGFDlS9fXrt27VL9+vXl7e1tPS63znmJiIiQp6dnhv1v1qyZ8ubNq3/++SfT/dq4caMsFouOHTum1atXW2s4fvy4pBuhpmvXripYsKA8PT1VqVIlLViwwGYb6b+fCRMmaPLkydZz6+DBg9k6tukKFCig0qVL688//7Rp/+WXX/Tcc8+pWLFi8vDwUNGiRdWvX78Ml/EiIyPl6+urU6dOqU2bNvL19VWBAgU0cOBApaam2vS9ePGiIiMjFRAQoMDAQEVERGR5qcOe8+zw4cPq2LGjAgICVKBAAb333nsyDEMnT55U69at5e/vr+DgYH300Ud2HZvbSUhI0IABA1S0aFF5eHioVKlSmjBhggzDsOlnsVjUq1cvLV68WOXKlZOHh4d++OEHSdKpU6fUpUsXFSxYUB4eHipXrpzmzp2b4bWmTZumcuXKydvbW3nz5lX16tX1xRdfWI/Bm2++KUkKCwvLcC7BPBh5Qa779ttvFR4erjp16mSrf7du3bRgwQK1a9dOAwYM0Pbt2zV27Fj9/vvvWr58uU3fmJgYtWvXTl27dlVERITmzp2ryMhIVatWTeXKlVPbtm0VGBiofv366cUXX1SLFi3k6+trV/0HDhxQy5YtVbFiRY0YMUIeHh6KiYnRli1bbrve2rVr1bx5c4WHh2vYsGG6evWqpk2bprp162r37t0ZgtPzzz+vsLAwjR07Vrt379acOXMUFBSkDz/8MFt1tm3bVj169NA333yjLl26SLox6lK6dGlVrVo1Q/+jR49qxYoVeu655xQWFqbY2FjNmjVLDRo00MGDB1W4cGGVKVNGI0aM0Pvvv6/u3burXr16kmTzuzx//ryaN2+u9u3bq2PHjipYsGCm9U2ZMkXr169XRESEtm3bJldXV82aNUtr1qzRokWLVLhw4UzXK1OmjBYtWqR+/frp4Ycftl6eKFCggK5evaqGDRsqJiZGvXr1UlhYmJYuXarIyEhdvHjRJhRL0rx585SUlKTu3bvLw8ND+fLly9axTZeSkqK///5befPmtWlfunSpEhMT1bNnTz300EPasWOHpk2bpr///ltLly616ZuamqpmzZqpVq1amjBhgtauXauPPvpIxYsXV8+ePSVJhmGodevW2rx5s3r06KEyZcpo+fLlioiIyFCTvefZCy+8oDJlyuiDDz7Q6tWrNWrUKOXLl0+zZs3S448/rg8//FCLFy/WwIEDVaNGDdWvX/+Ox+X69es6d+6cTZu3t7e8vb1lGIaefvppbdiwQV27dlXlypX1448/6s0339SpU6c0adIkm/XWr1+vJUuWqFevXsqfP79CQ0MVGxurRx991BpuChQooO+//15du3bV5cuXrZO3P/30U/Xp00ft2rXTG2+8oaSkJO3bt0/bt29Xhw4d1LZtWx0+fFhffvmlJk2apPz580u6cS7BZAwgF126dMmQZLRu3Tpb/aOjow1JRrdu3WzaBw4caEgy1q9fb20LCQkxJBmbNm2ytp09e9bw8PAwBgwYYG07duyYIckYP368zTYjIiKMkJCQDDUMHTrUuPmfxqRJkwxJxr///ptl3emvMW/ePGtb5cqVjaCgIOP8+fPWtr179xouLi5Gp06dMrxely5dbLb5zDPPGA899FCWr3nzfvj4+BiGYRjt2rUzGjdubBiGYaSmphrBwcHG8OHDMz0GSUlJRmpqaob98PDwMEaMGGFt27lzZ4Z9S9egQQNDkvHJJ59kuqxBgwY2bT/++KMhyRg1apRx9OhRw9fX12jTps0d99Ewbvy+n3rqKZu2yZMnG5KMzz//3Np27do1o3bt2oavr69x+fJl635JMvz9/Y2zZ89m+/WaNm1q/Pvvv8a///5r7N+/33j55ZcNScbrr79u0zcxMTHD+mPHjjUsFotx4sQJa1tERIQhyeb4GoZhVKlSxahWrZr1+YoVKwxJxrhx46xtKSkpRr169e76POvevbvNNh9++GHDYrEYH3zwgbX9woULhpeXlxEREZGt4yQpw2Po0KE2+zJq1Cib9dq1a2dYLBYjJibG2ibJcHFxMQ4cOGDTt2vXrkahQoWMc+fO2bS3b9/eCAgIsB7/1q1bG+XKlbttvePHjzckGceOHbvjvuH+xWUj5KrLly9Lkvz8/LLV/z//+Y8kqX///jbt6X9t3zo3pmzZstbRAOnGX1ClSpXS0aNHc1zzrdLnyqxcuVJpaWnZWuf06dOKjo5WZGSkzV/3FStW1BNPPGHdz5v16NHD5nm9evV0/vx56zHMjg4dOmjjxo06c+aM1q9frzNnzmR6yUi6MU/GxeXG/wJSU1N1/vx56yWx3bt3Z/s1PTw81Llz52z1bdq0qV599VWNGDFCbdu2laenp2bNmpXt17rVf/7zHwUHB+vFF1+0trm5ualPnz6Kj4/Xzz//bNP/2Wefteuv7DVr1qhAgQIqUKCAKlSooEWLFqlz584aP368TT8vLy/rzwkJCTp37pzq1KkjwzC0Z8+eDNvN7Hd98zn7n//8R3ny5LGOxEiSq6u
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB2CUlEQVR4nO3deXhM5/sG8Hsyksm+ENkIWew7sQaNJSS17xSRqL1Fba1YGju1L629JShfWylFpaWWIqUNsUtKhCBBiEQii8y8vz/8nHaahAxJTpK5P9eVq51nznJPJjFP3vOecxRCCAEiIiIiPWQgdwAiIiIiubARIiIiIr3FRoiIiIj0FhshIiIi0ltshIiIiEhvsREiIiIivcVGiIiIiPQWGyEiIiLSW2yEiIiISG+xESKiArFw4UK4ublBqVSiTp06cseRhYuLCwICAuSOQUT/wkaI9EJwcDAUCoX0VaJECZQpUwYBAQG4f/9+tusIIbBlyxZ88MEHsLa2hqmpKWrWrImZM2ciJSUlx33t3bsXH374IWxtbWFkZAQnJyf06tULv/32W66ypqWlYenSpWjUqBGsrKxgbGyMSpUqYeTIkYiMjHyn1y+3X375BV988QWaNm2KjRs3Yu7cufm6v4CAAK33W6VSoVKlSggKCkJaWlq+7rso+e/36d9fhw8fljteFg8ePMD06dMRHh4udxQqRkrIHYCoIM2cOROurq5IS0vDH3/8geDgYJw6dQpXrlyBsbGxtJxarUbfvn2xc+dONG/eHNOnT4epqSl+//13zJgxA7t27cKRI0dgb28vrSOEwMcff4zg4GDUrVsX48aNg4ODA2JjY7F37160bt0ap0+fhqenZ4754uPj4evri7CwMHTo0AF9+/aFubk5IiIisH37dqxbtw4ZGRn5+j3KD7/99hsMDAzw3XffwcjIqED2qVKp8O233wIAEhMTsW/fPsyaNQu3bt3C1q1bCyRDUfDv79O/1a5dW4Y0b/bgwQPMmDEDLi4uejuqSPlAEOmBjRs3CgDizz//1KpPnDhRABA7duzQqs+dO1cAEBMmTMiyrf379wsDAwPh6+urVV+4cKEAIMaMGSM0Gk2W9TZv3izOnj37xpzt27cXBgYGYvfu3VmeS0tLE+PHj3/j+rn18uVLkZ6enifbyo2BAwcKMzOzPNueRqMRL168yPF5f3//LPvTaDSicePGQqFQiLi4uDzLoovy5csLf39/Wfadney+T3kpJSUlT7f3559/CgBi48aNebpd0m88NEZ6rXnz5gCAW7duSbXU1FQsXLgQlSpVwrx587Ks07FjR/j7++Pw4cP4448/pHXmzZuHKlWqYNGiRVAoFFnW8/PzQ8OGDXPMcvbsWRw8eBCDBg1C9+7dszyvUqmwaNEi6XGLFi3QokWLLMsFBATAxcVFehwdHQ2FQoFFixZh2bJlcHd3h0qlwoULF1CiRAnMmDEjyzYiIiKgUCjwzTffSLVnz55hzJgxcHZ2hkqlQoUKFTB//nxoNJocXxMAKBQKbNy4ESkpKdJhl+DgYABAZmYmZs2aJWVycXHB5MmTkZ6errUNFxcXdOjQASEhIahfvz5MTEywdu3aN+43uxzNmjWDEAJRUVFS/c6dO/jkk09QuXJlmJiYoFSpUujZsyeio6O11n99ePX06dMYN24cSpcuDTMzM3Tt2hWPHz/WWlYIgdmzZ6Ns2bIwNTVFy5YtcfXq1WxzRUVFoWfPnihZsiRMTU3RuHFjHDx4UGuZ48ePQ6FQYOfOnZgxYwbKlCkDCwsL9OjRA4mJiUhPT8eYMWNgZ2cHc3NzDBw4MMv38H2sWrUK1atXh0qlgpOTEz799FM8e/ZMa5kWLVqgRo0aCAsLwwcffABTU1NMnjwZAJCeno5p06ahQoUKUKlUcHZ2xhdffJEl46+//opmzZrB2toa5ubmqFy5srSN48ePo0GDBgCAgQMHZvlZInpXPDRGeu31h52NjY1UO3XqFBISEvDZZ5+hRInsf0UGDBiAjRs34sCBA2jcuDFOnTqFp0+fYsyYMVAqle+UZf/+/QBeNUz5YePGjUhLS8PQoUOhUqng6OgILy8v7Ny5E9OmTdNadseOHVAqlejZsycA4MWLF/Dy8sL9+/cxbNgwlCtXDmfOnMGkSZMQGxuLZcuW5bjfLVu2YN26dTh37px0COb14cHBgwdj06ZN6NGjB8aPH4+zZ89i3rx5uH79Ovbu3au1nYiICHz00UcYNmwYhgwZgsqVK+v8Pcju/f7zzz9x5swZ9OnTB2XLlkV0dDRWr16NFi1a4Nq1azA1NdXaxqhRo2BjY4Np06YhOjoay5Ytw8iRI7Fjxw5pmaCgIMyePRvt2rVDu3btcP78ebRt2zbLYc2HDx/C09MTL168wOjRo1GqVCls2rQJnTp1wu7du9G1a1et5efNmwcTExMEBgbi5s2b+Prrr2FoaAgDAwMkJCRg+vTp0iFfV1dXBAUF5er7Eh8fr/XY0NAQVlZWAIDp06djxowZ8Pb2xogRIxAREYHVq1fjzz//xOnTp2FoaCit9+TJE3z44Yfo06cP+vfvD3t7e2g0GnTq1AmnTp3C0KFDUbVqVVy+fBlLly5FZGQkfvzxRwDA1atX0aFDB9SqVQszZ86ESqXCzZs3cfr0aQBA1apVMXPmTAQFBWHo0KHSHzFvOtRMlCtyD0kRFYTXh8aOHDkiHj9+LGJiYsTu3btF6dKlhUqlEjExMdKyy5YtEwDE3r17c9ze06dPBQDRrVs3IYQQy5cvf+s6b9O1a1cBQCQkJORqeS8vL+Hl5ZWl7u/vL8qXLy89vn37tgAgLC0txaNHj7SWXbt2rQAgLl++rFWvVq2aaNWqlfR41qxZwszMTERGRmotFxgYKJRKpbh79+4bs2Z3CCY8PFwAEIMHD9aqT5gwQQAQv/32m1QrX768ACAOHz78xv38d3+PHz8Wjx8/Fjdv3hSLFi0SCoVC1KhRQ+vQZXaH2EJDQwUAsXnzZqn2+mfI29tba/2xY8cKpVIpnj17JoQQ4tGjR8LIyEi0b99ea7nJkycLAFqHxsaMGSMAiN9//12qPX/+XLi6ugoXFxehVquFEEIcO3ZMABA1atQQGRkZ0rIfffSRUCgU4sMPP9TK36RJE62fgTd9nwBk+Xr9c/X6tbRt21bKIoQQ33zzjQAgNmzYINW8vLwEALFmzRqtfWzZskUYGBhovUYhhFizZo0AIE6fPi2EEGLp0qUCgHj8+HGOeXlojPIDD42RXvH29kbp0qXh7OyMHj16wMzMDPv370fZsmWlZZ4/fw4AsLCwyHE7r59LSkrS+u+b1nmbvNjGm3Tv3h2lS5fWqnXr1g0lSpTQGs24cuUKrl27ht69e0u1Xbt2oXnz5rCxsUF8fLz05e3tDbVajZMnT+qc59ChQwCAcePGadXHjx8PAFkOD7m6usLHxyfX209JSUHp0qVRunRpVKhQARMmTEDTpk2xb98+rUOXJiYm0v+/fPkST548QYUKFWBtbY3z589n2e7QoUO11m/evDnUajXu3LkDADhy5AgyMjIwatQoreXGjBmTZVuHDh1Cw4YN0axZM6lmbm6OoUOHIjo6GteuXdNafsCAAVojMI0aNZIm6f9bo0aNEBMTg8zMzLd9m2BsbIxff/1V62vx4sVar2XMmDEwMPjn42LIkCGwtLTM8h6pVCoMHDhQq7Zr1y5UrVoVVapU0frZadWqFQDg2LFjAABra2sAwL59+956uJUoL/HQGOmVlStXolKlSkhMTMSGDRtw8uRJqFQqrWVeNyKvG6Ls/LdZsrS0fOs6b/Pvbbz+UMhLrq6uWWq2trZo3bo1du7ciVmzZgF4dVisRIkS6Natm7Tc33//jUuXLmVppF579OiRznnu3LkDAwMDVKhQQavu4OAAa2trqbF4U/43MTY2xk8//QQAuHfvHhYsWIBHjx5pNT7AP/O7Nm7ciPv370MIIT2XmJiYZbvlypXTevz6MFtCQoL0ugCgYsWKWsuVLl1a65Dc62UbNWqUZR9Vq1aVnq9Ro0aO+359+MrZ2TlLXaPRIDExEaVKlcq
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Gradient Boosting Metrics:\n",
"Accuracy: 0.9185\n",
"Precision: 0.9577\n",
"Recall: 0.8718\n",
"F1-Score: 0.9128\n",
"ROC-AUC: 0.9745\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAAHHCAYAAAB3K7g2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABAZ0lEQVR4nO3dd3xUVf7/8fcQSCGkgBACAiGhF5EmggEBRZogEBRRgdBEUEABV2BXpQiygitVQUTpKr2IutJVMEsPIIgm0lyl9/R2fn/wyyyTApmQMFy+r+fjMQ8y55577+feCck7Z869YzPGGAEAAFhEAVcXAAAA4AzCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCywpKipKLVu2lJ+fn2w2m1avXp2n2z9+/LhsNpvmzZuXp9u1smbNmqlZs2Z5tr2YmBj17dtXgYGBstlseu211/Js23e7rL6/Ro8eLZvN5rqi7jGcz3sb4QW59vvvv+ull15SSEiIPD095evrq9DQUE2dOlXx8fH5uu/w8HAdPHhQ48eP18KFC1W/fv183d+d1LNnT9lsNvn6+mZ5HqOiomSz2WSz2fT+++87vf2//vpLo0ePVmRkZB5Um3vvvvuu5s2bpwEDBmjhwoXq3r17vu8zLS1NCxYs0BNPPKHixYurUKFCCggIUMuWLTV79mwlJibmew2u5OxrP2/ePPv3WvojICBAzZs317fffpu/xeZAXFycRo8era1bt7q6FNxpBsiFdevWGS8vL+Pv728GDx5sZs+ebWbMmGG6du1qChUqZF588cV823dcXJyRZP7xj3/k2z7S0tJMfHy8SUlJybd9ZCc8PNwULFjQuLm5mSVLlmRaPmrUKOPp6WkkmUmTJjm9/V27dhlJZu7cuU6tl5iYaBITE53eX3YefvhhExoammfbu5W4uDjTqlUrI8k88sgjZsKECeazzz4z77//vmnfvr1xc3MzvXv3viO1HDt2LNNrkJycbOLj4/N1v86+9nPnzjWSzNixY83ChQvNggULzKRJk0yNGjWMJPPVV1/la723cu7cOSPJjBo1KtOyO3E+4ToFXZaaYFnHjh1T165dFRQUpM2bN6tUqVL2Za+88oqio6P19ddf59v+z507J0ny9/fPt33YbDZ5enrm2/ZvxcPDQ6Ghofriiy/UpUsXh2Wff/65nnzySa1YseKO1BIXF6fChQvL3d09T7d79uxZVa9ePc+2l5KSorS0tGzrHDJkiL777jtNmTJFr776qsOyYcOGKSoqShs2bLitfdyOggULqmDBu/NHcps2bRxGN/v06aOSJUvqiy++ULt27VxYWfbu5vOJPODq9ATr6d+/v5Fktm/fnqP+ycnJZuzYsSYkJMS4u7uboKAgM3LkSJOQkODQLygoyDz55JPmxx9/NA899JDx8PAwwcHBZv78+fY+o0aNMpIcHkFBQcaY6yMW6V/fKH2dG61fv96EhoYaPz8/4+3tbSpXrmxGjhxpX57VX8bGGLNp0ybTuHFjU7hwYePn52eeeuopc/jw4Sz3FxUVZcLDw42fn5/x9fU1PXv2NLGxsbc8X+Hh4cbb29vMmzfPeHh4mEuXLtmX7dy500gyK1asyDTycuHCBTNs2DBTs2ZN4+3tbXx8fEzr1q1NZGSkvc+WLVsynb8bj7Np06amRo0aZvfu3aZJkybGy8vLvPrqq/ZlTZs2tW+rR48exsPDI9Pxt2zZ0vj7+5s///wzy+PLroZjx44ZY4w5c+aM6d27twkICDAeHh6mVq1aZt68eQ7bSH99Jk2aZCZPnmxCQkJMgQIFzL59+7Lc58mTJ42bm5tp3br1Tc68o5vtIzEx0bz11lumbt26xtfX1xQuXNg0btzYbN68OdN2Ll26ZMLDw42vr6/x8/MzPXr0MPv27cv0/ZXV96kxxixcuNDUrVvXeHp6mqJFi5pnn33WnDx50qFP+ut26NAh06xZM+Pl5WVKly5t3nvvPXufW732WUkfedm1a5dDe1pamvH19TU9evRwaI+JiTFDhw41ZcqUMe7u7qZy5cpm0qRJJi0tzaFfTn8m7Nq1y7Rs2dLcd999xtPT05QvX9706tXL4fXJ+EgfhcnqfEoyr7zyilm1apWpUaOGcXd3N9WrVzfffvttpmPfsmWLqVevnvHw8DAhISFm1qxZ2b5GuPOIpXDaV199pZCQED3yyCM56t+3b1/Nnz9fTz/9tIYNG6YdO3ZowoQJ+uWXX7Rq1SqHvtHR0Xr66afVp08fhYeH67PPPlPPnj1Vr1491ahRQ2FhYfL399eQIUP03HPPqW3btipSpIhT9R86dEjt2rVTrVq1NHbsWHl4eCg6Olrbt2+/6XobN25UmzZtFBISotGjRys+Pl7Tp09XaGio9u7dq/Llyzv079Kli4KDgzVhwgTt3btXc+bMUUBAgN57770c1RkWFqb+/ftr5cqV6t27t6Troy5Vq1ZV3bp1M/U/evSoVq9erWeeeUbBwcE6c+aMPv74YzVt2lSHDx9W6dKlVa1aNY0dO1Zvv/22+vXrpyZNmkiSw2t54cIFtWnTRl27dlW3bt1UsmTJLOubOnWqNm/erPDwcEVERMjNzU0ff/yx1q9fr4ULF6p06dJZrletWjUtXLhQQ4YMUZkyZTRs2DBJUokSJRQfH69mzZopOjpaAwcOVHBwsJYtW6aePXvq8uXLmUZM5s6dq4SEBPXr108eHh4qVqxYlvv89ttvlZqaqm7dut3irGeW1T6uXr2qOXPm6LnnntOLL76oa9eu6dNPP1WrVq20c+dO1a5dW5JkjFGHDh20bds29e/fX9WqVdOqVasUHh6eo32PHz9eb731lrp06aK+ffvq3Llzmj59uh599FHt27fPYfTx0qVLat26tcLCwtSlSxctX75cw4cP1wMPPKA2bdrk6LXPzpUrV3T+/HkZY3T27FlNnz5dMTExDufTGKOnnnpKW7ZsUZ8+fVS7dm199913+tvf/qY///xTkydPtvfNyc+Es2fPqmXLlipRooRGjBghf39/HT9+XCtXrpR0/ftl5syZGjBggDp16qSwsDBJUq1atW56LNu2bdPKlSv18ssvy8fHR9OmTVPnzp118uRJ3XfffZKkffv2qXXr1ipVqpTGjBmj1NRUjR07ViVKlMjBq4Y7wsXhCRZz5coVI8l06NAhR/0jIyONJNO3b1+H9tdff91IcvhLNSgoyEgyP/zwg73t7NmzxsPDwwwbNszeduNfxDfK6cjL5MmTjSRz7ty5bOvOauSldu3aJiAgwFy4cMHetn//flOgQAGHv0DT95dx/kSnTp3Mfffdl+0+bzwOb29vY4wxTz/9tHn88ceNMcakpqaawMBAM2bMmCzPQUJCgklNTc10HB4eHmbs2LH2tpvNe2jatKmRZGbNmpXlshtHXowx5rvvvjOSzLhx48zRo0dNkSJFTMeOHW95jMb8b6TtRlOmTDGSzKJFi+xtSUlJplGjRqZIkSLm6tWr9uOSZHx9fc3Zs2dvua8hQ4YYSQ6jUMZcn8dz7tw5++P8+fP2ZTfbR0pKSqb5P5cuXTIlS5Z0eN1Xr15tJJmJEyc6rNukSZNbjrwcP37cuLm5mfHjxzvs5+DBg6ZgwYIO7emv24IFCxyOLTAw0HTu3Nnelts5LxkfHh4emUbD0o913LhxDu1PP/20sdlsJjo62hiT858Jq1atynLU50Y3m/OS3ciLu7u7vRZjrv8flmSmT59ub2vfvr0pXLiww+hhVFSUKViwICMvdwmuNoJTrl69Kkny8fHJUf9vvvlGkjR06FCH9vS/tjPOjalevbr9L0Lp+l9XVapU0dGjR3Ndc0bpf62uWbNGaWlpOVrn1KlTioyMVM+ePR3+uq9Vq5aeeOIJ+3HeqH///g7PmzRpogsXLtjPYU48//zz2rp1q06fPq3Nmzfr9OnTev7557Ps6+HhoQIFrv+XTk1N1YULF1SkSBFVqVJFe/fuzfE+PTw81KtXrxz1bdmypV566SWNHTtWYWFh8vT01Mcff5zjfWX0zTffKDAwUM8
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB4PUlEQVR4nO3dd1hT5/sG8DuMsEEUEUSU4d6KEwcOFKp1V3GLVeuus19nHXXVurDuUcVZR13URasVJ2qL4haquAUURRAEIsn7+8MfqRFQgoHDuD/XxdXmyRl3EpCH97znHJkQQoCIiIioENKTOgARERGRVNgIERERUaHFRoiIiIgKLTZCREREVGixESIiIqJCi40QERERFVpshIiIiKjQYiNEREREhRYbISIiIiq02AgRkdYWLFgAFxcX6Ovro2bNmlLHyVVOTk7w9fVVPw4KCoJMJkNQUJBkmQoSvp+U29gIUb7j7+8PmUym/jIwMICDgwN8fX3x5MmTDNcRQmDLli1o2rQpihQpAlNTU1SrVg0//PADEhMTM93Xvn378MUXX8DGxgZyuRwlS5ZEt27d8Ndff2Upa3JyMpYsWYL69evDysoKxsbGKF++PEaMGIHw8PBsvX6p/fHHH/jf//6HRo0aYePGjZg7d26u7Pf06dPo1q0bHBwcIJfLYWVlhfr16+OHH35AdHR0rmSQ0ty5c7F///4sLXv//n2NnxGZTAZLS0vUrFkTy5cvh1KpzNmwWbBy5Ur4+/tLHYMIMt5rjPIbf39/9O/fHz/88AOcnZ2RnJyM8+fPw9/fH05OTrh+/TqMjY3VyyuVSvTs2RO7du1CkyZN0LlzZ5iamuL06dPYvn07KleujGPHjqFEiRLqdYQQ+Prrr+Hv749atWrhq6++gp2dHSIjI7Fv3z6EhITg7NmzcHd3zzRnTEwMvL29ERISgi+//BKenp4wNzdHWFgYduzYgaioKCgUihx9r3LCxIkTsWDBAiQlJUEul+fKPqdNm4ZZs2bBxcUF3bt3h4uLC5KTkxESEoI9e/bAxsYGd+/ezZUsTk5OaNasmfqXuEqlgkKhgFwuh55ezv1taW5ujq+++ipLzcP9+/fh7OyMHj16oE2bNgCAuLg4HD58GIcPH8b48eOxYMGCHMuaFVWrVoWNjU26kZ/cej+J1ARRPrNx40YBQPz9998a9QkTJggAYufOnRr1uXPnCgBi/Pjx6bYVEBAg9PT0hLe3t0Z9wYIFAoAYPXq0UKlU6dbbvHmzuHDhwkdztm3bVujp6Ynffvst3XPJycli3LhxH10/q96+fStSUlJ0sq2s6N+/vzAzM9PZ9lQqlXjz5k2mz+/YsUMAEN26dcvwdb569UpMnz79s/ahjTJlyoh+/frpZFvaMDMzy/J+7927JwCIBQsWaNRVKpWoW7euKFmyZA4k1E6VKlWEh4eH1DGIBBshyncya4QOHjwoAIi5c+eqa2/evBHW1taifPny4u3btxlur3///gKACA4OVq9TtGhRUbFiRZGampqtjOfPnxcAxKBBg7K0vIeHR4a/FPr16yfKlCmjfvz+L7glS5YIFxcXoaenJ86fPy/09fXFjBkz0m3j9u3bAoBYtmyZuhYbGytGjRolSpUqJeRyuXB1dRU//vijUCqVH80JIN3Xxo0bhRDvGrIffvhBuLi4CLlcLsqUKSMmTZokkpOTNbZRpkwZ0bZtW3H06FHh5uYmjIyMxJIlSzLdZ/ny5YWNjY14/fr1R7NldR8bNmwQzZs3F8WLFxdyuVxUqlRJrFy5Mt02VCqVmDVrlnBwcBAmJiaiWbNm4vr16+kaoRMnTggA4sSJExrrnz9/Xnh5eQlLS0thYmIimjZtKs6cOaOxzPTp0wUA8e+//4p+/foJKysrYWlpKXx9fUViYqJ6uYze9481RZk1QkII8eWXX4rSpUunq69YsUJUrlxZyOVyYW9vL4YNGyZiY2PTLbdr1y5Ru3ZtYWxsLIoVKyZ69eolHj9+rLFMZGSk8PX1FQ4ODkIulws7OzvRvn17ce/ePSHEu8/nw9eT9v2f0fvp4eEhqlSpIm7cuCGaNWsmTExMRMmSJcX8+fPT5bt//75o166dMDU1FcWLFxejR48WR48ezfAzIhJCCINcGHQiyhX3798HAFhbW6trZ86cQWxsLEaNGgUDg4y/3fv27YuNGzfi4MGDaNCgAc6cOYOXL19i9OjR0NfXz1aWgIAAAECfPn2ytf6nbNy4EcnJyfjmm29gZGQEe3t7eHh4YNeuXZg+fbrGsjt37oS+vj66du0KAHjz5g08PDzw5MkTDB48GKVLl8a5c+cwadIkREZGws/PL9P9btmyBWvXrsXFixexfv16AFAfHhw4cCA2bdqEr776CuPGjcOFCxcwb9483Lp1C/v27dPYTlhYGHr06IHBgwdj0KBBqFChQob7Cw8PR3h4OAYOHAhzc3Ot3qPM9rFq1SpUqVIF7du3h4GBAX7//XcMGzYMKpUKw4cPV68/bdo0zJ49G23atEGbNm1w6dIltG7dOkuHM//66y988cUXcHNzw/Tp06Gnp4eNGzeiRYsWOH36NOrVq6exfLdu3eDs7Ix58+bh0qVLWL9+PWxtbTF//nwA7973gQMHol69evjmm28AAK6urp/M8ebNG8TExAAA4uPjceTIERw9ehSTJk3SWG7GjBmYOXMmPD09MXToUISFhWHVqlX4+++/cfbsWRgaGgL477B03bp1MW/ePERHR2Pp0qU4e/YsLl++jCJFigAAunTpghs3bmDkyJFwcnLCs2fP8Oeff+Lhw4dwcnKCn58fRo4cCXNzc0yZMgUANA5NZyQ2Nhbe3t7o3LkzunXrht9++w0TJkxAtWrV8MUXXwAAEhMT0aJFC0RGRmLUqFGws7PD9u3bceLEiU++V1SISd2JEWkrbUTo2LFj4vnz5+LRo0fit99+E8WLFxdGRkbi0aNH6mX9/PwEALFv375Mt/fy5UsBQHTu3FkIIcTSpUs/uc6ndOrUSQDI8C/qjGg7ImRpaSmePXumseyaNWsEAHHt2jWNeuXKlUWLFi3Uj2fNmiXMzMxEeHi4xnITJ04U+vr64uHDhx/N2q9fv3SHxkJDQwUAMXDgQI36+PHjBQDx119/qWtpowFHjx796H6EEOLAgQMCgPDz89Ooq1Qq8fz5c42v90f8PraPjA6ReXl5CRcXF/XjZ8+eCblcLtq2batxaHTy5MnpRmM+HMFQqVSiXLlywsvLS2PdN2/eCGdnZ9GqVSt1LW1E6Ouvv9bI06lTJ1GsWDGNWnYOjWX0NXToUI1caa+1devWGiOCy5cvFwDEhg0bhBBCKBQKYWtrK6pWrSqSkpLUy6WNxE6bNk0I8W60EZmMRr0vs0NjmY0IARCbN29W11JSUoSdnZ3o0qWLurZo0SIBQOzfv19dS0pKEhUrVuSIEGWKM9Eo3/L09ETx4sXh6OiIr776CmZmZggICECpUqXUy7x+/RoAYGFhkel20p6Lj4/X+O/H1vkUXWzjY7p06YLixYtr1Dp37gwDAwPs3LlTXbt+/Tpu3rwJHx8fdW337t1o0qQJrK2tERMTo/7y9PSEUqnEqVOntM5z+PBhAMDYsWM16uPGjQMAHDp0SKPu7OwMLy+vT2437X38cDQoLi4OxYsX1/gKDQ3N0j5MTEw0thMTEwMPDw9EREQgLi4OAHDs2DEoFAqMHDkSMplMvfzo0aM/mTk0NBT//vsvevbsiRcvXqjf38TERLRs2RKnTp2CSqXSWGfIkCEaj5s0aYIXL16oX392ffPNN/jzzz/x559/Ys+ePRg+fDjWrFmj8TmlvdbRo0drTE4eNGgQLC0t1Z/dP//8g2fPnmHYsGEaJyO0bdsWFStWVC9nYmICuVyOoKAgxMbGflb+95mbm6N3797qx3K5HPXq1UNERIS6dvToUTg4OKB9+/bqmrGxMQYNGqSzHFTw8NAY5VsrVqxA+fLlERcXhw0bNuDUqVMwMjLSWCatEUlriDLyYbNkaWn5yXU+5f1tpB0u0CVnZ+d0NRsbG7Rs2RK7du3CrFmzALw7LGZgYIDOnTu
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import confusion_matrix, roc_curve, auc\n",
"\n",
"def plot_confusion_matrix(y_true, y_pred, title):\n",
" cm = confusion_matrix(y_true, y_pred)\n",
" sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)\n",
" plt.title(title)\n",
" plt.xlabel('Предсказанные значения')\n",
" plt.ylabel('Истинные значения')\n",
" plt.show()\n",
"\n",
"def plot_roc_curve(y_true, y_pred_proba, title):\n",
" fpr, tpr, _ = roc_curve(y_true, y_pred_proba)\n",
" roc_auc = auc(fpr, tpr)\n",
" plt.figure()\n",
" plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')\n",
" plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')\n",
" plt.xlim([0.0, 1.0])\n",
" plt.ylim([0.0, 1.05])\n",
" plt.xlabel('False Positive Rate')\n",
" plt.ylabel('True Positive Rate')\n",
" plt.title(title)\n",
" plt.legend(loc=\"lower right\")\n",
" plt.show()\n",
"\n",
"def evaluate_and_plot_model(model, X_test, y_test, model_name):\n",
" y_pred = model.predict(X_test)\n",
" y_pred_proba = model.predict_proba(X_test)[:, 1]\n",
" \n",
" accuracy = accuracy_score(y_test, y_pred)\n",
" precision = precision_score(y_test, y_pred, pos_label=1)\n",
" recall = recall_score(y_test, y_pred, pos_label=1)\n",
" f1 = f1_score(y_test, y_pred, pos_label=1)\n",
" roc_auc = roc_auc_score(y_test, y_pred_proba)\n",
" \n",
" print(f\"{model_name} Metrics:\")\n",
" print(f\"Accuracy: {accuracy:.4f}\")\n",
" print(f\"Precision: {precision:.4f}\")\n",
" print(f\"Recall: {recall:.4f}\")\n",
" print(f\"F1-Score: {f1:.4f}\")\n",
" print(f\"ROC-AUC: {roc_auc:.4f}\")\n",
" \n",
" plot_confusion_matrix(y_test, y_pred, f'Confusion Matrix for {model_name}')\n",
" plot_roc_curve(y_test, y_pred_proba, f'ROC Curve for {model_name}')\n",
"\n",
"evaluate_and_plot_model(logreg_best_model, X_test, y_test, 'Logistic Regression')\n",
"evaluate_and_plot_model(rf_best_model, X_test, y_test, 'Random Forest')\n",
"evaluate_and_plot_model(gb_best_model, X_test, y_test, 'Gradient Boosting')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "aimenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}