AIM-PIbd-32-Chubykina-P-P/lab_4/lab4.ipynb

998 lines
222 KiB
Plaintext
Raw Normal View History

2024-11-09 13:23:43 +04:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['HeartDisease', 'BMI', 'Smoking', 'AlcoholDrinking', 'Stroke',\n",
" 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory',\n",
" 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'SleepTime',\n",
" 'Asthma', 'KidneyDisease', 'SkinCancer'],\n",
" dtype='object')\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>HeartDisease</th>\n",
" <th>BMI</th>\n",
" <th>Smoking</th>\n",
" <th>AlcoholDrinking</th>\n",
" <th>Stroke</th>\n",
" <th>PhysicalHealth</th>\n",
" <th>MentalHealth</th>\n",
" <th>DiffWalking</th>\n",
" <th>Sex</th>\n",
" <th>AgeCategory</th>\n",
" <th>Race</th>\n",
" <th>Diabetic</th>\n",
" <th>PhysicalActivity</th>\n",
" <th>GenHealth</th>\n",
" <th>SleepTime</th>\n",
" <th>Asthma</th>\n",
" <th>KidneyDisease</th>\n",
" <th>SkinCancer</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>No</td>\n",
" <td>16.60</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>3.0</td>\n",
" <td>30.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>55-59</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>5.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>No</td>\n",
" <td>20.34</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>80 or older</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>7.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>No</td>\n",
" <td>26.58</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>20.0</td>\n",
" <td>30.0</td>\n",
" <td>No</td>\n",
" <td>Male</td>\n",
" <td>65-69</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Fair</td>\n",
" <td>8.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>No</td>\n",
" <td>24.21</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>75-79</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Good</td>\n",
" <td>6.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>No</td>\n",
" <td>23.71</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>28.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Female</td>\n",
" <td>40-44</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>8.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Yes</td>\n",
" <td>28.87</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>6.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Female</td>\n",
" <td>75-79</td>\n",
" <td>Black</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Fair</td>\n",
" <td>12.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>No</td>\n",
" <td>21.63</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>15.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>70-74</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Fair</td>\n",
" <td>4.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>No</td>\n",
" <td>31.64</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Female</td>\n",
" <td>80 or older</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Good</td>\n",
" <td>9.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>No</td>\n",
" <td>26.45</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>80 or older</td>\n",
" <td>White</td>\n",
" <td>No, borderline diabetes</td>\n",
" <td>No</td>\n",
" <td>Fair</td>\n",
" <td>5.0</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>No</td>\n",
" <td>40.69</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Male</td>\n",
" <td>65-69</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Good</td>\n",
" <td>10.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" HeartDisease BMI Smoking AlcoholDrinking Stroke PhysicalHealth \\\n",
"0 No 16.60 Yes No No 3.0 \n",
"1 No 20.34 No No Yes 0.0 \n",
"2 No 26.58 Yes No No 20.0 \n",
"3 No 24.21 No No No 0.0 \n",
"4 No 23.71 No No No 28.0 \n",
"5 Yes 28.87 Yes No No 6.0 \n",
"6 No 21.63 No No No 15.0 \n",
"7 No 31.64 Yes No No 5.0 \n",
"8 No 26.45 No No No 0.0 \n",
"9 No 40.69 No No No 0.0 \n",
"\n",
" MentalHealth DiffWalking Sex AgeCategory Race \\\n",
"0 30.0 No Female 55-59 White \n",
"1 0.0 No Female 80 or older White \n",
"2 30.0 No Male 65-69 White \n",
"3 0.0 No Female 75-79 White \n",
"4 0.0 Yes Female 40-44 White \n",
"5 0.0 Yes Female 75-79 Black \n",
"6 0.0 No Female 70-74 White \n",
"7 0.0 Yes Female 80 or older White \n",
"8 0.0 No Female 80 or older White \n",
"9 0.0 Yes Male 65-69 White \n",
"\n",
" Diabetic PhysicalActivity GenHealth SleepTime Asthma \\\n",
"0 Yes Yes Very good 5.0 Yes \n",
"1 No Yes Very good 7.0 No \n",
"2 Yes Yes Fair 8.0 Yes \n",
"3 No No Good 6.0 No \n",
"4 No Yes Very good 8.0 No \n",
"5 No No Fair 12.0 No \n",
"6 No Yes Fair 4.0 Yes \n",
"7 Yes No Good 9.0 Yes \n",
"8 No, borderline diabetes No Fair 5.0 No \n",
"9 No Yes Good 10.0 No \n",
"\n",
" KidneyDisease SkinCancer \n",
"0 No Yes \n",
"1 No No \n",
"2 No No \n",
"3 No Yes \n",
"4 No No \n",
"5 No No \n",
"6 No Yes \n",
"7 No No \n",
"8 Yes No \n",
"9 No No "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd \n",
"df = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\")\n",
"print(df.columns)\n",
"\n",
"display(df.head(10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Регрессия"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Цель: Разработать модель регрессии, которая будет предсказывать количество часов сна, которое человек получает в сутки, на основе его демографических данных, образа жизни и состояния здоровья.\n",
"\n",
"Применение:\n",
"\n",
"Медицинские учреждения: Модель может помочь врачам оценить качество сна пациента и разработать индивидуальные планы лечения и профилактики нарушений сна.\n",
"\n",
"Компании, разрабатывающие приложения для отслеживания сна: Модель может использоваться для улучшения своих продуктов и предоставления более точных рекомендаций.\n",
"\n",
"Исследования в области сна: Модель может помочь в изучении факторов, влияющих на качество сна."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Сначала подготовим данные для работы - удалим выбросы."
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Размер данных до удаления выбросов: (929, 18)\n",
"Размер данных после удаления выбросов: (929, 18)\n"
]
}
],
"source": [
"import pandas as pd\n",
"from scipy import stats\n",
"\n",
"data = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\").head(1000)\n",
"\n",
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']\n",
"\n",
"z_scores = stats.zscore(data[numeric_features])\n",
"\n",
"threshold = 3\n",
"\n",
"data_cleaned = data[(z_scores < threshold).all(axis=1)]\n",
"data = data_cleaned\n",
"print(\"Размер данных до удаления выбросов:\", data.shape)\n",
"print(\"Размер данных после удаления выбросов:\", data_cleaned.shape)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Теперь перейдем к делению на выборки и созданию ориентира"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Размер обучающей выборки: (255836, 16)\n",
"Размер тестовой выборки: (63959, 16)\n",
"Baseline MAE: 1.0154101277944922\n",
"Baseline MSE: 2.085820163563156\n",
"Baseline R²: -7.204157852269688e-05\n"
]
}
],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
"\n",
"features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
"target = 'SleepTime'\n",
"\n",
"global X_train, X_test, y_train, y_test\n",
"X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)\n",
"\n",
"print(\"Размер обучающей выборки:\", X_train.shape)\n",
"print(\"Размер тестовой выборки:\", X_test.shape)\n",
"\n",
"baseline_predictions = [y_train.mean()] * len(y_test)\n",
"\n",
"print('Baseline MAE:', mean_absolute_error(y_test, baseline_predictions))\n",
"print('Baseline MSE:', mean_squared_error(y_test, baseline_predictions))\n",
"print('Baseline R²:', r2_score(y_test, baseline_predictions))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Создание конвейера и обучение моделей"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: Linear Regression trained.\n",
"Model: Decision Tree trained.\n",
"Model: Gradient Boosting trained.\n"
]
}
],
"source": [
"import pandas as pd\n",
"from scipy import stats\n",
"from sklearn.model_selection import train_test_split, RandomizedSearchCV\n",
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.linear_model import LinearRegression\n",
"from sklearn.tree import DecisionTreeRegressor\n",
"from sklearn.ensemble import GradientBoostingRegressor\n",
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
"\n",
"categorical_features = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth']\n",
"\n",
"preprocessor = ColumnTransformer(\n",
" transformers=[\n",
" ('num', StandardScaler(), numeric_features),\n",
" ('cat', OneHotEncoder(), categorical_features)])\n",
"\n",
"pipeline_linear_regression = Pipeline(steps=[\n",
" ('preprocessor', preprocessor),\n",
" ('regressor', LinearRegression())])\n",
"\n",
"pipeline_decision_tree = Pipeline(steps=[\n",
" ('preprocessor', preprocessor),\n",
" ('regressor', DecisionTreeRegressor(random_state=42))])\n",
"\n",
"pipeline_gradient_boosting = Pipeline(steps=[\n",
" ('preprocessor', preprocessor),\n",
" ('regressor', GradientBoostingRegressor(random_state=42))])\n",
"\n",
"pipelines = [\n",
" ('Linear Regression', pipeline_linear_regression),\n",
" ('Decision Tree', pipeline_decision_tree),\n",
" ('Gradient Boosting', pipeline_gradient_boosting)\n",
"]\n",
"\n",
"for name, pipeline in pipelines:\n",
" pipeline.fit(X_train, y_train)\n",
" print(f\"Model: {name} trained.\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Оценка качества моделей"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: Linear Regression\n",
"MAE: 0.999721882988516\n",
"MSE: 2.007024248723743\n",
"R²: 0.03770762552704621\n",
"\n",
"Model: Decision Tree\n",
"MAE: 1.405790088390023\n",
"MSE: 4.053338792508978\n",
"R²: -0.9434229624615185\n",
"\n",
"Model: Gradient Boosting\n",
"MAE: 0.9962143800804221\n",
"MSE: 1.9983219431838193\n",
"R²: 0.041880052575063775\n",
"\n"
]
}
],
"source": [
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
"\n",
"for name, pipeline in pipelines:\n",
" y_pred = pipeline.predict(X_test)\n",
" print(f\"Model: {name}\")\n",
" print('MAE:', mean_absolute_error(y_test, y_pred))\n",
" print('MSE:', mean_squared_error(y_test, y_pred))\n",
" print('R²:', r2_score(y_test, y_pred))\n",
" print()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Линейная регрессия имеет низкое смещение, так как MAE и MSE близки к 1. Однако, R² близок к 0, что указывает на то, что модель не очень хорошо объясняет дисперсию в данных. Это может быть связано с тем, что линейная модель не может хорошо аппроксимировать сложные зависимости в данных.\n",
"\n",
"Дерево решений имеет высокое смещение и дисперсию. Отрицательный R² указывает на то, что модель работает хуже, чем простое предсказание среднего значения. Это свидетельствует о переобучении и высокой дисперсии.\n",
"\n",
"Градиентный бустинг имеет низкое смещение, так как MAE и MSE близки к 1. R² также близок к 0, что указывает на то, что модель не очень хорошо объясняет дисперсию в данных. Однако, это лучший результат среди всех моделей, что указывает на то, что градиентный бустинг лучше справляется с данными, чем линейная регрессия.\n",
"\n",
"Линейная регрессия и Градиентный бустинг имеют низкое смещение, но низкий R², что указывает на то, что они не могут хорошо объяснить дисперсию в данных.\n",
"\n",
"Дерево решений имеет высокую дисперсию и переобучение, что приводит к отрицательному R²."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Классификация"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Цель: Разработать модель, которая сможет предсказывать вероятность развития сердечно-сосудистых заболеваний (HeartDisease) у пациентов на основе их демографических данных, образа жизни и состояния здоровья.\n",
"\n",
"Применение: Модель может использоваться в медицинских учреждениях для раннего выявления пациентов с высоким риском сердечных заболеваний, что позволит назначить профилактические меры и улучшить результаты лечения."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Проведем деление на выборки"
2024-11-09 13:23:43 +04:00
]
},
{
"cell_type": "code",
"execution_count": 63,
2024-11-09 13:23:43 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HeartDisease\n",
"0 796\n",
"1 796\n",
"Name: count, dtype: int64\n",
"Размер обучающей выборки: (1273, 49)\n",
"Размер тестовой выборки: (319, 49)\n"
2024-11-09 13:23:43 +04:00
]
}
],
"source": [
"import pandas as pd\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
"target = 'HeartDisease'\n",
"\n",
"label_encoder = LabelEncoder()\n",
"data[target] = label_encoder.fit_transform(data[target])\n",
"\n",
"categorical_features = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth']\n",
"\n",
"categorical_transformer = Pipeline(steps=[\n",
" ('onehot', OneHotEncoder(handle_unknown='ignore'))\n",
"])\n",
"\n",
"numeric_transformer = Pipeline(steps=[\n",
" ('scaler', StandardScaler())\n",
"])\n",
"\n",
"preprocessor = ColumnTransformer(\n",
" transformers=[\n",
" ('num', numeric_transformer, numeric_features),\n",
" ('cat', categorical_transformer, categorical_features)\n",
" ])\n",
"\n",
"X = preprocessor.fit_transform(data[features])\n",
"y = data[target]\n",
"\n",
"smote = SMOTE(random_state=42)\n",
"X_resampled, y_resampled = smote.fit_resample(X, y)\n",
"\n",
"print(pd.Series(y_resampled).value_counts())\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)\n",
2024-11-09 13:23:43 +04:00
"\n",
"print(\"Размер обучающей выборки:\", X_train.shape)\n",
"print(\"Размер тестовой выборки:\", X_test.shape)"
]
},
{
"cell_type": "code",
"execution_count": 64,
2024-11-09 13:23:43 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Лучшие гиперпараметры для логистической регрессии:\n",
"{'classifier__C': np.float64(0.7272998688284025), 'classifier__penalty': 'l1', 'classifier__solver': 'liblinear'}\n",
"Accuracy: 0.7398\n",
"Precision: 0.7239\n",
"Recall: 0.7564\n",
"F1-Score: 0.7398\n",
"ROC-AUC: 0.8338\n",
"Лучшие гиперпараметры для случайного леса:\n",
"{'classifier__bootstrap': True, 'classifier__max_depth': np.int64(25), 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 6, 'classifier__n_estimators': 317}\n",
2024-11-09 13:23:43 +04:00
"Accuracy: 0.9122\n",
"Precision: 0.9571\n",
"Recall: 0.8590\n",
"F1-Score: 0.9054\n",
"ROC-AUC: 0.9773\n",
"Лучшие гиперпараметры для градиентного бустинга:\n",
"{'classifier__learning_rate': np.float64(0.17269984907963387), 'classifier__max_depth': np.int64(52), 'classifier__min_samples_leaf': 8, 'classifier__min_samples_split': 8, 'classifier__n_estimators': 294, 'classifier__subsample': np.float64(0.8288064461501716)}\n",
2024-11-09 13:23:43 +04:00
"Accuracy: 0.9185\n",
"Precision: 0.9577\n",
"Recall: 0.8718\n",
"F1-Score: 0.9128\n",
"ROC-AUC: 0.9745\n",
"\n",
"Результаты моделей:\n",
"\n",
"Logistic Regression:\n",
"Accuracy: 0.7398\n",
"Precision: 0.7239\n",
"Recall: 0.7564\n",
"F1: 0.7398\n",
"Roc_auc: 0.8338\n",
"\n",
"Random Forest:\n",
"Accuracy: 0.9122\n",
"Precision: 0.9571\n",
"Recall: 0.8590\n",
"F1: 0.9054\n",
"Roc_auc: 0.9773\n",
"\n",
"Gradient Boosting:\n",
"Accuracy: 0.9185\n",
"Precision: 0.9577\n",
"Recall: 0.8718\n",
"F1: 0.9128\n",
"Roc_auc: 0.9745\n"
]
}
],
"source": [
"import pandas as pd\n",
"from imblearn.over_sampling import SMOTE\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn.preprocessing import OneHotEncoder, LabelEncoder\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.linear_model import LogisticRegression\n",
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score\n",
"from scipy.stats import uniform, randint\n",
"from sklearn.model_selection import RandomizedSearchCV\n",
"\n",
"def evaluate_model(model, X_test, y_test):\n",
" y_pred = model.predict(X_test)\n",
" y_pred_proba = model.predict_proba(X_test)[:, 1]\n",
" \n",
" accuracy = accuracy_score(y_test, y_pred)\n",
" precision = precision_score(y_test, y_pred, pos_label=1) \n",
" recall = recall_score(y_test, y_pred, pos_label=1) \n",
" f1 = f1_score(y_test, y_pred, pos_label=1) \n",
" roc_auc = roc_auc_score(y_test, y_pred_proba)\n",
" \n",
" print(f\"Accuracy: {accuracy:.4f}\")\n",
" print(f\"Precision: {precision:.4f}\")\n",
" print(f\"Recall: {recall:.4f}\")\n",
" print(f\"F1-Score: {f1:.4f}\")\n",
" print(f\"ROC-AUC: {roc_auc:.4f}\")\n",
" \n",
" return {\n",
" 'accuracy': accuracy,\n",
" 'precision': precision,\n",
" 'recall': recall,\n",
" 'f1': f1,\n",
" 'roc_auc': roc_auc\n",
" }\n",
"\n",
"logreg_param_dist = {\n",
" 'classifier__C': uniform(loc=0, scale=4),\n",
" 'classifier__penalty': ['l1', 'l2'],\n",
" 'classifier__solver': ['liblinear', 'saga']\n",
"}\n",
"\n",
"logreg_pipeline = Pipeline([\n",
" ('classifier', LogisticRegression(max_iter=1000, random_state=42))\n",
"])\n",
"\n",
"logreg_random_search = RandomizedSearchCV(logreg_pipeline, param_distributions=logreg_param_dist, n_iter=50, cv=5, random_state=42, n_jobs=-1)\n",
"logreg_random_search.fit(X_train, y_train)\n",
"\n",
"print(\"Лучшие гиперпараметры для логистической регрессии:\")\n",
"print(logreg_random_search.best_params_)\n",
"\n",
"logreg_best_model = logreg_random_search.best_estimator_\n",
"logreg_results = evaluate_model(logreg_best_model, X_test, y_test)\n",
"\n",
"rf_param_dist = {\n",
" 'classifier__n_estimators': randint(100, 1000),\n",
" 'classifier__max_depth': [None] + list(randint(10, 100).rvs(10)),\n",
" 'classifier__min_samples_split': randint(2, 20),\n",
" 'classifier__min_samples_leaf': randint(1, 20),\n",
" 'classifier__bootstrap': [True, False]\n",
"}\n",
"\n",
"rf_pipeline = Pipeline([\n",
" ('classifier', RandomForestClassifier(random_state=42))\n",
"])\n",
"\n",
"rf_random_search = RandomizedSearchCV(rf_pipeline, param_distributions=rf_param_dist, n_iter=50, cv=5, random_state=42, n_jobs=-1)\n",
"rf_random_search.fit(X_train, y_train)\n",
"\n",
"print(\"Лучшие гиперпараметры для случайного леса:\")\n",
"print(rf_random_search.best_params_)\n",
"\n",
"rf_best_model = rf_random_search.best_estimator_\n",
"rf_results = evaluate_model(rf_best_model, X_test, y_test)\n",
"\n",
"gb_param_dist = {\n",
" 'classifier__n_estimators': randint(100, 1000),\n",
" 'classifier__learning_rate': uniform(0.01, 0.5),\n",
" 'classifier__max_depth': [None] + list(randint(10, 100).rvs(10)),\n",
" 'classifier__min_samples_split': randint(2, 20),\n",
" 'classifier__min_samples_leaf': randint(1, 20),\n",
" 'classifier__subsample': uniform(0.5, 0.5)\n",
"}\n",
"\n",
"gb_pipeline = Pipeline([\n",
" ('classifier', GradientBoostingClassifier(random_state=42))\n",
"])\n",
"\n",
"gb_random_search = RandomizedSearchCV(gb_pipeline, param_distributions=gb_param_dist, n_iter=50, cv=5, random_state=42, n_jobs=-1)\n",
"gb_random_search.fit(X_train, y_train)\n",
"\n",
"print(\"Лучшие гиперпараметры для градиентного бустинга:\")\n",
"print(gb_random_search.best_params_)\n",
"\n",
"gb_best_model = gb_random_search.best_estimator_\n",
"gb_results = evaluate_model(gb_best_model, X_test, y_test)\n",
"\n",
"print(\"\\nРезультаты моделей:\")\n",
"print(\"\\nLogistic Regression:\")\n",
"for metric, value in logreg_results.items():\n",
" print(f\"{metric.capitalize()}: {value:.4f}\")\n",
"\n",
"print(\"\\nRandom Forest:\")\n",
"for metric, value in rf_results.items():\n",
" print(f\"{metric.capitalize()}: {value:.4f}\")\n",
"\n",
"print(\"\\nGradient Boosting:\")\n",
"for metric, value in gb_results.items():\n",
" print(f\"{metric.capitalize()}: {value:.4f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Logistic Regression Metrics:\n",
"Accuracy: 0.7398\n",
"Precision: 0.7239\n",
"Recall: 0.7564\n",
"F1-Score: 0.7398\n",
"ROC-AUC: 0.8338\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAAHHCAYAAAB3K7g2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABBhklEQVR4nO3deZyN9f//8ecxzGrGmBhjncXY921ky4iIiCSJsofK3oJKlkgkW2RJ2bMXSiEkYco6o+xjS7KExjYLM3P9/vCb83XMYg5nHJfP4367zc2c63pf1/U61xwzz/O+3u/rWAzDMAQAAGAS2ZxdAAAAgD0ILwAAwFQILwAAwFQILwAAwFQILwAAwFQILwAAwFQILwAAwFQILwAAwFQILwAAwFQIL3joHTlyRA0bNlSuXLlksVi0YsUKh+7/xIkTslgsmj17tkP3a2bh4eEKDw932P6uXbumrl27KiAgQBaLRX379nXYvh8WmzZtksVi0aZNmxyyv9mzZ8tisejEiRMO2R+koUOHymKxOLsMOADhBZly9OhRde/eXSEhIXJ3d5ePj49q1aqliRMnKi4uLkuP3aFDB/3xxx8aOXKk5s2bp6pVq2bp8R6kjh07ymKxyMfHJ83zeOTIEVksFlksFo0dO9bu/f/zzz8aOnSoIiMjHVDtvfvoo480e/Zsvfbaa5o3b55eeeWVLD1eUFCQmjZtmqXHcJSPPvrI4YH8TilBKOUre/bsKliwoDp27KjTp09n6bGBLGEAd/H9998bHh4ehq+vr9G7d29jxowZxuTJk402bdoYOXLkMF599dUsO3ZsbKwhyXjvvfey7BjJyclGXFyckZiYmGXHSE+HDh2M7NmzGy4uLsbixYtTrR8yZIjh7u5uSDI++eQTu/e/Y8cOQ5Ixa9Ysu7ZLSEgwEhIS7D5eeqpXr27UqlXLYfu7m8DAQOOZZ555YMczDMNISkoy4uLijKSkJLu28/LyMjp06JBqeWJiohEXF2ckJyffd22zZs0yJBnDhw835s2bZ3zxxRdGly5dDBcXF6No0aJGXFzcfR/DDG7evPk/81wfddmdG53wsDt+/LjatGmjwMBAbdy4Ufnz57eue+ONNxQdHa3Vq1dn2fH//fdfSZKvr2+WHcNiscjd3T3L9n83bm5uqlWrlhYuXKjWrVvbrPv666/1zDPPaPny5Q+kltjYWHl6esrV1dWh+z1//rxKly7tsP0lJiYqOTnZ4XXej2zZsjn0deTi4iIXFxeH7U+SGjdubO257Nq1q/LkyaPRo0dr1apVqV57WckwDMXHx8vDw+OBHVOSsmfPruzZ+bP3KOCyETI0ZswYXbt2TV9++aVNcEkRGhqqPn36WB8nJibqww8/VNGiReXm5qagoCC9++67SkhIsNkupVt/y5YtCgsLk7u7u0JCQjR37lxrm6FDhyowMFCS9Pbbb8tisSgoKEjSrcstKd/fLq1r2j/99JNq164tX19f5cyZUyVKlNC7775rXZ/emJeNGzeqTp068vLykq+vr5o3b64DBw6kebzo6Gh17NhRvr6+ypUrlzp16qTY2Nj0T+wd2rZtqx9//FExMTHWZTt27NCRI0fUtm3bVO0vXbqkt956S+XKlVPOnDnl4+Ojxo0bKyoqytpm06ZNqlatmiSpU6dO1ksGKc8zPDxcZcuW1a5du/TEE0/I09PTel7uHPPSoUMHubu7p3r+jRo1Uu7cufXPP/+k+bxSxoEcP35cq1evttaQMo7j/Pnz6tKli/Llyyd3d3dVqFBBc+bMsdlHys9n7NixmjBhgvW1tX///kyd2/Rk9rWanJysoUOHqkCBAvL09FS9evW0f/9+BQUFqWPHjqme6+1jXo4cOaLnn39eAQEBcnd3V6FChdSmTRtdvnxZ0q3gfP36dc2ZM8d6blL2md6Ylx9//FF169aVt7e3fHx8VK1aNX399df3dA7q1Kkj6dZl4dsdPHhQrVq1kp+fn9zd3VW1alWtWrUq1fZ79+5V3bp15eHhoUKFCmnEiBGaNWtWqrpT/r+vXbtWVatWlYeHh6ZPny5JiomJUd++fVW4cGG5ubkpNDRUo0ePVnJyss2xFi1apCpVqlifd7ly5TRx4kTr+ps3b2rYsGEqVqyY3N3d9dhjj6l27dr66aefrG3S+v3gyN9ZeHCIoMjQd999p5CQENWsWTNT7bt27ao5c+aoVatWevPNN/X7779r1KhROnDggL799lubttHR0WrVqpW6dOmiDh066KuvvlLHjh1VpUoVlSlTRi1btpSvr6/69eunl156SU2aNFHOnDntqn/fvn1q2rSpypcvr+HDh8vNzU3R0dHaunVrhtutX79ejRs3VkhIiIYOHaq4uDh99tlnqlWrlnbv3p0qOLVu3VrBwcEaNWqUdu/erZkzZ8rf31+jR4/OVJ0tW7ZUjx499M0336hz586SbvW6lCxZUpUrV07V/tixY1qxYoVeeOEFBQcH69y5c5o+fbrq1q2r/fv3q0CBAipVqpSGDx+uDz74QN26dbP+obr9Z3nx4kU1btxYbdq00csvv6x8+fKlWd/EiRO1ceNGdejQQREREXJxcdH06dO1bt06zZs3TwUKFEhzu1KlSmnevHnq16+fChUqpDfffFOSlDdvXsXFxSk8PFzR0dHq2bOngoODtXTpUnXs2FExMTE2oViSZs2apfj4eHXr1k1ubm7y8/PL1LlNT2Zfq4MGDdKYMWPUrFkzNWrUSFFRUWrUqJHi4+Mz3P+NGzfUqFEjJSQkqFevXgoICNDp06f1/fffKyYmRrly5dK8efPUtWtXhYWFqVu3bpKkokWLprvP2bNnq3PnzipTpowGDRokX19f7dmzR2vWrEkz5N5NSsDInTu3ddm+fftUq1YtFSxYUAMHDpSXl5eWLFmiFi1aaPny5XruueckSadPn1a9evVksVg0aNAgeXl5aebMmXJzc0vzWIcOHdJLL72k7t2769VXX1WJEiUUGxurunXr6vTp0+revbuKFCmibdu2adCgQTpz5owmTJgg6dYbkJdeekn169e3/p86cOCAtm7dan2dDB06VKNGjbKezytXrmjnzp3avXu3nnrqqXTPgSN/Z+EBcvZ1Kzy8Ll++bEgymjdvnqn2kZGRhiSja9euNsvfeustQ5KxceNG67LAwEBDkrF582brsvPnzxtubm7Gm2++aV12/PjxNMd7dOjQwQgMDExVw5AhQ4zbX9bjx483JBn//vtvunWnHOP2cSEVK1Y0/P39jYsXL1qXRUVFGdmyZTPat2+f6nidO3e22edzzz1nPPbYY+ke8/bn4eXlZRiGYbRq1cqoX7++YRi3xk8EBAQYw4YNS/McxMfHpxpbcfz4ccPNzc0YPny4dVlGY17q1q1rSDKmTZuW5rq6devaLFu7dq0hyRgxYoRx7NgxI2fOnEaLFi3u+hwNI+0xKBMmTDAkGfPnz7cuu3HjhlGjRg0jZ86cxpUrV6zPS5Lh4+NjnD9//p6Pd7vMvlbPnj1rZM+ePdXzHDp0qCHJZqzKzz//bEgyfv75Z8MwDGPPnj2GJGPp0qUZ1premJeUcSrHjx83DMMwYmJiDG9vb6N69eqpxm3cbVxMyr7Wr19v/Pvvv8apU6eMZcuWGXnz5jXc3NyMU6dOWdvWr1/fKFeunBEfH2+z/5o1axrFihWzLuvVq5dhsViMPXv2WJddvHjR8PPzs6nbMP7v//uaNWts6vrwww8NLy8v4/DhwzbLBw4caLi4uBh//fWXYRiG0adPH8PHxyfDcWkVKlS46zinO38/ZMXvLDwYXDZCuq5cuSJJ8vb2zlT7H374QZLUv39/m+Up77bvHBtTunRpa2+AdOvdeIkSJXTs2LF7rvlOKWNlVq5cmaobOj1nzpxRZGSkOnbsaPPuvnz58nrqqaesz/N2PXr0sHlcp04dXbx40XoOM6Nt27batGmTzp49q40bN+rs2bPpvpt2c3NTtmy3/vsmJSXp4sWL1ktiu3fvzvQx3dzc1KlTp0y1bdiwobp3767hw4erZcuWcnd3t3b934sffvhBAQEBeumll6zLcuTIod69e+v
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB7v0lEQVR4nO3dd3xN9/8H8NdN5GYvIhISMuzaMYOmCFGqghJb1FaqRitGjSpqU7VbYqXWl1IrLRU1UirEljQiZoIQWTLv/fz+yC+XK0MuNzlJ7uv5eNxHe9/3jPe9J3Hf+ZzPkAkhBIiIiIh0kJ7UCRARERFJhYUQERER6SwWQkRERKSzWAgRERGRzmIhRERERDqLhRARERHpLBZCREREpLNYCBEREZHOYiFEREREOouFEBG91aJFi+Di4gJ9fX00aNBA6nQKhUwmw6xZs7RyrKioKMhkMvj7+2vleAQEBQVBJpMhKChI6lSolGEhRMWev78/ZDKZ6lGmTBlUqlQJvr6+ePjwYa77CCGwdetWfPjhh7CysoKJiQnq1q2L7777DsnJyXmea9++ffj4449hY2MDuVyOihUrolevXvjrr78KlGtqaiqWLVuGZs2awdLSEkZGRqhevTrGjBmD8PDwd3r/Uvvjjz/wzTffoGXLlti0aRPmzZtXqOfz9fWFmZlZoZ5DWwICArB8+fJCPUd2UZX90NPTQ9myZfHxxx8jODi4UM9NpAtkXGuMijt/f38MHjwY3333HZydnZGamop//vkH/v7+cHJywrVr12BkZKTaXqFQoG/fvti1axdat26N7t27w8TEBKdOnUJAQABq166NY8eOoUKFCqp9hBD4/PPP4e/vj4YNG+Kzzz6DnZ0doqOjsW/fPoSEhODMmTNwd3fPM8/Y2Fh07NgRISEh+OSTT+Dp6QkzMzOEhYVhx44diImJQXp6eqF+VoXBz88PixYtQkpKCuRyeaGfz9fXF3v27EFSUlKhn+t1qampKFOmDMqUKVPgfT755BNcu3YNUVFRanEhBNLS0mBgYAB9ff33yisqKgrOzs7o06cPOnXqBIVCgfDwcKxevRopKSn4999/Ubdu3fc6R0mgVCqRnp4OuVwOPT3+DU9aJIiKuU2bNgkA4t9//1WLT548WQAQO3fuVIvPmzdPABCTJk3KcawDBw4IPT090bFjR7X4okWLBADx1VdfCaVSmWO/LVu2iHPnzuWbZ+fOnYWenp7Ys2dPjtdSU1PFxIkT892/oDIyMkRaWppWjlUQgwcPFqamplo7nlKpFC9fvszz9UGDBmn1fIWpc+fOokqVKoV6jjt37ggAYtGiRWrxI0eOCABi1KhRhXr+3CQlJRX5OYkKCwshKvbyKoQOHjwoAIh58+apYi9fvhTW1taievXqIiMjI9fjDR48WAAQwcHBqn3Kli0ratasKTIzM98px3/++UcAEMOGDSvQ9h4eHsLDwyNHfNCgQWpfrK9/CS5btky4uLgIPT098c8//wh9fX0xa9asHMe4deuWACBWrlypisXFxYlx48YJBwcHIZfLhaurq/jhhx+EQqHIN08AOR6bNm0SQmQVZN99951wcXERcrlcVKlSRUyZMkWkpqaqHaNKlSqic+fO4ujRo8LNzU0YGhqKZcuW5XnOghZCu3btEo0aNRJGRkaiXLlyol+/fuLBgwe5blerVi1haGgoPvjgA7F3794cn3P2e505c6bqeUJCghg3bpyoUqWKkMvlonz58sLT01OEhIQIIbKu4ZufTfYxs69b9meV7ebNm6Jnz57CxsZGGBkZierVq4upU6fm+z7zKoSSkpIEANGhQwe1eEGvdWxsrOjfv78wNzcXlpaWYuDAgSI0NDRH3tnXIyIiQnz88cfCzMxMdO3aVQghhEKhEMuWLRO1a9cWhoaGwtbWVgwfPlw8f/5c7Vz//vuv6NChgyhXrpwwMjISTk5OYvDgwWrb/Prrr6JRo0bCzMxMmJubizp16ojly5erXj9x4oQAIE6cOKG2X0F+DrLfw4MHD0TXrl2FqampsLGxERMnTnzn33kqPQreBkxUzGTfjrC2tlbFTp8+jbi4OIwbNy7PWxwDBw7Epk2bcPDgQTRv3hynT5/G8+fP8dVXX73zbYwDBw4AAAYMGPBO+7/Npk2bkJqaiuHDh8PQ0BD29vbw8PDArl27MHPmTLVtd+7cCX19ffTs2RMA8PLlS3h4eODhw4cYMWIEKleujLNnz2LKlCmIjo7Ot4/L1q1bsX79epw/fx4///wzAKhuDw4dOhSbN2/GZ599hokTJ+LcuXOYP38+bt68iX379qkdJywsDH369MGIESMwbNgw1KhR470+j+zbpU2aNMH8+fPx+PFjrFixAmfOnMGlS5dgZWUFADh06BB8fHxQt25dzJ8/H3FxcRgyZAgqVar01nOMHDkSe/bswZgxY1C7dm08e/YMp0+fxs2bN9GoUSNMmzYN8fHxePDgAZYtWwYA+fZtunLlClq3bg0DAwMMHz4cTk5OuH37Nn7//XfMnTtX488gt5//gl5rpVKJLl264Pz58xg1ahRq1qyJ/fv3Y9CgQbmeKzMzE15eXmjVqhUWL14MExMTAMCIESNU1+LLL7/EnTt38NNPP+HSpUs4c+YMDAwM8OTJE3To0AHly5eHn58frKysEBUVhb1796qO/+eff6JPnz5o164dFixYAAC4efMmzpw5g3HjxuX5GRT05wDIumXu5eWFZs2aYfHixTh27BiWLFkCV1dXjBo1SuPPn0oRqSsxorfJbhE6duyYePr0qbh//77Ys2ePKF++vDA0NBT3799Xbbt8+XIBQOzbty/P4z1//lwAEN27dxdCCLFixYq37vM23bp1EwBEXFxcgbbXtEXIwsJCPHnyRG3bdevWCQDi6tWravHatWuLtm3bqp7PmTNHmJqaivDwcLXt/Pz8hL6+vrh3716+uebWQpPdcjB06FC1+KRJkwQA8ddff6liVapUEQDE0aNH8z1Pfud7XXp6urC1tRV16tQRKSkpqnh2C+GMGTNUsbp16woHBweRmJioigUFBam13mTDGy1ClpaW4osvvsg317xujeXWIvThhx8Kc3NzcffuXbVtc7sVm9uxZs+eLZ4+fSpiYmLEqVOnRJMmTQQAsXv3btW2Bb3W//vf/wQAtRYXhUIh2rZtm2uLEADh5+endsxTp04JAGL79u1q8aNHj6rF9+3bl2uL7uvGjRsnLCws8m2debNFSJOfg+z38N1336kds2HDhsLNzS3Pc5JuYI8zKjE8PT1Rvnx5ODo64rPPPoOpqSkOHDgABwcH1TaJiYkAAHNz8zyPk/1aQkKC2n/z2+dttHGM/PTo0QPly5dXi3Xv3h1lypTBzp07VbFr167hxo0b8PHxUcV2796N1q1bw9raGrGxsaqHp6cnFAoF/v77b43zOXz4MABgwoQJavGJEycCyGqJeZ2zszO8vLw0Pk9uLly4gCdPnmD06NFqneQ7d+6MmjVrqs796NEjXL16FQMHDlRrqfHw8ChQ52IrKyucO3cOjx49eu+cnz59ir///huff/45KleurPaaTCYr0DFmzpyJ8uXLw87ODq1bt8bNmzexZMkSfPbZZ6ptCnqtjx49CgMDAwwbNky1r56eHr744os8z/9mq8nu3bthaWmJ9u3bq53Lzc0NZmZmOHHiBACoWmUOHjyIjIyMXI9tZWWF5ORk/PnnnwX6LICC/xy8buTIkWrPW7dujcjIyAKfk0onFkJUYqxatQp//vkn9uzZg06dOiE2NhaGhoZq22QXItkFUW7eLJYsLCzeus/baOMY+XF2ds4Rs7GxQbt27bBr1y5VbOfOnShTpgy6d++uiv333384evQoypcvr/bw9PQEADx58kTjfO7evQs9PT1UrVpVLW5nZwcrKyvcvXv3rfm/q+xj53Z7rWbNmqrXs//7Zo55xd60cOFCXLt2DY6OjmjatClmzZr1zl+a2fvVqVPnnfYHgOHDh+PPP//E77//jvHjxyMlJQUKhUJtm4Je67t378Le3l51iytbXp9LmTJl1P7gyD5XfHw8bG1tc5wvKSl
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Random Forest Metrics:\n",
"Accuracy: 0.9122\n",
"Precision: 0.9571\n",
"Recall: 0.8590\n",
"F1-Score: 0.9054\n",
"ROC-AUC: 0.9773\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAAHHCAYAAAB3K7g2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+LElEQVR4nO3deZyN5f/H8feZMftKjEHMguz7FrJFRERSSTJjSRSyFdrsFLJHJGuqIVnKt5ItWb4Ig0hMliRGjG1mzDAz9+8Pjzk/x8wwZ5xx3L6v5+NxHs257uu+z+e+59Z5z3Vf9zkWwzAMAQAAmISLswsAAACwB+EFAACYCuEFAACYCuEFAACYCuEFAACYCuEFAACYCuEFAACYCuEFAACYCuEFAACYCuEF/xOOHDmipk2bKiAgQBaLRStWrHDo9o8fPy6LxaL58+c7dLtm1rBhQzVs2NBh24uPj1e3bt0UHBwsi8Wivn37OmzbZsF5BtxAeME98+eff+rVV19VeHi4PD095e/vr7p162rKlCm6evVqrr52RESE9u/fr9GjR2vRokWqXr16rr7evRQZGSmLxSJ/f/9Mj+ORI0dksVhksVg0YcIEu7f/zz//aNiwYYqOjnZAtTk3ZswYzZ8/Xz179tSiRYv08ssv5+rrhYaGWo+bxWKRj4+PatasqYULF+bq65rNrcfp5kdSUpKzy8tg69atGjZsmC5evOjsUnAX8ji7APxvWL16tZ577jl5eHioU6dOKl++vK5du6bNmzfrzTff1IEDBzR79uxcee2rV69q27Zteuedd9SrV69ceY2QkBBdvXpVbm5uubL9O8mTJ48SExP17bff6vnnn7dZtnjxYnl6eub4jeSff/7R8OHDFRoaqsqVK2d7vTVr1uTo9bKyfv16Pfrooxo6dKhDt3s7lStX1oABAyRJp0+f1pw5cxQREaHk5GS98sor96yO+93Nx+lm7u7uTqjm9rZu3arhw4crMjJSgYGBzi4HOUR4Qa47duyY2rdvr5CQEK1fv16FChWyLnv99dcVExOj1atX59rr//vvv5KUq/+jslgs8vT0zLXt34mHh4fq1q2rL7/8MkN4+eKLL/TUU09p2bJl96SWxMREeXt7O/yN6+zZsypbtqzDtpeSkqK0tLTb1lmkSBF17NjR+jwyMlLh4eGaNGkS4eUmtx4nR0lLS9O1a9ec+m8L9ycuGyHXjRs3TvHx8frss89sgku6EiVK6I033rA+T0lJ0ciRI1W8eHF5eHgoNDRUb7/9tpKTk23WCw0NVcuWLbV582bVrFlTnp6eCg8PtxnWHzZsmEJCQiRJb775piwWi0JDQyXdeCNK//lmw4YNk8VisWn76aef9NhjjykwMFC+vr4qVaqU3n77bevyrOYirF+/XvXq1ZOPj48CAwPVunVr/f7775m+XkxMjPWvwYCAAHXu3FmJiYlZH9hbdOjQQd9//73NcPjOnTt15MgRdejQIUP/uLg4DRw4UBUqVJCvr6/8/f3VvHlz7d2719pn48aNqlGjhiSpc+fO1ssB6fvZsGFDlS9fXrt27VL9+vXl7e1tPS63znmJiIiQp6dnhv1v1qyZ8ubNq3/++SfT/dq4caMsFouOHTum1atXW2s4fvy4pBuhpmvXripYsKA8PT1VqVIlLViwwGYb6b+fCRMmaPLkydZz6+DBg9k6tukKFCig0qVL688//7Rp/+WXX/Tcc8+pWLFi8vDwUNGiRdWvX78Ml/EiIyPl6+urU6dOqU2bNvL19VWBAgU0cOBApaam2vS9ePGiIiMjFRAQoMDAQEVERGR5qcOe8+zw4cPq2LGjAgICVKBAAb333nsyDEMnT55U69at5e/vr+DgYH300Ud2HZvbSUhI0IABA1S0aFF5eHioVKlSmjBhggzDsOlnsVjUq1cvLV68WOXKlZOHh4d++OEHSdKpU6fUpUsXFSxYUB4eHipXrpzmzp2b4bWmTZumcuXKydvbW3nz5lX16tX1xRdfWI/Bm2++KUkKCwvLcC7BPBh5Qa779ttvFR4erjp16mSrf7du3bRgwQK1a9dOAwYM0Pbt2zV27Fj9/vvvWr58uU3fmJgYtWvXTl27dlVERITmzp2ryMhIVatWTeXKlVPbtm0VGBiofv366cUXX1SLFi3k6+trV/0HDhxQy5YtVbFiRY0YMUIeHh6KiYnRli1bbrve2rVr1bx5c4WHh2vYsGG6evWqpk2bprp162r37t0ZgtPzzz+vsLAwjR07Vrt379acOXMUFBSkDz/8MFt1tm3bVj169NA333yjLl26SLox6lK6dGlVrVo1Q/+jR49qxYoVeu655xQWFqbY2FjNmjVLDRo00MGDB1W4cGGVKVNGI0aM0Pvvv6/u3burXr16kmTzuzx//ryaN2+u9u3bq2PHjipYsGCm9U2ZMkXr169XRESEtm3bJldXV82aNUtr1qzRokWLVLhw4UzXK1OmjBYtWqR+/frp4Ycftl6eKFCggK5evaqGDRsqJiZGvXr1UlhYmJYuXarIyEhdvHjRJhRL0rx585SUlKTu3bvLw8ND+fLly9axTZeSkqK///5befPmtWlfunSpEhMT1bNnTz300EPasWOHpk2bpr///ltLly616ZuamqpmzZqpVq1amjBhgtauXauPPvpIxYsXV8+ePSVJhmGodevW2rx5s3r06KEyZcpo+fLlioiIyFCTvefZCy+8oDJlyuiDDz7Q6tWrNWrUKOXLl0+zZs3S448/rg8//FCLFy/WwIEDVaNGDdWvX/+Ox+X69es6d+6cTZu3t7e8vb1lGIaefvppbdiwQV27dlXlypX1448/6s0339SpU6c0adIkm/XWr1+vJUuWqFevXsqfP79CQ0MVGxurRx991BpuChQooO+//15du3bV5cuXrZO3P/30U/Xp00ft2rXTG2+8oaSkJO3bt0/bt29Xhw4d1LZtWx0+fFhffvmlJk2apPz580u6cS7BZAwgF126dMmQZLRu3Tpb/aOjow1JRrdu3WzaBw4caEgy1q9fb20LCQkxJBmbNm2ytp09e9bw8PAwBgwYYG07duyYIckYP368zTYjIiKMkJCQDDUMHTrUuPmfxqRJkwxJxr///ptl3emvMW/ePGtb5cqVjaCgIOP8+fPWtr179xouLi5Gp06dMrxely5dbLb5zDPPGA899FCWr3nzfvj4+BiGYRjt2rUzGjdubBiGYaSmphrBwcHG8OHDMz0GSUlJRmpqaob98PDwMEaMGGFt27lzZ4Z9S9egQQNDkvHJJ59kuqxBgwY2bT/++KMhyRg1apRx9OhRw9fX12jTps0d99Ewbvy+n3rqKZu2yZMnG5KMzz//3Np27do1o3bt2oavr69x+fJl635JMvz9/Y2zZ89m+/WaNm1q/Pvvv8a///5r7N+/33j55ZcNScbrr79u0zcxMTHD+mPHjjUsFotx4sQJa1tERIQhyeb4GoZhVKlSxahWrZr1+YoVKwxJxrhx46xtKSkpRr169e76POvevbvNNh9++GHDYrEYH3zwgbX9woULhpeXlxEREZGt4yQpw2Po0KE2+zJq1Cib9dq1a2dYLBYjJibG2ibJcHFxMQ4cOGDTt2vXrkahQoWMc+fO2bS3b9/eCAgIsB7/1q1bG+XKlbttvePHjzckGceOHbvjvuH+xWUj5KrLly9Lkvz8/LLV/z//+Y8kqX///jbt6X9t3zo3pmzZstbRAOnGX1ClSpXS0aNHc1zzrdLnyqxcuVJpaWnZWuf06dOKjo5WZGSkzV/3FStW1BNPPGHdz5v16NHD5nm9evV0/vx56zHMjg4dOmjjxo06c+aM1q9frzNnzmR6yUi6MU/GxeXG/wJSU1N1/vx56yWx3bt3Z/s1PTw81Llz52z1bdq0qV599VWNGDFCbdu2laenp2bNmpXt17rVf/7zHwUHB+vFF1+0trm5ualPnz6Kj4/Xzz//bNP/2Wefteuv7DVr1qhAgQIqUKCAKlSooEWLFqlz584aP368TT8vLy/rzwkJCTp37pzq1KkjwzC0Z8+eDNvN7Hd98zn7n//8R3ny5LGOxEiSq6u
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB2CUlEQVR4nO3deXhM5/sG8Hsyksm+ENkIWew7sQaNJSS17xSRqL1Fba1YGju1L629JShfWylFpaWWIqUNsUtKhCBBiEQii8y8vz/8nHaahAxJTpK5P9eVq51nznJPJjFP3vOecxRCCAEiIiIiPWQgdwAiIiIiubARIiIiIr3FRoiIiIj0FhshIiIi0ltshIiIiEhvsREiIiIivcVGiIiIiPQWGyEiIiLSW2yEiIiISG+xESKiArFw4UK4ublBqVSiTp06cseRhYuLCwICAuSOQUT/wkaI9EJwcDAUCoX0VaJECZQpUwYBAQG4f/9+tusIIbBlyxZ88MEHsLa2hqmpKWrWrImZM2ciJSUlx33t3bsXH374IWxtbWFkZAQnJyf06tULv/32W66ypqWlYenSpWjUqBGsrKxgbGyMSpUqYeTIkYiMjHyn1y+3X375BV988QWaNm2KjRs3Yu7cufm6v4CAAK33W6VSoVKlSggKCkJaWlq+7rso+e/36d9fhw8fljteFg8ePMD06dMRHh4udxQqRkrIHYCoIM2cOROurq5IS0vDH3/8geDgYJw6dQpXrlyBsbGxtJxarUbfvn2xc+dONG/eHNOnT4epqSl+//13zJgxA7t27cKRI0dgb28vrSOEwMcff4zg4GDUrVsX48aNg4ODA2JjY7F37160bt0ap0+fhqenZ4754uPj4evri7CwMHTo0AF9+/aFubk5IiIisH37dqxbtw4ZGRn5+j3KD7/99hsMDAzw3XffwcjIqED2qVKp8O233wIAEhMTsW/fPsyaNQu3bt3C1q1bCyRDUfDv79O/1a5dW4Y0b/bgwQPMmDEDLi4uejuqSPlAEOmBjRs3CgDizz//1KpPnDhRABA7duzQqs+dO1cAEBMmTMiyrf379wsDAwPh6+urVV+4cKEAIMaMGSM0Gk2W9TZv3izOnj37xpzt27cXBgYGYvfu3VmeS0tLE+PHj3/j+rn18uVLkZ6enifbyo2BAwcKMzOzPNueRqMRL168yPF5f3//LPvTaDSicePGQqFQiLi4uDzLoovy5csLf39/Wfadney+T3kpJSUlT7f3559/CgBi48aNebpd0m88NEZ6rXnz5gCAW7duSbXU1FQsXLgQlSpVwrx587Ks07FjR/j7++Pw4cP4448/pHXmzZuHKlWqYNGiRVAoFFnW8/PzQ8OGDXPMcvbsWRw8eBCDBg1C9+7dszyvUqmwaNEi6XGLFi3QokWLLMsFBATAxcVFehwdHQ2FQoFFixZh2bJlcHd3h0qlwoULF1CiRAnMmDEjyzYiIiKgUCjwzTffSLVnz55hzJgxcHZ2hkqlQoUKFTB//nxoNJocXxMAKBQKbNy4ESkpKdJhl+DgYABAZmYmZs2aJWVycXHB5MmTkZ6errUNFxcXdOjQASEhIahfvz5MTEywdu3aN+43uxzNmjWDEAJRUVFS/c6dO/jkk09QuXJlmJiYoFSpUujZsyeio6O11n99ePX06dMYN24cSpcuDTMzM3Tt2hWPHz/WWlYIgdmzZ6Ns2bIwNTVFy5YtcfXq1WxzRUVFoWfPnihZsiRMTU3RuHFjHDx4UGuZ48ePQ6FQYOfOnZgxYwbKlCkDCwsL9OjRA4mJiUhPT8eYMWNgZ2cHc3NzDBw4MMv38H2sWrUK1atXh0qlgpOTEz799FM8e/ZMa5kWLVqgRo0aCAsLwwcffABTU1NMnjwZAJCeno5p06ahQoUKUKlUcHZ2xhdffJEl46+//opmzZrB2toa5ubmqFy5srSN48ePo0GDBgCAgQMHZvlZInpXPDRGeu31h52NjY1UO3XqFBISEvDZZ5+hRInsf0UGDBiAjRs34sCBA2jcuDFOnTqFp0+fYsyYMVAqle+UZf/+/QBeNUz5YePGjUhLS8PQoUOhUqng6OgILy8v7Ny5E9OmTdNadseOHVAqlejZsycA4MWLF/Dy8sL9+/cxbNgwlCtXDmfOnMGkSZMQGxuLZcuW5bjfLVu2YN26dTh37px0COb14cHBgwdj06ZN6NGjB8aPH4+zZ89i3rx5uH79Ovbu3au1nYiICHz00UcYNmwYhgwZgsqVK+v8Pcju/f7zzz9x5swZ9OnTB2XLlkV0dDRWr16NFi1a4Nq1azA1NdXaxqhRo2BjY4Np06YhOjoay5Ytw8iRI7Fjxw5pmaCgIMyePRvt2rVDu3btcP78ebRt2zbLYc2HDx/C09MTL168wOjRo1GqVCls2rQJnTp1wu7du9G1a1et5efNmwcTExMEBgbi5s2b+Prrr2FoaAgDAwMkJCRg+vTp0iFfV1dXBAUF5er7Eh8fr/XY0NAQVlZWAIDp06djxowZ8Pb2xogRIxAREYHVq1fjzz//xOnTp2FoaCit9+TJE3z44Yfo06cP+vfvD3t7e2g0GnTq1AmnTp3C0KFDUbVqVVy+fBlLly5FZGQkfvzxRwDA1atX0aFDB9SqVQszZ86ESqXCzZs3cfr0aQBA1apVMXPmTAQFBWHo0KHSHzFvOtRMlCtyD0kRFYTXh8aOHDkiHj9+LGJiYsTu3btF6dKlhUqlEjExMdKyy5YtEwDE3r17c9ze06dPBQDRrVs3IYQQy5cvf+s6b9O1a1cBQCQkJORqeS8vL+Hl5ZWl7u/vL8qXLy89vn37tgAgLC0txaNHj7SWXbt2rQAgLl++rFWvVq2aaNWqlfR41qxZwszMTERGRmotFxgYKJRKpbh79+4bs2Z3CCY8PFwAEIMHD9aqT5gwQQAQv/32m1QrX768ACAOHz78xv38d3+PHz8Wjx8/Fjdv3hSLFi0SCoVC1KhRQ+vQZXaH2EJDQwUAsXnzZqn2+mfI29tba/2xY8cKpVIpnj17JoQQ4tGjR8LIyEi0b99ea7nJkycLAFqHxsaMGSMAiN9//12qPX/+XLi6ugoXFxehVquFEEIcO3ZMABA1atQQGRkZ0rIfffSRUCgU4sMPP9TK36RJE62fgTd9nwBk+Xr9c/X6tbRt21bKIoQQ33zzjQAgNmzYINW8vLwEALFmzRqtfWzZskUYGBhovUYhhFizZo0AIE6fPi2EEGLp0qUCgHj8+HGOeXlojPIDD42RXvH29kbp0qXh7OyMHj16wMzMDPv370fZsmWlZZ4/fw4AsLCwyHE7r59LSkrS+u+b1nmbvNjGm3Tv3h2lS5fWqnXr1g0lSpTQGs24cuUKrl27ht69e0u1Xbt2oXnz5rCxsUF8fLz05e3tDbVajZMnT+qc59ChQwCAcePGadXHjx8PAFkOD7m6usLHxyfX209JSUHp0qVRunRpVKhQARMmTEDTpk2xb98+rUOXJiYm0v+/fPkST548QYUKFWBtbY3z589n2e7QoUO11m/evDnUajXu3LkDADhy5AgyMjIwatQoreXGjBmTZVuHDh1Cw4YN0axZM6lmbm6OoUOHIjo6GteuXdNafsCAAVojMI0aNZIm6f9bo0aNEBMTg8zMzLd9m2BsbIxff/1V62vx4sVar2XMmDEwMPjn42LIkCGwtLTM8h6pVCoMHDhQq7Zr1y5UrVoVVapU0frZadWqFQDg2LFjAABra2sAwL59+956uJUoL/HQGOmVlStXolKlSkhMTMSGDRtw8uRJqFQqrWVeNyKvG6Ls/LdZsrS0fOs6b/Pvbbz+UMhLrq6uWWq2trZo3bo1du7ciVmzZgF4dVisRIkS6Natm7Tc33//jUuXLmVppF579OiRznnu3LkDAwMDVKhQQavu4OAAa2trqbF4U/43MTY2xk8//QQAuHfvHhYsWIBHjx5pNT7AP/O7Nm7ciPv370MIIT2XmJiYZbvlypXTevz6MFtCQoL0ugCgYsWKWsuVLl1a65Dc62UbNWqUZR9Vq1aVnq9Ro0aO+359+MrZ2TlLXaPRIDExEaVKlcq
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Gradient Boosting Metrics:\n",
"Accuracy: 0.9185\n",
"Precision: 0.9577\n",
"Recall: 0.8718\n",
"F1-Score: 0.9128\n",
"ROC-AUC: 0.9745\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAAHHCAYAAAB3K7g2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABAZ0lEQVR4nO3dd3xUVf7/8fcQSCGkgBACAiGhF5EmggEBRZogEBRRgdBEUEABV2BXpQiygitVQUTpKr2IutJVMEsPIIgm0lyl9/R2fn/wyyyTApmQMFy+r+fjMQ8y55577+feCck7Z869YzPGGAEAAFhEAVcXAAAA4AzCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCywpKipKLVu2lJ+fn2w2m1avXp2n2z9+/LhsNpvmzZuXp9u1smbNmqlZs2Z5tr2YmBj17dtXgYGBstlseu211/Js23e7rL6/Ro8eLZvN5rqi7jGcz3sb4QW59vvvv+ull15SSEiIPD095evrq9DQUE2dOlXx8fH5uu/w8HAdPHhQ48eP18KFC1W/fv183d+d1LNnT9lsNvn6+mZ5HqOiomSz2WSz2fT+++87vf2//vpLo0ePVmRkZB5Um3vvvvuu5s2bpwEDBmjhwoXq3r17vu8zLS1NCxYs0BNPPKHixYurUKFCCggIUMuWLTV79mwlJibmew2u5OxrP2/ePPv3WvojICBAzZs317fffpu/xeZAXFycRo8era1bt7q6FNxpBsiFdevWGS8vL+Pv728GDx5sZs+ebWbMmGG6du1qChUqZF588cV823dcXJyRZP7xj3/k2z7S0tJMfHy8SUlJybd9ZCc8PNwULFjQuLm5mSVLlmRaPmrUKOPp6WkkmUmTJjm9/V27dhlJZu7cuU6tl5iYaBITE53eX3YefvhhExoammfbu5W4uDjTqlUrI8k88sgjZsKECeazzz4z77//vmnfvr1xc3MzvXv3viO1HDt2LNNrkJycbOLj4/N1v86+9nPnzjWSzNixY83ChQvNggULzKRJk0yNGjWMJPPVV1/la723cu7cOSPJjBo1KtOyO3E+4ToFXZaaYFnHjh1T165dFRQUpM2bN6tUqVL2Za+88oqio6P19ddf59v+z507J0ny9/fPt33YbDZ5enrm2/ZvxcPDQ6Ghofriiy/UpUsXh2Wff/65nnzySa1YseKO1BIXF6fChQvL3d09T7d79uxZVa9ePc+2l5KSorS0tGzrHDJkiL777jtNmTJFr776qsOyYcOGKSoqShs2bLitfdyOggULqmDBu/NHcps2bRxGN/v06aOSJUvqiy++ULt27VxYWfbu5vOJPODq9ATr6d+/v5Fktm/fnqP+ycnJZuzYsSYkJMS4u7uboKAgM3LkSJOQkODQLygoyDz55JPmxx9/NA899JDx8PAwwcHBZv78+fY+o0aNMpIcHkFBQcaY6yMW6V/fKH2dG61fv96EhoYaPz8/4+3tbSpXrmxGjhxpX57VX8bGGLNp0ybTuHFjU7hwYePn52eeeuopc/jw4Sz3FxUVZcLDw42fn5/x9fU1PXv2NLGxsbc8X+Hh4cbb29vMmzfPeHh4mEuXLtmX7dy500gyK1asyDTycuHCBTNs2DBTs2ZN4+3tbXx8fEzr1q1NZGSkvc+WLVsynb8bj7Np06amRo0aZvfu3aZJkybGy8vLvPrqq/ZlTZs2tW+rR48exsPDI9Pxt2zZ0vj7+5s///wzy+PLroZjx44ZY4w5c+aM6d27twkICDAeHh6mVq1aZt68eQ7bSH99Jk2aZCZPnmxCQkJMgQIFzL59+7Lc58mTJ42bm5tp3br1Tc68o5vtIzEx0bz11lumbt26xtfX1xQuXNg0btzYbN68OdN2Ll26ZMLDw42vr6/x8/MzPXr0MPv27cv0/ZXV96kxxixcuNDUrVvXeHp6mqJFi5pnn33WnDx50qFP+ut26NAh06xZM+Pl5WVKly5t3nvvPXufW732WUkfedm1a5dDe1pamvH19TU9evRwaI+JiTFDhw41ZcqUMe7u7qZy5cpm0qRJJi0tzaFfTn8m7Nq1y7Rs2dLcd999xtPT05QvX9706tXL4fXJ+EgfhcnqfEoyr7zyilm1apWpUaOGcXd3N9WrVzfffvttpmPfsmWLqVevnvHw8DAhISFm1qxZ2b5GuPOIpXDaV199pZCQED3yyCM56t+3b1/Nnz9fTz/9tIYNG6YdO3ZowoQJ+uWXX7Rq1SqHvtHR0Xr66afVp08fhYeH67PPPlPPnj1Vr1491ahRQ2FhYfL399eQIUP03HPPqW3btipSpIhT9R86dEjt2rVTrVq1NHbsWHl4eCg6Olrbt2+/6XobN25UmzZtFBISotGjRys+Pl7Tp09XaGio9u7dq/Llyzv079Kli4KDgzVhwgTt3btXc+bMUUBAgN57770c1RkWFqb+/ftr5cqV6t27t6Troy5Vq1ZV3bp1M/U/evSoVq9erWeeeUbBwcE6c+aMPv74YzVt2lSHDx9W6dKlVa1aNY0dO1Zvv/22+vXrpyZNmkiSw2t54cIFtWnTRl27dlW3bt1UsmTJLOubOnWqNm/erPDwcEVERMjNzU0ff/yx1q9fr4ULF6p06dJZrletWjUtXLhQQ4YMUZkyZTRs2DBJUokSJRQfH69mzZopOjpaAwcOVHBwsJYtW6aePXvq8uXLmUZM5s6dq4SEBPXr108eHh4qVqxYlvv89ttvlZqaqm7dut3irGeW1T6uXr2qOXPm6LnnntOLL76oa9eu6dNPP1WrVq20c+dO1a5dW5JkjFGHDh20bds29e/fX9WqVdOqVasUHh6eo32PHz9eb731lrp06aK+ffvq3Llzmj59uh599FHt27fPYfTx0qVLat26tcLCwtSlSxctX75cw4cP1wMPPKA2bdrk6LXPzpUrV3T+/HkZY3T27FlNnz5dMTExDufTGKOnnnpKW7ZsUZ8+fVS7dm199913+tvf/qY///xTkydPtvfNyc+Es2fPqmXLlipRooRGjBghf39/HT9+XCtXrpR0/ftl5syZGjBggDp16qSwsDBJUq1atW56LNu2bdPKlSv18ssvy8fHR9OmTVPnzp118uRJ3XfffZKkffv2qXXr1ipVqpTGjBmj1NRUjR07ViVKlMjBq4Y7wsXhCRZz5coVI8l06NAhR/0jIyONJNO3b1+H9tdff91IcvhLNSgoyEgyP/zwg73t7NmzxsPDwwwbNszeduNfxDfK6cjL5MmTjSRz7ty5bOvOauSldu3aJiAgwFy4cMHetn//flOgQAGHv0DT95dx/kSnTp3Mfffdl+0+bzwOb29vY4wxTz/9tHn88ceNMcakpqaawMBAM2bMmCzPQUJCgklNTc10HB4eHmbs2LH2tpvNe2jatKmRZGbNmpXlshtHXowx5rvvvjOSzLhx48zRo0dNkSJFTMeOHW95jMb8b6TtRlOmTDGSzKJFi+xtSUlJplGjRqZIkSLm6tWr9uOSZHx9fc3Zs2dvua8hQ4YYSQ6jUMZcn8dz7tw5++P8+fP2ZTfbR0pKSqb5P5cuXTIlS5Z0eN1Xr15tJJmJEyc6rNukSZNbjrwcP37cuLm5mfHjxzvs5+DBg6ZgwYIO7emv24IFCxyOLTAw0HTu3Nnelts5LxkfHh4emUbD0o913LhxDu1PP/20sdlsJjo62hiT858Jq1atynLU50Y3m/OS3ciLu7u7vRZjrv8flmSmT59ub2vfvr0pXLiww+hhVFSUKViwICMvdwmuNoJTrl69Kkny8fHJUf9vvvlGkjR06FCH9vS/tjPOjalevbr9L0Lp+l9XVapU0dGjR3Ndc0bpf62uWbNGaWlpOVrn1KlTioyMVM+ePR3+uq9Vq5aeeOIJ+3HeqH///g7PmzRpogsXLtjPYU48//zz2rp1q06fPq3Nmzfr9OnTev7557Ps6+HhoQIFrv+XTk1N1YULF1SkSBFVqVJFe/fuzfE+PTw81KtXrxz1bdmypV566SWNHTtWYWFh8vT01Mcff5zjfWX0zTffKDAwUM8
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB4PUlEQVR4nO3dd1hT5/sG8DuMsEEUEUSU4d6KEwcOFKp1V3GLVeuus19nHXXVurDuUcVZR13URasVJ2qL4haquAUURRAEIsn7+8MfqRFQgoHDuD/XxdXmyRl3EpCH97znHJkQQoCIiIioENKTOgARERGRVNgIERERUaHFRoiIiIgKLTZCREREVGixESIiIqJCi40QERERFVpshIiIiKjQYiNEREREhRYbISIiIiq02AgRkdYWLFgAFxcX6Ovro2bNmlLHyVVOTk7w9fVVPw4KCoJMJkNQUJBkmQoSvp+U29gIUb7j7+8PmUym/jIwMICDgwN8fX3x5MmTDNcRQmDLli1o2rQpihQpAlNTU1SrVg0//PADEhMTM93Xvn378MUXX8DGxgZyuRwlS5ZEt27d8Ndff2Upa3JyMpYsWYL69evDysoKxsbGKF++PEaMGIHw8PBsvX6p/fHHH/jf//6HRo0aYePGjZg7d26u7Pf06dPo1q0bHBwcIJfLYWVlhfr16+OHH35AdHR0rmSQ0ty5c7F///4sLXv//n2NnxGZTAZLS0vUrFkTy5cvh1KpzNmwWbBy5Ur4+/tLHYMIMt5rjPIbf39/9O/fHz/88AOcnZ2RnJyM8+fPw9/fH05OTrh+/TqMjY3VyyuVSvTs2RO7du1CkyZN0LlzZ5iamuL06dPYvn07KleujGPHjqFEiRLqdYQQ+Prrr+Hv749atWrhq6++gp2dHSIjI7Fv3z6EhITg7NmzcHd3zzRnTEwMvL29ERISgi+//BKenp4wNzdHWFgYduzYgaioKCgUihx9r3LCxIkTsWDBAiQlJUEul+fKPqdNm4ZZs2bBxcUF3bt3h4uLC5KTkxESEoI9e/bAxsYGd+/ezZUsTk5OaNasmfqXuEqlgkKhgFwuh55ezv1taW5ujq+++ipLzcP9+/fh7OyMHj16oE2bNgCAuLg4HD58GIcPH8b48eOxYMGCHMuaFVWrVoWNjU26kZ/cej+J1ARRPrNx40YBQPz9998a9QkTJggAYufOnRr1uXPnCgBi/Pjx6bYVEBAg9PT0hLe3t0Z9wYIFAoAYPXq0UKlU6dbbvHmzuHDhwkdztm3bVujp6Ynffvst3XPJycli3LhxH10/q96+fStSUlJ0sq2s6N+/vzAzM9PZ9lQqlXjz5k2mz+/YsUMAEN26dcvwdb569UpMnz79s/ahjTJlyoh+/frpZFvaMDMzy/J+7927JwCIBQsWaNRVKpWoW7euKFmyZA4k1E6VKlWEh4eH1DGIBBshyncya4QOHjwoAIi5c+eqa2/evBHW1taifPny4u3btxlur3///gKACA4OVq9TtGhRUbFiRZGampqtjOfPnxcAxKBBg7K0vIeHR4a/FPr16yfKlCmjfvz+L7glS5YIFxcXoaenJ86fPy/09fXFjBkz0m3j9u3bAoBYtmyZuhYbGytGjRolSpUqJeRyuXB1dRU//vijUCqVH80JIN3Xxo0bhRDvGrIffvhBuLi4CLlcLsqUKSMmTZokkpOTNbZRpkwZ0bZtW3H06FHh5uYmjIyMxJIlSzLdZ/ny5YWNjY14/fr1R7NldR8bNmwQzZs3F8WLFxdyuVxUqlRJrFy5Mt02VCqVmDVrlnBwcBAmJiaiWbNm4vr16+kaoRMnTggA4sSJExrrnz9/Xnh5eQlLS0thYmIimjZtKs6cOaOxzPTp0wUA8e+//4p+/foJKysrYWlpKXx9fUViYqJ6uYze9481RZk1QkII8eWXX4rSpUunq69YsUJUrlxZyOVyYW9vL4YNGyZiY2PTLbdr1y5Ru3ZtYWxsLIoVKyZ69eolHj9+rLFMZGSk8PX1FQ4ODkIulws7OzvRvn17ce/ePSHEu8/nw9eT9v2f0fvp4eEhqlSpIm7cuCGaNWsmTExMRMmSJcX8+fPT5bt//75o166dMDU1FcWLFxejR48WR48ezfAzIhJCCINcGHQiyhX3798HAFhbW6trZ86cQWxsLEaNGgUDg4y/3fv27YuNGzfi4MGDaNCgAc6cOYOXL19i9OjR0NfXz1aWgIAAAECfPn2ytf6nbNy4EcnJyfjmm29gZGQEe3t7eHh4YNeuXZg+fbrGsjt37oS+vj66du0KAHjz5g08PDzw5MkTDB48GKVLl8a5c+cwadIkREZGws/PL9P9btmyBWvXrsXFixexfv16AFAfHhw4cCA2bdqEr776CuPGjcOFCxcwb9483Lp1C/v27dPYTlhYGHr06IHBgwdj0KBBqFChQob7Cw8PR3h4OAYOHAhzc3Ot3qPM9rFq1SpUqVIF7du3h4GBAX7//XcMGzYMKpUKw4cPV68/bdo0zJ49G23atEGbNm1w6dIltG7dOkuHM//66y988cUXcHNzw/Tp06Gnp4eNGzeiRYsWOH36NOrVq6exfLdu3eDs7Ix58+bh0qVLWL9+PWxtbTF//nwA7973gQMHol69evjmm28AAK6urp/M8ebNG8TExAAA4uPjceTIERw9ehSTJk3SWG7GjBmYOXMmPD09MXToUISFhWHVqlX4+++/cfbsWRgaGgL477B03bp1MW/ePERHR2Pp0qU4e/YsLl++jCJFigAAunTpghs3bmDkyJFwcnLCs2fP8Oeff+Lhw4dwcnKCn58fRo4cCXNzc0yZMgUANA5NZyQ2Nhbe3t7o3LkzunXrht9++w0TJkxAtWrV8MUXXwAAEhMT0aJFC0RGRmLUqFGws7PD9u3bceLEiU++V1SISd2JEWkrbUTo2LFj4vnz5+LRo0fit99+E8WLFxdGRkbi0aNH6mX9/PwEALFv375Mt/fy5UsBQHTu3FkIIcTSpUs/uc6ndOrUSQDI8C/qjGg7ImRpaSmePXumseyaNWsEAHHt2jWNeuXKlUWLFi3Uj2fNmiXMzMxEeHi4xnITJ04U+vr64uHDhx/N2q9fv3SHxkJDQwUAMXDgQI36+PHjBQDx119/qWtpowFHjx796H6EEOLAgQMCgPDz89Ooq1Qq8fz5c42v90f8PraPjA6ReXl5CRcXF/XjZ8+eCblcLtq2batxaHTy5MnpRmM+HMFQqVSiXLlywsvLS2PdN2/eCGdnZ9GqVSt1LW1E6Ouvv9bI06lTJ1GsWDGNWnYOjWX0NXToUI1caa+1devWGiOCy5cvFwDEhg0bhBBCKBQKYWtrK6pWrSqSkpLUy6WNxE6bNk0I8W60EZmMRr0vs0NjmY0IARCbN29W11JSUoSdnZ3o0qWLurZo0SIBQOzfv19dS0pKEhUrVuSIEGWKM9Eo3/L09ETx4sXh6OiIr776CmZmZggICECpUqXUy7x+/RoAYGFhkel20p6Lj4/X+O/H1vkUXWzjY7p06YLixYtr1Dp37gwDAwPs3LlTXbt+/Tpu3rwJHx8fdW337t1o0qQJrK2tERMTo/7y9PSEUqnEqVOntM5z+PBhAMDYsWM16uPGjQMAHDp0SKPu7OwMLy+vT2437X38cDQoLi4OxYsX1/gKDQ3N0j5MTEw0thMTEwMPDw9EREQgLi4OAHDs2DEoFAqMHDkSMplMvfzo0aM/mTk0NBT//vsvevbsiRcvXqjf38TERLRs2RKnTp2CSqXSWGfIkCEaj5s0aYIXL16oX392ffPNN/jzzz/x559/Ys+ePRg+fDjWrFmj8TmlvdbRo0drTE4eNGgQLC0t1Z/dP//8g2fPnmHYsGEaJyO0bdsWFStWVC9nYmICuVyOoKAgxMbGflb+95mbm6N3797qx3K5HPXq1UNERIS6dvToUTg4OKB9+/bqmrGxMQYNGqSzHFTw8NAY5VsrVqxA+fLlERcXhw0bNuDUqVMwMjLSWCatEUlriDLyYbNkaWn5yXU+5f1tpB0u0CVnZ+d0NRsbG7Rs2RK7du3CrFmzALw7LGZgYIDOnTu
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.metrics import confusion_matrix, roc_curve, auc\n",
"\n",
"def plot_confusion_matrix(y_true, y_pred, title):\n",
" cm = confusion_matrix(y_true, y_pred)\n",
" sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)\n",
" plt.title(title)\n",
" plt.xlabel('Предсказанные значения')\n",
" plt.ylabel('Истинные значения')\n",
" plt.show()\n",
"\n",
"def plot_roc_curve(y_true, y_pred_proba, title):\n",
" fpr, tpr, _ = roc_curve(y_true, y_pred_proba)\n",
" roc_auc = auc(fpr, tpr)\n",
" plt.figure()\n",
" plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')\n",
" plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')\n",
" plt.xlim([0.0, 1.0])\n",
" plt.ylim([0.0, 1.05])\n",
" plt.xlabel('False Positive Rate')\n",
" plt.ylabel('True Positive Rate')\n",
" plt.title(title)\n",
" plt.legend(loc=\"lower right\")\n",
" plt.show()\n",
"\n",
"def evaluate_and_plot_model(model, X_test, y_test, model_name):\n",
" y_pred = model.predict(X_test)\n",
" y_pred_proba = model.predict_proba(X_test)[:, 1]\n",
" \n",
" accuracy = accuracy_score(y_test, y_pred)\n",
" precision = precision_score(y_test, y_pred, pos_label=1)\n",
" recall = recall_score(y_test, y_pred, pos_label=1)\n",
" f1 = f1_score(y_test, y_pred, pos_label=1)\n",
" roc_auc = roc_auc_score(y_test, y_pred_proba)\n",
" \n",
" print(f\"{model_name} Metrics:\")\n",
" print(f\"Accuracy: {accuracy:.4f}\")\n",
" print(f\"Precision: {precision:.4f}\")\n",
" print(f\"Recall: {recall:.4f}\")\n",
" print(f\"F1-Score: {f1:.4f}\")\n",
" print(f\"ROC-AUC: {roc_auc:.4f}\")\n",
" \n",
" plot_confusion_matrix(y_test, y_pred, f'Confusion Matrix for {model_name}')\n",
" plot_roc_curve(y_test, y_pred_proba, f'ROC Curve for {model_name}')\n",
"\n",
"evaluate_and_plot_model(logreg_best_model, X_test, y_test, 'Logistic Regression')\n",
"evaluate_and_plot_model(rf_best_model, X_test, y_test, 'Random Forest')\n",
"evaluate_and_plot_model(gb_best_model, X_test, y_test, 'Gradient Boosting')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "aimenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}