1005 lines
223 KiB
Plaintext
1005 lines
223 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['HeartDisease', 'BMI', 'Smoking', 'AlcoholDrinking', 'Stroke',\n",
|
|||
|
" 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory',\n",
|
|||
|
" 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'SleepTime',\n",
|
|||
|
" 'Asthma', 'KidneyDisease', 'SkinCancer'],\n",
|
|||
|
" dtype='object')\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>HeartDisease</th>\n",
|
|||
|
" <th>BMI</th>\n",
|
|||
|
" <th>Smoking</th>\n",
|
|||
|
" <th>AlcoholDrinking</th>\n",
|
|||
|
" <th>Stroke</th>\n",
|
|||
|
" <th>PhysicalHealth</th>\n",
|
|||
|
" <th>MentalHealth</th>\n",
|
|||
|
" <th>DiffWalking</th>\n",
|
|||
|
" <th>Sex</th>\n",
|
|||
|
" <th>AgeCategory</th>\n",
|
|||
|
" <th>Race</th>\n",
|
|||
|
" <th>Diabetic</th>\n",
|
|||
|
" <th>PhysicalActivity</th>\n",
|
|||
|
" <th>GenHealth</th>\n",
|
|||
|
" <th>SleepTime</th>\n",
|
|||
|
" <th>Asthma</th>\n",
|
|||
|
" <th>KidneyDisease</th>\n",
|
|||
|
" <th>SkinCancer</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>16.60</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>3.0</td>\n",
|
|||
|
" <td>30.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>55-59</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Very good</td>\n",
|
|||
|
" <td>5.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>20.34</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>80 or older</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Very good</td>\n",
|
|||
|
" <td>7.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>26.58</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>20.0</td>\n",
|
|||
|
" <td>30.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Male</td>\n",
|
|||
|
" <td>65-69</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Fair</td>\n",
|
|||
|
" <td>8.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>24.21</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>75-79</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>6.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>23.71</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>28.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>40-44</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Very good</td>\n",
|
|||
|
" <td>8.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>5</th>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>28.87</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>6.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>75-79</td>\n",
|
|||
|
" <td>Black</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Fair</td>\n",
|
|||
|
" <td>12.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>6</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>21.63</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>15.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>70-74</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Fair</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>7</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>31.64</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>5.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>80 or older</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>9.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>8</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>26.45</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>80 or older</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>No, borderline diabetes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Fair</td>\n",
|
|||
|
" <td>5.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>9</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>40.69</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Male</td>\n",
|
|||
|
" <td>65-69</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>10.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" HeartDisease BMI Smoking AlcoholDrinking Stroke PhysicalHealth \\\n",
|
|||
|
"0 No 16.60 Yes No No 3.0 \n",
|
|||
|
"1 No 20.34 No No Yes 0.0 \n",
|
|||
|
"2 No 26.58 Yes No No 20.0 \n",
|
|||
|
"3 No 24.21 No No No 0.0 \n",
|
|||
|
"4 No 23.71 No No No 28.0 \n",
|
|||
|
"5 Yes 28.87 Yes No No 6.0 \n",
|
|||
|
"6 No 21.63 No No No 15.0 \n",
|
|||
|
"7 No 31.64 Yes No No 5.0 \n",
|
|||
|
"8 No 26.45 No No No 0.0 \n",
|
|||
|
"9 No 40.69 No No No 0.0 \n",
|
|||
|
"\n",
|
|||
|
" MentalHealth DiffWalking Sex AgeCategory Race \\\n",
|
|||
|
"0 30.0 No Female 55-59 White \n",
|
|||
|
"1 0.0 No Female 80 or older White \n",
|
|||
|
"2 30.0 No Male 65-69 White \n",
|
|||
|
"3 0.0 No Female 75-79 White \n",
|
|||
|
"4 0.0 Yes Female 40-44 White \n",
|
|||
|
"5 0.0 Yes Female 75-79 Black \n",
|
|||
|
"6 0.0 No Female 70-74 White \n",
|
|||
|
"7 0.0 Yes Female 80 or older White \n",
|
|||
|
"8 0.0 No Female 80 or older White \n",
|
|||
|
"9 0.0 Yes Male 65-69 White \n",
|
|||
|
"\n",
|
|||
|
" Diabetic PhysicalActivity GenHealth SleepTime Asthma \\\n",
|
|||
|
"0 Yes Yes Very good 5.0 Yes \n",
|
|||
|
"1 No Yes Very good 7.0 No \n",
|
|||
|
"2 Yes Yes Fair 8.0 Yes \n",
|
|||
|
"3 No No Good 6.0 No \n",
|
|||
|
"4 No Yes Very good 8.0 No \n",
|
|||
|
"5 No No Fair 12.0 No \n",
|
|||
|
"6 No Yes Fair 4.0 Yes \n",
|
|||
|
"7 Yes No Good 9.0 Yes \n",
|
|||
|
"8 No, borderline diabetes No Fair 5.0 No \n",
|
|||
|
"9 No Yes Good 10.0 No \n",
|
|||
|
"\n",
|
|||
|
" KidneyDisease SkinCancer \n",
|
|||
|
"0 No Yes \n",
|
|||
|
"1 No No \n",
|
|||
|
"2 No No \n",
|
|||
|
"3 No Yes \n",
|
|||
|
"4 No No \n",
|
|||
|
"5 No No \n",
|
|||
|
"6 No Yes \n",
|
|||
|
"7 No No \n",
|
|||
|
"8 Yes No \n",
|
|||
|
"9 No No "
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd \n",
|
|||
|
"df = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\")\n",
|
|||
|
"print(df.columns)\n",
|
|||
|
"\n",
|
|||
|
"display(df.head(10))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Регрессия"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Цель: Разработать модель регрессии, которая будет предсказывать количество часов сна, которое человек получает в сутки, на основе его демографических данных, образа жизни и состояния здоровья.\n",
|
|||
|
"\n",
|
|||
|
"Применение:\n",
|
|||
|
"\n",
|
|||
|
"Медицинские учреждения: Модель может помочь врачам оценить качество сна пациента и разработать индивидуальные планы лечения и профилактики нарушений сна.\n",
|
|||
|
"\n",
|
|||
|
"Компании, разрабатывающие приложения для отслеживания сна: Модель может использоваться для улучшения своих продуктов и предоставления более точных рекомендаций.\n",
|
|||
|
"\n",
|
|||
|
"Исследования в области сна: Модель может помочь в изучении факторов, влияющих на качество сна."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Сначала подготовим данные для работы - удалим выбросы."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 34,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Размер данных до удаления выбросов: (929, 18)\n",
|
|||
|
"Размер данных после удаления выбросов: (929, 18)\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from scipy import stats\n",
|
|||
|
"\n",
|
|||
|
"data = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\").head(1000)\n",
|
|||
|
"\n",
|
|||
|
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']\n",
|
|||
|
"\n",
|
|||
|
"z_scores = stats.zscore(data[numeric_features])\n",
|
|||
|
"\n",
|
|||
|
"threshold = 3\n",
|
|||
|
"\n",
|
|||
|
"data_cleaned = data[(z_scores < threshold).all(axis=1)]\n",
|
|||
|
"data = data_cleaned\n",
|
|||
|
"print(\"Размер данных до удаления выбросов:\", data.shape)\n",
|
|||
|
"print(\"Размер данных после удаления выбросов:\", data_cleaned.shape)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Теперь перейдем к делению на выборки и созданию ориентира"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Размер обучающей выборки: (255836, 16)\n",
|
|||
|
"Размер тестовой выборки: (63959, 16)\n",
|
|||
|
"Baseline MAE: 1.0154101277944922\n",
|
|||
|
"Baseline MSE: 2.085820163563156\n",
|
|||
|
"Baseline R²: -7.204157852269688e-05\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
|
|||
|
"target = 'SleepTime'\n",
|
|||
|
"\n",
|
|||
|
"global X_train, X_test, y_train, y_test\n",
|
|||
|
"X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"print(\"Размер обучающей выборки:\", X_train.shape)\n",
|
|||
|
"print(\"Размер тестовой выборки:\", X_test.shape)\n",
|
|||
|
"\n",
|
|||
|
"baseline_predictions = [y_train.mean()] * len(y_test)\n",
|
|||
|
"\n",
|
|||
|
"print('Baseline MAE:', mean_absolute_error(y_test, baseline_predictions))\n",
|
|||
|
"print('Baseline MSE:', mean_squared_error(y_test, baseline_predictions))\n",
|
|||
|
"print('Baseline R²:', r2_score(y_test, baseline_predictions))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Создание конвейера и обучение моделей"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Linear Regression trained.\n",
|
|||
|
"Model: Decision Tree trained.\n",
|
|||
|
"Model: Gradient Boosting trained.\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from scipy import stats\n",
|
|||
|
"from sklearn.model_selection import train_test_split, RandomizedSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
"from sklearn.tree import DecisionTreeRegressor\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"categorical_features = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
|
|||
|
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth']\n",
|
|||
|
"\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numeric_features),\n",
|
|||
|
" ('cat', OneHotEncoder(), categorical_features)])\n",
|
|||
|
"\n",
|
|||
|
"pipeline_linear_regression = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('regressor', LinearRegression())])\n",
|
|||
|
"\n",
|
|||
|
"pipeline_decision_tree = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('regressor', DecisionTreeRegressor(random_state=42))])\n",
|
|||
|
"\n",
|
|||
|
"pipeline_gradient_boosting = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('regressor', GradientBoostingRegressor(random_state=42))])\n",
|
|||
|
"\n",
|
|||
|
"pipelines = [\n",
|
|||
|
" ('Linear Regression', pipeline_linear_regression),\n",
|
|||
|
" ('Decision Tree', pipeline_decision_tree),\n",
|
|||
|
" ('Gradient Boosting', pipeline_gradient_boosting)\n",
|
|||
|
"]\n",
|
|||
|
"\n",
|
|||
|
"for name, pipeline in pipelines:\n",
|
|||
|
" pipeline.fit(X_train, y_train)\n",
|
|||
|
" print(f\"Model: {name} trained.\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Оценка качества моделей"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 23,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE: 0.999721882988516\n",
|
|||
|
"MSE: 2.007024248723743\n",
|
|||
|
"R²: 0.03770762552704621\n",
|
|||
|
"\n",
|
|||
|
"Model: Decision Tree\n",
|
|||
|
"MAE: 1.405790088390023\n",
|
|||
|
"MSE: 4.053338792508978\n",
|
|||
|
"R²: -0.9434229624615185\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting\n",
|
|||
|
"MAE: 0.9962143800804221\n",
|
|||
|
"MSE: 1.9983219431838193\n",
|
|||
|
"R²: 0.041880052575063775\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"for name, pipeline in pipelines:\n",
|
|||
|
" y_pred = pipeline.predict(X_test)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print('MAE:', mean_absolute_error(y_test, y_pred))\n",
|
|||
|
" print('MSE:', mean_squared_error(y_test, y_pred))\n",
|
|||
|
" print('R²:', r2_score(y_test, y_pred))\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Линейная регрессия имеет низкое смещение, так как MAE и MSE близки к 1. Однако, R² близок к 0, что указывает на то, что модель не очень хорошо объясняет дисперсию в данных. Это может быть связано с тем, что линейная модель не может хорошо аппроксимировать сложные зависимости в данных.\n",
|
|||
|
"\n",
|
|||
|
"Дерево решений имеет высокое смещение и дисперсию. Отрицательный R² указывает на то, что модель работает хуже, чем простое предсказание среднего значения. Это свидетельствует о переобучении и высокой дисперсии.\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг имеет низкое смещение, так как MAE и MSE близки к 1. R² также близок к 0, что указывает на то, что модель не очень хорошо объясняет дисперсию в данных. Однако, это лучший результат среди всех моделей, что указывает на то, что градиентный бустинг лучше справляется с данными, чем линейная регрессия.\n",
|
|||
|
"\n",
|
|||
|
"Линейная регрессия и Градиентный бустинг имеют низкое смещение, но низкий R², что указывает на то, что они не могут хорошо объяснить дисперсию в данных.\n",
|
|||
|
"\n",
|
|||
|
"Дерево решений имеет высокую дисперсию и переобучение, что приводит к отрицательному R²."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Классификация"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Цель: Разработать модель, которая сможет предсказывать вероятность развития сердечно-сосудистых заболеваний (HeartDisease) у пациентов на основе их демографических данных, образа жизни и состояния здоровья.\n",
|
|||
|
"\n",
|
|||
|
"Применение: Модель может использоваться в медицинских учреждениях для раннего выявления пациентов с высоким риском сердечных заболеваний, что позволит назначить профилактические меры и улучшить результаты лечения."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Проведем деление на выборки и создание ориентира"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Размер обучающей выборки: (743, 16)\n",
|
|||
|
"Размер тестовой выборки: (186, 16)\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"\n",
|
|||
|
"features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
|
|||
|
"target = 'HeartDisease'\n",
|
|||
|
"\n",
|
|||
|
"X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"print(\"Размер обучающей выборки:\", X_train.shape)\n",
|
|||
|
"print(\"Размер тестовой выборки:\", X_test.shape)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"HeartDisease\n",
|
|||
|
"0 796\n",
|
|||
|
"1 796\n",
|
|||
|
"Name: count, dtype: int64\n",
|
|||
|
"Размер обучающей выборки: (1273, 49)\n",
|
|||
|
"Размер тестовой выборки: (319, 49)\n",
|
|||
|
"Лучшие гиперпараметры для логистической регрессии:\n",
|
|||
|
"{'classifier__C': np.float64(0.7272998688284025), 'classifier__penalty': 'l1', 'classifier__solver': 'liblinear'}\n",
|
|||
|
"Accuracy: 0.7398\n",
|
|||
|
"Precision: 0.7239\n",
|
|||
|
"Recall: 0.7564\n",
|
|||
|
"F1-Score: 0.7398\n",
|
|||
|
"ROC-AUC: 0.8338\n",
|
|||
|
"Лучшие гиперпараметры для случайного леса:\n",
|
|||
|
"{'classifier__bootstrap': True, 'classifier__max_depth': np.int64(32), 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 6, 'classifier__n_estimators': 317}\n",
|
|||
|
"Accuracy: 0.9122\n",
|
|||
|
"Precision: 0.9571\n",
|
|||
|
"Recall: 0.8590\n",
|
|||
|
"F1-Score: 0.9054\n",
|
|||
|
"ROC-AUC: 0.9773\n",
|
|||
|
"Лучшие гиперпараметры для градиентного бустинга:\n",
|
|||
|
"{'classifier__learning_rate': np.float64(0.17269984907963387), 'classifier__max_depth': np.int64(96), 'classifier__min_samples_leaf': 8, 'classifier__min_samples_split': 8, 'classifier__n_estimators': 294, 'classifier__subsample': np.float64(0.8288064461501716)}\n",
|
|||
|
"Accuracy: 0.9185\n",
|
|||
|
"Precision: 0.9577\n",
|
|||
|
"Recall: 0.8718\n",
|
|||
|
"F1-Score: 0.9128\n",
|
|||
|
"ROC-AUC: 0.9745\n",
|
|||
|
"\n",
|
|||
|
"Результаты моделей:\n",
|
|||
|
"\n",
|
|||
|
"Logistic Regression:\n",
|
|||
|
"Accuracy: 0.7398\n",
|
|||
|
"Precision: 0.7239\n",
|
|||
|
"Recall: 0.7564\n",
|
|||
|
"F1: 0.7398\n",
|
|||
|
"Roc_auc: 0.8338\n",
|
|||
|
"\n",
|
|||
|
"Random Forest:\n",
|
|||
|
"Accuracy: 0.9122\n",
|
|||
|
"Precision: 0.9571\n",
|
|||
|
"Recall: 0.8590\n",
|
|||
|
"F1: 0.9054\n",
|
|||
|
"Roc_auc: 0.9773\n",
|
|||
|
"\n",
|
|||
|
"Gradient Boosting:\n",
|
|||
|
"Accuracy: 0.9185\n",
|
|||
|
"Precision: 0.9577\n",
|
|||
|
"Recall: 0.8718\n",
|
|||
|
"F1: 0.9128\n",
|
|||
|
"Roc_auc: 0.9745\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from imblearn.over_sampling import SMOTE\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder, LabelEncoder\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score\n",
|
|||
|
"from scipy.stats import uniform, randint\n",
|
|||
|
"from sklearn.model_selection import RandomizedSearchCV\n",
|
|||
|
"\n",
|
|||
|
"features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
|
|||
|
"target = 'HeartDisease'\n",
|
|||
|
"\n",
|
|||
|
"label_encoder = LabelEncoder()\n",
|
|||
|
"data[target] = label_encoder.fit_transform(data[target])\n",
|
|||
|
"\n",
|
|||
|
"categorical_features = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
|
|||
|
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth']\n",
|
|||
|
"\n",
|
|||
|
"categorical_transformer = Pipeline(steps=[\n",
|
|||
|
" ('onehot', OneHotEncoder(handle_unknown='ignore'))\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"numeric_transformer = Pipeline(steps=[\n",
|
|||
|
" ('scaler', StandardScaler())\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', numeric_transformer, numeric_features),\n",
|
|||
|
" ('cat', categorical_transformer, categorical_features)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"X = preprocessor.fit_transform(data[features])\n",
|
|||
|
"y = data[target]\n",
|
|||
|
"\n",
|
|||
|
"smote = SMOTE(random_state=42)\n",
|
|||
|
"X_resampled, y_resampled = smote.fit_resample(X, y)\n",
|
|||
|
"\n",
|
|||
|
"print(pd.Series(y_resampled).value_counts())\n",
|
|||
|
"\n",
|
|||
|
"X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"def evaluate_model(model, X_test, y_test):\n",
|
|||
|
" y_pred = model.predict(X_test)\n",
|
|||
|
" y_pred_proba = model.predict_proba(X_test)[:, 1]\n",
|
|||
|
" \n",
|
|||
|
" accuracy = accuracy_score(y_test, y_pred)\n",
|
|||
|
" precision = precision_score(y_test, y_pred, pos_label=1) \n",
|
|||
|
" recall = recall_score(y_test, y_pred, pos_label=1) \n",
|
|||
|
" f1 = f1_score(y_test, y_pred, pos_label=1) \n",
|
|||
|
" roc_auc = roc_auc_score(y_test, y_pred_proba)\n",
|
|||
|
" \n",
|
|||
|
" print(f\"Accuracy: {accuracy:.4f}\")\n",
|
|||
|
" print(f\"Precision: {precision:.4f}\")\n",
|
|||
|
" print(f\"Recall: {recall:.4f}\")\n",
|
|||
|
" print(f\"F1-Score: {f1:.4f}\")\n",
|
|||
|
" print(f\"ROC-AUC: {roc_auc:.4f}\")\n",
|
|||
|
" \n",
|
|||
|
" return {\n",
|
|||
|
" 'accuracy': accuracy,\n",
|
|||
|
" 'precision': precision,\n",
|
|||
|
" 'recall': recall,\n",
|
|||
|
" 'f1': f1,\n",
|
|||
|
" 'roc_auc': roc_auc\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
"logreg_param_dist = {\n",
|
|||
|
" 'classifier__C': uniform(loc=0, scale=4),\n",
|
|||
|
" 'classifier__penalty': ['l1', 'l2'],\n",
|
|||
|
" 'classifier__solver': ['liblinear', 'saga']\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"logreg_pipeline = Pipeline([\n",
|
|||
|
" ('classifier', LogisticRegression(max_iter=1000, random_state=42))\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"logreg_random_search = RandomizedSearchCV(logreg_pipeline, param_distributions=logreg_param_dist, n_iter=50, cv=5, random_state=42, n_jobs=-1)\n",
|
|||
|
"logreg_random_search.fit(X_train, y_train)\n",
|
|||
|
"\n",
|
|||
|
"print(\"Лучшие гиперпараметры для логистической регрессии:\")\n",
|
|||
|
"print(logreg_random_search.best_params_)\n",
|
|||
|
"\n",
|
|||
|
"logreg_best_model = logreg_random_search.best_estimator_\n",
|
|||
|
"logreg_results = evaluate_model(logreg_best_model, X_test, y_test)\n",
|
|||
|
"\n",
|
|||
|
"rf_param_dist = {\n",
|
|||
|
" 'classifier__n_estimators': randint(100, 1000),\n",
|
|||
|
" 'classifier__max_depth': [None] + list(randint(10, 100).rvs(10)),\n",
|
|||
|
" 'classifier__min_samples_split': randint(2, 20),\n",
|
|||
|
" 'classifier__min_samples_leaf': randint(1, 20),\n",
|
|||
|
" 'classifier__bootstrap': [True, False]\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"rf_pipeline = Pipeline([\n",
|
|||
|
" ('classifier', RandomForestClassifier(random_state=42))\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"rf_random_search = RandomizedSearchCV(rf_pipeline, param_distributions=rf_param_dist, n_iter=50, cv=5, random_state=42, n_jobs=-1)\n",
|
|||
|
"rf_random_search.fit(X_train, y_train)\n",
|
|||
|
"\n",
|
|||
|
"print(\"Лучшие гиперпараметры для случайного леса:\")\n",
|
|||
|
"print(rf_random_search.best_params_)\n",
|
|||
|
"\n",
|
|||
|
"rf_best_model = rf_random_search.best_estimator_\n",
|
|||
|
"rf_results = evaluate_model(rf_best_model, X_test, y_test)\n",
|
|||
|
"\n",
|
|||
|
"gb_param_dist = {\n",
|
|||
|
" 'classifier__n_estimators': randint(100, 1000),\n",
|
|||
|
" 'classifier__learning_rate': uniform(0.01, 0.5),\n",
|
|||
|
" 'classifier__max_depth': [None] + list(randint(10, 100).rvs(10)),\n",
|
|||
|
" 'classifier__min_samples_split': randint(2, 20),\n",
|
|||
|
" 'classifier__min_samples_leaf': randint(1, 20),\n",
|
|||
|
" 'classifier__subsample': uniform(0.5, 0.5)\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"gb_pipeline = Pipeline([\n",
|
|||
|
" ('classifier', GradientBoostingClassifier(random_state=42))\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"gb_random_search = RandomizedSearchCV(gb_pipeline, param_distributions=gb_param_dist, n_iter=50, cv=5, random_state=42, n_jobs=-1)\n",
|
|||
|
"gb_random_search.fit(X_train, y_train)\n",
|
|||
|
"\n",
|
|||
|
"print(\"Лучшие гиперпараметры для градиентного бустинга:\")\n",
|
|||
|
"print(gb_random_search.best_params_)\n",
|
|||
|
"\n",
|
|||
|
"gb_best_model = gb_random_search.best_estimator_\n",
|
|||
|
"gb_results = evaluate_model(gb_best_model, X_test, y_test)\n",
|
|||
|
"\n",
|
|||
|
"print(\"\\nРезультаты моделей:\")\n",
|
|||
|
"print(\"\\nLogistic Regression:\")\n",
|
|||
|
"for metric, value in logreg_results.items():\n",
|
|||
|
" print(f\"{metric.capitalize()}: {value:.4f}\")\n",
|
|||
|
"\n",
|
|||
|
"print(\"\\nRandom Forest:\")\n",
|
|||
|
"for metric, value in rf_results.items():\n",
|
|||
|
" print(f\"{metric.capitalize()}: {value:.4f}\")\n",
|
|||
|
"\n",
|
|||
|
"print(\"\\nGradient Boosting:\")\n",
|
|||
|
"for metric, value in gb_results.items():\n",
|
|||
|
" print(f\"{metric.capitalize()}: {value:.4f}\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Logistic Regression Metrics:\n",
|
|||
|
"Accuracy: 0.7398\n",
|
|||
|
"Precision: 0.7239\n",
|
|||
|
"Recall: 0.7564\n",
|
|||
|
"F1-Score: 0.7398\n",
|
|||
|
"ROC-AUC: 0.8338\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAAHHCAYAAAB3K7g2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABBhklEQVR4nO3deZyN9f//8ecxzGrGmBhjncXY921ky4iIiCSJsofK3oJKlkgkW2RJ2bMXSiEkYco6o+xjS7KExjYLM3P9/vCb83XMYg5nHJfP4367zc2c63pf1/U61xwzz/O+3u/rWAzDMAQAAGAS2ZxdAAAAgD0ILwAAwFQILwAAwFQILwAAwFQILwAAwFQILwAAwFQILwAAwFQILwAAwFQILwAAwFQIL3joHTlyRA0bNlSuXLlksVi0YsUKh+7/xIkTslgsmj17tkP3a2bh4eEKDw932P6uXbumrl27KiAgQBaLRX379nXYvh8WmzZtksVi0aZNmxyyv9mzZ8tisejEiRMO2R+koUOHymKxOLsMOADhBZly9OhRde/eXSEhIXJ3d5ePj49q1aqliRMnKi4uLkuP3aFDB/3xxx8aOXKk5s2bp6pVq2bp8R6kjh07ymKxyMfHJ83zeOTIEVksFlksFo0dO9bu/f/zzz8aOnSoIiMjHVDtvfvoo480e/Zsvfbaa5o3b55eeeWVLD1eUFCQmjZtmqXHcJSPPvrI4YH8TilBKOUre/bsKliwoDp27KjTp09n6bGBLGEAd/H9998bHh4ehq+vr9G7d29jxowZxuTJk402bdoYOXLkMF599dUsO3ZsbKwhyXjvvfey7BjJyclGXFyckZiYmGXHSE+HDh2M7NmzGy4uLsbixYtTrR8yZIjh7u5uSDI++eQTu/e/Y8cOQ5Ixa9Ysu7ZLSEgwEhIS7D5eeqpXr27UqlXLYfu7m8DAQOOZZ555YMczDMNISkoy4uLijKSkJLu28/LyMjp06JBqeWJiohEXF2ckJyffd22zZs0yJBnDhw835s2bZ3zxxRdGly5dDBcXF6No0aJGXFzcfR/DDG7evPk/81wfddmdG53wsDt+/LjatGmjwMBAbdy4Ufnz57eue+ONNxQdHa3Vq1dn2fH//fdfSZKvr2+WHcNiscjd3T3L9n83bm5uqlWrlhYuXKjWrVvbrPv666/1zDPPaPny5Q+kltjYWHl6esrV1dWh+z1//rxKly7tsP0lJiYqOTnZ4XXej2zZsjn0deTi4iIXFxeH7U+SGjdubO257Nq1q/LkyaPRo0dr1apVqV57WckwDMXHx8vDw+OBHVOSsmfPruzZ+bP3KOCyETI0ZswYXbt2TV9++aVNcEkRGhqqPn36WB8nJibqww8/VNGiReXm5qagoCC9++67SkhIsNkupVt/y5YtCgsLk7u7u0JCQjR37lxrm6FDhyowMFCS9Pbbb8tisSgoKEjSrcstKd/fLq1r2j/99JNq164tX19f5cyZUyVKlNC7775rXZ/emJeNGzeqTp068vLykq+vr5o3b64DBw6kebzo6Gh17NhRvr6+ypUrlzp16qTY2Nj0T+wd2rZtqx9//FExMTHWZTt27NCRI0fUtm3bVO0vXbqkt956S+XKlVPOnDnl4+Ojxo0bKyoqytpm06ZNqlatmiSpU6dO1ksGKc8zPDxcZcuW1a5du/TEE0/I09PTel7uHPPSoUMHubu7p3r+jRo1Uu7cufXPP/+k+bxSxoEcP35cq1evttaQMo7j/Pnz6tKli/Llyyd3d3dVqFBBc+bMsdlHys9n7NixmjBhgvW1tX///kyd2/Rk9rWanJysoUOHqkCBAvL09FS9evW0f/9+BQUFqWPHjqme6+1jXo4cOaLnn39eAQEBcnd3V6FChdSmTRtdvnxZ0q3gfP36dc2ZM8d6blL2md6Ylx9//FF169aVt7e3fHx8VK1aNX399df3dA7q1Kkj6dZl4dsdPHhQrVq1kp+fn9zd3VW1alWtWrUq1fZ79+5V3bp15eHhoUKFCmnEiBGaNWtWqrpT/r+vXbtWVatWlYeHh6ZPny5JiomJUd++fVW4cGG5ubkpNDRUo0ePVnJyss2xFi1apCpVqlifd7ly5TRx4kTr+ps3b2rYsGEqVqyY3N3d9dhjj6l27dr66aefrG3S+v3gyN9ZeHCIoMjQd999p5CQENWsWTNT7bt27ao5c+aoVatWevPNN/X7779r1KhROnDggL799lubttHR0WrVqpW6dOmiDh066KuvvlLHjh1VpUoVlSlTRi1btpSvr6/69eunl156SU2aNFHOnDntqn/fvn1q2rSpypcvr+HDh8vNzU3R0dHaunVrhtutX79ejRs3VkhIiIYOHaq4uDh99tlnqlWrlnbv3p0qOLVu3VrBwcEaNWqUdu/erZkzZ8rf31+jR4/OVJ0tW7ZUjx499M0336hz586SbvW6lCxZUpUrV07V/tixY1qxYoVeeOEFBQcH69y5c5o+fbrq1q2r/fv3q0CBAipVqpSGDx+uDz74QN26dbP+obr9Z3nx4kU1btxYbdq00csvv6x8+fKlWd/EiRO1ceNGdejQQREREXJxcdH06dO1bt06zZs3TwUKFEhzu1KlSmnevHnq16+fChUqpDfffFOSlDdvXsXFxSk8PFzR0dHq2bOngoODtXTpUnXs2FExMTE2oViSZs2apfj4eHXr1k1ubm7y8/PL1LlNT2Zfq4MGDdKYMWPUrFkzNWrUSFFRUWrUqJHi4+Mz3P+NGzfUqFEjJSQkqFevXgoICNDp06f1/fffKyYmRrly5dK8efPUtWtXhYWFqVu3bpKkokWLprvP2bNnq3PnzipTpowGDRokX19f7dmzR2vWrEkz5N5NSsDInTu3ddm+fftUq1YtFSxYUAMHDpSXl5eWLFmiFi1aaPny5XruueckSadPn1a9evVksVg0aNAgeXl5aebMmXJzc0vzWIcOHdJLL72k7t2769VXX1WJEiUUGxurunXr6vTp0+revbuKFCmibdu2adCgQTpz5owmTJgg6dYbkJdeekn169e3/p86cOCAtm7dan2dDB06VKNGjbKezytXrmjnzp3avXu3nnrqqXTPgSN/Z+EBcvZ1Kzy8Ll++bEgymjdvnqn2kZGRhiSja9euNsvfeustQ5KxceNG67LAwEBDkrF582brsvPnzxtubm7Gm2++aV12/PjxNMd7dOjQwQgMDExVw5AhQ4zbX9bjx483JBn//vtvunWnHOP2cSEVK1Y0/P39jYsXL1qXRUVFGdmyZTPat2+f6nidO3e22edzzz1nPPbYY+ke8/bn4eXlZRiGYbRq1cqoX7++YRi3xk8EBAQYw4YNS/McxMfHpxpbcfz4ccPNzc0YPny4dVlGY17q1q1rSDKmTZuW5rq6devaLFu7dq0hyRgxYoRx7NgxI2fOnEaLFi3u+hwNI+0xKBMmTDAkGfPnz7cuu3HjhlGjRg0jZ86cxpUrV6zPS5Lh4+NjnD9//p6Pd7vMvlbPnj1rZM+ePdXzHDp0qCHJZqzKzz//bEgyfv75Z8MwDGPPnj2GJGPp0qUZ1premJeUcSrHjx83DMMwYmJiDG9vb6N69eqpxm3cbVxMyr7Wr19v/Pvvv8apU6eMZcuWGXnz5jXc3NyMU6dOWdvWr1/fKFeunBEfH2+z/5o1axrFihWzLuvVq5dhsViMPXv2WJddvHjR8PPzs6nbMP7v//uaNWts6vrwww8NLy8v4/DhwzbLBw4caLi4uBh//fWXYRiG0adPH8PHxyfDcWkVKlS46zinO38/ZMXvLDwYXDZCuq5cuSJJ8vb2zlT7H374QZLUv39/m+Up77bvHBtTunRpa2+AdOvdeIkSJXTs2LF7rvlOKWNlVq5cmaobOj1nzpxRZGSkOnbsaPPuvnz58nrqqaesz/N2PXr0sHlcp04dXbx40XoOM6Nt27batGmTzp49q40bN+rs2bPpvpt2c3NTtmy3/vsmJSXp4sWL1ktiu3fvzvQx3dzc1KlTp0y1bdiwobp3767hw4erZcuWcnd3t3b934sffvhBAQEBeumll6zLcuTIod69e+v
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB7v0lEQVR4nO3dd3xN9/8H8NdN5GYvIhISMuzaMYOmCFGqghJb1FaqRitGjSpqU7VbYqXWl1IrLRU1UirEljQiZoIQWTLv/fz+yC+XK0MuNzlJ7uv5eNxHe9/3jPe9J3Hf+ZzPkAkhBIiIiIh0kJ7UCRARERFJhYUQERER6SwWQkRERKSzWAgRERGRzmIhRERERDqLhRARERHpLBZCREREpLNYCBEREZHOYiFEREREOouFEBG91aJFi+Di4gJ9fX00aNBA6nQKhUwmw6xZs7RyrKioKMhkMvj7+2vleAQEBQVBJpMhKChI6lSolGEhRMWev78/ZDKZ6lGmTBlUqlQJvr6+ePjwYa77CCGwdetWfPjhh7CysoKJiQnq1q2L7777DsnJyXmea9++ffj4449hY2MDuVyOihUrolevXvjrr78KlGtqaiqWLVuGZs2awdLSEkZGRqhevTrGjBmD8PDwd3r/Uvvjjz/wzTffoGXLlti0aRPmzZtXqOfz9fWFmZlZoZ5DWwICArB8+fJCPUd2UZX90NPTQ9myZfHxxx8jODi4UM9NpAtkXGuMijt/f38MHjwY3333HZydnZGamop//vkH/v7+cHJywrVr12BkZKTaXqFQoG/fvti1axdat26N7t27w8TEBKdOnUJAQABq166NY8eOoUKFCqp9hBD4/PPP4e/vj4YNG+Kzzz6DnZ0doqOjsW/fPoSEhODMmTNwd3fPM8/Y2Fh07NgRISEh+OSTT+Dp6QkzMzOEhYVhx44diImJQXp6eqF+VoXBz88PixYtQkpKCuRyeaGfz9fXF3v27EFSUlKhn+t1qampKFOmDMqUKVPgfT755BNcu3YNUVFRanEhBNLS0mBgYAB9ff33yisqKgrOzs7o06cPOnXqBIVCgfDwcKxevRopKSn4999/Ubdu3fc6R0mgVCqRnp4OuVwOPT3+DU9aJIiKuU2bNgkA4t9//1WLT548WQAQO3fuVIvPmzdPABCTJk3KcawDBw4IPT090bFjR7X4okWLBADx1VdfCaVSmWO/LVu2iHPnzuWbZ+fOnYWenp7Ys2dPjtdSU1PFxIkT892/oDIyMkRaWppWjlUQgwcPFqamplo7nlKpFC9fvszz9UGDBmn1fIWpc+fOokqVKoV6jjt37ggAYtGiRWrxI0eOCABi1KhRhXr+3CQlJRX5OYkKCwshKvbyKoQOHjwoAIh58+apYi9fvhTW1taievXqIiMjI9fjDR48WAAQwcHBqn3Kli0ratasKTIzM98px3/++UcAEMOGDSvQ9h4eHsLDwyNHfNCgQWpfrK9/CS5btky4uLgIPT098c8//wh9fX0xa9asHMe4deuWACBWrlypisXFxYlx48YJBwcHIZfLhaurq/jhhx+EQqHIN08AOR6bNm0SQmQVZN99951wcXERcrlcVKlSRUyZMkWkpqaqHaNKlSqic+fO4ujRo8LNzU0YGhqKZcuW5XnOghZCu3btEo0aNRJGRkaiXLlyol+/fuLBgwe5blerVi1haGgoPvjgA7F3794cn3P2e505c6bqeUJCghg3bpyoUqWKkMvlonz58sLT01OEhIQIIbKu4ZufTfYxs69b9meV7ebNm6Jnz57CxsZGGBkZierVq4upU6fm+z7zKoSSkpIEANGhQwe1eEGvdWxsrOjfv78wNzcXlpaWYuDAgSI0NDRH3tnXIyIiQnz88cfCzMxMdO3aVQghhEKhEMuWLRO1a9cWhoaGwtbWVgwfPlw8f/5c7Vz//vuv6NChgyhXrpwwMjISTk5OYvDgwWrb/Prrr6JRo0bCzMxMmJubizp16ojly5erXj9x4oQAIE6cOKG2X0F+DrLfw4MHD0TXrl2FqampsLGxERMnTnzn33kqPQreBkxUzGTfjrC2tlbFTp8+jbi4OIwbNy7PWxwDBw7Epk2bcPDgQTRv3hynT5/G8+fP8dVXX73zbYwDBw4AAAYMGPBO+7/Npk2bkJqaiuHDh8PQ0BD29vbw8PDArl27MHPmTLVtd+7cCX19ffTs2RMA8PLlS3h4eODhw4cYMWIEKleujLNnz2LKlCmIjo7Ot4/L1q1bsX79epw/fx4///wzAKhuDw4dOhSbN2/GZ599hokTJ+LcuXOYP38+bt68iX379qkdJywsDH369MGIESMwbNgw1KhR470+j+zbpU2aNMH8+fPx+PFjrFixAmfOnMGlS5dgZWUFADh06BB8fHxQt25dzJ8/H3FxcRgyZAgqVar01nOMHDkSe/bswZgxY1C7dm08e/YMp0+fxs2bN9GoUSNMmzYN8fHxePDgAZYtWwYA+fZtunLlClq3bg0DAwMMHz4cTk5OuH37Nn7//XfMnTtX488gt5//gl5rpVKJLl264Pz58xg1ahRq1qyJ/fv3Y9CgQbmeKzMzE15eXmjVqhUWL14MExMTAMCIESNU1+LLL7/EnTt38NNPP+HSpUs4c+YMDAwM8OTJE3To0AHly5eHn58frKysEBUVhb1796qO/+eff6JPnz5o164dFixYAAC4efMmzpw5g3HjxuX5GRT05wDIumXu5eWFZs2aYfHixTh27BiWLFkCV1dXjBo1SuPPn0oRqSsxorfJbhE6duyYePr0qbh//77Ys2ePKF++vDA0NBT3799Xbbt8+XIBQOzbty/P4z1//lwAEN27dxdCCLFixYq37vM23bp1EwBEXFxcgbbXtEXIwsJCPHnyRG3bdevWCQDi6tWravHatWuLtm3bqp7PmTNHmJqaivDwcLXt/Pz8hL6+vrh3716+uebWQpPdcjB06FC1+KRJkwQA8ddff6liVapUEQDE0aNH8z1Pfud7XXp6urC1tRV16tQRKSkpqnh2C+GMGTNUsbp16woHBweRmJioigUFBam13mTDGy1ClpaW4osvvsg317xujeXWIvThhx8Kc3NzcffuXbVtc7sVm9uxZs+eLZ4+fSpiYmLEqVOnRJMmTQQAsXv3btW2Bb3W//vf/wQAtRYXhUIh2rZtm2uLEADh5+endsxTp04JAGL79u1q8aNHj6rF9+3bl2uL7uvGjRsnLCws8m2debNFSJOfg+z38N1336kds2HDhsLNzS3Pc5JuYI8zKjE8PT1Rvnx5ODo64rPPPoOpqSkOHDgABwcH1TaJiYkAAHNz8zyPk/1aQkKC2n/z2+dttHGM/PTo0QPly5dXi3Xv3h1lypTBzp07VbFr167hxo0b8PHxUcV2796N1q1bw9raGrGxsaqHp6cnFAoF/v77b43zOXz4MABgwoQJavGJEycCyGqJeZ2zszO8vLw0Pk9uLly4gCdPnmD06NFqneQ7d+6MmjVrqs796NEjXL16FQMHDlRrqfHw8ChQ52IrKyucO3cOjx49eu+cnz59ir///huff/45KleurPaaTCYr0DFmzpyJ8uXLw87ODq1bt8bNmzexZMkSfPbZZ6ptCnqtjx49CgMDAwwbNky1r56eHr744os8z/9mq8nu3bthaWmJ9u3bq53Lzc0NZmZmOHHiBACoWmUOHjyIjIyMXI9tZWWF5ORk/PnnnwX6LICC/xy8buTIkWrPW7dujcjIyAKfk0onFkJUYqxatQp//vkn9uzZg06dOiE2NhaGhoZq22QXItkFUW7eLJYsLCzeus/baOMY+XF2ds4Rs7GxQbt27bBr1y5VbOfOnShTpgy6d++uiv333384evQoypcvr/bw9PQEADx58kTjfO7evQs9PT1UrVpVLW5nZwcrKyvcvXv3rfm/q+xj53Z7rWbNmqrXs//7Zo55xd60cOFCXLt2DY6OjmjatClmzZr1zl+a2fvVqVPnnfYHgOHDh+PPP//E77//jvHjxyMlJQUKhUJtm4Je67t378Le3l51iytbXp9LmTJl1P7gyD5XfHw8bG1tc5wvKSl
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Random Forest Metrics:\n",
|
|||
|
"Accuracy: 0.9122\n",
|
|||
|
"Precision: 0.9571\n",
|
|||
|
"Recall: 0.8590\n",
|
|||
|
"F1-Score: 0.9054\n",
|
|||
|
"ROC-AUC: 0.9773\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAAHHCAYAAAB3K7g2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+LElEQVR4nO3deZyN5f/H8feZMftKjEHMguz7FrJFRERSSTJjSRSyFdrsFLJHJGuqIVnKt5ItWb4Ig0hMliRGjG1mzDAz9+8Pjzk/x8wwZ5xx3L6v5+NxHs257uu+z+e+59Z5z3Vf9zkWwzAMAQAAmISLswsAAACwB+EFAACYCuEFAACYCuEFAACYCuEFAACYCuEFAACYCuEFAACYCuEFAACYCuEFAACYCuEF/xOOHDmipk2bKiAgQBaLRStWrHDo9o8fPy6LxaL58+c7dLtm1rBhQzVs2NBh24uPj1e3bt0UHBwsi8Wivn37OmzbZsF5BtxAeME98+eff+rVV19VeHi4PD095e/vr7p162rKlCm6evVqrr52RESE9u/fr9GjR2vRokWqXr16rr7evRQZGSmLxSJ/f/9Mj+ORI0dksVhksVg0YcIEu7f/zz//aNiwYYqOjnZAtTk3ZswYzZ8/Xz179tSiRYv08ssv5+rrhYaGWo+bxWKRj4+PatasqYULF+bq65rNrcfp5kdSUpKzy8tg69atGjZsmC5evOjsUnAX8ji7APxvWL16tZ577jl5eHioU6dOKl++vK5du6bNmzfrzTff1IEDBzR79uxcee2rV69q27Zteuedd9SrV69ceY2QkBBdvXpVbm5uubL9O8mTJ48SExP17bff6vnnn7dZtnjxYnl6eub4jeSff/7R8OHDFRoaqsqVK2d7vTVr1uTo9bKyfv16Pfrooxo6dKhDt3s7lStX1oABAyRJp0+f1pw5cxQREaHk5GS98sor96yO+93Nx+lm7u7uTqjm9rZu3arhw4crMjJSgYGBzi4HOUR4Qa47duyY2rdvr5CQEK1fv16FChWyLnv99dcVExOj1atX59rr//vvv5KUq/+jslgs8vT0zLXt34mHh4fq1q2rL7/8MkN4+eKLL/TUU09p2bJl96SWxMREeXt7O/yN6+zZsypbtqzDtpeSkqK0tLTb1lmkSBF17NjR+jwyMlLh4eGaNGkS4eUmtx4nR0lLS9O1a9ec+m8L9ycuGyHXjRs3TvHx8frss89sgku6EiVK6I033rA+T0lJ0ciRI1W8eHF5eHgoNDRUb7/9tpKTk23WCw0NVcuWLbV582bVrFlTnp6eCg8PtxnWHzZsmEJCQiRJb775piwWi0JDQyXdeCNK//lmw4YNk8VisWn76aef9NhjjykwMFC+vr4qVaqU3n77bevyrOYirF+/XvXq1ZOPj48CAwPVunVr/f7775m+XkxMjPWvwYCAAHXu3FmJiYlZH9hbdOjQQd9//73NcPjOnTt15MgRdejQIUP/uLg4DRw4UBUqVJCvr6/8/f3VvHlz7d2719pn48aNqlGjhiSpc+fO1ssB6fvZsGFDlS9fXrt27VL9+vXl7e1tPS63znmJiIiQp6dnhv1v1qyZ8ubNq3/++SfT/dq4caMsFouOHTum1atXW2s4fvy4pBuhpmvXripYsKA8PT1VqVIlLViwwGYb6b+fCRMmaPLkydZz6+DBg9k6tukKFCig0qVL688//7Rp/+WXX/Tcc8+pWLFi8vDwUNGiRdWvX78Ml/EiIyPl6+urU6dOqU2bNvL19VWBAgU0cOBApaam2vS9ePGiIiMjFRAQoMDAQEVERGR5qcOe8+zw4cPq2LGjAgICVKBAAb333nsyDEMnT55U69at5e/vr+DgYH300Ud2HZvbSUhI0IABA1S0aFF5eHioVKlSmjBhggzDsOlnsVjUq1cvLV68WOXKlZOHh4d++OEHSdKpU6fUpUsXFSxYUB4eHipXrpzmzp2b4bWmTZumcuXKydvbW3nz5lX16tX1xRdfWI/Bm2++KUkKCwvLcC7BPBh5Qa779ttvFR4erjp16mSrf7du3bRgwQK1a9dOAwYM0Pbt2zV27Fj9/vvvWr58uU3fmJgYtWvXTl27dlVERITmzp2ryMhIVatWTeXKlVPbtm0VGBiofv366cUXX1SLFi3k6+trV/0HDhxQy5YtVbFiRY0YMUIeHh6KiYnRli1bbrve2rVr1bx5c4WHh2vYsGG6evWqpk2bprp162r37t0ZgtPzzz+vsLAwjR07Vrt379acOXMUFBSkDz/8MFt1tm3bVj169NA333yjLl26SLox6lK6dGlVrVo1Q/+jR49qxYoVeu655xQWFqbY2FjNmjVLDRo00MGDB1W4cGGVKVNGI0aM0Pvvv6/u3burXr16kmTzuzx//ryaN2+u9u3bq2PHjipYsGCm9U2ZMkXr169XRESEtm3bJldXV82aNUtr1qzRokWLVLhw4UzXK1OmjBYtWqR+/frp4Ycftl6eKFCggK5evaqGDRsqJiZGvXr1UlhYmJYuXarIyEhdvHjRJhRL0rx585SUlKTu3bvLw8ND+fLly9axTZeSkqK///5befPmtWlfunSpEhMT1bNnTz300EPasWOHpk2bpr///ltLly616ZuamqpmzZqpVq1amjBhgtauXauPPvpIxYsXV8+ePSVJhmGodevW2rx5s3r06KEyZcpo+fLlioiIyFCTvefZCy+8oDJlyuiDDz7Q6tWrNWrUKOXLl0+zZs3S448/rg8//FCLFy/WwIEDVaNGDdWvX/+Ox+X69es6d+6cTZu3t7e8vb1lGIaefvppbdiwQV27dlXlypX1448/6s0339SpU6c0adIkm/XWr1+vJUuWqFevXsqfP79CQ0MVGxurRx991BpuChQooO+//15du3bV5cuXrZO3P/30U/Xp00ft2rXTG2+8oaSkJO3bt0/bt29Xhw4d1LZtWx0+fFhffvmlJk2apPz580u6cS7BZAwgF126dMmQZLRu3Tpb/aOjow1JRrdu3WzaBw4caEgy1q9fb20LCQkxJBmbNm2ytp09e9bw8PAwBgwYYG07duyYIckYP368zTYjIiKMkJCQDDUMHTrUuPmfxqRJkwxJxr///ptl3emvMW/ePGtb5cqVjaCgIOP8+fPWtr179xouLi5Gp06dMrxely5dbLb5zDPPGA899FCWr3nzfvj4+BiGYRjt2rUzGjdubBiGYaSmphrBwcHG8OHDMz0GSUlJRmpqaob98PDwMEaMGGFt27lzZ4Z9S9egQQNDkvHJJ59kuqxBgwY2bT/++KMhyRg1apRx9OhRw9fX12jTps0d99Ewbvy+n3rqKZu2yZMnG5KMzz//3Np27do1o3bt2oavr69x+fJl635JMvz9/Y2zZ89m+/WaNm1q/Pvvv8a///5r7N+/33j55ZcNScbrr79u0zcxMTHD+mPHjjUsFotx4sQJa1tERIQhyeb4GoZhVKlSxahWrZr1+YoVKwxJxrhx46xtKSkpRr169e76POvevbvNNh9++GHDYrEYH3zwgbX9woULhpeXlxEREZGt4yQpw2Po0KE2+zJq1Cib9dq1a2dYLBYjJibG2ibJcHFxMQ4cOGDTt2vXrkahQoWMc+fO2bS3b9/eCAgIsB7/1q1bG+XKlbttvePHjzckGceOHbvjvuH+xWUj5KrLly9Lkvz8/LLV/z//+Y8kqX///jbt6X9t3zo3pmzZstbRAOnGX1ClSpXS0aNHc1zzrdLnyqxcuVJpaWnZWuf06dOKjo5WZGSkzV/3FStW1BNPPGHdz5v16NHD5nm9evV0/vx56zHMjg4dOmjjxo06c+aM1q9frzNnzmR6yUi6MU/GxeXG/wJSU1N1/vx56yWx3bt3Z/s1PTw81Llz52z1bdq0qV599VWNGDFCbdu2laenp2bNmpXt17rVf/7zHwUHB+vFF1+0trm5ualPnz6Kj4/Xzz//bNP/2Wefteuv7DVr1qhAgQIqUKCAKlSooEWLFqlz584aP368TT8vLy/rzwkJCTp37pzq1KkjwzC0Z8+eDNvN7Hd98zn7n//8R3ny5LGOxEiSq6u
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB2CUlEQVR4nO3deXhM5/sG8Hsyksm+ENkIWew7sQaNJSS17xSRqL1Fba1YGju1L629JShfWylFpaWWIqUNsUtKhCBBiEQii8y8vz/8nHaahAxJTpK5P9eVq51nznJPJjFP3vOecxRCCAEiIiIiPWQgdwAiIiIiubARIiIiIr3FRoiIiIj0FhshIiIi0ltshIiIiEhvsREiIiIivcVGiIiIiPQWGyEiIiLSW2yEiIiISG+xESKiArFw4UK4ublBqVSiTp06cseRhYuLCwICAuSOQUT/wkaI9EJwcDAUCoX0VaJECZQpUwYBAQG4f/9+tusIIbBlyxZ88MEHsLa2hqmpKWrWrImZM2ciJSUlx33t3bsXH374IWxtbWFkZAQnJyf06tULv/32W66ypqWlYenSpWjUqBGsrKxgbGyMSpUqYeTIkYiMjHyn1y+3X375BV988QWaNm2KjRs3Yu7cufm6v4CAAK33W6VSoVKlSggKCkJaWlq+7rso+e/36d9fhw8fljteFg8ePMD06dMRHh4udxQqRkrIHYCoIM2cOROurq5IS0vDH3/8geDgYJw6dQpXrlyBsbGxtJxarUbfvn2xc+dONG/eHNOnT4epqSl+//13zJgxA7t27cKRI0dgb28vrSOEwMcff4zg4GDUrVsX48aNg4ODA2JjY7F37160bt0ap0+fhqenZ4754uPj4evri7CwMHTo0AF9+/aFubk5IiIisH37dqxbtw4ZGRn5+j3KD7/99hsMDAzw3XffwcjIqED2qVKp8O233wIAEhMTsW/fPsyaNQu3bt3C1q1bCyRDUfDv79O/1a5dW4Y0b/bgwQPMmDEDLi4uejuqSPlAEOmBjRs3CgDizz//1KpPnDhRABA7duzQqs+dO1cAEBMmTMiyrf379wsDAwPh6+urVV+4cKEAIMaMGSM0Gk2W9TZv3izOnj37xpzt27cXBgYGYvfu3VmeS0tLE+PHj3/j+rn18uVLkZ6enifbyo2BAwcKMzOzPNueRqMRL168yPF5f3//LPvTaDSicePGQqFQiLi4uDzLoovy5csLf39/Wfadney+T3kpJSUlT7f3559/CgBi48aNebpd0m88NEZ6rXnz5gCAW7duSbXU1FQsXLgQlSpVwrx587Ks07FjR/j7++Pw4cP4448/pHXmzZuHKlWqYNGiRVAoFFnW8/PzQ8OGDXPMcvbsWRw8eBCDBg1C9+7dszyvUqmwaNEi6XGLFi3QokWLLMsFBATAxcVFehwdHQ2FQoFFixZh2bJlcHd3h0qlwoULF1CiRAnMmDEjyzYiIiKgUCjwzTffSLVnz55hzJgxcHZ2hkqlQoUKFTB//nxoNJocXxMAKBQKbNy4ESkpKdJhl+DgYABAZmYmZs2aJWVycXHB5MmTkZ6errUNFxcXdOjQASEhIahfvz5MTEywdu3aN+43uxzNmjWDEAJRUVFS/c6dO/jkk09QuXJlmJiYoFSpUujZsyeio6O11n99ePX06dMYN24cSpcuDTMzM3Tt2hWPHz/WWlYIgdmzZ6Ns2bIwNTVFy5YtcfXq1WxzRUVFoWfPnihZsiRMTU3RuHFjHDx4UGuZ48ePQ6FQYOfOnZgxYwbKlCkDCwsL9OjRA4mJiUhPT8eYMWNgZ2cHc3NzDBw4MMv38H2sWrUK1atXh0qlgpOTEz799FM8e/ZMa5kWLVqgRo0aCAsLwwcffABTU1NMnjwZAJCeno5p06ahQoUKUKlUcHZ2xhdffJEl46+//opmzZrB2toa5ubmqFy5srSN48ePo0GDBgCAgQMHZvlZInpXPDRGeu31h52NjY1UO3XqFBISEvDZZ5+hRInsf0UGDBiAjRs34sCBA2jcuDFOnTqFp0+fYsyYMVAqle+UZf/+/QBeNUz5YePGjUhLS8PQoUOhUqng6OgILy8v7Ny5E9OmTdNadseOHVAqlejZsycA4MWLF/Dy8sL9+/cxbNgwlCtXDmfOnMGkSZMQGxuLZcuW5bjfLVu2YN26dTh37px0COb14cHBgwdj06ZN6NGjB8aPH4+zZ89i3rx5uH79Ovbu3au1nYiICHz00UcYNmwYhgwZgsqVK+v8Pcju/f7zzz9x5swZ9OnTB2XLlkV0dDRWr16NFi1a4Nq1azA1NdXaxqhRo2BjY4Np06YhOjoay5Ytw8iRI7Fjxw5pmaCgIMyePRvt2rVDu3btcP78ebRt2zbLYc2HDx/C09MTL168wOjRo1GqVCls2rQJnTp1wu7du9G1a1et5efNmwcTExMEBgbi5s2b+Prrr2FoaAgDAwMkJCRg+vTp0iFfV1dXBAUF5er7Eh8fr/XY0NAQVlZWAIDp06djxowZ8Pb2xogRIxAREYHVq1fjzz//xOnTp2FoaCit9+TJE3z44Yfo06cP+vfvD3t7e2g0GnTq1AmnTp3C0KFDUbVqVVy+fBlLly5FZGQkfvzxRwDA1atX0aFDB9SqVQszZ86ESqXCzZs3cfr0aQBA1apVMXPmTAQFBWHo0KHSHzFvOtRMlCtyD0kRFYTXh8aOHDkiHj9+LGJiYsTu3btF6dKlhUqlEjExMdKyy5YtEwDE3r17c9ze06dPBQDRrVs3IYQQy5cvf+s6b9O1a1cBQCQkJORqeS8vL+Hl5ZWl7u/vL8qXLy89vn37tgAgLC0txaNHj7SWXbt2rQAgLl++rFWvVq2aaNWqlfR41qxZwszMTERGRmotFxgYKJRKpbh79+4bs2Z3CCY8PFwAEIMHD9aqT5gwQQAQv/32m1QrX768ACAOHz78xv38d3+PHz8Wjx8/Fjdv3hSLFi0SCoVC1KhRQ+vQZXaH2EJDQwUAsXnzZqn2+mfI29tba/2xY8cKpVIpnj17JoQQ4tGjR8LIyEi0b99ea7nJkycLAFqHxsaMGSMAiN9//12qPX/+XLi6ugoXFxehVquFEEIcO3ZMABA1atQQGRkZ0rIfffSRUCgU4sMPP9TK36RJE62fgTd9nwBk+Xr9c/X6tbRt21bKIoQQ33zzjQAgNmzYINW8vLwEALFmzRqtfWzZskUYGBhovUYhhFizZo0AIE6fPi2EEGLp0qUCgHj8+HGOeXlojPIDD42RXvH29kbp0qXh7OyMHj16wMzMDPv370fZsmWlZZ4/fw4AsLCwyHE7r59LSkrS+u+b1nmbvNjGm3Tv3h2lS5fWqnXr1g0lSpTQGs24cuUKrl27ht69e0u1Xbt2oXnz5rCxsUF8fLz05e3tDbVajZMnT+qc59ChQwCAcePGadXHjx8PAFkOD7m6usLHxyfX209JSUHp0qVRunRpVKhQARMmTEDTpk2xb98+rUOXJiYm0v+/fPkST548QYUKFWBtbY3z589n2e7QoUO11m/evDnUajXu3LkDADhy5AgyMjIwatQoreXGjBmTZVuHDh1Cw4YN0axZM6lmbm6OoUOHIjo6GteuXdNafsCAAVojMI0aNZIm6f9bo0aNEBMTg8zMzLd9m2BsbIxff/1V62vx4sVar2XMmDEwMPjn42LIkCGwtLTM8h6pVCoMHDhQq7Zr1y5UrVoVVapU0frZadWqFQDg2LFjAABra2sAwL59+956uJUoL/HQGOmVlStXolKlSkhMTMSGDRtw8uRJqFQqrWVeNyKvG6Ls/LdZsrS0fOs6b/Pvbbz+UMhLrq6uWWq2trZo3bo1du7ciVmzZgF4dVisRIkS6Natm7Tc33//jUuXLmVppF579OiRznnu3LkDAwMDVKhQQavu4OAAa2trqbF4U/43MTY2xk8//QQAuHfvHhYsWIBHjx5pNT7AP/O7Nm7ciPv370MIIT2XmJiYZbvlypXTevz6MFtCQoL0ugCgYsWKWsuVLl1a65Dc62UbNWqUZR9Vq1aVnq9Ro0aO+359+MrZ2TlLXaPRIDExEaVKlcq
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Gradient Boosting Metrics:\n",
|
|||
|
"Accuracy: 0.9185\n",
|
|||
|
"Precision: 0.9577\n",
|
|||
|
"Recall: 0.8718\n",
|
|||
|
"F1-Score: 0.9128\n",
|
|||
|
"ROC-AUC: 0.9745\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAAHHCAYAAAB3K7g2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABAZ0lEQVR4nO3dd3xUVf7/8fcQSCGkgBACAiGhF5EmggEBRZogEBRRgdBEUEABV2BXpQiygitVQUTpKr2IutJVMEsPIIgm0lyl9/R2fn/wyyyTApmQMFy+r+fjMQ8y55577+feCck7Z869YzPGGAEAAFhEAVcXAAAA4AzCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCwAAsBTCCywpKipKLVu2lJ+fn2w2m1avXp2n2z9+/LhsNpvmzZuXp9u1smbNmqlZs2Z5tr2YmBj17dtXgYGBstlseu211/Js23e7rL6/Ro8eLZvN5rqi7jGcz3sb4QW59vvvv+ull15SSEiIPD095evrq9DQUE2dOlXx8fH5uu/w8HAdPHhQ48eP18KFC1W/fv183d+d1LNnT9lsNvn6+mZ5HqOiomSz2WSz2fT+++87vf2//vpLo0ePVmRkZB5Um3vvvvuu5s2bpwEDBmjhwoXq3r17vu8zLS1NCxYs0BNPPKHixYurUKFCCggIUMuWLTV79mwlJibmew2u5OxrP2/ePPv3WvojICBAzZs317fffpu/xeZAXFycRo8era1bt7q6FNxpBsiFdevWGS8vL+Pv728GDx5sZs+ebWbMmGG6du1qChUqZF588cV823dcXJyRZP7xj3/k2z7S0tJMfHy8SUlJybd9ZCc8PNwULFjQuLm5mSVLlmRaPmrUKOPp6WkkmUmTJjm9/V27dhlJZu7cuU6tl5iYaBITE53eX3YefvhhExoammfbu5W4uDjTqlUrI8k88sgjZsKECeazzz4z77//vmnfvr1xc3MzvXv3viO1HDt2LNNrkJycbOLj4/N1v86+9nPnzjWSzNixY83ChQvNggULzKRJk0yNGjWMJPPVV1/la723cu7cOSPJjBo1KtOyO3E+4ToFXZaaYFnHjh1T165dFRQUpM2bN6tUqVL2Za+88oqio6P19ddf59v+z507J0ny9/fPt33YbDZ5enrm2/ZvxcPDQ6Ghofriiy/UpUsXh2Wff/65nnzySa1YseKO1BIXF6fChQvL3d09T7d79uxZVa9ePc+2l5KSorS0tGzrHDJkiL777jtNmTJFr776qsOyYcOGKSoqShs2bLitfdyOggULqmDBu/NHcps2bRxGN/v06aOSJUvqiy++ULt27VxYWfbu5vOJPODq9ATr6d+/v5Fktm/fnqP+ycnJZuzYsSYkJMS4u7uboKAgM3LkSJOQkODQLygoyDz55JPmxx9/NA899JDx8PAwwcHBZv78+fY+o0aNMpIcHkFBQcaY6yMW6V/fKH2dG61fv96EhoYaPz8/4+3tbSpXrmxGjhxpX57VX8bGGLNp0ybTuHFjU7hwYePn52eeeuopc/jw4Sz3FxUVZcLDw42fn5/x9fU1PXv2NLGxsbc8X+Hh4cbb29vMmzfPeHh4mEuXLtmX7dy500gyK1asyDTycuHCBTNs2DBTs2ZN4+3tbXx8fEzr1q1NZGSkvc+WLVsynb8bj7Np06amRo0aZvfu3aZJkybGy8vLvPrqq/ZlTZs2tW+rR48exsPDI9Pxt2zZ0vj7+5s///wzy+PLroZjx44ZY4w5c+aM6d27twkICDAeHh6mVq1aZt68eQ7bSH99Jk2aZCZPnmxCQkJMgQIFzL59+7Lc58mTJ42bm5tp3br1Tc68o5vtIzEx0bz11lumbt26xtfX1xQuXNg0btzYbN68OdN2Ll26ZMLDw42vr6/x8/MzPXr0MPv27cv0/ZXV96kxxixcuNDUrVvXeHp6mqJFi5pnn33WnDx50qFP+ut26NAh06xZM+Pl5WVKly5t3nvvPXufW732WUkfedm1a5dDe1pamvH19TU9evRwaI+JiTFDhw41ZcqUMe7u7qZy5cpm0qRJJi0tzaFfTn8m7Nq1y7Rs2dLcd999xtPT05QvX9706tXL4fXJ+EgfhcnqfEoyr7zyilm1apWpUaOGcXd3N9WrVzfffvttpmPfsmWLqVevnvHw8DAhISFm1qxZ2b5GuPOIpXDaV199pZCQED3yyCM56t+3b1/Nnz9fTz/9tIYNG6YdO3ZowoQJ+uWXX7Rq1SqHvtHR0Xr66afVp08fhYeH67PPPlPPnj1Vr1491ahRQ2FhYfL399eQIUP03HPPqW3btipSpIhT9R86dEjt2rVTrVq1NHbsWHl4eCg6Olrbt2+/6XobN25UmzZtFBISotGjRys+Pl7Tp09XaGio9u7dq/Llyzv079Kli4KDgzVhwgTt3btXc+bMUUBAgN57770c1RkWFqb+/ftr5cqV6t27t6Troy5Vq1ZV3bp1M/U/evSoVq9erWeeeUbBwcE6c+aMPv74YzVt2lSHDx9W6dKlVa1aNY0dO1Zvv/22+vXrpyZNmkiSw2t54cIFtWnTRl27dlW3bt1UsmTJLOubOnWqNm/erPDwcEVERMjNzU0ff/yx1q9fr4ULF6p06dJZrletWjUtXLhQQ4YMUZkyZTRs2DBJUokSJRQfH69mzZopOjpaAwcOVHBwsJYtW6aePXvq8uXLmUZM5s6dq4SEBPXr108eHh4qVqxYlvv89ttvlZqaqm7dut3irGeW1T6uXr2qOXPm6LnnntOLL76oa9eu6dNPP1WrVq20c+dO1a5dW5JkjFGHDh20bds29e/fX9WqVdOqVasUHh6eo32PHz9eb731lrp06aK+ffvq3Llzmj59uh599FHt27fPYfTx0qVLat26tcLCwtSlSxctX75cw4cP1wMPPKA2bdrk6LXPzpUrV3T+/HkZY3T27FlNnz5dMTExDufTGKOnnnpKW7ZsUZ8+fVS7dm199913+tvf/qY///xTkydPtvfNyc+Es2fPqmXLlipRooRGjBghf39/HT9+XCtXrpR0/ftl5syZGjBggDp16qSwsDBJUq1atW56LNu2bdPKlSv18ssvy8fHR9OmTVPnzp118uRJ3XfffZKkffv2qXXr1ipVqpTGjBmj1NRUjR07ViVKlMjBq4Y7wsXhCRZz5coVI8l06NAhR/0jIyONJNO3b1+H9tdff91IcvhLNSgoyEgyP/zwg73t7NmzxsPDwwwbNszeduNfxDfK6cjL5MmTjSRz7ty5bOvOauSldu3aJiAgwFy4cMHetn//flOgQAGHv0DT95dx/kSnTp3Mfffdl+0+bzwOb29vY4wxTz/9tHn88ceNMcakpqaawMBAM2bMmCzPQUJCgklNTc10HB4eHmbs2LH2tpvNe2jatKmRZGbNmpXlshtHXowx5rvvvjOSzLhx48zRo0dNkSJFTMeOHW95jMb8b6TtRlOmTDGSzKJFi+xtSUlJplGjRqZIkSLm6tWr9uOSZHx9fc3Zs2dvua8hQ4YYSQ6jUMZcn8dz7tw5++P8+fP2ZTfbR0pKSqb5P5cuXTIlS5Z0eN1Xr15tJJmJEyc6rNukSZNbjrwcP37cuLm5mfHjxzvs5+DBg6ZgwYIO7emv24IFCxyOLTAw0HTu3Nnelts5LxkfHh4emUbD0o913LhxDu1PP/20sdlsJjo62hiT858Jq1atynLU50Y3m/OS3ciLu7u7vRZjrv8flmSmT59ub2vfvr0pXLiww+hhVFSUKViwICMvdwmuNoJTrl69Kkny8fHJUf9vvvlGkjR06FCH9vS/tjPOjalevbr9L0Lp+l9XVapU0dGjR3Ndc0bpf62uWbNGaWlpOVrn1KlTioyMVM+ePR3+uq9Vq5aeeOIJ+3HeqH///g7PmzRpogsXLtjPYU48//zz2rp1q06fPq3Nmzfr9OnTev7557Ps6+HhoQIFrv+XTk1N1YULF1SkSBFVqVJFe/fuzfE+PTw81KtXrxz1bdmypV566SWNHTtWYWFh8vT01Mcff5zjfWX0zTffKDAwUM8
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAkIAAAHHCAYAAABTMjf2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB4PUlEQVR4nO3dd1hT5/sG8DuMsEEUEUSU4d6KEwcOFKp1V3GLVeuus19nHXXVurDuUcVZR13URasVJ2qL4haquAUURRAEIsn7+8MfqRFQgoHDuD/XxdXmyRl3EpCH97znHJkQQoCIiIioENKTOgARERGRVNgIERERUaHFRoiIiIgKLTZCREREVGixESIiIqJCi40QERERFVpshIiIiKjQYiNEREREhRYbISIiIiq02AgRkdYWLFgAFxcX6Ovro2bNmlLHyVVOTk7w9fVVPw4KCoJMJkNQUJBkmQoSvp+U29gIUb7j7+8PmUym/jIwMICDgwN8fX3x5MmTDNcRQmDLli1o2rQpihQpAlNTU1SrVg0//PADEhMTM93Xvn378MUXX8DGxgZyuRwlS5ZEt27d8Ndff2Upa3JyMpYsWYL69evDysoKxsbGKF++PEaMGIHw8PBsvX6p/fHHH/jf//6HRo0aYePGjZg7d26u7Pf06dPo1q0bHBwcIJfLYWVlhfr16+OHH35AdHR0rmSQ0ty5c7F///4sLXv//n2NnxGZTAZLS0vUrFkTy5cvh1KpzNmwWbBy5Ur4+/tLHYMIMt5rjPIbf39/9O/fHz/88AOcnZ2RnJyM8+fPw9/fH05OTrh+/TqMjY3VyyuVSvTs2RO7du1CkyZN0LlzZ5iamuL06dPYvn07KleujGPHjqFEiRLqdYQQ+Prrr+Hv749atWrhq6++gp2dHSIjI7Fv3z6EhITg7NmzcHd3zzRnTEwMvL29ERISgi+//BKenp4wNzdHWFgYduzYgaioKCgUihx9r3LCxIkTsWDBAiQlJUEul+fKPqdNm4ZZs2bBxcUF3bt3h4uLC5KTkxESEoI9e/bAxsYGd+/ezZUsTk5OaNasmfqXuEqlgkKhgFwuh55ezv1taW5ujq+++ipLzcP9+/fh7OyMHj16oE2bNgCAuLg4HD58GIcPH8b48eOxYMGCHMuaFVWrVoWNjU26kZ/cej+J1ARRPrNx40YBQPz9998a9QkTJggAYufOnRr1uXPnCgBi/Pjx6bYVEBAg9PT0hLe3t0Z9wYIFAoAYPXq0UKlU6dbbvHmzuHDhwkdztm3bVujp6Ynffvst3XPJycli3LhxH10/q96+fStSUlJ0sq2s6N+/vzAzM9PZ9lQqlXjz5k2mz+/YsUMAEN26dcvwdb569UpMnz79s/ahjTJlyoh+/frpZFvaMDMzy/J+7927JwCIBQsWaNRVKpWoW7euKFmyZA4k1E6VKlWEh4eH1DGIBBshyncya4QOHjwoAIi5c+eqa2/evBHW1taifPny4u3btxlur3///gKACA4OVq9TtGhRUbFiRZGampqtjOfPnxcAxKBBg7K0vIeHR4a/FPr16yfKlCmjfvz+L7glS5YIFxcXoaenJ86fPy/09fXFjBkz0m3j9u3bAoBYtmyZuhYbGytGjRolSpUqJeRyuXB1dRU//vijUCqVH80JIN3Xxo0bhRDvGrIffvhBuLi4CLlcLsqUKSMmTZokkpOTNbZRpkwZ0bZtW3H06FHh5uYmjIyMxJIlSzLdZ/ny5YWNjY14/fr1R7NldR8bNmwQzZs3F8WLFxdyuVxUqlRJrFy5Mt02VCqVmDVrlnBwcBAmJiaiWbNm4vr16+kaoRMnTggA4sSJExrrnz9/Xnh5eQlLS0thYmIimjZtKs6cOaOxzPTp0wUA8e+//4p+/foJKysrYWlpKXx9fUViYqJ6uYze9481RZk1QkII8eWXX4rSpUunq69YsUJUrlxZyOVyYW9vL4YNGyZiY2PTLbdr1y5Ru3ZtYWxsLIoVKyZ69eolHj9+rLFMZGSk8PX1FQ4ODkIulws7OzvRvn17ce/ePSHEu8/nw9eT9v2f0fvp4eEhqlSpIm7cuCGaNWsmTExMRMmSJcX8+fPT5bt//75o166dMDU1FcWLFxejR48WR48ezfAzIhJCCINcGHQiyhX3798HAFhbW6trZ86cQWxsLEaNGgUDg4y/3fv27YuNGzfi4MGDaNCgAc6cOYOXL19i9OjR0NfXz1aWgIAAAECfPn2ytf6nbNy4EcnJyfjmm29gZGQEe3t7eHh4YNeuXZg+fbrGsjt37oS+vj66du0KAHjz5g08PDzw5MkTDB48GKVLl8a5c+cwadIkREZGws/PL9P9btmyBWvXrsXFixexfv16AFAfHhw4cCA2bdqEr776CuPGjcOFCxcwb9483Lp1C/v27dPYTlhYGHr06IHBgwdj0KBBqFChQob7Cw8PR3h4OAYOHAhzc3Ot3qPM9rFq1SpUqVIF7du3h4GBAX7//XcMGzYMKpUKw4cPV68/bdo0zJ49G23atEGbNm1w6dIltG7dOkuHM//66y988cUXcHNzw/Tp06Gnp4eNGzeiRYsWOH36NOrVq6exfLdu3eDs7Ix58+bh0qVLWL9+PWxtbTF//nwA7973gQMHol69evjmm28AAK6urp/M8ebNG8TExAAA4uPjceTIERw9ehSTJk3SWG7GjBmYOXMmPD09MXToUISFhWHVqlX4+++/cfbsWRgaGgL477B03bp1MW/ePERHR2Pp0qU4e/YsLl++jCJFigAAunTpghs3bmDkyJFwcnLCs2fP8Oeff+Lhw4dwcnKCn58fRo4cCXNzc0yZMgUANA5NZyQ2Nhbe3t7o3LkzunXrht9++w0TJkxAtWrV8MUXXwAAEhMT0aJFC0RGRmLUqFGws7PD9u3bceLEiU++V1SISd2JEWkrbUTo2LFj4vnz5+LRo0fit99+E8WLFxdGRkbi0aNH6mX9/PwEALFv375Mt/fy5UsBQHTu3FkIIcTSpUs/uc6ndOrUSQDI8C/qjGg7ImRpaSmePXumseyaNWsEAHHt2jWNeuXKlUWLFi3Uj2fNmiXMzMxEeHi4xnITJ04U+vr64uHDhx/N2q9fv3SHxkJDQwUAMXDgQI36+PHjBQDx119/qWtpowFHjx796H6EEOLAgQMCgPDz89Ooq1Qq8fz5c42v90f8PraPjA6ReXl5CRcXF/XjZ8+eCblcLtq2batxaHTy5MnpRmM+HMFQqVSiXLlywsvLS2PdN2/eCGdnZ9GqVSt1LW1E6Ouvv9bI06lTJ1GsWDGNWnYOjWX0NXToUI1caa+1devWGiOCy5cvFwDEhg0bhBBCKBQKYWtrK6pWrSqSkpLUy6WNxE6bNk0I8W60EZmMRr0vs0NjmY0IARCbN29W11JSUoSdnZ3o0qWLurZo0SIBQOzfv19dS0pKEhUrVuSIEGWKM9Eo3/L09ETx4sXh6OiIr776CmZmZggICECpUqXUy7x+/RoAYGFhkel20p6Lj4/X+O/H1vkUXWzjY7p06YLixYtr1Dp37gwDAwPs3LlTXbt+/Tpu3rwJHx8fdW337t1o0qQJrK2tERMTo/7y9PSEUqnEqVOntM5z+PBhAMDYsWM16uPGjQMAHDp0SKPu7OwMLy+vT2437X38cDQoLi4OxYsX1/gKDQ3N0j5MTEw0thMTEwMPDw9EREQgLi4OAHDs2DEoFAqMHDkSMplMvfzo0aM/mTk0NBT//vsvevbsiRcvXqjf38TERLRs2RKnTp2CSqXSWGfIkCEaj5s0aYIXL16oX392ffPNN/jzzz/x559/Ys+ePRg+fDjWrFmj8TmlvdbRo0drTE4eNGgQLC0t1Z/dP//8g2fPnmHYsGEaJyO0bdsWFStWVC9nYmICuVyOoKAgxMbGflb+95mbm6N3797qx3K5HPXq1UNERIS6dvToUTg4OKB9+/bqmrGxMQYNGqSzHFTw8NAY5VsrVqxA+fLlERcXhw0bNuDUqVMwMjLSWCatEUlriDLyYbNkaWn5yXU+5f1tpB0u0CVnZ+d0NRsbG7Rs2RK7du3CrFmzALw7LGZgYIDOnTu
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import seaborn as sns\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"from sklearn.metrics import confusion_matrix, roc_curve, auc\n",
|
|||
|
"\n",
|
|||
|
"def plot_confusion_matrix(y_true, y_pred, title):\n",
|
|||
|
" cm = confusion_matrix(y_true, y_pred)\n",
|
|||
|
" sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', cbar=False)\n",
|
|||
|
" plt.title(title)\n",
|
|||
|
" plt.xlabel('Предсказанные значения')\n",
|
|||
|
" plt.ylabel('Истинные значения')\n",
|
|||
|
" plt.show()\n",
|
|||
|
"\n",
|
|||
|
"def plot_roc_curve(y_true, y_pred_proba, title):\n",
|
|||
|
" fpr, tpr, _ = roc_curve(y_true, y_pred_proba)\n",
|
|||
|
" roc_auc = auc(fpr, tpr)\n",
|
|||
|
" plt.figure()\n",
|
|||
|
" plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')\n",
|
|||
|
" plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')\n",
|
|||
|
" plt.xlim([0.0, 1.0])\n",
|
|||
|
" plt.ylim([0.0, 1.05])\n",
|
|||
|
" plt.xlabel('False Positive Rate')\n",
|
|||
|
" plt.ylabel('True Positive Rate')\n",
|
|||
|
" plt.title(title)\n",
|
|||
|
" plt.legend(loc=\"lower right\")\n",
|
|||
|
" plt.show()\n",
|
|||
|
"\n",
|
|||
|
"def evaluate_and_plot_model(model, X_test, y_test, model_name):\n",
|
|||
|
" y_pred = model.predict(X_test)\n",
|
|||
|
" y_pred_proba = model.predict_proba(X_test)[:, 1]\n",
|
|||
|
" \n",
|
|||
|
" accuracy = accuracy_score(y_test, y_pred)\n",
|
|||
|
" precision = precision_score(y_test, y_pred, pos_label=1)\n",
|
|||
|
" recall = recall_score(y_test, y_pred, pos_label=1)\n",
|
|||
|
" f1 = f1_score(y_test, y_pred, pos_label=1)\n",
|
|||
|
" roc_auc = roc_auc_score(y_test, y_pred_proba)\n",
|
|||
|
" \n",
|
|||
|
" print(f\"{model_name} Metrics:\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy:.4f}\")\n",
|
|||
|
" print(f\"Precision: {precision:.4f}\")\n",
|
|||
|
" print(f\"Recall: {recall:.4f}\")\n",
|
|||
|
" print(f\"F1-Score: {f1:.4f}\")\n",
|
|||
|
" print(f\"ROC-AUC: {roc_auc:.4f}\")\n",
|
|||
|
" \n",
|
|||
|
" plot_confusion_matrix(y_test, y_pred, f'Confusion Matrix for {model_name}')\n",
|
|||
|
" plot_roc_curve(y_test, y_pred_proba, f'ROC Curve for {model_name}')\n",
|
|||
|
"\n",
|
|||
|
"evaluate_and_plot_model(logreg_best_model, X_test, y_test, 'Logistic Regression')\n",
|
|||
|
"evaluate_and_plot_model(rf_best_model, X_test, y_test, 'Random Forest')\n",
|
|||
|
"evaluate_and_plot_model(gb_best_model, X_test, y_test, 'Gradient Boosting')"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "aimenv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.5"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|