From 67a9ef177de1d97cde74c2c746c07d1ccbb1ef1e Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9F=D0=BE=D0=BB=D0=B8=D0=BD=D0=B0=20=D0=A7=D1=83=D0=B1?=
 =?UTF-8?q?=D1=8B=D0=BA=D0=B8=D0=BD=D0=B0?= <polinakill04@gmail.com>
Date: Thu, 12 Dec 2024 19:07:11 +0400
Subject: [PATCH] =?UTF-8?q?4=20=D0=BB=D0=B0=D0=B1=D1=83=20=D0=B2=D0=B5?=
 =?UTF-8?q?=D1=80=D0=BD=D1=83=D0=BB=D0=B0?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lab_4/lab4.ipynb | 150 ++++++++++++++++++++++++++++-------------------
 1 file changed, 90 insertions(+), 60 deletions(-)

diff --git a/lab_4/lab4.ipynb b/lab_4/lab4.ipynb
index 7d6646e..a5fa605 100644
--- a/lab_4/lab4.ipynb
+++ b/lab_4/lab4.ipynb
@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
@@ -365,14 +365,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 34,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Размер данных до удаления выбросов: (929, 18)\n",
+      "Размер данных после удаления выбросов: (929, 18)\n"
+     ]
+    }
+   ],
    "source": [
     "import pandas as pd\n",
     "from scipy import stats\n",
     "\n",
-    "data = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\").head(5000)\n",
+    "data = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\").head(1000)\n",
     "\n",
     "numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']\n",
     "\n",
@@ -380,7 +389,10 @@
     "\n",
     "threshold = 3\n",
     "\n",
-    "data = data[(z_scores < threshold).all(axis=1)]"
+    "data_cleaned = data[(z_scores < threshold).all(axis=1)]\n",
+    "data = data_cleaned\n",
+    "print(\"Размер данных до удаления выбросов:\", data.shape)\n",
+    "print(\"Размер данных после удаления выбросов:\", data_cleaned.shape)"
    ]
   },
   {
@@ -392,22 +404,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Размер обучающей выборки: (3440, 16)\n",
-      "Размер тестовой выборки: (861, 16)\n",
-      "Baseline MAE: 0.9691505955757231\n",
-      "Baseline MSE: 1.5758684447764715\n",
-      "Baseline R²: -0.0006615872431061653\n"
+      "Размер обучающей выборки: (255836, 16)\n",
+      "Размер тестовой выборки: (63959, 16)\n",
+      "Baseline MAE: 1.0154101277944922\n",
+      "Baseline MSE: 2.085820163563156\n",
+      "Baseline R²: -7.204157852269688e-05\n"
      ]
     }
    ],
    "source": [
+    "import pandas as pd\n",
     "from sklearn.model_selection import train_test_split\n",
     "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
     "\n",
@@ -436,7 +449,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 21,
    "metadata": {},
    "outputs": [
     {
@@ -450,13 +463,16 @@
     }
    ],
    "source": [
-    "\n",
+    "import pandas as pd\n",
+    "from scipy import stats\n",
+    "from sklearn.model_selection import train_test_split, RandomizedSearchCV\n",
     "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
     "from sklearn.compose import ColumnTransformer\n",
     "from sklearn.pipeline import Pipeline\n",
     "from sklearn.linear_model import LinearRegression\n",
     "from sklearn.tree import DecisionTreeRegressor\n",
     "from sklearn.ensemble import GradientBoostingRegressor\n",
+    "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
     "\n",
     "categorical_features = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
     "numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth']\n",
@@ -498,7 +514,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": 23,
    "metadata": {},
    "outputs": [
     {
@@ -506,19 +522,19 @@
      "output_type": "stream",
      "text": [
       "Model: Linear Regression\n",
-      "MAE: 0.9720871556511324\n",
-      "MSE: 1.512023769950756\n",
-      "R²: 0.039879178618337674\n",
+      "MAE: 0.999721882988516\n",
+      "MSE: 2.007024248723743\n",
+      "R²: 0.03770762552704621\n",
       "\n",
       "Model: Decision Tree\n",
-      "MAE: 1.4547038327526132\n",
-      "MSE: 3.6193379790940767\n",
-      "R²: -1.2982454524896956\n",
+      "MAE: 1.405790088390023\n",
+      "MSE: 4.053338792508978\n",
+      "R²: -0.9434229624615185\n",
       "\n",
       "Model: Gradient Boosting\n",
-      "MAE: 0.9718200023112538\n",
-      "MSE: 1.5324871722382205\n",
-      "R²: 0.02688511132722371\n",
+      "MAE: 0.9962143800804221\n",
+      "MSE: 1.9983219431838193\n",
+      "R²: 0.041880052575063775\n",
       "\n"
      ]
     }
@@ -582,12 +598,13 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Размер обучающей выборки: (3440, 16)\n",
-      "Размер тестовой выборки: (861, 16)\n"
+      "Размер обучающей выборки: (743, 16)\n",
+      "Размер тестовой выборки: (186, 16)\n"
      ]
     }
    ],
    "source": [
+    "import pandas as pd\n",
     "from sklearn.model_selection import train_test_split\n",
     "\n",
     "features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
@@ -601,7 +618,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -609,42 +626,55 @@
      "output_type": "stream",
      "text": [
       "HeartDisease\n",
-      "0    3900\n",
-      "1    3900\n",
+      "0    796\n",
+      "1    796\n",
       "Name: count, dtype: int64\n",
+      "Размер обучающей выборки: (1273, 49)\n",
+      "Размер тестовой выборки: (319, 49)\n",
       "Лучшие гиперпараметры для логистической регрессии:\n",
-      "{'classifier__C': np.float64(0.26020637194111806), 'classifier__penalty': 'l2', 'classifier__solver': 'liblinear'}\n",
-      "Accuracy: 0.7667\n",
-      "Precision: 0.7470\n",
-      "Recall: 0.8028\n",
-      "F1-Score: 0.7739\n",
-      "ROC-AUC: 0.8408\n",
+      "{'classifier__C': np.float64(0.7272998688284025), 'classifier__penalty': 'l1', 'classifier__solver': 'liblinear'}\n",
+      "Accuracy: 0.7398\n",
+      "Precision: 0.7239\n",
+      "Recall: 0.7564\n",
+      "F1-Score: 0.7398\n",
+      "ROC-AUC: 0.8338\n",
       "Лучшие гиперпараметры для случайного леса:\n",
-      "{'classifier__bootstrap': False, 'classifier__max_depth': np.int64(98), 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 13, 'classifier__n_estimators': 413}\n",
-      "Accuracy: 0.9487\n",
-      "Precision: 0.9665\n",
-      "Recall: 0.9291\n",
-      "F1-Score: 0.9474\n",
-      "ROC-AUC: 0.9874\n"
-     ]
-    },
-    {
-     "ename": "KeyboardInterrupt",
-     "evalue": "",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[1;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[17], line 125\u001b[0m\n\u001b[0;32m    120\u001b[0m gb_pipeline \u001b[38;5;241m=\u001b[39m Pipeline([\n\u001b[0;32m    121\u001b[0m     (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mclassifier\u001b[39m\u001b[38;5;124m'\u001b[39m, GradientBoostingClassifier(random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m42\u001b[39m))\n\u001b[0;32m    122\u001b[0m ])\n\u001b[0;32m    124\u001b[0m gb_random_search \u001b[38;5;241m=\u001b[39m RandomizedSearchCV(gb_pipeline, param_distributions\u001b[38;5;241m=\u001b[39mgb_param_dist, n_iter\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m50\u001b[39m, cv\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m, random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m42\u001b[39m, n_jobs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m--> 125\u001b[0m \u001b[43mgb_random_search\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    127\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mЛучшие гиперпараметры для градиентного бустинга:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m    128\u001b[0m \u001b[38;5;28mprint\u001b[39m(gb_random_search\u001b[38;5;241m.\u001b[39mbest_params_)\n",
-      "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\base.py:1473\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1466\u001b[0m     estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m   1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m   1469\u001b[0m     skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m   1470\u001b[0m         prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m   1471\u001b[0m     )\n\u001b[0;32m   1472\u001b[0m ):\n\u001b[1;32m-> 1473\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:1019\u001b[0m, in \u001b[0;36mBaseSearchCV.fit\u001b[1;34m(self, X, y, **params)\u001b[0m\n\u001b[0;32m   1013\u001b[0m     results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_results(\n\u001b[0;32m   1014\u001b[0m         all_candidate_params, n_splits, all_out, all_more_results\n\u001b[0;32m   1015\u001b[0m     )\n\u001b[0;32m   1017\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m results\n\u001b[1;32m-> 1019\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_search\u001b[49m\u001b[43m(\u001b[49m\u001b[43mevaluate_candidates\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1021\u001b[0m \u001b[38;5;66;03m# multimetric is determined here because in the case of a callable\u001b[39;00m\n\u001b[0;32m   1022\u001b[0m \u001b[38;5;66;03m# self.scoring the return type is only known after calling\u001b[39;00m\n\u001b[0;32m   1023\u001b[0m first_test_score \u001b[38;5;241m=\u001b[39m all_out[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_scores\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
-      "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:1960\u001b[0m, in \u001b[0;36mRandomizedSearchCV._run_search\u001b[1;34m(self, evaluate_candidates)\u001b[0m\n\u001b[0;32m   1958\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_run_search\u001b[39m(\u001b[38;5;28mself\u001b[39m, evaluate_candidates):\n\u001b[0;32m   1959\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Search n_iter candidates from param_distributions\"\"\"\u001b[39;00m\n\u001b[1;32m-> 1960\u001b[0m     \u001b[43mevaluate_candidates\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   1961\u001b[0m \u001b[43m        \u001b[49m\u001b[43mParameterSampler\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   1962\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparam_distributions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_iter\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom_state\u001b[49m\n\u001b[0;32m   1963\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1964\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:965\u001b[0m, in \u001b[0;36mBaseSearchCV.fit.<locals>.evaluate_candidates\u001b[1;34m(candidate_params, cv, more_results)\u001b[0m\n\u001b[0;32m    957\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m    958\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\n\u001b[0;32m    959\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFitting \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;124m folds for each of \u001b[39m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;124m candidates,\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    960\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m totalling \u001b[39m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m fits\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[0;32m    961\u001b[0m             n_splits, n_candidates, n_candidates \u001b[38;5;241m*\u001b[39m n_splits\n\u001b[0;32m    962\u001b[0m         )\n\u001b[0;32m    963\u001b[0m     )\n\u001b[1;32m--> 965\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mparallel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    966\u001b[0m \u001b[43m    \u001b[49m\u001b[43mdelayed\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_fit_and_score\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    967\u001b[0m \u001b[43m        \u001b[49m\u001b[43mclone\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbase_estimator\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    968\u001b[0m \u001b[43m        \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    969\u001b[0m \u001b[43m        \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    970\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtrain\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    971\u001b[0m \u001b[43m        \u001b[49m\u001b[43mtest\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    972\u001b[0m \u001b[43m        \u001b[49m\u001b[43mparameters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparameters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    973\u001b[0m \u001b[43m        \u001b[49m\u001b[43msplit_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msplit_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_splits\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    974\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcandidate_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcand_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_candidates\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    975\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfit_and_score_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    976\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    977\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mcand_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43msplit_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrain\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mproduct\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    978\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43menumerate\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcandidate_params\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    979\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;28;43menumerate\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrouted_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplitter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplit\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    980\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    981\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    983\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(out) \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m    984\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m    985\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo fits were performed. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    986\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWas the CV iterator empty? \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    987\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWere there no candidates?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    988\u001b[0m     )\n",
-      "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\utils\\parallel.py:74\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m     69\u001b[0m config \u001b[38;5;241m=\u001b[39m get_config()\n\u001b[0;32m     70\u001b[0m iterable_with_config \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m     71\u001b[0m     (_with_config(delayed_func, config), args, kwargs)\n\u001b[0;32m     72\u001b[0m     \u001b[38;5;28;01mfor\u001b[39;00m delayed_func, args, kwargs \u001b[38;5;129;01min\u001b[39;00m iterable\n\u001b[0;32m     73\u001b[0m )\n\u001b[1;32m---> 74\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__call__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43miterable_with_config\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\joblib\\parallel.py:2007\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m   2001\u001b[0m \u001b[38;5;66;03m# The first item from the output is blank, but it makes the interpreter\u001b[39;00m\n\u001b[0;32m   2002\u001b[0m \u001b[38;5;66;03m# progress until it enters the Try/Except block of the generator and\u001b[39;00m\n\u001b[0;32m   2003\u001b[0m \u001b[38;5;66;03m# reaches the first `yield` statement. This starts the asynchronous\u001b[39;00m\n\u001b[0;32m   2004\u001b[0m \u001b[38;5;66;03m# dispatch of the tasks to the workers.\u001b[39;00m\n\u001b[0;32m   2005\u001b[0m \u001b[38;5;28mnext\u001b[39m(output)\n\u001b[1;32m-> 2007\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_generator \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43moutput\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\joblib\\parallel.py:1650\u001b[0m, in \u001b[0;36mParallel._get_outputs\u001b[1;34m(self, iterator, pre_dispatch)\u001b[0m\n\u001b[0;32m   1647\u001b[0m     \u001b[38;5;28;01myield\u001b[39;00m\n\u001b[0;32m   1649\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backend\u001b[38;5;241m.\u001b[39mretrieval_context():\n\u001b[1;32m-> 1650\u001b[0m         \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_retrieve()\n\u001b[0;32m   1652\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mGeneratorExit\u001b[39;00m:\n\u001b[0;32m   1653\u001b[0m     \u001b[38;5;66;03m# The generator has been garbage collected before being fully\u001b[39;00m\n\u001b[0;32m   1654\u001b[0m     \u001b[38;5;66;03m# consumed. This aborts the remaining tasks if possible and warn\u001b[39;00m\n\u001b[0;32m   1655\u001b[0m     \u001b[38;5;66;03m# the user if necessary.\u001b[39;00m\n\u001b[0;32m   1656\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n",
-      "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\joblib\\parallel.py:1762\u001b[0m, in \u001b[0;36mParallel._retrieve\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m   1757\u001b[0m \u001b[38;5;66;03m# If the next job is not ready for retrieval yet, we just wait for\u001b[39;00m\n\u001b[0;32m   1758\u001b[0m \u001b[38;5;66;03m# async callbacks to progress.\u001b[39;00m\n\u001b[0;32m   1759\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ((\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jobs) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m\n\u001b[0;32m   1760\u001b[0m     (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jobs[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mget_status(\n\u001b[0;32m   1761\u001b[0m         timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtimeout) \u001b[38;5;241m==\u001b[39m TASK_PENDING)):\n\u001b[1;32m-> 1762\u001b[0m     \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0.01\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1763\u001b[0m     \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[0;32m   1765\u001b[0m \u001b[38;5;66;03m# We need to be careful: the job list can be filling up as\u001b[39;00m\n\u001b[0;32m   1766\u001b[0m \u001b[38;5;66;03m# we empty it and Python list are not thread-safe by\u001b[39;00m\n\u001b[0;32m   1767\u001b[0m \u001b[38;5;66;03m# default hence the use of the lock\u001b[39;00m\n",
-      "\u001b[1;31mKeyboardInterrupt\u001b[0m: "
+      "{'classifier__bootstrap': True, 'classifier__max_depth': np.int64(32), 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 6, 'classifier__n_estimators': 317}\n",
+      "Accuracy: 0.9122\n",
+      "Precision: 0.9571\n",
+      "Recall: 0.8590\n",
+      "F1-Score: 0.9054\n",
+      "ROC-AUC: 0.9773\n",
+      "Лучшие гиперпараметры для градиентного бустинга:\n",
+      "{'classifier__learning_rate': np.float64(0.17269984907963387), 'classifier__max_depth': np.int64(96), 'classifier__min_samples_leaf': 8, 'classifier__min_samples_split': 8, 'classifier__n_estimators': 294, 'classifier__subsample': np.float64(0.8288064461501716)}\n",
+      "Accuracy: 0.9185\n",
+      "Precision: 0.9577\n",
+      "Recall: 0.8718\n",
+      "F1-Score: 0.9128\n",
+      "ROC-AUC: 0.9745\n",
+      "\n",
+      "Результаты моделей:\n",
+      "\n",
+      "Logistic Regression:\n",
+      "Accuracy: 0.7398\n",
+      "Precision: 0.7239\n",
+      "Recall: 0.7564\n",
+      "F1: 0.7398\n",
+      "Roc_auc: 0.8338\n",
+      "\n",
+      "Random Forest:\n",
+      "Accuracy: 0.9122\n",
+      "Precision: 0.9571\n",
+      "Recall: 0.8590\n",
+      "F1: 0.9054\n",
+      "Roc_auc: 0.9773\n",
+      "\n",
+      "Gradient Boosting:\n",
+      "Accuracy: 0.9185\n",
+      "Precision: 0.9577\n",
+      "Recall: 0.8718\n",
+      "F1: 0.9128\n",
+      "Roc_auc: 0.9745\n"
      ]
     }
    ],
@@ -797,7 +827,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {