From beb0ee0ab82ed82534d8fc1a9b79c8fb3dc18a09 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?=D0=9F=D0=BE=D0=BB=D0=B8=D0=BD=D0=B0=20=D0=A7=D1=83=D0=B1?=
 =?UTF-8?q?=D1=8B=D0=BA=D0=B8=D0=BD=D0=B0?= <polinakill04@gmail.com>
Date: Sat, 9 Nov 2024 13:27:56 +0400
Subject: [PATCH] =?UTF-8?q?=D0=B5=D1=89=D0=B5=20=D0=B8=D0=B7=D0=BC=D0=B5?=
 =?UTF-8?q?=D0=BD=D0=B5=D0=BD=D0=B8=D1=8F=20=D0=BD=D0=B5=20=D0=B2=D1=81?=
 =?UTF-8?q?=D0=B5=20=D0=B7=D0=B0=D0=BB=D0=B8=D0=BB=D0=BE=D1=81=D1=8C=20?=
 =?UTF-8?q?=D0=BF=D0=BE=D1=87=D0=B5=D0=BC=D1=83=20=D1=82=D0=BE=3F=3F?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 lab_4/lab4.ipynb | 123 ++++++++++++++++++++++-------------------------
 1 file changed, 58 insertions(+), 65 deletions(-)

diff --git a/lab_4/lab4.ipynb b/lab_4/lab4.ipynb
index a5fa605..e0eabee 100644
--- a/lab_4/lab4.ipynb
+++ b/lab_4/lab4.ipynb
@@ -586,39 +586,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Проведем деление на выборки и создание ориентира"
+    "Проведем деление на выборки"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Размер обучающей выборки: (743, 16)\n",
-      "Размер тестовой выборки: (186, 16)\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "from sklearn.model_selection import train_test_split\n",
-    "\n",
-    "features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
-    "target = 'HeartDisease'\n",
-    "\n",
-    "X_train, X_test, y_train, y_test = train_test_split(data[features], data[target], test_size=0.2, random_state=42)\n",
-    "\n",
-    "print(\"Размер обучающей выборки:\", X_train.shape)\n",
-    "print(\"Размер тестовой выборки:\", X_test.shape)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 63,
    "metadata": {},
    "outputs": [
     {
@@ -630,7 +603,60 @@
       "1    796\n",
       "Name: count, dtype: int64\n",
       "Размер обучающей выборки: (1273, 49)\n",
-      "Размер тестовой выборки: (319, 49)\n",
+      "Размер тестовой выборки: (319, 49)\n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
+    "target = 'HeartDisease'\n",
+    "\n",
+    "label_encoder = LabelEncoder()\n",
+    "data[target] = label_encoder.fit_transform(data[target])\n",
+    "\n",
+    "categorical_features = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
+    "numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth']\n",
+    "\n",
+    "categorical_transformer = Pipeline(steps=[\n",
+    "    ('onehot', OneHotEncoder(handle_unknown='ignore'))\n",
+    "])\n",
+    "\n",
+    "numeric_transformer = Pipeline(steps=[\n",
+    "    ('scaler', StandardScaler())\n",
+    "])\n",
+    "\n",
+    "preprocessor = ColumnTransformer(\n",
+    "    transformers=[\n",
+    "        ('num', numeric_transformer, numeric_features),\n",
+    "        ('cat', categorical_transformer, categorical_features)\n",
+    "    ])\n",
+    "\n",
+    "X = preprocessor.fit_transform(data[features])\n",
+    "y = data[target]\n",
+    "\n",
+    "smote = SMOTE(random_state=42)\n",
+    "X_resampled, y_resampled = smote.fit_resample(X, y)\n",
+    "\n",
+    "print(pd.Series(y_resampled).value_counts())\n",
+    "\n",
+    "X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)\n",
+    "\n",
+    "print(\"Размер обучающей выборки:\", X_train.shape)\n",
+    "print(\"Размер тестовой выборки:\", X_test.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 64,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "Лучшие гиперпараметры для логистической регрессии:\n",
       "{'classifier__C': np.float64(0.7272998688284025), 'classifier__penalty': 'l1', 'classifier__solver': 'liblinear'}\n",
       "Accuracy: 0.7398\n",
@@ -639,14 +665,14 @@
       "F1-Score: 0.7398\n",
       "ROC-AUC: 0.8338\n",
       "Лучшие гиперпараметры для случайного леса:\n",
-      "{'classifier__bootstrap': True, 'classifier__max_depth': np.int64(32), 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 6, 'classifier__n_estimators': 317}\n",
+      "{'classifier__bootstrap': True, 'classifier__max_depth': np.int64(25), 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 6, 'classifier__n_estimators': 317}\n",
       "Accuracy: 0.9122\n",
       "Precision: 0.9571\n",
       "Recall: 0.8590\n",
       "F1-Score: 0.9054\n",
       "ROC-AUC: 0.9773\n",
       "Лучшие гиперпараметры для градиентного бустинга:\n",
-      "{'classifier__learning_rate': np.float64(0.17269984907963387), 'classifier__max_depth': np.int64(96), 'classifier__min_samples_leaf': 8, 'classifier__min_samples_split': 8, 'classifier__n_estimators': 294, 'classifier__subsample': np.float64(0.8288064461501716)}\n",
+      "{'classifier__learning_rate': np.float64(0.17269984907963387), 'classifier__max_depth': np.int64(52), 'classifier__min_samples_leaf': 8, 'classifier__min_samples_split': 8, 'classifier__n_estimators': 294, 'classifier__subsample': np.float64(0.8288064461501716)}\n",
       "Accuracy: 0.9185\n",
       "Precision: 0.9577\n",
       "Recall: 0.8718\n",
@@ -692,39 +718,6 @@
     "from scipy.stats import uniform, randint\n",
     "from sklearn.model_selection import RandomizedSearchCV\n",
     "\n",
-    "features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
-    "target = 'HeartDisease'\n",
-    "\n",
-    "label_encoder = LabelEncoder()\n",
-    "data[target] = label_encoder.fit_transform(data[target])\n",
-    "\n",
-    "categorical_features = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n",
-    "numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth']\n",
-    "\n",
-    "categorical_transformer = Pipeline(steps=[\n",
-    "    ('onehot', OneHotEncoder(handle_unknown='ignore'))\n",
-    "])\n",
-    "\n",
-    "numeric_transformer = Pipeline(steps=[\n",
-    "    ('scaler', StandardScaler())\n",
-    "])\n",
-    "\n",
-    "preprocessor = ColumnTransformer(\n",
-    "    transformers=[\n",
-    "        ('num', numeric_transformer, numeric_features),\n",
-    "        ('cat', categorical_transformer, categorical_features)\n",
-    "    ])\n",
-    "\n",
-    "X = preprocessor.fit_transform(data[features])\n",
-    "y = data[target]\n",
-    "\n",
-    "smote = SMOTE(random_state=42)\n",
-    "X_resampled, y_resampled = smote.fit_resample(X, y)\n",
-    "\n",
-    "print(pd.Series(y_resampled).value_counts())\n",
-    "\n",
-    "X_train, X_test, y_train, y_test = train_test_split(X_resampled, y_resampled, test_size=0.2, random_state=42)\n",
-    "\n",
     "def evaluate_model(model, X_test, y_test):\n",
     "    y_pred = model.predict(X_test)\n",
     "    y_pred_proba = model.predict_proba(X_test)[:, 1]\n",