From 67a9ef177de1d97cde74c2c746c07d1ccbb1ef1e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=D0=9F=D0=BE=D0=BB=D0=B8=D0=BD=D0=B0=20=D0=A7=D1=83=D0=B1?= =?UTF-8?q?=D1=8B=D0=BA=D0=B8=D0=BD=D0=B0?= Date: Thu, 12 Dec 2024 19:07:11 +0400 Subject: [PATCH] =?UTF-8?q?4=20=D0=BB=D0=B0=D0=B1=D1=83=20=D0=B2=D0=B5?= =?UTF-8?q?=D1=80=D0=BD=D1=83=D0=BB=D0=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lab_4/lab4.ipynb | 150 ++++++++++++++++++++++++++++------------------- 1 file changed, 90 insertions(+), 60 deletions(-) diff --git a/lab_4/lab4.ipynb b/lab_4/lab4.ipynb index 7d6646e..a5fa605 100644 --- a/lab_4/lab4.ipynb +++ b/lab_4/lab4.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -365,14 +365,23 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 34, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Размер данных до удаления выбросов: (929, 18)\n", + "Размер данных после удаления выбросов: (929, 18)\n" + ] + } + ], "source": [ "import pandas as pd\n", "from scipy import stats\n", "\n", - "data = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\").head(5000)\n", + "data = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\").head(1000)\n", "\n", "numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']\n", "\n", @@ -380,7 +389,10 @@ "\n", "threshold = 3\n", "\n", - "data = data[(z_scores < threshold).all(axis=1)]" + "data_cleaned = data[(z_scores < threshold).all(axis=1)]\n", + "data = data_cleaned\n", + "print(\"Размер данных до удаления выбросов:\", data.shape)\n", + "print(\"Размер данных после удаления выбросов:\", data_cleaned.shape)" ] }, { @@ -392,22 +404,23 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Размер обучающей выборки: (3440, 16)\n", - "Размер тестовой выборки: (861, 16)\n", - "Baseline MAE: 0.9691505955757231\n", - "Baseline MSE: 1.5758684447764715\n", - "Baseline R²: -0.0006615872431061653\n" + "Размер обучающей выборки: (255836, 16)\n", + "Размер тестовой выборки: (63959, 16)\n", + "Baseline MAE: 1.0154101277944922\n", + "Baseline MSE: 2.085820163563156\n", + "Baseline R²: -7.204157852269688e-05\n" ] } ], "source": [ + "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n", "\n", @@ -436,7 +449,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 21, "metadata": {}, "outputs": [ { @@ -450,13 +463,16 @@ } ], "source": [ - "\n", + "import pandas as pd\n", + "from scipy import stats\n", + "from sklearn.model_selection import train_test_split, RandomizedSearchCV\n", "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", "from sklearn.compose import ColumnTransformer\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.tree import DecisionTreeRegressor\n", "from sklearn.ensemble import GradientBoostingRegressor\n", + "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n", "\n", "categorical_features = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n", "numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth']\n", @@ -498,7 +514,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 23, "metadata": {}, "outputs": [ { @@ -506,19 +522,19 @@ "output_type": "stream", "text": [ "Model: Linear Regression\n", - "MAE: 0.9720871556511324\n", - "MSE: 1.512023769950756\n", - "R²: 0.039879178618337674\n", + "MAE: 0.999721882988516\n", + "MSE: 2.007024248723743\n", + "R²: 0.03770762552704621\n", "\n", "Model: Decision Tree\n", - "MAE: 1.4547038327526132\n", - "MSE: 3.6193379790940767\n", - "R²: -1.2982454524896956\n", + "MAE: 1.405790088390023\n", + "MSE: 4.053338792508978\n", + "R²: -0.9434229624615185\n", "\n", "Model: Gradient Boosting\n", - "MAE: 0.9718200023112538\n", - "MSE: 1.5324871722382205\n", - "R²: 0.02688511132722371\n", + "MAE: 0.9962143800804221\n", + "MSE: 1.9983219431838193\n", + "R²: 0.041880052575063775\n", "\n" ] } @@ -582,12 +598,13 @@ "name": "stdout", "output_type": "stream", "text": [ - "Размер обучающей выборки: (3440, 16)\n", - "Размер тестовой выборки: (861, 16)\n" + "Размер обучающей выборки: (743, 16)\n", + "Размер тестовой выборки: (186, 16)\n" ] } ], "source": [ + "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "\n", "features = ['BMI', 'Smoking', 'AlcoholDrinking', 'Stroke', 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer']\n", @@ -601,7 +618,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -609,42 +626,55 @@ "output_type": "stream", "text": [ "HeartDisease\n", - "0 3900\n", - "1 3900\n", + "0 796\n", + "1 796\n", "Name: count, dtype: int64\n", + "Размер обучающей выборки: (1273, 49)\n", + "Размер тестовой выборки: (319, 49)\n", "Лучшие гиперпараметры для логистической регрессии:\n", - "{'classifier__C': np.float64(0.26020637194111806), 'classifier__penalty': 'l2', 'classifier__solver': 'liblinear'}\n", - "Accuracy: 0.7667\n", - "Precision: 0.7470\n", - "Recall: 0.8028\n", - "F1-Score: 0.7739\n", - "ROC-AUC: 0.8408\n", + "{'classifier__C': np.float64(0.7272998688284025), 'classifier__penalty': 'l1', 'classifier__solver': 'liblinear'}\n", + "Accuracy: 0.7398\n", + "Precision: 0.7239\n", + "Recall: 0.7564\n", + "F1-Score: 0.7398\n", + "ROC-AUC: 0.8338\n", "Лучшие гиперпараметры для случайного леса:\n", - "{'classifier__bootstrap': False, 'classifier__max_depth': np.int64(98), 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 13, 'classifier__n_estimators': 413}\n", - "Accuracy: 0.9487\n", - "Precision: 0.9665\n", - "Recall: 0.9291\n", - "F1-Score: 0.9474\n", - "ROC-AUC: 0.9874\n" - ] - }, - { - "ename": "KeyboardInterrupt", - "evalue": "", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[17], line 125\u001b[0m\n\u001b[0;32m 120\u001b[0m gb_pipeline \u001b[38;5;241m=\u001b[39m Pipeline([\n\u001b[0;32m 121\u001b[0m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mclassifier\u001b[39m\u001b[38;5;124m'\u001b[39m, GradientBoostingClassifier(random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m42\u001b[39m))\n\u001b[0;32m 122\u001b[0m ])\n\u001b[0;32m 124\u001b[0m gb_random_search \u001b[38;5;241m=\u001b[39m RandomizedSearchCV(gb_pipeline, param_distributions\u001b[38;5;241m=\u001b[39mgb_param_dist, n_iter\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m50\u001b[39m, cv\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m, random_state\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m42\u001b[39m, n_jobs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m--> 125\u001b[0m \u001b[43mgb_random_search\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 127\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mЛучшие гиперпараметры для градиентного бустинга:\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 128\u001b[0m \u001b[38;5;28mprint\u001b[39m(gb_random_search\u001b[38;5;241m.\u001b[39mbest_params_)\n", - "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\base.py:1473\u001b[0m, in \u001b[0;36m_fit_context..decorator..wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1466\u001b[0m estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m 1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m 1469\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m 1470\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 1471\u001b[0m )\n\u001b[0;32m 1472\u001b[0m ):\n\u001b[1;32m-> 1473\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:1019\u001b[0m, in \u001b[0;36mBaseSearchCV.fit\u001b[1;34m(self, X, y, **params)\u001b[0m\n\u001b[0;32m 1013\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_results(\n\u001b[0;32m 1014\u001b[0m all_candidate_params, n_splits, all_out, all_more_results\n\u001b[0;32m 1015\u001b[0m )\n\u001b[0;32m 1017\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m results\n\u001b[1;32m-> 1019\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_search\u001b[49m\u001b[43m(\u001b[49m\u001b[43mevaluate_candidates\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1021\u001b[0m \u001b[38;5;66;03m# multimetric is determined here because in the case of a callable\u001b[39;00m\n\u001b[0;32m 1022\u001b[0m \u001b[38;5;66;03m# self.scoring the return type is only known after calling\u001b[39;00m\n\u001b[0;32m 1023\u001b[0m first_test_score \u001b[38;5;241m=\u001b[39m all_out[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_scores\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n", - "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:1960\u001b[0m, in \u001b[0;36mRandomizedSearchCV._run_search\u001b[1;34m(self, evaluate_candidates)\u001b[0m\n\u001b[0;32m 1958\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_run_search\u001b[39m(\u001b[38;5;28mself\u001b[39m, evaluate_candidates):\n\u001b[0;32m 1959\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Search n_iter candidates from param_distributions\"\"\"\u001b[39;00m\n\u001b[1;32m-> 1960\u001b[0m \u001b[43mevaluate_candidates\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1961\u001b[0m \u001b[43m \u001b[49m\u001b[43mParameterSampler\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1962\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparam_distributions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_iter\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom_state\u001b[49m\n\u001b[0;32m 1963\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1964\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:965\u001b[0m, in \u001b[0;36mBaseSearchCV.fit..evaluate_candidates\u001b[1;34m(candidate_params, cv, more_results)\u001b[0m\n\u001b[0;32m 957\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mverbose \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m:\n\u001b[0;32m 958\u001b[0m \u001b[38;5;28mprint\u001b[39m(\n\u001b[0;32m 959\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFitting \u001b[39m\u001b[38;5;132;01m{0}\u001b[39;00m\u001b[38;5;124m folds for each of \u001b[39m\u001b[38;5;132;01m{1}\u001b[39;00m\u001b[38;5;124m candidates,\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 960\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m totalling \u001b[39m\u001b[38;5;132;01m{2}\u001b[39;00m\u001b[38;5;124m fits\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(\n\u001b[0;32m 961\u001b[0m n_splits, n_candidates, n_candidates \u001b[38;5;241m*\u001b[39m n_splits\n\u001b[0;32m 962\u001b[0m )\n\u001b[0;32m 963\u001b[0m )\n\u001b[1;32m--> 965\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mparallel\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 966\u001b[0m \u001b[43m \u001b[49m\u001b[43mdelayed\u001b[49m\u001b[43m(\u001b[49m\u001b[43m_fit_and_score\u001b[49m\u001b[43m)\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 967\u001b[0m \u001b[43m \u001b[49m\u001b[43mclone\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbase_estimator\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 968\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 969\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 970\u001b[0m \u001b[43m \u001b[49m\u001b[43mtrain\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtrain\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 971\u001b[0m \u001b[43m \u001b[49m\u001b[43mtest\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtest\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 972\u001b[0m \u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparameters\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 973\u001b[0m \u001b[43m \u001b[49m\u001b[43msplit_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43msplit_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_splits\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 974\u001b[0m \u001b[43m \u001b[49m\u001b[43mcandidate_progress\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcand_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mn_candidates\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 975\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfit_and_score_kwargs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 976\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 977\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mfor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mcand_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mparameters\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43msplit_idx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m(\u001b[49m\u001b[43mtrain\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtest\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01min\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mproduct\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 978\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43menumerate\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcandidate_params\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 979\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43menumerate\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mcv\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mrouted_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplitter\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msplit\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 980\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 981\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 983\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(out) \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m1\u001b[39m:\n\u001b[0;32m 984\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 985\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNo fits were performed. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 986\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWas the CV iterator empty? \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 987\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mWere there no candidates?\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 988\u001b[0m )\n", - "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\sklearn\\utils\\parallel.py:74\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m 69\u001b[0m config \u001b[38;5;241m=\u001b[39m get_config()\n\u001b[0;32m 70\u001b[0m iterable_with_config \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 71\u001b[0m (_with_config(delayed_func, config), args, kwargs)\n\u001b[0;32m 72\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m delayed_func, args, kwargs \u001b[38;5;129;01min\u001b[39;00m iterable\n\u001b[0;32m 73\u001b[0m )\n\u001b[1;32m---> 74\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__call__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43miterable_with_config\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\joblib\\parallel.py:2007\u001b[0m, in \u001b[0;36mParallel.__call__\u001b[1;34m(self, iterable)\u001b[0m\n\u001b[0;32m 2001\u001b[0m \u001b[38;5;66;03m# The first item from the output is blank, but it makes the interpreter\u001b[39;00m\n\u001b[0;32m 2002\u001b[0m \u001b[38;5;66;03m# progress until it enters the Try/Except block of the generator and\u001b[39;00m\n\u001b[0;32m 2003\u001b[0m \u001b[38;5;66;03m# reaches the first `yield` statement. This starts the asynchronous\u001b[39;00m\n\u001b[0;32m 2004\u001b[0m \u001b[38;5;66;03m# dispatch of the tasks to the workers.\u001b[39;00m\n\u001b[0;32m 2005\u001b[0m \u001b[38;5;28mnext\u001b[39m(output)\n\u001b[1;32m-> 2007\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m output \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mreturn_generator \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28;43mlist\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43moutput\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\joblib\\parallel.py:1650\u001b[0m, in \u001b[0;36mParallel._get_outputs\u001b[1;34m(self, iterator, pre_dispatch)\u001b[0m\n\u001b[0;32m 1647\u001b[0m \u001b[38;5;28;01myield\u001b[39;00m\n\u001b[0;32m 1649\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_backend\u001b[38;5;241m.\u001b[39mretrieval_context():\n\u001b[1;32m-> 1650\u001b[0m \u001b[38;5;28;01myield from\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_retrieve()\n\u001b[0;32m 1652\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mGeneratorExit\u001b[39;00m:\n\u001b[0;32m 1653\u001b[0m \u001b[38;5;66;03m# The generator has been garbage collected before being fully\u001b[39;00m\n\u001b[0;32m 1654\u001b[0m \u001b[38;5;66;03m# consumed. This aborts the remaining tasks if possible and warn\u001b[39;00m\n\u001b[0;32m 1655\u001b[0m \u001b[38;5;66;03m# the user if necessary.\u001b[39;00m\n\u001b[0;32m 1656\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_exception \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mTrue\u001b[39;00m\n", - "File \u001b[1;32mc:\\storage\\university\\3 course\\AIM\\AIM-PIbd-32-Chubykina-P-P\\aimenv\\Lib\\site-packages\\joblib\\parallel.py:1762\u001b[0m, in \u001b[0;36mParallel._retrieve\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1757\u001b[0m \u001b[38;5;66;03m# If the next job is not ready for retrieval yet, we just wait for\u001b[39;00m\n\u001b[0;32m 1758\u001b[0m \u001b[38;5;66;03m# async callbacks to progress.\u001b[39;00m\n\u001b[0;32m 1759\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ((\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jobs) \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m0\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m\n\u001b[0;32m 1760\u001b[0m (\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_jobs[\u001b[38;5;241m0\u001b[39m]\u001b[38;5;241m.\u001b[39mget_status(\n\u001b[0;32m 1761\u001b[0m timeout\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtimeout) \u001b[38;5;241m==\u001b[39m TASK_PENDING)):\n\u001b[1;32m-> 1762\u001b[0m \u001b[43mtime\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msleep\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m0.01\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1763\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n\u001b[0;32m 1765\u001b[0m \u001b[38;5;66;03m# We need to be careful: the job list can be filling up as\u001b[39;00m\n\u001b[0;32m 1766\u001b[0m \u001b[38;5;66;03m# we empty it and Python list are not thread-safe by\u001b[39;00m\n\u001b[0;32m 1767\u001b[0m \u001b[38;5;66;03m# default hence the use of the lock\u001b[39;00m\n", - "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + "{'classifier__bootstrap': True, 'classifier__max_depth': np.int64(32), 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 6, 'classifier__n_estimators': 317}\n", + "Accuracy: 0.9122\n", + "Precision: 0.9571\n", + "Recall: 0.8590\n", + "F1-Score: 0.9054\n", + "ROC-AUC: 0.9773\n", + "Лучшие гиперпараметры для градиентного бустинга:\n", + "{'classifier__learning_rate': np.float64(0.17269984907963387), 'classifier__max_depth': np.int64(96), 'classifier__min_samples_leaf': 8, 'classifier__min_samples_split': 8, 'classifier__n_estimators': 294, 'classifier__subsample': np.float64(0.8288064461501716)}\n", + "Accuracy: 0.9185\n", + "Precision: 0.9577\n", + "Recall: 0.8718\n", + "F1-Score: 0.9128\n", + "ROC-AUC: 0.9745\n", + "\n", + "Результаты моделей:\n", + "\n", + "Logistic Regression:\n", + "Accuracy: 0.7398\n", + "Precision: 0.7239\n", + "Recall: 0.7564\n", + "F1: 0.7398\n", + "Roc_auc: 0.8338\n", + "\n", + "Random Forest:\n", + "Accuracy: 0.9122\n", + "Precision: 0.9571\n", + "Recall: 0.8590\n", + "F1: 0.9054\n", + "Roc_auc: 0.9773\n", + "\n", + "Gradient Boosting:\n", + "Accuracy: 0.9185\n", + "Precision: 0.9577\n", + "Recall: 0.8718\n", + "F1: 0.9128\n", + "Roc_auc: 0.9745\n" ] } ], @@ -797,7 +827,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [ {