From 75bf27d6c9d60d49fd24dd0f97e50861876f8a56 Mon Sep 17 00:00:00 2001 From: ujijrujijr Date: Fri, 15 Nov 2024 21:25:43 +0400 Subject: [PATCH] =?UTF-8?q?=D0=9B=D0=B0=D0=B1=204?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lab_4/Lab4.ipynb | 872 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 872 insertions(+) create mode 100644 lab_4/Lab4.ipynb diff --git a/lab_4/Lab4.ipynb b/lab_4/Lab4.ipynb new file mode 100644 index 0000000..44f818e --- /dev/null +++ b/lab_4/Lab4.ipynb @@ -0,0 +1,872 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.pipeline import make_pipeline\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn.metrics import mean_squared_error\n", + "import matplotlib.pyplot as plt\n", + "from scipy import stats\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.tree import DecisionTreeRegressor\n", + "from sklearn.ensemble import GradientBoostingRegressor\n", + "from sklearn import linear_model, tree, neighbors, ensemble, neural_network\n", + "\n", + "df = pd.read_csv(\"..//static//csv//balanced_neo.csv\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## **1-я бизнес-цель (регрессия)**: \n", + "\n", + "Предсказание скорости космического объекта для принятия решения о том, насколько опасным он может быть и стоит ли вести за ним наблюдения" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Целевой признак: скорость космического объекта relative_velocity\n", + "\n", + "Вход: минимальный радиус est_diameter_min, максимальный радиус est_diameter_max, яркость объекта absolute_magnitude, расстояние от Земли miss_distance" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Достижимый уровень качества: предсказания должны иметь погрешность в среднем не более 10000 км/с. Для проверки будет использоваться метрика MAE (средняя абсолютная ошибка)" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.discriminant_analysis import StandardScaler\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.model_selection import GridSearchCV, train_test_split\n", + "from sklearn.metrics import roc_auc_score, confusion_matrix, accuracy_score\n", + "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n", + "import seaborn as sns\n", + "from sklearn.model_selection import cross_val_predict\n", + "from sklearn.metrics import mean_squared_error\n", + "import numpy as np\n", + "from sklearn import metrics\n", + "import sklearn.preprocessing as preproc\n", + "from sklearn.linear_model import LinearRegression, Ridge\n", + "from sklearn.metrics import mean_absolute_error\n", + "from mlxtend.evaluate import bias_variance_decomp\n", + "from sklearn.neural_network import MLPRegressor\n", + "\n", + "# Загрузка данных\n", + "df = pd.read_csv(\"..//static//csv//balanced_neo.csv\")\n", + "data = df[['est_diameter_min', 'est_diameter_max', 'absolute_magnitude', 'miss_distance', 'relative_velocity']]\n", + "\n", + "X = data.drop('relative_velocity', axis=1)\n", + "y = data['relative_velocity']\n", + "\n", + "# Разделение данных на обучающую и тестовую выборки\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Преобразование числовых данных\n", + "#заполнение пустых значений медианой\n", + "num_imputer = SimpleImputer(strategy=\"median\")\n", + "\n", + "preprocessing_num = Pipeline(\n", + " [\n", + " (\"imputer\", num_imputer)\n", + " ]\n", + ")\n", + "\n", + "#Категориальных данных нет, поэтому преобразовывать их не надо\n", + "\n", + "\n", + "# Общая предобработка (только числовые данные)\n", + "preprocessing = ColumnTransformer(\n", + " [\n", + " (\"nums\", preprocessing_num, X.columns)\n", + " ]\n", + ")\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Линейная регрессия" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Лучшие гиперпараметры: {'preprocessing': MinMaxScaler()}\n", + "Cредняя абсолютная ошибка (MAE) = 19241.554618019443\n", + "Смещение: 616083845.5088656\n", + "Дисперсия: 438598.9204950822\n" + ] + } + ], + "source": [ + "pipeline_lin_reg = Pipeline([\n", + " ('preprocessing', preprocessing),\n", + " ('model', LinearRegression())]\n", + ")\n", + "\n", + "# Определение сетки гиперпараметров (возможных знач-ий гиперпараметров) для перебора\n", + "param_grid = {\n", + " #как будут масштабироваться признаки\n", + " 'preprocessing': [StandardScaler(), preproc.MinMaxScaler(), preproc.MaxAbsScaler(), None]\n", + "}\n", + "\n", + "# Создание объекта GridSearchCV для поиска лучших гиперпараметров по сетке с максимальным знач-ием \n", + "# отрицательного корня из среднеквадратичной ошибки (отриц., чтобы искался не минимум, а максимум)\n", + "grid_search = GridSearchCV(pipeline_lin_reg, param_grid, cv=5, scoring='neg_root_mean_squared_error', n_jobs=-1)\n", + "\n", + "# Обучение модели с перебором гиперпараметров\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "print(\"Лучшие гиперпараметры: \", grid_search.best_params_)\n", + "\n", + "# Лучшая модель лин. регрессии\n", + "best_model = grid_search.best_estimator_\n", + "\n", + "y_pred = best_model.predict(X_test)\n", + "\n", + "print(f'Cредняя абсолютная ошибка (MAE) = {mean_absolute_error(y_test, y_pred)}')\n", + "\n", + "\n", + "mse, bias, variance = bias_variance_decomp(best_model, X_train.values, y_train.values, X_test.values, y_test.values, loss='mse', num_rounds=200, random_seed=123)\n", + "print(\"Смещение: \", bias)\n", + "print(\"Дисперсия: \", variance)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Гребневая регрессия" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Лучшие гиперпараметры: {'model__alpha': 10.0, 'preprocessing': MinMaxScaler()}\n", + "Cредняя абсолютная ошибка (MAE) = 19239.098934204343\n", + "Смещение: 615921619.3705255\n", + "Дисперсия: 326886.495836047\n" + ] + } + ], + "source": [ + "pipeline_ridge = Pipeline([\n", + " ('preprocessing', preprocessing),\n", + " ('model', Ridge())]\n", + ")\n", + "\n", + "# Определение сетки гиперпараметров (возможных знач-ий гиперпараметров) для перебора\n", + "param_grid = {\n", + " #как будут масштабироваться признаки\n", + " 'preprocessing': [StandardScaler(), preproc.MinMaxScaler(), preproc.MaxAbsScaler(), None],\n", + " #сила регуляризации\n", + " 'model__alpha': [0, 0.5, 1.0, 1.5, 2.0, 5.0, 10.0] \n", + "}\n", + "\n", + "# Создание объекта GridSearchCV для поиска лучших гиперпараметров по сетке с максимальным знач-ием \n", + "# отрицательного корня из среднеквадратичной ошибки (отриц., чтобы искался не минимум, а максимум)\n", + "grid_search = GridSearchCV(pipeline_ridge, param_grid, cv=5, scoring='neg_root_mean_squared_error', n_jobs=-1, verbose=0)\n", + "\n", + "# Обучение модели с перебором гиперпараметров\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "print(\"Лучшие гиперпараметры: \", grid_search.best_params_)\n", + "\n", + "# Лучшая модель регрессии\n", + "best_model = grid_search.best_estimator_\n", + "\n", + "y_pred = best_model.predict(X_test)\n", + "\n", + "print(f'Cредняя абсолютная ошибка (MAE) = {mean_absolute_error(y_test, y_pred)}')\n", + "\n", + "mse, bias, variance = bias_variance_decomp(best_model, X_train.values, y_train.values, X_test.values, y_test.values, loss='mse', num_rounds=200, random_seed=123)\n", + "print(\"Смещение: \", bias)\n", + "print(\"Дисперсия: \", variance)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Гребневая регрессия показала почти такие же результаты, что и линейная регрессия" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "MLP" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\AI labs\\aimenv\\Lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:690: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Лучшие гиперпараметры: {'model__activation': 'relu', 'model__alpha': 0.0001, 'preprocessing': StandardScaler()}\n", + "Cредняя абсолютная ошибка (MAE) = 19363.27371661712\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\AI labs\\aimenv\\Lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:690: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n", + "c:\\AI labs\\aimenv\\Lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:690: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n", + "c:\\AI labs\\aimenv\\Lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:690: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n", + "c:\\AI labs\\aimenv\\Lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:697: UserWarning: Training interrupted by user.\n", + " warnings.warn(\"Training interrupted by user.\")\n", + "c:\\AI labs\\aimenv\\Lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:690: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n", + "c:\\AI labs\\aimenv\\Lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:690: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n", + "c:\\AI labs\\aimenv\\Lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:697: UserWarning: Training interrupted by user.\n", + " warnings.warn(\"Training interrupted by user.\")\n", + "c:\\AI labs\\aimenv\\Lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:690: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n", + "c:\\AI labs\\aimenv\\Lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:690: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n", + "c:\\AI labs\\aimenv\\Lib\\site-packages\\sklearn\\neural_network\\_multilayer_perceptron.py:690: ConvergenceWarning: Stochastic Optimizer: Maximum iterations (500) reached and the optimization hasn't converged yet.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "# Создание пайплайна для модели MLP\n", + "pipeline_mlp = Pipeline([\n", + " ('preprocessing', StandardScaler()), # По умолчанию используем StandardScaler\n", + " ('model', MLPRegressor(random_state=42, max_iter=500)) # MLP модель с фиксированным random_state и количеством итераций\n", + "])\n", + "\n", + "# Определение сетки гиперпараметров для перебора\n", + "param_grid = {\n", + " 'preprocessing': [StandardScaler(), preproc.MinMaxScaler()], # Разные способы масштабирования признаков\n", + " 'model__hidden_layer_sizes': [(50,), (100,)], # Разные конфигурации слоев\n", + " #'model__activation': ['relu', 'tanh'],\n", + " 'model__alpha': [0.0001, 0.001], # Разные значения регуляризации\n", + "}\n", + "\n", + "# Создание объекта GridSearchCV для поиска лучших гиперпараметров\n", + "grid_search = GridSearchCV(pipeline_mlp, param_grid, cv=5, scoring='neg_root_mean_squared_error', n_jobs=-1, verbose=0)\n", + "\n", + "# Обучение модели с перебором гиперпараметров\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "print(\"Лучшие гиперпараметры: \", grid_search.best_params_)\n", + "\n", + "# Лучшая модель MLP\n", + "best_model = grid_search.best_estimator_\n", + "\n", + "y_pred = best_model.predict(X_test)\n", + "\n", + "print(f'Cредняя абсолютная ошибка (MAE) = {mean_absolute_error(y_test, y_pred)}')\n", + "\n", + "# Расчет смещения и дисперсии\n", + "mse, bias, variance = bias_variance_decomp(best_model, X_train.values, y_train.values, X_test.values, y_test.values, loss='mse', num_rounds=200, random_seed=123)\n", + "print(\"Смещение: \", bias)\n", + "print(\"Дисперсия: \", variance)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Метод градиентного бустинга (набор деревьев решений)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Лучшие гиперпараметры: {'model__learning_rate': 0.1, 'model__max_depth': 3, 'model__n_estimators': 100, 'preprocessing': None}\n", + "Cредняя абсолютная ошибка (MAE) = 18905.987766249527\n", + "Смещение: -3.2312558004292335\n", + "Дисперсия: 162393666.8715257\n" + ] + } + ], + "source": [ + "# Конвейер\n", + "pipeline_grad = Pipeline([\n", + " ('preprocessing', preprocessing),\n", + " ('model', GradientBoostingRegressor())\n", + "])\n", + "\n", + "# Определение сетки гиперпараметров\n", + "param_grid = {\n", + " 'preprocessing': [StandardScaler(), preproc.MinMaxScaler(), preproc.MaxAbsScaler(), None],\n", + " 'model__n_estimators': [100, 200, 300],\n", + " #Скорость обучения\n", + " 'model__learning_rate': [0.1, 0.2],\n", + " #Максимальная глубина дерева\n", + " 'model__max_depth': [3, 5, 7]\n", + "}\n", + "\n", + "# Создание объекта GridSearchCV\n", + "grid_search = GridSearchCV(pipeline_grad, param_grid, cv=2, scoring='neg_root_mean_squared_error', n_jobs=-1)\n", + "\n", + "# Обучение модели с перебором гиперпараметров\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "print(\"Лучшие гиперпараметры: \", grid_search.best_params_)\n", + "\n", + "# Лучшая модель случайного леса\n", + "best_model = grid_search.best_estimator_\n", + "\n", + "\n", + "y_pred = best_model.predict(X_test)\n", + "\n", + "\n", + "print(f'Cредняя абсолютная ошибка (MAE) = {mean_absolute_error(y_test, y_pred)}')\n", + "\n", + "\n", + "# Получение предсказаний на кросс-валидации\n", + "y_cv_pred = cross_val_predict(best_model, X_train, y_train, cv=3)\n", + "\n", + "# Оценка смещения\n", + "bias = np.mean(y_cv_pred - y_train)\n", + "\n", + "# Оценка дисперсии\n", + "variance = np.var(y_cv_pred)\n", + "\n", + "print(f\"Смещение: {bias}\")\n", + "print(f\"Дисперсия: {variance}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Вывод**: \n", + "\n", + "Все 3 модели регрессии не показали необходимого уровня \"погрешности\". Это означает, что необходимо использовать более сложные модели или что по доступным данным нельзя достичь необходимой погрешности.\n", + "\n", + "Из всех моделей градиентный бустинг показал самую низкую \"погрешность\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## **2-я бизнес-цель (классификация):** \n", + "\n", + "Определение опасности космиеского объекта для увеличения безопасности Земли" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Целевой признак: опасность объекта hazardous\n", + "\n", + "Вход: минимальный радиус est_diameter_min, максимальный радиус est_diameter_max, яркость объекта absolute_magnitude, скорость relative_velocity" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Достижимый уровень качества: необходимо, чтобы точность предсказания модели составляла не менее 90%. Для проверки этого будет использована метрика Accuracy" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "metadata": {}, + "outputs": [], + "source": [ + "import pandas as pd\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.discriminant_analysis import StandardScaler\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.linear_model import LogisticRegression\n", + "from sklearn.model_selection import GridSearchCV, train_test_split\n", + "from sklearn.metrics import roc_auc_score, confusion_matrix, accuracy_score\n", + "from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n", + "import seaborn as sns\n", + "from sklearn.model_selection import cross_val_predict\n", + "from sklearn.metrics import mean_squared_error\n", + "import numpy as np\n", + "from sklearn import metrics\n", + "# Загрузка данных\n", + "df = pd.read_csv(\"..//static//csv//balanced_neo.csv\")\n", + "data = df[['est_diameter_min', 'est_diameter_max', 'absolute_magnitude', 'relative_velocity', 'hazardous']]\n", + "\n", + "X = data.drop('hazardous', axis=1)\n", + "y = data['hazardous']\n", + "\n", + "# Разделение данных на обучающую и тестовую выборки\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Преобразование числовых данных\n", + "num_imputer = SimpleImputer(strategy=\"median\")\n", + "num_scaler = StandardScaler()\n", + "preprocessing_num = Pipeline(\n", + " [\n", + " (\"imputer\", num_imputer),\n", + " (\"scaler\", num_scaler),\n", + " ]\n", + ")\n", + "\n", + "# Общая предобработка (только числовые данные)\n", + "preprocessing = ColumnTransformer(\n", + " [\n", + " (\"nums\", preprocessing_num, X.columns),\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Логистическая регрессия" + ] + }, + { + "cell_type": "code", + "execution_count": 66, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Лучшие гиперпараметры: {'classifier__C': 0.1, 'classifier__penalty': 'l1', 'classifier__solver': 'liblinear'}\n", + "ROC у логистической регрессии = 0.8670867396912991\n", + "Точность = 0.8591628959276018\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Смещение: 0.852988221106613\n", + "Дисперсия: 0.006548654676149887\n" + ] + } + ], + "source": [ + "# Конвейер для логистической регрессии\n", + "pipeline_logreg = Pipeline([\n", + " ('preprocessing', preprocessing),\n", + " ('classifier', LogisticRegression())\n", + "])\n", + "\n", + "# Определение сетки гиперпараметров (возможных знач-ий гиперпараметров) для перебора\n", + "param_grid = {\n", + " # Параметр регуляризации (сила регуляризации), чем меньше, тем сильнее регуляризация\n", + " 'classifier__C': [0.1, 0.5, 1],\n", + " # Тип регуляризации (ф-ия штрафов)\n", + " 'classifier__penalty': ['l1', 'l2'],\n", + " # Решатель (сам алгоритм?)\n", + " 'classifier__solver': ['liblinear', 'saga']\n", + "}\n", + "\n", + "# Создание объекта GridSearchCV для поиска лучших гиперпараметров по сетке с максимальным знач-ием ROC-кривой\n", + "grid_search = GridSearchCV(pipeline_logreg, param_grid, cv=5, scoring='accuracy', n_jobs=-1)\n", + "\n", + "# Обучение модели с перебором гиперпараметров\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "print(\"Лучшие гиперпараметры: \", grid_search.best_params_)\n", + "\n", + "# Лучшая модель логистической регрессии\n", + "best_model = grid_search.best_estimator_\n", + "\n", + "# Использование и оценка лучшей логистической модели\n", + "y_pred_proba = best_model.predict_proba(X_test)[:, 1]\n", + "print(f'ROC у логистической регрессии = {roc_auc_score(y_test, y_pred_proba)}')\n", + "\n", + "y_pred = best_model.predict(X_test)\n", + "print(f'Точность = {accuracy_score(y_test, y_pred)}')\n", + "\n", + "fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba)\n", + "\n", + "# построение ROC кривой\n", + "plt.plot(fpr, tpr)\n", + "plt.ylabel('True Positive Rate')\n", + "plt.xlabel('False Positive Rate')\n", + "plt.show()\n", + "\n", + "# Построение матрицы ошибок\n", + "conf_matrix = confusion_matrix(y_test, y_pred)\n", + "\n", + "# Визуализация матрицы ошибок\n", + "plt.figure(figsize=(8, 6))\n", + "sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', \n", + " xticklabels=['Предсказанный \"безопасный\"', 'Предсказанный \"опасный\"'], \n", + " yticklabels=['Действительно \"безопасный\"', 'Действительно \"опасный\"'])\n", + "plt.title('Confusion Matrix')\n", + "plt.ylabel('Actual')\n", + "plt.xlabel('Predicted')\n", + "plt.show()\n", + "\n", + "# Оценка дисперсии и смещения\n", + "cv_results = grid_search.cv_results_\n", + "mean_test_score = cv_results['mean_test_score']\n", + "std_test_score = cv_results['std_test_score']\n", + "\n", + "print(f\"Смещение: {mean_test_score.mean()}\")\n", + "print(f\"Дисперсия: {std_test_score.mean()}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Метод случаного леса (набор деревьев решений)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Лучшие гиперпараметры: {'classifier__max_depth': 20, 'classifier__min_samples_leaf': 4, 'classifier__n_estimators': 200}\n", + "ROC у метода случайного леса = 0.9081081989462431\n", + "Точность = 0.8718891402714932\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Матрица ошибок:\n", + "[[1329 397]\n", + " [ 56 1754]]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Смещение: 0.8686998314031272\n", + "Дисперсия: 0.003095104102985812\n" + ] + } + ], + "source": [ + "# Конвейер для случайного леса\n", + "pipeline_ranfor = Pipeline([\n", + " ('preprocessing', preprocessing),\n", + " ('classifier', RandomForestClassifier())\n", + "])\n", + "\n", + "# Определение сетки гиперпараметров\n", + "param_grid = {\n", + " #Количество деревьев в лесу\n", + " 'classifier__n_estimators': [50, 100, 200],\n", + " #Максимальная глубина дерева\n", + " 'classifier__max_depth': [10, 20, 30],\n", + " #Минимальное количество образцов для листового узла\n", + " 'classifier__min_samples_leaf': [1, 2, 4]\n", + "}\n", + "\n", + "# Создание объекта GridSearchCV\n", + "grid_search = GridSearchCV(pipeline_ranfor, param_grid, cv=5, scoring='accuracy', n_jobs=-1)\n", + "\n", + "# Обучение модели с перебором гиперпараметров\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "print(\"Лучшие гиперпараметры: \", grid_search.best_params_)\n", + "\n", + "# Лучшая модель случайного леса\n", + "best_model = grid_search.best_estimator_\n", + "\n", + "# Использование и оценка лучшей модели\n", + "y_pred_proba = best_model.predict_proba(X_test)[:, 1]\n", + "print(f'ROC у метода случайного леса = {roc_auc_score(y_test, y_pred_proba)}')\n", + "\n", + "y_pred = best_model.predict(X_test)\n", + "print(f'Точность = {accuracy_score(y_test, y_pred)}')\n", + "\n", + "fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba)\n", + "\n", + "# построение ROC кривой\n", + "plt.plot(fpr, tpr)\n", + "plt.ylabel('True Positive Rate')\n", + "plt.xlabel('False Positive Rate')\n", + "plt.show()\n", + "\n", + "# Построение матрицы ошибок\n", + "conf_matrix = confusion_matrix(y_test, y_pred)\n", + "\n", + "# Визуализация матрицы ошибок\n", + "plt.figure(figsize=(8, 6))\n", + "sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', \n", + " xticklabels=['Предсказанный \"безопасный\"', 'Предсказанный \"опасный\"'], \n", + " yticklabels=['Действительно \"безопасный\"', 'Действительно \"опасный\"'])\n", + "plt.title('Confusion Matrix')\n", + "plt.ylabel('Actual')\n", + "plt.xlabel('Predicted')\n", + "plt.show()\n", + "\n", + "# Оценка дисперсии и смещения\n", + "cv_results = grid_search.cv_results_\n", + "mean_test_score = cv_results['mean_test_score']\n", + "std_test_score = cv_results['std_test_score']\n", + "\n", + "print(f\"Смещение: {mean_test_score.mean()}\")\n", + "print(f\"Дисперсия: {std_test_score.mean()}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Градиентный бустинг" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Лучшие гиперпараметры: {'classifier__learning_rate': 0.1, 'classifier__max_depth': 3, 'classifier__n_estimators': 300, 'classifier__subsample': 0.5}\n", + "ROC у метода градиентного спуска = 0.9012421336337971\n", + "Точность = 0.872737556561086\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGwCAYAAABVdURTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABC8klEQVR4nO3dfXzO9eLH8fe12a4N29Da5maam9zlZm7iIIlWVEecTuWUg9TRKTd12lEhLCWcinSinJRUv4rqqJxoDoqDdMisFCY3i7CxsDtsdl3f3x/LVzuGXXNd1/e6rr2ej8f1eHy+332/1/XeF13vvrc2wzAMAQAABIggqwMAAAC4E+UGAAAEFMoNAAAIKJQbAAAQUCg3AAAgoFBuAABAQKHcAACAgFLN6gDe5nQ6dfDgQUVERMhms1kdBwAAVIBhGMrPz1e9evUUFHThfTNVrtwcPHhQ8fHxVscAAACVsH//fjVo0OCCy1S5chMRESGpdONERkZanAYAAFREXl6e4uPjze/xC6ly5ebMoajIyEjKDQAAfqYip5RwQjEAAAgolBsAABBQKDcAACCgUG4AAEBAodwAAICAQrkBAAABhXIDAAACCuUGAAAEFMoNAAAIKJQbAAAQUCwtN//5z3/Ur18/1atXTzabTR9//PFF11m9erU6dOggu92upk2basGCBR7PCQAA/Iel5aawsFDt2rXTnDlzKrT83r17dcstt6hXr15KT0/XX/7yF/3pT3/S8uXLPZwUAAD4C0sfnHnTTTfppptuqvDyc+fOVaNGjTRjxgxJUsuWLbVu3Tq98MIL6tOnj6diAuU6nH9KxSVOq2MAgM8JrRakmIgwyz7fr54KvmHDBiUlJZWZ16dPH/3lL3857zpFRUUqKioyp/Py8jwVDwHktMOpBesz9dl3h1TiNBQeElzm5//de9SiZADg+zo0rKXFI7pb9vl+VW6ysrIUGxtbZl5sbKzy8vJ08uRJhYeHn7POtGnTNHnyZG9FhJ/bkZWnvrPWurSOvRrn5QPAr4UEW/vfRb8qN5Uxbtw4JScnm9N5eXmKj4+3MBF80f6jJ9Tj2S/K/dldnRuqQ8NaCg8tu/cmNjJMVyfU8UY8AIAL/KrcxMXFKTs7u8y87OxsRUZGlrvXRpLsdrvsdrs34sEPFZc41ePZz5WdV1Rmfv1a4fr3I9eqht2v/okAAORn5aZr165atmxZmXkrVqxQ165dLUoEf3fDC2vKFJuklrGaN6SjbDabhakAAJfC0nJTUFCgXbt2mdN79+5Venq66tSpo4YNG2rcuHE6cOCA3nrrLUnSAw88oNmzZ+uxxx7Tvffeq88//1zvv/++li5datWvAD/2RcZh/fjzCXM6Y0pf2asFX2ANAIA/sPSMn6+//lrt27dX+/btJUnJyclq3769Jk2aJEk6dOiQ9u3bZy7fqFEjLV26VCtWrFC7du00Y8YMvfbaa1wGDpdt/vGYhr2xyZz+bnIfig0ABAibYRiG1SG8KS8vT1FRUcrNzVVkZKTVceBlmTmFuu751WXmvXVvZ13b7HJrAgEAKsSV72+uYUWV8c3+4+cUm+E9GlFsACDA+NUJxcCl6D9nvTm+ql6kFo/oxqEoAAhAlBtUCb9+TMJNreP0yh87WpgGAOBJHJZCwDtZ7NCAX+21mX5bWwvTAAA8jT03CGh3zt2gjZllnwMVVT3EojQAAG9gzw0CVkFRyTnF5pOR1j3IDQDgHey5QcC691f3sfkm5UZFhbPHBgCqAvbcICDtPlJQZq8NxQYAqg7KDQJOUYlD189YY06n/qWHhWkAAN5GuUHAmf352eeVtakfpRZx3IkaAKoSyg0CyoL1e/XSr8rNklGcQAwAVQ0nFCMg7D96Qj2e/aLMvGd/31Y2m82iRAAAq7DnBgEhff/xMtPjbmqhOzo1sCYMAMBS7LlBQHjsw28lSS3iIpT6l2stTgMAsBJ7buD3Hnh7s06edkiS6kaFWZwGAGA1yg382pe7cpT6fZY5PXcwD8QEgKqOcgO/dbSwWHe/9l9z+t+PXCt7tWALEwEAfAHlBn6rw9MrzHFKv1ZqFhthYRoAgK+g3MDvNb68hoZ1b2R1DACAj6DcwO/sP3pCCWOXmtMf/LmrhWkAAL6GcgO/k/x+epnpOjVCrQkCAPBJlBv4leISpzZlHjOndzzdl7sQAwDKoNzAr+SePG2O1z7WS2EhXB0FACiLcgO/8t2BXHMcX6e6hUkAAL6KcgO/MmzBJqsjAAB8HOUGfuWyX04eHtY9wdogAACfRbmB3ygoKtHPhcWSpP6J9S1OAwDwVZQb+I2l3x40xy3iuBsxAKB8lBv4jaOFpVdKBdnEVVIAgPOi3MAv5J06rb+l7pAkdW8abXEaAIAvo9zA5x3KPam2T/7bnL7xqjgL0wAAfB3lBj6v67TPzXHnhDoa/JsrLEwDAPB1lBv4tLc3ZJrjGqHBeu/+31gXBgDgF6pZHQA4n9fW7tGUpdvN6a8n3KDgIJ4jBQC4MPbcwCcVFpWUKTbvDf+NwkO5QgoAcHHsuYFP+ve2LHO86P7fqEvjyyxMAwDwJ+y5gU965pe9NmEhQRQbAIBLKDfwOUUlDuUUlD5mIeGyGhanAQD4G8oNfM7/fbXPHL/yx44WJgEA+CPKDXzO059uM8eNotlzAwBwDeUGPmXLvmPmeHTvphYmAQD4K8oNfMrvXv7SHI/sRbkBALiOcgOfkfjU2edHPdqnOU/+BgBUCuUGPuFYYbGOnzhtTo+4romFaQAA/oxyA5/Q/ukV5viHZ26SzcZjFgAAlUO5geUWbjx76XdifC2FBPPXEgBQeXyLwHKT/3X20u+PRnSzMAkAIBBQbmC5OjVCJUkP9GzC4SgAwCWj3MBSye+n68Dxk5Kk3i1iLE4DAAgElBtY5ouMw1qcdsCcbh4bYWEaAECgoNzAMsPe2GSONz5xvaKqh1iYBgAQKCg3sMTJYoc5vqtzQ8VEhFmYBgAQSCg3sMQTH209O76lpYVJAACBhnIDS2zY87Mkqaa9mmraq1mcBgAQSCg38Lqfjp3QodxTkqSPR3a3OA0AINBQbuB11/ztC3PcOLqGhUkAAIGIcgOvOu1wmuPOjeooKIib9gEA3MvycjNnzhwlJCQoLCxMXbp00caNGy+4/KxZs9S8eXOFh4crPj5ejzzyiE6dOuWltLhUa384Yo5fG9rJwiQAgEBlablZtGiRkpOTlZKSorS0NLVr1059+vTR4cOHy13+3Xff1dixY5WSkqLt27fr9ddf16JFizR+/HgvJ0dlFJU4dO+Cr83pGqGcSAwAcD9Ly83MmTM1fPhwDRs2TK1atdLcuXNVvXp1zZ8/v9zlv/zyS3Xv3l133323EhISdOONN+quu+664N6eoqIi5eXllXnBGuP+efby78f7tlAwh6QAAB5gWbkpLi7W5s2blZSUdDZMUJCSkpK0YcOGctfp1q2bNm/ebJaZPXv2aNmyZbr55pvP+znTpk1TVFSU+YqPj3fvLwKXRYWH6IGeja2OAQAIUJYdF8jJyZHD4VBsbGyZ+bGxsdqxY0e569x9993KycnRNddcI8MwVFJSogceeOCCh6XGjRun5ORkczovL4+CY4GDx09q8ZbS50gN7XoFT/8GAHiM5ScUu2L16tWaOnWqXn75ZaWlpWnx4sVaunSpnn766fOuY7fbFRkZWeYF7+s2/XNzfHWjOhYmAQAEOsv23ERHRys4OFjZ2dll5mdnZysuLq7cdSZOnKjBgwfrT3/6kySpTZs2Kiws1P33368nnnhCQUF+1dWqlMsj7DqSX6QWcRHqceXlVscBAAQwy9pAaGioOnbsqFWrVpnznE6nVq1apa5du5a7zokTJ84pMMHBwZIkwzA8FxaX5JP0AzqSXyRJmnlnorVhAAABz9JrcZOTkzV06FB16tRJnTt31qxZs1RYWKhhw4ZJkoYMGaL69etr2rRpkqR+/fpp5syZat++vbp06aJdu3Zp4sSJ6tevn1ly4FtOFJfo4YXp5nR8nXDrwgAAqgRLy83AgQN15MgRTZo0SVlZWUpMTFRqaqp5kvG+ffvK7KmZMGGCbDabJkyYoAMHDujyyy9Xv3799Mwzz1j1K+AinlzyvTl+897OiggLsTANAKAqsBlV7HhOXl6eoqKilJuby8nFXpAwdqk5zpx+i4VJAAD+zJXvb87AhcccP1Fsjt8YdrWFSQAAVQnlBh6xN6dQiU+tMKd7NI22MA0AoCqh3MAj/rIo3Rz3uSpW1YL5qwYA8A6+ceB23/50XN/sPy5Jalinuubc3cHaQACAKoVyA7f78ecT5vjd4V3YawMA8Cq+deB2723cJ0nq2vgyNahd3eI0AICqhnIDt8vMKZQknXY4LU4CAKiKKDdwK4fT0MHcU5Kkv97Y3OI0AICqiHIDt3phxU5z3Cy2poVJAABVFeUGbjX7i12SpMiwarqspt3iNACAqohyA7eK/qXQvDAw0dogAIAqi3IDj6hfm6d/AwCsQbmB2xiGoZyCIqtjAACqOMoN3GbaZzvMcU17NQuTAACqMsoN3CL/1Gm9+p89kqSQYBs37wMAWIZyg0tWUFSiNk/+25x+c1hnC9MAAKo6yg0u2Ze7csxxl0Z11K1ptIVpAABVHeUGl2x6aum5NhH2alr0564WpwEAVHWUG1yyE0UOSdLlkdy0DwBgPcoNLonDaSgrr/RZUo/yLCkAgA+g3OCSFJU4zHHHK2pbmAQAgFKUG1wSp3F2XDOMe9sAAKxHucEl+duvbtwXHGSzMAkAAKUoN7gkb3/1ozm2Vwu2MAkAAKUoN6i0o4XF5vjjkd0tTAIAwFmUG1Tajqw8c5wYX8u6IAAA/ArlBpWyMztfQ+dvtDoGAADn4PIWuOyH7Hzd+MJ/zOnH+nJ/GwCA72DPDVy2asdhc/zQ9VfqwZ5NLEwDAEBZ7LmBy0KCSztx54Q6Sr6hmcVpAAAoiz03cNlHW36SJMVFhVmcBACAc1Fu4LLYiNJSc9rhtDgJAADnotzAZQ6j9JkLvZrHWJwEAIBzXVK5OXXqlLtywE9s/SlXqzOOlE7wtAUAgA9yudw4nU49/fTTql+/vmrWrKk9e/ZIkiZOnKjXX3/d7QHhW/rNXmeOuzW5zMIkAACUz+VyM2XKFC1YsEDPPvusQkNDzfmtW7fWa6+95tZw8C17jhSY4wm3tFSD2tUtTAMAQPlcLjdvvfWWXn31VQ0aNEjBwWcflNiuXTvt2LHjAmvC3x08fvYw5L3dG1mYBACA83O53Bw4cEBNmzY9Z77T6dTp06fdEgq+qcRZenVUi7gIBQVxwg0AwDe5XG5atWqltWvXnjP/ww8/VPv27d0SCr5p0ab9krgEHADg21y+Q/GkSZM0dOhQHThwQE6nU4sXL1ZGRobeeustffrpp57ICB9gGIY++y5LknSy2GFxGgAAzs/lPTf9+/fXv/71L61cuVI1atTQpEmTtH37dv3rX//SDTfc4ImM8AH/99995vixvi0sTAIAwIVV6tlSPXr00IoVK9ydBT5s4sffSZLaxddS/8R6FqcBAOD8XN5z07hxY/3888/nzD9+/LgaN27sllDwLfmnzp4o/scuDWWzcTIxAMB3uVxuMjMz5XCce85FUVGRDhw44JZQ8C0fbzn759qvHXttAAC+rcKHpZYsWWKOly9frqioKHPa4XBo1apVSkhIcGs4+IaZK3ZKkjpdUVthIcEXWRoAAGtVuNwMGDBAkmSz2TR06NAyPwsJCVFCQoJmzJjh1nDwDcdOlB6W+m3buhYnAQDg4ipcbpy/3MCtUaNG2rRpk6Kjoz0WCr7jx58LzfE1V15uYRIAACrG5aul9u7d64kc8FE9n1ttjpvG1LQuCAAAFVSpS8ELCwu1Zs0a7du3T8XFxWV+9tBDD7klGKw3d81uc8zl3wAAf+FyudmyZYtuvvlmnThxQoWFhapTp45ycnJUvXp1xcTEUG4CyMa9R83xrIGJ1gUBAMAFLl8K/sgjj6hfv346duyYwsPD9dVXX+nHH39Ux44d9fzzz3siIyxy8PhJSdKzv2/LvW0AAH7D5XKTnp6uv/71rwoKClJwcLCKiooUHx+vZ599VuPHj/dERlhkR1a+JMmQYXESAAAqzuVyExISoqCg0tViYmK0b1/pM4eioqK0f/9+96aDZU6dPnujxtb1oy6wJAAAvsXlc27at2+vTZs26corr1TPnj01adIk5eTk6O2331br1q09kREW+P5grjluHM1VUgAA/+HynpupU6eqbt3Sm7k988wzql27th588EEdOXJE//jHP9weENb4OvOYJCkqPEThodyVGADgP1zec9OpUydzHBMTo9TUVLcGgm8oKCqRJF1VL9LiJAAAuMblPTfnk5aWpt/+9rcurzdnzhwlJCQoLCxMXbp00caNGy+4/PHjxzVy5EjVrVtXdrtdzZo107JlyyobG+UwDEMvfb5LkpQQXcPiNAAAuMalcrN8+XKNGTNG48eP1549eyRJO3bs0IABA3T11Vebj2ioqEWLFik5OVkpKSlKS0tTu3bt1KdPHx0+fLjc5YuLi3XDDTcoMzNTH374oTIyMjRv3jzVr1/fpc/FhTmcZ6+OujqhtoVJAABwXYUPS73++usaPny46tSpo2PHjum1117TzJkzNXr0aA0cOFDfffedWrZs6dKHz5w5U8OHD9ewYcMkSXPnztXSpUs1f/58jR079pzl58+fr6NHj+rLL79USEiIJF30SeRFRUUqKioyp/Py8lzKWNX1ah5jdQQAAFxS4T03L774ov72t78pJydH77//vnJycvTyyy9r69atmjt3rsvFpri4WJs3b1ZSUtLZMEFBSkpK0oYNG8pdZ8mSJeratatGjhyp2NhYtW7dWlOnTpXD4Sh3eUmaNm2aoqKizFd8fLxLOQEAgH+pcLnZvXu37rjjDknSbbfdpmrVqum5555TgwYNKvXBOTk5cjgcio2NLTM/NjZWWVlZ5a6zZ88effjhh3I4HFq2bJkmTpyoGTNmaMqUKef9nHHjxik3N9d8cS+ei1u69ZA5Dg7izsQAAP9S4cNSJ0+eVPXq1SVJNptNdrvdvCTcW5xOp2JiYvTqq68qODhYHTt21IEDB/Tcc88pJSWl3HXsdrvsdrtXc/q7bYdKD93FRtoVERZicRoAAFzj0qXgr732mmrWLL2hW0lJiRYsWKDo6Ogyy1T0wZnR0dEKDg5WdnZ2mfnZ2dmKi4srd526desqJCREwcFn77vSsmVLZWVlqbi4WKGhoa78OriIfm15EjgAwP9UuNw0bNhQ8+bNM6fj4uL09ttvl1nGZrNVuNyEhoaqY8eOWrVqlQYMGCCpdM/MqlWrNGrUqHLX6d69u9599105nU7zERA7d+5U3bp1KTZu9I81pVfC8axMAIA/qnC5yczMdPuHJycna+jQoerUqZM6d+6sWbNmqbCw0Lx6asiQIapfv76mTZsmSXrwwQc1e/ZsPfzwwxo9erR++OEHTZ06tcKFChc3+PX/muOiEtcu7QcAwBe4fIdidxo4cKCOHDmiSZMmKSsrS4mJiUpNTTVPMt63b5+5h0aS4uPjtXz5cj3yyCNq27at6tevr4cffliPP/64Vb9CQMk/dVprf8gxp8ff7NoVcAAA+AKbYRjGxRcLHHl5eYqKilJubq4iI3m0wK+t/eGIBr9eeofonVNuUmg1t93AGgCAS+LK9zffXjDNW7vXHFNsAAD+im8wmC6vWXrJ/HXNL7c4CQAAlUe5wTm6Nr7M6ggAAFRapcrN7t27NWHCBN11113mQy4/++wzff/9924NB+8pKnHon2k/WR0DAIBL5nK5WbNmjdq0aaP//ve/Wrx4sQoKCiRJ33zzzXnvEgzf13xCqjkOCwm+wJIAAPg2l8vN2LFjNWXKFK1YsaLMjfN69+6tr776yq3h4B1rdh4pM31rO+5MDADwXy7f52br1q169913z5kfExOjnJycctaAr5u6dLs5zpx+i4VJAAC4dC7vualVq5YOHTp0zvwtW7aofv36bgkF78rIzpckXdM0+iJLAgDg+1wuN3/4wx/0+OOPKysrSzabTU6nU+vXr9eYMWM0ZMgQT2SEB2XmFJrjEdc1sTAJAADu4XK5mTp1qlq0aKH4+HgVFBSoVatWuvbaa9WtWzdNmDDBExnhIU6noaSZayRJIcE2dWPPDQAgALh8zk1oaKjmzZuniRMn6rvvvlNBQYHat2+vK6+80hP54EGp32epxFn69I0ujbi3DQAgMLhcbtatW6drrrlGDRs2VMOGDT2RCV5gGIZGvJNmTr82tJOFaQAAcB+XD0v17t1bjRo10vjx47Vt2zZPZIIX9J6xxhxP/V0b7m0DAAgYLpebgwcP6q9//avWrFmj1q1bKzExUc8995x++om72/qL/FOntfdXJxLf1TnewjQAALiXy+UmOjpao0aN0vr167V7927dcccdevPNN5WQkKDevXt7IiPcLP9UiTnePfVm2Ww2C9MAAOBel/TgzEaNGmns2LGaPn262rRpozVr1lx8JVjO8ctJxKHVghQcRLEBAASWSpeb9evXa8SIEapbt67uvvtutW7dWkuXLnVnNnjI1z8elSQVlzgtTgIAgPu5fLXUuHHjtHDhQh08eFA33HCDXnzxRfXv31/Vq1f3RD54QInDsDoCAAAe43K5+c9//qNHH31Ud955p6KjuembPzpzMvF1zS+3OAkAAO7ncrlZv369J3LAi3ZmF0iScgqKLE4CAID7VajcLFmyRDfddJNCQkK0ZMmSCy576623uiUYPMPpNLT/6AlJ3JUYABCYKlRuBgwYoKysLMXExGjAgAHnXc5ms8nhcLgrGzzg3jc3mU8B79U8xuI0AAC4X4XKjdPpLHcM/7M644gkqU39KF1zJedMAQACj8uXgr/11lsqKjr3XI3i4mK99dZbbgkFz3h/035zPOPOdhYmAQDAc1wuN8OGDVNubu458/Pz8zVs2DC3hIJnrNyebY6vjKlpYRIAADzH5XJjGEa5t+v/6aefFBUV5ZZQ8IyfC4slSde3iOGRCwCAgFXhS8Hbt28vm80mm82m66+/XtWqnV3V4XBo79696tu3r0dCwj02/3hMEve3AQAEtgqXmzNXSaWnp6tPnz6qWfPsYY3Q0FAlJCTo97//vdsDwj1Svztkji+PsFuYBAAAz6pwuUlJSZEkJSQkaODAgQoLC/NYKLjfkm8OmuOklrEWJgEAwLNcvkPx0KFDPZEDHrZsa5YkaXiPRqoWfEkPgwcAwKdVqNzUqVNHO3fuVHR0tGrXrn3Bk1GPHj3qtnBwj7R9x8xxv3b1LEwCAIDnVajcvPDCC4qIiDDHXGnjX46fKDbHbRvUsi4IAABeUKFy8+tDUffcc4+nssBDVmw7LElq14BL9QEAgc/lky/S0tK0detWc/qTTz7RgAEDNH78eBUXF19gTVhlyy+HpfbmFFqcBAAAz3O53Pz5z3/Wzp07JUl79uzRwIEDVb16dX3wwQd67LHH3B4Ql6bE4TRv3vfs7W0tTgMAgOe5XG527typxMRESdIHH3ygnj176t1339WCBQv0z3/+0935cInmrtmtI/mlzwJrVZfDUgCAwFepxy+ceTL4ypUrdfPNN0uS4uPjlZOT4950uGQb9vwsSWpQO1wNL6tucRoAADzP5XLTqVMnTZkyRW+//bbWrFmjW265RZK0d+9excZyczhfs35XabkZkFjf4iQAAHiHy+Vm1qxZSktL06hRo/TEE0+oadOmkqQPP/xQ3bp1c3tAVF5BUYk5bhdfy7ogAAB4kct3KG7btm2Zq6XOeO655xQcHOyWUHCPHYfyzHEvHpYJAKgiXC43Z2zevFnbt2+XJLVq1UodOnRwWyi4x+1zN0iSosJDeOQCAKDKcLncHD58WAMHDtSaNWtUq1YtSdLx48fVq1cvLVy4UJdfzh4CX/DlrrMnd7fl5n0AgCrE5f+dHz16tAoKCvT999/r6NGjOnr0qL777jvl5eXpoYce8kRGVMKK7dnm+K17O1uYBAAA73J5z01qaqpWrlypli1bmvNatWqlOXPm6MYbb3RrOFTevp9PSJJubVePZ4EBAKoUl/fcOJ1OhYSEnDM/JCTEvP8NrPfdwVxJUu8WMRYnAQDAu1wuN71799bDDz+sgwcPmvMOHDigRx55RNdff71bw6HyCk6VXgbOJeAAgKrG5XIze/Zs5eXlKSEhQU2aNFGTJk3UqFEj5eXl6aWXXvJERrjIMAwVFjskScEckgIAVDEun3MTHx+vtLQ0rVq1yrwUvGXLlkpKSnJ7OFTOzuwCc1zDzr2HAABVi0vlZtGiRVqyZImKi4t1/fXXa/To0Z7KhUuw+8jZcnNZTbuFSQAA8L4Kl5tXXnlFI0eO1JVXXqnw8HAtXrxYu3fv1nPPPefJfKiEX9+ZGACAqqbC59zMnj1bKSkpysjIUHp6ut588029/PLLnsyGSvghO19//3yXJOmebgnWhgEAwAIVLjd79uzR0KFDzem7775bJSUlOnTokEeCoXIeWphujod1T7AsBwAAVqlwuSkqKlKNGjXOrhgUpNDQUJ08edIjweC6U6cd2pFVekjq9o4NdMVlNS6yBgAAgcelE4onTpyo6tWrm9PFxcV65plnFBV19tlFM2fOdF86uCTtx2MyDKl6aLCm39bG6jgAAFiiwuXm2muvVUZGRpl53bp10549e8xpbvNvrVU7DkuSroypyVPAAQBVVoXLzerVqz0YA+6QmVMoSfrpGIcKAQBVl0/87/2cOXOUkJCgsLAwdenSRRs3bqzQegsXLpTNZtOAAQM8G9APZGTlm3tu7urc0OI0AABYx/Jys2jRIiUnJyslJUVpaWlq166d+vTpo8OHD19wvczMTI0ZM0Y9evTwUlLf9tuX1prjwV2vsDAJAADWsrzczJw5U8OHD9ewYcPUqlUrzZ07V9WrV9f8+fPPu47D4dCgQYM0efJkNW7c2ItpfdOxwmKddhiSpHYNohQbGWZxIgAArGNpuSkuLtbmzZvLPJcqKChISUlJ2rBhw3nXe+qppxQTE6P77rvvop9RVFSkvLy8Mq9A8+2BXHO8eER3C5MAAGA9S8tNTk6OHA6HYmNjy8yPjY1VVlZWueusW7dOr7/+uubNm1ehz5g2bZqioqLMV3x8/CXn9jUHfnUCcXAQV6wBAKq2SpWbtWvX6o9//KO6du2qAwcOSJLefvttrVu3zq3h/ld+fr4GDx6sefPmKTo6ukLrjBs3Trm5ueZr//79Hs3obYZhaPxHWyVJSS1jLE4DAID1XLqJnyT985//1ODBgzVo0CBt2bJFRUVFkqTc3FxNnTpVy5Ytq/B7RUdHKzg4WNnZ2WXmZ2dnKy4u7pzld+/erczMTPXr18+c53Q6S3+RatWUkZGhJk2alFnHbrfLbg/cJ2P/+gng/drVszAJAAC+weU9N1OmTNHcuXM1b948hYSEmPO7d++utLQ0l94rNDRUHTt21KpVq8x5TqdTq1atUteuXc9ZvkWLFtq6davS09PN16233qpevXopPT09IA85XcwP2WfLTf/E+hYmAQDAN7i85yYjI0PXXnvtOfOjoqJ0/PhxlwMkJydr6NCh6tSpkzp37qxZs2apsLBQw4YNkyQNGTJE9evX17Rp0xQWFqbWrVuXWb9WrVqSdM78quBEcYkefKe0UHJvGwAASrlcbuLi4rRr1y4lJCSUmb9u3bpKXZY9cOBAHTlyRJMmTVJWVpYSExOVmppqnmS8b98+BQVZfsW6T9qRlW+Ox93cwsIkAAD4DpfLzfDhw/Xwww9r/vz5stlsOnjwoDZs2KAxY8Zo4sSJlQoxatQojRo1qtyfXeyxDwsWLKjUZwaSmAi7IsNCLr4gAABVgMvlZuzYsXI6nbr++ut14sQJXXvttbLb7RozZoxGjx7tiYw4D6P0vn0KCwm2NggAAD7E5XJjs9n0xBNP6NFHH9WuXbtUUFCgVq1aqWbNmp7IhwtIWfKdJMnhNCxOAgCA73C53JwRGhqqVq1auTMLXLT7cOlTwA8c5yngAACc4XK56dWrl2y2898F9/PPP7+kQLg4p9PQgi8zdfK0Q5L0zwe7WZwIAADf4XK5SUxMLDN9+vRppaen67vvvtPQoUPdlQsX0G/2On1/sPQZWVfG1FTHK2pbnAgAAN/hcrl54YUXyp3/5JNPqqCgoNyfwX2OFRabxUaSpv++jYVpAADwPW67gcwf//hHzZ8/311vh/P4cvfP5njnlJvU8Yo6FqYBAMD3uK3cbNiwQWFhYe56O5zH5zsOm+PQatzcEACA/+XyYanbbrutzLRhGDp06JC+/vrrSt/EDxX3372le24aR9ewOAkAAL7J5XITFRVVZjooKEjNmzfXU089pRtvvNFtwXCuTZlH9dOx0su+R/RqanEaAAB8k0vlxuFwaNiwYWrTpo1q1+YKHW9b+0OOOb6tPU8ABwCgPC6dtBEcHKwbb7yxUk//xqXbfaT0arS+V8UpKOj89xoCAKAqc/mM1NatW2vPnj2eyIKLWPrtIUlSYsNa1gYBAMCHuVxupkyZojFjxujTTz/VoUOHlJeXV+YFz3h+eYY5bsTJxAAAnFeFz7l56qmn9Ne//lU333yzJOnWW28t8xgGwzBks9nkcDjcnxLmVVKSdGOrWAuTAADg2ypcbiZPnqwHHnhAX3zxhSfz4Dxq2kv/qB7v2+KCz/YCAKCqq3C5MQxDktSzZ0+PhcH5fZFxRJJ0Wc1Qi5MAAODbXDrnhj0G1rs8wm51BAAAfJpL97lp1qzZRQvO0aNHLykQznU4/5Q5bhYbYWESAAB8n0vlZvLkyefcoRiet2JbtjmuG8nzuwAAuBCXys0f/vAHxcTEeCoLzuPjLQfMMTfvAwDgwip8zg3n21inxFl6MndSS4olAAAXU+Fyc+ZqKXhfaHDpH9MtbetanAQAAN9X4cNSTqfTkzlQAaHBwVZHAADA57n8+AUAAABfRrkBAAABhXIDAAACCuXGD2zZf9zqCAAA+A3KjY/LyMpXcUnpydxNYmpYnAYAAN9HufFx2XlnH73QIi7SwiQAAPgHyo2faFWXYgMAQEVQbnwct04EAMA1lBsft/KXh2Y6uUM0AAAVQrnxYYZhaPOPxyRJ+adKLE4DAIB/oNz4sM0/HtO2Q3mSpP6J9SxOAwCAf6Dc+LBlW7PM8R2d4i1MAgCA/6Dc+LB9RwslSdE17WoUzT1uAACoCMqND9uTU1puBnVpaHESAAD8B+XGR50oLtHeX8rNDa1iLU4DAID/oNz4qOy8Ip25+vuqetzADwCAiqLc+Ki0Xy4BlySbzWZhEgAA/Avlxked6TOX1Qi1NggAAH6GcuPjrqofZXUEAAD8CuUGAAAEFMoNAAAIKJQbH1Vc4rQ6AgAAfoly46M++6700QunKTkAALiEcuOjLqtZepVUaDX+iAAAcAXfnD5qcdoBSVLXJpdZnAQAAP9CufFB/9l5xBzXqxVuYRIAAPwP5cbHGIahIfM3mtO3tqtnYRoAAPwP5cbHjPngW3P8556NLUwCAIB/otz4kIKiEv0z7SdJUlR4iMbd1NLiRAAA+B/KjQ/ZuPdnc7wyuaeFSQAA8F+UGx/y1Z6jkqQ6NUJ1eYTd4jQAAPgnyo0Psf9yT5v6XCEFAECl+US5mTNnjhISEhQWFqYuXbpo48aN51123rx56tGjh2rXrq3atWsrKSnpgsv7C8Mw9NLnuyRJbRvwJHAAACrL8nKzaNEiJScnKyUlRWlpaWrXrp369Omjw4cPl7v86tWrddddd+mLL77Qhg0bFB8frxtvvFEHDhzwcnL3OnX67GMWOl5R28IkAAD4N5thGIaVAbp06aKrr75as2fPliQ5nU7Fx8dr9OjRGjt27EXXdzgcql27tmbPnq0hQ4ZcdPm8vDxFRUUpNzdXkZGRl5zfXRKf+reOnzgtSdrxdF+FhQRbnAgAAN/hyve3pXtuiouLtXnzZiUlJZnzgoKClJSUpA0bNlToPU6cOKHTp0+rTp065f68qKhIeXl5ZV6+ZvuhPLPYSKLYAABwCSwtNzk5OXI4HIqNjS0zPzY2VllZWRV6j8cff1z16tUrU5B+bdq0aYqKijJf8fHxl5zb3V5Zvdscb5l4g4VJAADwf5afc3Mppk+froULF+qjjz5SWFhYucuMGzdOubm55mv//v1eTnlxNeyle2o6N6qj2jVCLU4DAIB/q2blh0dHRys4OFjZ2dll5mdnZysuLu6C6z7//POaPn26Vq5cqbZt2553ObvdLrvdd+8Zc7LYofc2lhau3zTmCeAAAFwqS/fchIaGqmPHjlq1apU5z+l0atWqVeratet513v22Wf19NNPKzU1VZ06dfJGVI9wOA21nJRqTndoWMu6MAAABAhL99xIUnJysoYOHapOnTqpc+fOmjVrlgoLCzVs2DBJ0pAhQ1S/fn1NmzZNkvS3v/1NkyZN0rvvvquEhATz3JyaNWuqZs2alv0elbHt4NmTm5Naxui65jEWpgEAIDBYXm4GDhyoI0eOaNKkScrKylJiYqJSU1PNk4z37dunoKCzO5heeeUVFRcX6/bbby/zPikpKXryySe9Gf2Sbcw8ao5fG3q1hUkAAAgclpcbSRo1apRGjRpV7s9Wr15dZjozM9PzgbwkJNgmicctAADgTn59tVSgSIyvZXUEAAACBuUGAAAEFMqNhZ5fnmF1BAAAAg7lxkIOZ+ljvSLDfeLUJwAAAgLlxiJOp6HCYock6f5rm1icBgCAwEG5scjOw/nmOCo8xMIkAAAEFsqNRU6XGOa4Ds+TAgDAbSg3FqsXVf4DPwEAQOVQbixiyLj4QgAAwGWUG4tM+XS7JMlhUHIAAHAnyo0F8k+dNp8rVVzitDgNAACBhXJjgY+2HDDHn//1OuuCAAAQgCg3FtiZffYy8NpcKQUAgFtRbizw7++zJUm/a1/f4iQAAAQeyo0FDucXSZIaRdewOAkAAIGHcuNlRSUOc9y9abSFSQAACEyUGy/79ZXfzWJrWhcEAIAARbkBAAABhXLjZfuPnrA6AgAAAY1y42ULN+03xzXt1SxMAgBAYKLceNmq7aWXgTeOriGbzWZxGgAAAg/lxoscTkOZP5celnrguiYWpwEAIDBRbrzo9rlfmuMeV3IZOAAAnkC58ZL/7vlZW/YdN6frRoVbFwYAgABGufGSga9+ZY5Xj7nOuiAAAAQ4yo0XLNt6yBw/3reFEnjsAgAAHkO58YIR76SZ43uvSbAuCAAAVQDlxou6N71M9mrBVscAACCgUW68oEZoaaGZfOtVFicBACDwUW68KCSYzQ0AgKfxbQsAAAIK5cbDThSXqLDYYXUMAACqDMqNh+3MLjDHsZFhFiYBAKBqoNx4WN7J05Kk6Jp2hYVwpRQAAJ5GufGwr388Jqn08BQAAPA8yo2Hnbk7ccM61S1OAgBA1UC58bC4X86zaR4XYXESAACqBsqNh506XXqlVO8WMRYnAQCgaqDceNiW/cclSTabzdogAABUEZQbD3MahiSpXYMoi5MAAFA1UG68pHpoNasjAABQJVBuAABAQKHcAACAgEK58SDDMPTLKTcAAMBLKDce1GjcMnNsD2FTAwDgDXzjesh3B3LLTEeGhViUBACAqoVy4yFZuafM8d5pN1uYBACAqoVy4yGffntQktQuvhY38AMAwIsoNx6y+0ihJOkKHpgJAIBXUW48JDuv9LDUwKvjLU4CAEDVQrnxgJyCIh3OL5IktawbaXEaAACqFsqNBwx7Y5M5rlMj1MIkAABUPZQbN/s686i2/s9l4AAAwHsoN242c8VOc7x+bG8LkwAAUDVRbtzI4TT0deYxSdLYm1qofq1wixMBAFD1UG7c6Ks9P6vY4ZTNJt3TLcHqOAAAVEk+UW7mzJmjhIQEhYWFqUuXLtq4ceMFl//ggw/UokULhYWFqU2bNlq2bNkFl/eWMR98I0myVwtSWEiwxWkAAKiaLC83ixYtUnJyslJSUpSWlqZ27dqpT58+Onz4cLnLf/nll7rrrrt03333acuWLRowYIAGDBig7777zsvJy8opKNKhXx65cO2Vl1uaBQCAqsxmGIZhZYAuXbro6quv1uzZsyVJTqdT8fHxGj16tMaOHXvO8gMHDlRhYaE+/fRTc95vfvMbJSYmau7cuRf9vLy8PEVFRSk3N1eRke67B03avmO67eUvJUmbJyTpspp2t703AABVnSvf35buuSkuLtbmzZuVlJRkzgsKClJSUpI2bNhQ7jobNmwos7wk9enT57zLFxUVKS8vr8zLkxrWqU6xAQDAQpaWm5ycHDkcDsXGxpaZHxsbq6ysrHLXycrKcmn5adOmKSoqynzFx3vmcQg2lZ5rE1rN8iN9AABUaQH/TTxu3Djl5uaar/3793vkc9o3rK2MKTdpZXJPj7w/AAComGpWfnh0dLSCg4OVnZ1dZn52drbi4uLKXScuLs6l5e12u+x2DhMBAFBVWLrnJjQ0VB07dtSqVavMeU6nU6tWrVLXrl3LXadr165llpekFStWnHd5AABQtVi650aSkpOTNXToUHXq1EmdO3fWrFmzVFhYqGHDhkmShgwZovr162vatGmSpIcfflg9e/bUjBkzdMstt2jhwoX6+uuv9eqrr1r5awAAAB9hebkZOHCgjhw5okmTJikrK0uJiYlKTU01Txret2+fgoLO7mDq1q2b3n33XU2YMEHjx4/XlVdeqY8//litW7e26lcAAAA+xPL73Hibp+5zAwAAPMdv7nMDAADgbpQbAAAQUCg3AAAgoFBuAABAQKHcAACAgEK5AQAAAYVyAwAAAgrlBgAABBTKDQAACCiWP37B287ckDkvL8/iJAAAoKLOfG9X5MEKVa7c5OfnS5Li4+MtTgIAAFyVn5+vqKioCy5T5Z4t5XQ6dfDgQUVERMhms7n1vfPy8hQfH6/9+/fz3CoPYjt7B9vZO9jO3sO29g5PbWfDMJSfn6969eqVeaB2earcnpugoCA1aNDAo58RGRnJPxwvYDt7B9vZO9jO3sO29g5PbOeL7bE5gxOKAQBAQKHcAACAgEK5cSO73a6UlBTZ7XarowQ0trN3sJ29g+3sPWxr7/CF7VzlTigGAACBjT03AAAgoFBuAABAQKHcAACAgEK5AQAAAYVy46I5c+YoISFBYWFh6tKlizZu3HjB5T/44AO1aNFCYWFhatOmjZYtW+alpP7Nle08b9489ejRQ7Vr11bt2rWVlJR00T8XlHL17/MZCxculM1m04ABAzwbMEC4up2PHz+ukSNHqm7durLb7WrWrBn/7agAV7fzrFmz1Lx5c4WHhys+Pl6PPPKITp065aW0/uk///mP+vXrp3r16slms+njjz++6DqrV69Whw4dZLfb1bRpUy1YsMDjOWWgwhYuXGiEhoYa8+fPN77//ntj+PDhRq1atYzs7Oxyl1+/fr0RHBxsPPvss8a2bduMCRMmGCEhIcbWrVu9nNy/uLqd7777bmPOnDnGli1bjO3btxv33HOPERUVZfz0009eTu5fXN3OZ+zdu9eoX7++0aNHD6N///7eCevHXN3ORUVFRqdOnYybb77ZWLdunbF3715j9erVRnp6upeT+xdXt/M777xj2O1245133jH27t1rLF++3Khbt67xyCOPeDm5f1m2bJnxxBNPGIsXLzYkGR999NEFl9+zZ49RvXp1Izk52di2bZvx0ksvGcHBwUZqaqpHc1JuXNC5c2dj5MiR5rTD4TDq1atnTJs2rdzl77zzTuOWW24pM69Lly7Gn//8Z4/m9Heubuf/VVJSYkRERBhvvvmmpyIGhMps55KSEqNbt27Ga6+9ZgwdOpRyUwGubudXXnnFaNy4sVFcXOytiAHB1e08cuRIo3fv3mXmJScnG927d/dozkBSkXLz2GOPGVdddVWZeQMHDjT69OnjwWSGwWGpCiouLtbmzZuVlJRkzgsKClJSUpI2bNhQ7jobNmwos7wk9enT57zLo3Lb+X+dOHFCp0+fVp06dTwV0+9Vdjs/9dRTiomJ0X333eeNmH6vMtt5yZIl6tq1q0aOHKnY2Fi1bt1aU6dOlcPh8FZsv1OZ7dytWzdt3rzZPHS1Z88eLVu2TDfffLNXMlcVVn0PVrkHZ1ZWTk6OHA6HYmNjy8yPjY3Vjh07yl0nKyur3OWzsrI8ltPfVWY7/6/HH39c9erVO+cfFM6qzHZet26dXn/9daWnp3shYWCozHbes2ePPv/8cw0aNEjLli3Trl27NGLECJ0+fVopKSneiO13KrOd7777buXk5Oiaa66RYRgqKSnRAw88oPHjx3sjcpVxvu/BvLw8nTx5UuHh4R75XPbcIKBMnz5dCxcu1EcffaSwsDCr4wSM/Px8DR48WPPmzVN0dLTVcQKa0+lUTEyMXn31VXXs2FEDBw7UE088oblz51odLaCsXr1aU6dO1csvv6y0tDQtXrxYS5cu1dNPP211NLgBe24qKDo6WsHBwcrOzi4zPzs7W3FxceWuExcX59LyqNx2PuP555/X9OnTtXLlSrVt29aTMf2eq9t59+7dyszMVL9+/cx5TqdTklStWjVlZGSoSZMmng3thyrz97lu3boKCQlRcHCwOa9ly5bKyspScXGxQkNDPZrZH1VmO0+cOFGDBw/Wn/70J0lSmzZtVFhYqPvvv19PPPGEgoL4f393ON/3YGRkpMf22kjsuamw0NBQdezYUatWrTLnOZ1OrVq1Sl27di13na5du5ZZXpJWrFhx3uVRue0sSc8++6yefvpppaamqlOnTt6I6tdc3c4tWrTQ1q1blZ6ebr5uvfVW9erVS+np6YqPj/dmfL9Rmb/P3bt3165du8zyKEk7d+5U3bp1KTbnUZntfOLEiXMKzJlCafDIRbex7HvQo6crB5iFCxcadrvdWLBggbFt2zbj/vvvN2rVqmVkZWUZhmEYgwcPNsaOHWsuv379eqNatWrG888/b2zfvt1ISUnhUvAKcHU7T58+3QgNDTU+/PBD49ChQ+YrPz/fql/BL7i6nf8XV0tVjKvbed++fUZERIQxatQoIyMjw/j000+NmJgYY8qUKVb9Cn7B1e2ckpJiREREGO+9956xZ88e49///rfRpEkT484777TqV/AL+fn5xpYtW4wtW7YYkoyZM2caW7ZsMX788UfDMAxj7NixxuDBg83lz1wK/uijjxrbt2835syZw6Xgvuill14yGjZsaISGhhqdO3c2vvrqK/NnPXv2NIYOHVpm+ffff99o1qyZERoaalx11VXG0qVLvZzYP7myna+44gpD0jmvlJQU7wf3M67+ff41yk3Fubqdv/zyS6NLly6G3W43GjdubDzzzDNGSUmJl1P7H1e28+nTp40nn3zSaNKkiREWFmbEx8cbI0aMMI4dO+b94H7kiy++KPe/t2e27dChQ42ePXues05iYqIRGhpqNG7c2HjjjTc8ntNmGOx/AwAAgYNzbgAAQECh3AAAgIBCuQEAAAGFcgMAAAIK5QYAAAQUyg0AAAgolBsAABBQKDcAACCgUG4AlLFgwQLVqlXL6hiVZrPZ9PHHH19wmXvuuUcDBgzwSh4A3ke5AQLQPffcI5vNds5r165dVkfTggULzDxBQUFq0KCBhg0bpsOHD7vl/Q8dOqSbbrpJkpSZmSmbzab09PQyy7z44otasGCBWz7vfJ588knz9wwODlZ8fLzuv/9+HT161KX3oYgBrqtmdQAAntG3b1+98cYbZeZdfvnlFqUpKzIyUhkZGXI6nfrmm280bNgwHTx4UMuXL7/k946Li7voMlFRUZf8ORVx1VVXaeXKlXI4HNq+fbvuvfde5ebmatGiRV75fKCqYs8NEKDsdrvi4uLKvIKDgzVz5ky1adNGNWrUUHx8vEaMGKGCgoLzvs8333yjXr16KSIiQpGRkerYsaO+/vpr8+fr1q1Tjx49FB4ervj4eD300EMqLCy8YDabzaa4uDjVq1dPN910kx566CGtXLlSJ0+elNPp1FNPPaUGDRrIbrcrMTFRqamp5rrFxcUaNWqU6tatq7CwMF1xxRWaNm1amfc+c1iqUaNGkqT27dvLZrPpuuuuk1R2b8irr76qevXqyel0lsnYv39/3Xvvveb0J598og4dOigsLEyNGzfW5MmTVVJScsHfs1q1aoqLi1P9+vWVlJSkO+64QytWrDB/7nA4dN9996lRo0YKDw9X8+bN9eKLL5o/f/LJJ/Xmm2/qk08+MfcCrV69WpK0f/9+3XnnnapVq5bq1Kmj/v37KzMz84J5gKqCcgNUMUFBQfr73/+u77//Xm+++aY+//xzPfbYY+ddftCgQWrQoIE2bdqkzZs3a+zYsQoJCZEk7d69W3379tXvf/97ffvtt1q0aJHWrVunUaNGuZQpPDxcTqdTJSUlevHFFzVjxgw9//zz+vbbb9WnTx/deuut+uGHHyRJf//737VkyRK9//77ysjI0DvvvKOEhIRy33fjxo2SpJUrV+rQoUNavHjxOcvccccd+vnnn/XFF1+Y844eParU1FQNGjRIkrR27VoNGTJEDz/8sLZt26Z//OMfWrBggZ555pkK/46ZmZlavny5QkNDzXlOp1MNGjTQBx98oG3btmnSpEkaP3683n//fUnSmDFjdOedd6pv3746dOiQDh06pG7duun06dPq06ePIiIitHbtWq1fv141a9ZU3759VVxcXOFMQMDy+HPHAXjd0KFDjeDgYKNGjRrm6/bbby932Q8++MC47LLLzOk33njDiIqKMqcjIiKMBQsWlLvufffdZ9x///1l5q1du9YICgoyTp48We46//v+O3fuNJo1a2Z06tTJMAzDqFevnvHMM8+UWefqq682RowYYRiGYYwePdro3bu34XQ6y31/ScZHH31kGIZh7N2715BkbNmypcwyQ4cONfr3729O9+/f37j33nvN6X/84x9GvXr1DIfDYRiGYVx//fXG1KlTy7zH22+/bdStW7fcDIZhGCkpKUZQUJBRo0YNIywszJBkSDJmzpx53nUMwzBGjhxp/P73vz9v1jOf3bx58zLboKioyAgPDzeWL19+wfcHqgLOuQECVK9evfTKK6+Y0zVq1JBUuhdj2rRp2rFjh/Ly8lRSUqJTp07pxIkTql69+jnvk5ycrD/96U96++23zUMrTZo0kVR6yOrbb7/VO++8Yy5vGIacTqf27t2rli1blpstNzdXNWvWlNPp1KlTp3TNNdfotddeU15eng4ePKju3buXWb579+765ptvJJUeUrrhhhvUvHlz9e3bV7/97W914403XtK2GjRokIYPH66XX35Zdrtd77zzjv7whz8oKCjI/D3Xr19fZk+Nw+G44HaTpObNm2vJkiU6deqU/u///k/p6ekaPXp0mWXmzJmj+fPna9++fTp58qSKi4uVmJh4wbzffPONdu3apYiIiDLzT506pd27d1diCwCBhXIDBKgaNWqoadOmZeZlZmbqt7/9rR588EE988wzqlOnjtatW6f77rtPxcXF5X5JP/nkk7r77ru1dOlSffbZZ0pJSdHChQv1u9/9TgUFBfrzn/+shx566Jz1GjZseN5sERERSktLU1BQkOrWravw8HBJUl5e3kV/rw4dOmjv3r367LPPtHLlSt15551KSkrShx9+eNF1z6dfv34yDENLly7V1VdfrbVr1+qFF14wf15QUKDJkyfrtttuO2fdsLCw875vaGio+Wcwffp03XLLLZo8ebKefvppSdLChQs1ZswYzZgxQ127dlVERISee+45/fe//71g3oKCAnXs2LFMqTzDV04aB6xEuQGqkM2bN8vpdGrGjBnmXokz53dcSLNmzdSsWTM98sgjuuuuu/TGG2/od7/7nTp06KBt27adU6IuJigoqNx1IiMjVa9ePa1fv149e/Y0569fv16dO3cus9zAgQM1cOBA3X777erbt6+OHj2qOnXqlHm/M+e3OByOC+YJCwvTbbfdpnfeeUe7du1S8+bN1aFDB/PnHTp0UEZGhsu/5/+aMGGCevfurQcffND8Pbt166YRI0aYy/zvnpfQ0NBz8nfo0EGLFi1STEyMIiMjLykTEIg4oRioQpo2barTp0/rpZde0p49e/T2229r7ty5513+5MmTGjVqlFavXq0ff/xR69ev16ZNm8zDTY8//ri+/PJLjRo1Sunp6frhhx/0ySefuHxC8a89+uij+tvf/qZFixYpIyNDY8eOVXp6uh5++GFJ0syZM/Xee+9px44d2rlzpz744APFxcWVe+PBmJgYhYeHKzU1VdnZ2crNzT3v5w4aNEhLly7V/PnzzROJz5g0aZLeeustTZ48Wd9//722b9+uhQsXasKECS79bl27dlXbtm01depUSdKVV16pr7/+WsuXL9fOnTs1ceJEbdq0qcw6CQkJ+vbbb5WRkaGcnBydPn1agwYNUnR0tPr376+1a9dq7969Wr16tR566CH99NNPLmUCApLVJ/0AcL/yTkI9Y+bMmUbdunWN8PBwo0+fPsZbb71lSDKOHTtmGEbZE36LioqMP/zhD0Z8fLwRGhpq1KtXzxg1alSZk4U3btxo3HDDDUbNmjWNGjVqGG3btj3nhOBf+98Tiv+Xw+EwnnzySaN+/fpGSEiI0a5dO+Ozzz4zf/7qq68aiYmJRo0aNYzIyEjj+uuvN9LS0syf61cnFBuGYcybN8+Ij483goKCjJ49e553+zgcDqNu3bqGJGP37t3n5EpNTTW6detmhIeHG5GRkUbnzp2NV1999by/R0pKitGuXbtz5r/33nuG3W439u3bZ5w6dcq45557jKioKKNWrVrGgw8+aIwdO7bMeocPHza3ryTjiy++MAzDMA4dOmQMGTLEiI6ONux2u9G4cWNj+PDhRm5u7nkzAVWFzTAMw9p6BQAA4D4clgIAAAGFcgMAAAIK5QYAAAQUyg0AAAgolBsAABBQKDcAACCgUG4AAEBAodwAAICAQrkBAAABhXIDAAACCuUGAAAElP8HK0zmVyWlGLkAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Матрица ошибок:\n", + "[[1326 400]\n", + " [ 50 1760]]\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Смещение: 0.8811650848575816\n", + "Дисперсия: 0.008658656436943876\n" + ] + } + ], + "source": [ + "# Конвейер\n", + "pipeline_grad = Pipeline([\n", + " ('preprocessing', preprocessing),\n", + " ('classifier', GradientBoostingClassifier())\n", + "])\n", + "\n", + "# Определение сетки гиперпараметров\n", + "param_grid = {\n", + " 'classifier__n_estimators': [100, 200, 300],\n", + " #Скорость обучения\n", + " 'classifier__learning_rate': [0.1, 0.2],\n", + " #Максимальная глубина дерева\n", + " 'classifier__max_depth': [3, 5, 7],\n", + " 'classifier__subsample': [0.1, 0.5, 1.0],\n", + "}\n", + "\n", + "# Создание объекта GridSearchCV\n", + "grid_search = GridSearchCV(pipeline_grad, param_grid, cv=2, scoring='roc_auc', n_jobs=-1)\n", + "\n", + "# Обучение модели с перебором гиперпараметров\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "print(\"Лучшие гиперпараметры: \", grid_search.best_params_)\n", + "\n", + "# Лучшая модель\n", + "best_model = grid_search.best_estimator_\n", + "\n", + "# Использование и оценка лучшей модели\n", + "y_pred_proba = best_model.predict_proba(X_test)[:, 1]\n", + "print(f'ROC у метода градиентного спуска = {roc_auc_score(y_test, y_pred_proba)}')\n", + "\n", + "y_pred = best_model.predict(X_test)\n", + "print(f'Точность = {accuracy_score(y_test, y_pred)}')\n", + "\n", + "fpr, tpr, _ = metrics.roc_curve(y_test, y_pred_proba)\n", + "\n", + "# построение ROC кривой\n", + "plt.plot(fpr, tpr)\n", + "plt.ylabel('True Positive Rate')\n", + "plt.xlabel('False Positive Rate')\n", + "plt.show()\n", + "\n", + "# Построение матрицы ошибок\n", + "conf_matrix = confusion_matrix(y_test, y_pred)\n", + "\n", + "# Визуализация матрицы ошибок\n", + "plt.figure(figsize=(8, 6))\n", + "sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', \n", + " xticklabels=['Предсказанный \"безопасный\"', 'Предсказанный \"опасный\"'], \n", + " yticklabels=['Действительно \"безопасный\"', 'Действительно \"опасный\"'])\n", + "plt.title('Confusion Matrix')\n", + "plt.ylabel('Actual')\n", + "plt.xlabel('Predicted')\n", + "plt.show()\n", + "\n", + "# Оценка дисперсии и смещения\n", + "cv_results = grid_search.cv_results_\n", + "mean_test_score = cv_results['mean_test_score']\n", + "std_test_score = cv_results['std_test_score']\n", + "\n", + "print(f\"Смещение: {mean_test_score.mean()}\")\n", + "print(f\"Дисперсия: {std_test_score.mean()}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Вывод**:\n", + "\n", + "Все модели классификации показали хорошие результаты, но лучший показатель точности у случайного леса. При этом все рассмотренные модели немного не дотянули до показателя точности в 90%. Дополнительая настройка гиперпараметров могла бы приблизить значение оценки ещё ближе к 90% " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "aimenv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}