diff --git a/lab_3/lab3.ipynb b/lab_3/lab3.ipynb new file mode 100644 index 0000000..9ac7ebf --- /dev/null +++ b/lab_3/lab3.ipynb @@ -0,0 +1,1030 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Данные по инсультам\n", + "\n", + "Выведем информацию о столбцах датасета:" + ] + }, + { + "cell_type": "code", + "execution_count": 441, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['id', 'gender', 'age', 'hypertension', 'heart_disease', 'ever_married',\n", + " 'work_type', 'Residence_type', 'avg_glucose_level', 'bmi',\n", + " 'smoking_status', 'stroke'],\n", + " dtype='object')\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idgenderagehypertensionheart_diseaseever_marriedwork_typeResidence_typeavg_glucose_levelbmismoking_statusstroke
09046Male67.001YesPrivateUrban228.6936.6formerly smoked1
151676Female61.000YesSelf-employedRural202.21NaNnever smoked1
231112Male80.001YesPrivateRural105.9232.5never smoked1
360182Female49.000YesPrivateUrban171.2334.4smokes1
41665Female79.010YesSelf-employedRural174.1224.0never smoked1
\n", + "
" + ], + "text/plain": [ + " id gender age hypertension heart_disease ever_married \\\n", + "0 9046 Male 67.0 0 1 Yes \n", + "1 51676 Female 61.0 0 0 Yes \n", + "2 31112 Male 80.0 0 1 Yes \n", + "3 60182 Female 49.0 0 0 Yes \n", + "4 1665 Female 79.0 1 0 Yes \n", + "\n", + " work_type Residence_type avg_glucose_level bmi smoking_status \\\n", + "0 Private Urban 228.69 36.6 formerly smoked \n", + "1 Self-employed Rural 202.21 NaN never smoked \n", + "2 Private Rural 105.92 32.5 never smoked \n", + "3 Private Urban 171.23 34.4 smokes \n", + "4 Self-employed Rural 174.12 24.0 never smoked \n", + "\n", + " stroke \n", + "0 1 \n", + "1 1 \n", + "2 1 \n", + "3 1 \n", + "4 1 " + ] + }, + "execution_count": 441, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "from sklearn.model_selection import train_test_split\n", + "from imblearn.over_sampling import RandomOverSampler\n", + "from sklearn.preprocessing import StandardScaler\n", + "import featuretools as ft\n", + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.model_selection import cross_val_score\n", + "import time\n", + "from sklearn.metrics import root_mean_squared_error, r2_score, mean_absolute_error\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "\n", + "df = pd.read_csv(\"..//..//static//csv//healthcare-dataset-stroke-data.csv\")\n", + "\n", + "print(df.columns)\n", + "df.head()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Определим бизнес цели и цели технического проекта.\n", + "\n", + "1. Улучшение диагностики и профилактики инсульта.\n", + " * Бизнес-цель: повышение точности прогнозирования риска инсульта среди пациентов для более раннего лечебного вмешательства. Определение основных факторов риска для более целенаправленного подхода в медицинском обслуживании.\n", + " * Цель технического проекта: разработка статистической модели, которая решает задачу классификации и предсказывает возможность возникновения инсульта у пациентов на основе имеющихся данных (возраст, гипертония, заболевания сердца и пр.), с целью выявления групп риска. Внедрение этой модели в систему поддержки принятия медицинских решений для врачей.\n", + "2. Снижение расходов на лечение инсультов.\n", + " * Бизнес-цель: снижение затрат на лечение инсульта путем более эффективного распределения медицинских ресурсов и направленных профилактических мер.\n", + " * Цель технического проекта: создание системы оценки индивидуального риска инсульта для пациентов, что позволит медучреждениям проводить профилактические меры среди целевых групп, сокращая расходы на лечение.\n", + "\n", + "### И теперь проверим датасет на пустые значения:" + ] + }, + { + "cell_type": "code", + "execution_count": 442, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id 0\n", + "gender 0\n", + "age 0\n", + "hypertension 0\n", + "heart_disease 0\n", + "ever_married 0\n", + "work_type 0\n", + "Residence_type 0\n", + "avg_glucose_level 0\n", + "bmi 201\n", + "smoking_status 0\n", + "stroke 0\n", + "dtype: int64\n", + "\n", + "id False\n", + "gender False\n", + "age False\n", + "hypertension False\n", + "heart_disease False\n", + "ever_married False\n", + "work_type False\n", + "Residence_type False\n", + "avg_glucose_level False\n", + "bmi True\n", + "smoking_status False\n", + "stroke False\n", + "dtype: bool\n", + "\n", + "bmi процент пустых значений: %3.93\n" + ] + } + ], + "source": [ + "# Количество пустых значений признаков\n", + "print(df.isnull().sum())\n", + "\n", + "print()\n", + "\n", + "# Есть ли пустые значения признаков\n", + "print(df.isnull().any())\n", + "\n", + "print()\n", + "\n", + "# Процент пустых значений признаков\n", + "for i in df.columns:\n", + " null_rate = df[i].isnull().sum() / len(df) * 100\n", + " if null_rate > 0:\n", + " print(f\"{i} процент пустых значений: %{null_rate:.2f}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "В столбце bmi можно заметить пустые значение. Заменим их на медиану:" + ] + }, + { + "cell_type": "code", + "execution_count": 443, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "Количество пустых значений в каждом столбце после замены:\n", + "id 0\n", + "gender 0\n", + "age 0\n", + "hypertension 0\n", + "heart_disease 0\n", + "ever_married 0\n", + "work_type 0\n", + "Residence_type 0\n", + "avg_glucose_level 0\n", + "bmi 0\n", + "smoking_status 0\n", + "stroke 0\n", + "dtype: int64\n" + ] + } + ], + "source": [ + "# Замена значений\n", + "df[\"bmi\"] = df[\"bmi\"].fillna(df[\"bmi\"].median())\n", + "\n", + "# Проверка на пропущенные значения после замены\n", + "missing_values_after_drop = df.isnull().sum()\n", + "\n", + "# Вывод результатов после замены\n", + "print(\"\\nКоличество пустых значений в каждом столбце после замены:\")\n", + "print(missing_values_after_drop)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Можно перейти к созданию выборок" + ] + }, + { + "cell_type": "code", + "execution_count": 444, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Размер обучающей выборки: (2503, 11)\n", + "Размер контрольной выборки: (1074, 11)\n", + "Размер тестовой выборки: (1533, 11)\n" + ] + } + ], + "source": [ + "# Разделение данных на признаки (X) и целевую переменную (y)\n", + "# В данном случае мы хотим предсказать 'stroke'\n", + "X = df.drop(columns=['stroke'])\n", + "y = df['stroke']\n", + "\n", + "# Разбиение данных на обучающую и тестовую выборки\n", + "# Сначала разделим на обучающую и тестовую\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)\n", + "\n", + "# Затем разделим обучающую выборку на обучающую и контрольную\n", + "X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3)\n", + "\n", + "# Проверка размеров выборок\n", + "print(\"Размер обучающей выборки:\", X_train.shape)\n", + "print(\"Размер контрольной выборки:\", X_val.shape)\n", + "print(\"Размер тестовой выборки:\", X_test.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Оценим сбалансированность выборок:" + ] + }, + { + "cell_type": "code", + "execution_count": 445, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Распределение классов в обучающей выборке:\n", + "stroke\n", + "0 0.955653\n", + "1 0.044347\n", + "Name: proportion, dtype: float64\n", + "\n", + "Распределение классов в контрольной выборке:\n", + "stroke\n", + "0 0.954376\n", + "1 0.045624\n", + "Name: proportion, dtype: float64\n", + "\n", + "Распределение классов в тестовой выборке:\n", + "stroke\n", + "0 0.941944\n", + "1 0.058056\n", + "Name: proportion, dtype: float64\n" + ] + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAABboAAAHyCAYAAAAtJXgGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABpfElEQVR4nO3dd3RU1d7G8ScJ6SEgpFJM6KGDoQiItECA0PQKUq4EVEABFfCigkoAS0QUQUApChbwiqDgVZQqKGIEAVGQIiVID4QWagKZ/f7ByrwMM4FQZHLw+1kra2X27HPO78xkZp95cmYfD2OMEQAAAAAAAAAAFuXp7gIAAAAAAAAAALgRBN0AAAAAAAAAAEsj6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAACug81mU3p6unbu3OnuUgAAAP7xCLoBAAAAII8OHjyoAQMGKCoqSj4+PgoNDVWlSpWUkZHh7tIAAAD+0Qq4uwAAAICb7YMPPlDPnj3tt319fXXnnXeqRYsWevHFFxUeHu7G6gBY1fbt29WkSROdP39eTz75pO666y4VKFBA/v7+CgwMdHd5AAAA/2gE3QAA4LY1cuRIlSpVSufOndOPP/6od999V9988402btyogIAAd5cHwGL69OkjHx8f/fzzzypevLi7ywEAAMAlCLoBAMBtq1WrVqpVq5Yk6dFHH1XRokU1ZswYffnll+rSpYubqwNgJWvXrtV3332nRYsWEXIDAADkQ8zRDQAA/jGaNm0qSUpNTZUkHT16VP/5z39UtWpVBQUFKTg4WK1atdJvv/3mtOy5c+c0fPhwlS9fXn5+foqMjNT999+vHTt2SJJ27dolDw+PXH8aN25sX9fy5cvl4eGhWbNmaejQoYqIiFBgYKDatWunPXv2OG171apVatmypQoVKqSAgAA1atRIK1eudLmPjRs3drn94cOHO/WdMWOGYmNj5e/vryJFiqhz584ut3+lfbuUzWbT2LFjVblyZfn5+Sk8PFx9+vTRsWPHHPpFR0erTZs2Ttvp37+/0zpd1T569Ginx1SSMjMzlZSUpLJly8rX11clS5bUM888o8zMTJeP1aUuf9xCQkKUkJCgjRs35mnZKlWqaO3atapfv778/f1VqlQpTZo0yaFfVlaWhg0bptjYWBUqVEiBgYFq2LChli1b5tBv69atatq0qSIiIuz78dhjj+no0aNO2+7Ro8dVn+8ePXooOjraYbk9e/bI399fHh4e2rVrl6T/f54/+OADh77Dhw93+bz079/fqZ42bdo4bCtnnW+88UYuj57z+qdPny4PDw9NmzbNod+rr74qDw8PffPNN7muS7r495XzOHh6eioiIkIPPvigdu/efUN1/fzzz/Lz89OOHTtUuXJl+fr6KiIiQn369HH53MyePdv++goJCdG///1v7du3z6FPjx49FBQUpJ07dyo+Pl6BgYEqVqyYRo4cKWOMU72XPjcnT55UbGysSpUqpQMHDtjb33jjDdWvX19FixaVv7+/YmNjNWfOHIft3uhjDAAAkB9xRjcAAPjHyAmlixYtKknauXOn5s2bp44dO6pUqVJKS0vT5MmT1ahRI23atEnFihWTJGVnZ6tNmzZaunSpOnfurKeeekonT57U4sWLtXHjRpUpU8a+jS5duqh169YO2x0yZIjLel555RV5eHjo2Wef1aFDhzR27FjFxcVp/fr18vf3lyR99913atWqlWJjY5WUlCRPT09Nnz5dTZs21YoVK1SnTh2n9ZYoUULJycmSpFOnTunxxx93ue0XX3xRnTp10qOPPqrDhw9r/Pjxuvfee/Xrr7+qcOHCTsv07t1bDRs2lCR98cUXmjt3rsP9ffr0sc+P/uSTTyo1NVUTJkzQr7/+qpUrV8rb29vl43Atjh8/bt+3S9lsNrVr104//vijevfurYoVK2rDhg1666239Oeff2revHlXXXdMTIyef/55GWO0Y8cOjRkzRq1bt3YISHNz7NgxtW7dWp06dVKXLl302Wef6fHHH5ePj48efvhhSVJGRobee+89denSRb169dLJkyf1/vvvKz4+XqtXr1aNGjUkSadPn1aJEiXUtm1bBQcHa+PGjZo4caL27dunr776ymnbISEheuutt+y3H3rooavWO2zYMJ07d+6q/dyhZ8+e+uKLLzRo0CA1b95cJUuW1IYNGzRixAg98sgjTq8vVxo2bKjevXvLZrNp48aNGjt2rPbv368VK1Zcd11HjhzRuXPn9Pjjj6tp06Z67LHHtGPHDk2cOFGrVq3SqlWr5OvrK+n/rxNQu3ZtJScnKy0tTePGjdPKlSudXl/Z2dlq2bKl7r77br3++utasGCBkpKSdOHCBY0cOdJlLefPn9e//vUv7d69WytXrlRkZKT9vnHjxqldu3bq1q2bsrKy9Omnn6pjx476+uuvlZCQcNMeYwAAgHzHAAAA3GamT59uJJklS5aYw4cPmz179phPP/3UFC1a1Pj7+5u9e/caY4w5d+6cyc7Odlg2NTXV+Pr6mpEjR9rbpk2bZiSZMWPGOG3LZrPZl5NkRo8e7dSncuXKplGjRvbby5YtM5JM8eLFTUZGhr39s88+M5LMuHHj7OsuV66ciY+Pt2/HGGPOnDljSpUqZZo3b+60rfr165sqVarYbx8+fNhIMklJSfa2Xbt2GS8vL/PKK684LLthwwZToEABp/Zt27YZSebDDz+0tyUlJZlLDyVXrFhhJJmZM2c6LLtgwQKn9qioKJOQkOBUe79+/czlh6eX1/7MM8+YsLAwExsb6/CYfvzxx8bT09OsWLHCYflJkyYZSWblypVO27tUo0aNHNZnjDFDhw41ksyhQ4euuqwk8+abb9rbMjMzTY0aNUxYWJjJysoyxhhz4cIFk5mZ6bDssWPHTHh4uHn44YevuI2+ffuaoKAgp/Zu3bqZUqVKObRd/pglJiaaqKgo++2NGzcaT09P06pVKyPJpKamGmOM+euvv4wkM23aNIf1Xf5c52yjX79+TvUkJCQ4bOtKr4srrf/AgQOmSJEipnnz5iYzM9PUrFnT3HnnnebEiRO5ridHVFSUSUxMdGjr2rWrCQgIuKG6cm43a9bMXLhwwd6e834zfvx4Y4wxWVlZJiwszFSpUsWcPXvW3u/rr782ksywYcPsbYmJiUaSeeKJJ+xtNpvNJCQkGB8fH3P48GGHeqdPn25sNpvp1q2bCQgIMKtWrXKq+8yZMw63s7KyTJUqVUzTpk0d2m/kMQYAAMiPmLoEAADctuLi4hQaGqqSJUuqc+fOCgoK0ty5c+3z6/r6+srT8+LhUHZ2to4cOaKgoCBVqFBB69ats6/n888/V0hIiJ544gmnbVw+pcO16N69uwoWLGi//cADDygyMtI+bcD69eu1bds2de3aVUeOHFF6errS09N1+vRpNWvWTD/88INsNpvDOs+dOyc/P78rbveLL76QzWZTp06d7OtMT09XRESEypUr5zSVRlZWliTZz1Z1Zfbs2SpUqJCaN2/usM7Y2FgFBQU5rfP8+fMO/dLT0696hvG+ffs0fvx4vfjiiwoKCnLafsWKFRUTE+Owzpzpai7fvis5NR0+fFgpKSmaO3euqlWrppCQkKsuW6BAAfXp08d+28fHR3369NGhQ4e0du1aSZKXl5d8fHwkXTwD/ejRo7pw4YJq1arl8PeW48SJE0pLS9PSpUs1f/583XvvvU59srKyrvi8uDJkyBDddddd6tixo0N7aGioJGnv3r15Ws+5c+ecnsPz58+77HvmzBmlp6fr2LFjDlNy5CYiIkITJ07U4sWL1bBhQ61fv17Tpk1TcHBwnmrLzMxUenq6Dh06pMWLF+u7775Ts2bNbrguSRo0aJC8vLzstx966CGFh4dr/vz5kqQ1a9bo0KFD6tu3r8NrMSEhQTExMfZ+l7p0GpicaWGysrK0ZMkSp76DBw/WzJkz9dlnn7n8RkfOt0Gki980OHHihBo2bOj0N3ajjzEAAEB+w9QlAADgtjVx4kSVL19eBQoUUHh4uCpUqGAPtqWLYeO4ceP0zjvvKDU1VdnZ2fb7cqY3kS5OeVKhQgUVKHBzD53KlSvncNvDw0Nly5a1z5m8bds2SVJiYmKu6zhx4oTuuOMO++309HSn9V5u27ZtMsbk2u/yKUaOHz8uSU7h8uXrPHHihMLCwlzef+jQIYfbixYtsgereZWUlKRixYqpT58+TnMOb9u2TZs3b851nZdv35WffvrJYfly5cpp3rx5efpnRrFixRQYGOjQVr58eUkX51e+++67JUkffvih3nzzTW3ZssUhFC5VqpTTOuPj47Vq1SpJUsuWLTVr1iynPsePH7/i83K5H3/8UV999ZWWLl3qNCWLv7+/atasqSlTpiguLs7+93HmzBmX63r//ff1/vvvO7VHRUU5tSUlJSkpKUmS5Ofnp6ZNm2rs2LFX/Fvt3LmzZsyYofnz56t3794ug+rcfPrpp/r000/tt2vXrq333nvvhurK+TuIiYlxaPfy8lK5cuXsr9u//vpLklShQgWndcTExOjHH390aPP09FTp0qUd2i7927nU5MmT9fPPP0uS09z3Ob7++mu9/PLLWr9+vcP89K7+jm/kMQYAAMhvCLoBAMBtq06dOqpVq1au97/66qt68cUX9fDDD+ull15SkSJF5OnpqQEDBjidKe0OOTWMHj3aPn/z5S4NObOysnTgwAE1b978quv18PDQt99+63Bmqqt1StLBgwclXTwD9ErrDAsL08yZM13ef3kAXbduXb388ssObRMmTNCXX37pcvnNmzfrgw8+0IwZM1zO9W2z2VS1alWNGTPG5fIlS5bMtfYc1apV05tvvilJOnz4sN5++201btxY69atu+K+59WMGTPUo0cPdejQQYMHD1ZYWJi8vLyUnJxsnz/+UuPHj1d6ero2bdqk5ORkPfbYY5oxY4ZDn4MHD7oMlnPz7LPPKj4+Xk2bNnW66KQkTZo0Se3bt1f9+vWvuq727ds7XZDyhRdesP+9XKp3797q2LGjsrOztXnzZg0fPlwdOnTQH3/8kev6jxw5ojVr1kiSNm3aJJvN5vCPqitp0aKFBg8eLOniGeqjRo1SkyZNtGbNGocznq+lrkuXc5eff/5Zr7zyin755RcNHDhQLVu2dPjGwYoVK9SuXTvde++9eueddxQZGSlvb29Nnz5dn3zyidP6buQxBgAAyG8IugEAwD/WnDlz1KRJE6ezUo8fP+4QHpUpU0arVq3S+fPnb8oFFXPknLGdwxij7du3q1q1avbtSlJwcLDi4uKuur7ffvtN58+fv2K4n7NeY4xKlSplP3P0SjZt2iQPDw+XZ6heus4lS5aoQYMGeQoEQ0JCnPbpSheMHDJkiGrUqKEHH3ww1+3/9ttvatas2XVPJ3PHHXc41NS4cWMVK1ZM06dPz/WCojn279+v06dPO5zV/eeff0qSoqOjJV38eytdurS++OILhxpzzii+XO3atSVJrVq1UlhYmLp3767nn39eFStWlHRxqpXt27erZcuWedq/efPmKSUlxeU0KTnq1KmjnTt36vfff9fJkyclSR999JE+/vhjp74lSpRweg7Hjh3rMuguV66cvW98fLzOnDmj559//ooX+uzXr59Onjyp5ORkDRkyRGPHjtWgQYPytK+RkZEOtVWoUEH169fXvHnz1KVLl+uqK+es+61btzqcgW2z2bRt2zbVrFlT0v+f0b5161b71Dk5tm7d6vSPCZvNpp07dzq8Fi//28nx8MMPa+jQodq/f78qVaqkgQMHOjw3n3/+ufz8/LRw4UKHKW2mT5/u8nG6kccYAAAgv+Hf9QAA4B/Ly8vLaV7e2bNna9++fQ5t//rXv5Senq4JEyY4rSOv8/q68tFHH9nDROliEHrgwAG1atVKkhQbG6syZcrojTfe0KlTp5yWP3z4sFPtXl5eatOmzRW3e//998vLy0sjRoxwqt8YoyNHjthvX7hwQZ9//rnq1KlzxSkyOnXqpOzsbL300ktO9124cME+/cn1SElJ0ZdffqnXXnst1xC7U6dO2rdvn6ZOnep039mzZ3X69Olr3u7Zs2clyWH6h9xcuHBBkydPtt/OysrS5MmTFRoaqtjYWEmynz1/6WO+atUqpaSkXHX96enpTrV8+eWXOnv2rFOY6kp2draGDh2qrl275vrtgBz+/v6qW7eu4uLiFBcX5zStxs2Q820FV98okC6+FmbNmqXXXntNzz33nDp37qwXXnjBHgBfq7w+l1eqq1mzZvL19dXbb7/t8I2PmTNnKi0tzf66q1WrlsLCwjRp0iSH7X377bfavHmzEhISnNZ96XuLMUYTJkyQt7e301QiDRs2lHRxqpxRo0ZpxowZWrRokf1+Ly8veXh4OEzDtGvXLpf/RLrZjzEAAIC7cUY3AAD4x2rTpo1Gjhypnj17qn79+tqwYYNmzpzpFOx1795dH330kQYNGqTVq1erYcOGOn36tJYsWaK+ffuqffv217X9IkWK6J577lHPnj2VlpamsWPHqmzZsurVq5eki3P3vvfee2rVqpUqV66snj17qnjx4tq3b5+WLVum4OBgffXVVzp9+rQmTpyot99+W+XLl9fy5cvt28gJyH///XelpKSoXr16KlOmjF5++WUNGTJEu3btUocOHVSwYEGlpqZq7ty56t27t/7zn/9oyZIlevHFF/X777/rq6++uuK+NGrUSH369FFycrLWr1+vFi1ayNvbW9u2bdPs2bM1btw4PfDAA9f1OC1atEjNmze/4lntDz30kD777DM99thjWrZsmRo0aKDs7Gxt2bJFn332mRYuXHjVM93T0tLsU4Okp6dr8uTJKlCgwFX/cSD9f/C4a9culS9fXrNmzdL69es1ZcoU+7cA2rRpoy+++EL33XefEhISlJqaqkmTJqlSpUoO/8gYOXKk9u3bpypVqsjX11fr1q3T9OnTVa1aNVWrVk1nzpxRUlKS3nnnHdWvX18tWrS4an179+6Vj4+P/UKnt9rWrVu1YMEC2Ww2bdq0SaNHj1bt2rXtF4a91KFDh/T444+rSZMm9qlRJkyYoGXLlqlHjx768ccfrzq9xs6dO+3P5b59+zRhwgQFBwc7BcfXUleRIkX0wgsv6MUXX1R8fLzat2+vnTt3asKECapevboeffRRSRfnuB81apR69uypRo0aqUuXLkpLS9O4ceMUHR2tgQMHOqzXz89PCxYsUGJiourWratvv/1W8+fP19ChQ684j33v3r31ySef6LHHHtPGjRsVEBCghIQEjRkzRi1btlTXrl116NAhTZw4UWXLltXvv/9+Ux9jAACAfMcAAADcZqZPn24kmV9++eWK/c6dO2eefvppExkZafz9/U2DBg1MSkqKadSokWnUqJFD3zNnzpjnn3/elCpVynh7e5uIiAjzwAMPmB07dhhjjElNTTWSzOjRo522U7lyZYf1LVu2zEgy//3vf82QIUNMWFiY8ff3NwkJCeavv/5yWv7XX381999/vylatKjx9fU1UVFRplOnTmbp0qUO277aT2JiosN6P//8c3PPPfeYwMBAExgYaGJiYky/fv3M1q1bjTHGPPHEE+bee+81CxYscKopKSnJuDqUnDJliomNjTX+/v6mYMGCpmrVquaZZ54x+/fvt/eJiooyCQkJTsv269fPaZ2SjIeHh1m7dq1Du6vnKCsry4waNcpUrlzZ+Pr6mjvuuMPExsaaESNGmBMnTjht7/L1XfpYFS5c2DRo0MB88803V1wuZ9nKlSubNWvWmHr16hk/Pz8TFRVlJkyY4NDPZrOZV1991URFRRlfX19Ts2ZN8/XXX5vExEQTFRVl7zdnzhxTu3ZtExwcbPz9/U3ZsmXN008/bQ4fPmyMMWbv3r2mZMmSZsCAAS73S5JJSkqy305MTDSSzFNPPeXQL+d1kpqaesX9c/VcSzL9+vVz6puQkOCwL5f/bXp6epoSJUqYxMREs3fvXpfrv//++03BggXNrl27HNb95ZdfGklm1KhRV6w3KirKYZshISGmRYsWJiUl5YbqyjFx4kQTExNjvL29TXh4uOnTp485cuSIU79Zs2aZmjVrGl9fX1OkSBHTrVs3+7pzJCYmmsDAQLNjxw7TokULExAQYMLDw01SUpLJzs52qnf69OkOy2/dutX4+fmZgQMH2tvef/99U65cOePr62tiYmLM9OnTb/pjDAAAkB95GHMD37cFAADANVu+fLmaNGmi2bNnX/dZzpfatWuXSpUqpdTUVKc5fXMMHz5cu3btcnkBQtyYxo0bKz09XRs3bnR3KbCYHj16aM6cOS6nJgIAAMC14ftoAAAAAAAAAABLY45uAAAAiwsKClK3bt2ueLHIatWqqVixYrewKgAAAAC4dQi6AQAALC4kJMR+4b3c3H///beoGgAAAAC49ZijGwAAAAAAAABgaczRDQAAAAAAAACwNIJuAAAAAAAAAIClEXQDAAAAAAAAACyNoBsAAAAAAAAAYGkE3QAAAAAAAAAASyPoBgAAAAAAAABYGkE3AAAAAAAAAMDSCLoBAAAAAAAAAJZG0A1IOn36tPbs2aNjx465uxTcRDyvAABYlzFGR48e1bZt29xdCgAAtyWbzab09HTt3LnT3aUANwVBN/6xZs+erWbNmqlgwYIKCgrSnXfeqddff93dZeEG8bwCAGBdJ0+e1AsvvKAKFSrIx8dHRYsWVfny5bV161Z3lwYAwG3h4MGDGjBggKKiouTj46PQ0FBVqlRJGRkZ7i4NuGEF3F0AcDP88ccfSk5O1rJly5Senq6iRYuqSZMmGjp0qCpXruzU/7nnntOoUaPUvn17TZ06VSEhIfLw8FD58uXdUD1uFp5XAHC/Dz74QD179tQvv/yiWrVqOdw3depU9e7dW+3bt9fnn38uLy8vN1WJ/OjIkSNq1KiRdu/erSeeeEINGjSQj4+PvL29FR0d7e7yAACX8fDwyFO/ZcuWqXHjxn9vMciT7du3q0mTJjp//ryefPJJ3XXXXSpQoID8/f0VGBjo7vKAG0bQDcv74osv1KVLFxUpUkSPPPKISpUqpV27dun999/XnDlz9Omnn+q+++6z9//+++81atQoJScn67nnnnNj5biZeF4BIH+bO3euHn/8cTVs2FCffvopITecDB48WAcOHFBKSorLExUAAPnLxx9/7HD7o48+0uLFi53aK1aseCvLwhX06dNHPj4++vnnn1W8eHF3lwPcdB7GGOPuIoDrtWPHDlWrVk133nmnfvjhB4WGhtrvS09PV8OGDbVnzx79/vvvKl26tCSpbdu2Onr0qFauXOmusvE34HkFgPzB1Rndy5cvV8uWLVW+fHmtWLFChQoVcnOVyG8OHTqkyMhITZo0Sb169XJ3OQCA69C/f39NnDhRxEz509q1a1WrVi0tWrRIzZs3d3c5wN+CObphaaNHj9aZM2c0ZcoUh5BbkkJCQjR58mSdPn3aYY7mn3/+WVWqVFHnzp1VpEgR+fv7q3bt2po3b569z6lTpxQYGKinnnrKaZt79+6Vl5eXkpOTJUk9evRw+XVaDw8PDR8+3H77r7/+Ut++fVWhQgX5+/uraNGi6tixo3bt2uWw3PLly+Xh4aHly5fb23755Rc1b95cBQsWVGBgoBo3bqwVK1Y4LPfBBx/Iw8NDa9assbelp6c71SFJbdq0cap5xYoV6tixo+688075+vqqZMmSGjhwoM6ePeu0b3PmzFGtWrVUsGBBeXh42H/eeOMNp76uasz5CQgIUNWqVfXee+859OvRo4eCgoKuuK7L9ysvz2uOQ4cO6ZFHHlF4eLj8/PxUvXp1ffjhhw59du3aZd+nt956S1FRUfL391ejRo20ceNGp3ovfzxnzJghT09Pvfbaa/a233//XT169FDp0qXl5+eniIgIPfzwwzpy5MgV9xUArGz9+vVq3769IiMjtXDhQpch9+zZsxUbGyt/f3+FhITo3//+t/bt2+fQJ7exYc6cOQ7jZuPGjR3GGlc/OTw8PNS/f3/NnDlTFSpUkJ+fn2JjY/XDDz84befXX39Vq1atFBwcrKCgIDVr1kw///yzy33OrYYPPvjAoU+VKlWu+vjl1Hg5V2P5G2+8ofr166to0aLy9/dXbGys5syZ47TsqVOn9PTTT6t06dLy9vZ2qDE9Pf2K9Vy+byEhIUpISHAaG3OrO0fOMUHOcdAvv/wim82mrKws1apVS35+fipatKi6dOmi3bt3Oy3/3XffqWHDhgoMDFThwoXVvn17bd682aHP8OHD5eHhoS1btqhTp04KDg5W0aJF9dRTT+ncuXNO9V56XHHhwgW1bt1aRYoU0aZNm+zt06dPV9OmTRUWFiZfX19VqlRJ77777hUfMwDARZmZmUpKSlLZsmXtnzmfeeYZZWZmOvWdMWOG6tSpo4CAAN1xxx269957tWjRIklSdHT0Fcf5S8fH06dP6+mnn1bJkiXl6+urChUq6I033nAK4y9d3svLS8WLF1fv3r11/Phxe5+srCwNGzZMsbGxKlSokAIDA9WwYUMtW7bMqf6cz5x33nmnvLy87Ou+2ufcy/fP09NTERERevDBBx3Gw0s/r+YmZxzM8fPPP8vPz087duxQ5cqV5evrq4iICPXp00dHjx51Wv5ajs927typ+Ph4BQYGqlixYho5cqTDY5xT76XHQidPnlRsbKxKlSqlAwcO2NvzejwDuMLUJbC0r776StHR0WrYsKHL+++9915FR0dr/vz59rYjR45oypQpCgoK0pNPPqnQ0FDNmDFD999/v2bOnKkuXbooKChI9913n2bNmqUxY8Y4fL36v//9r4wx6tat2zXV+ssvv+inn35S586dVaJECe3atUvvvvuuGjdurE2bNikgIMDlctu3b1fjxo0VEBCgwYMHKyAgQFOnTlVcXJwWL16se++995rqyM3s2bN15swZPf744ypatKhWr16t8ePHa+/evZo9e7a9X0pKijp16qTq1avrtddeU6FChZSenq6BAwfmeVtvvfWWQkJClJGRoWnTpqlXr16Kjo5WXFzcddefl+dVks6ePavGjRtr+/bt6t+/v0qVKqXZs2erR48eOn78uNM/Nz766COdPHlS/fr107lz5zRu3Dg1bdpUGzZsUHh4uMtaFi1apIcfflj9+/d3mEZl8eLF2rlzp3r27KmIiAj98ccfmjJliv744w/9/PPPeZ7jDgCsYseOHWrZsqV8fX21cOFCRUZGOvXJOQO8du3aSk5OVlpamsaNG6eVK1fq119/VeHCha9pm88//7weffRRSbKPT7179871WOH777/XrFmz9OSTT8rX11fvvPOOWrZsqdWrV9uD6D/++EMNGzZUcHCwnnnmGXl7e2vy5Mlq3Lixvv/+e9WtW9dpvTExMXr++ecd6vi7jRs3Tu3atVO3bt2UlZWlTz/9VB07dtTXX3+thIQEe7/Bgwdr0qRJeuSRR9SgQQN5e3vriy++0Ny5c/O0nZx9M8Zox44dGjNmjFq3bu0ykM6rnH/69u/fX7GxsXrttdd0+PBhvf322/rxxx/166+/KiQkRJK0ZMkStWrVSqVLl9bw4cN19uxZjR8/Xg0aNNC6deuc/gHQqVMnRUdHKzk5WT///LPefvttHTt2TB999FGu9Tz66KNavny5Fi9erEqVKtnb3333XVWuXFnt2rVTgQIF9NVXX6lv376y2Wzq16/fde8/ANzubDab2rVrpx9//FG9e/dWxYoVtWHDBr311lv6888/HU5QGjFihIYPH6769etr5MiR8vHx0apVq/Tdd9+pRYsWGjt2rE6dOiVJ2rx5s1599VUNHTrUPkVKTphsjFG7du20bNkyPfLII6pRo4YWLlyowYMHa9++fXrrrbccarzvvvt0//3368KFC0pJSdGUKVN09uxZ+1QsGRkZeu+999SlSxf16tVLJ0+e1Pvvv6/4+HitXr1aNWrUsK8rMTFRS5Ys0RNPPKHq1avLy8tLU6ZM0bp16/L0eDVs2FC9e/eWzWbTxo0bNXbsWO3fv9/phLdrceTIEZ07d06PP/64mjZtqscee0w7duzQxIkTtWrVKq1atUq+vr6Sru34LDs7Wy1bttTdd9+t119/XQsWLFBSUpIuXLigkSNHuqzl/Pnz+te//qXdu3dr5cqVDseIeT2eAVwygEUdP37cSDLt27e/Yr927doZSSYjI8MYY4wkI8ksX77c3ufMmTOmYsWKJiIiwmRlZRljjFm4cKGRZL799luH9VWrVs00atTIfrtnz57mzjvvdNquJJOUlOSwjculpKQYSeajjz6yty1btsxIMsuWLTPGGPOvf/3LeHl5mY0bN9r7pKenm6JFi5rY2Fh72/Tp040k88svv9jbDh8+7FSHMcYkJCSYqKgohzZX9SUnJxsPDw/z119/2duGDBliJJkDBw7Y21JTU40kM3r0aKd1XCqnxtTUVHvbn3/+aSSZ119/3d6WmJhoAgMDr7iuy/crr8/r2LFjjSQzY8YMe7+srCxTr149ExQUZP87ydknf39/s3fvXnvfVatWGUlm4MCBDvXmPJ5r1qwxQUFBpmPHjiY7O9uhZleP8X//+18jyfzwww9X3F8AsIqc9/qvv/7alClTxkgyLVq0cNk3KyvLhIWFmSpVqpizZ8/a27/++msjyQwbNszeltvYMHv2bIdx81I57+XTp093uf2csWPNmjX2tr/++sv4+fmZ++67z97WoUMH4+PjY3bs2GFv279/vylYsKC59957ndbboEED06RJkyvW0ahRI1O5cmWXdV1eY79+/Zza8zKWZ2VlmSpVqpimTZs6tEdGRpr4+HiHtqSkJCPJHD58+Ir1NGrUyOE4yBhjhg4daiSZQ4cOXbXuHJcfE+TcrlSpksN+5BwXPf300/a2GjVqmLCwMHPkyBF722+//WY8PT1N9+7dnfapXbt2Dtvu27evkWR+++03h3pzjiuGDBlivLy8zLx585zqdjWWx8fHm9KlS+e6rwDwT9GvXz+TW8z08ccfG09PT7NixQqH9kmTJhlJZuXKlcYYY7Zt22Y8PT3Nfffd5/R5ymazOa338s/Pl5o3b56RZF5++WWH9gceeMB4eHiY7du329tcfW6uX7++qVSpkv32hQsXTGZmpkOfY8eOmfDwcPPwww/b286ePWs8PT1Nnz59HPrm5XOuMcZERUWZxMREh7auXbuagIAA++28fAbPGQcvv92sWTNz4cIFe3vOGDx+/HhjzLUfn0kyTzzxhL3NZrOZhIQE4+PjYz+uuPRYyGazmW7dupmAgACzatUqp7rzejwDuMLUJbCskydPSpIKFix4xX4592dkZNjbateurUaNGtlv+/v7q2/fvjp48KD9P6xxcXEqVqyYZs6cae+3ceNG/f777/r3v/9tbwsLC9OhQ4eUlZV1xTr8/f3tv58/f15HjhxR2bJlVbhwYZf/1T1x4oQOHTqkxYsXKz4+3uGiTEWLFlWPHj20du1apaWlXXG7eXVpfadPn1Z6errq168vY4x+/fVX+30nT56Up6fnNZ9hd6ljx44pPT1dO3fu1FtvvSUvLy+H5yNHenq60tPTnb5enJu8PK/ffPONIiIi7Gd4S5K3t7eefPJJnTp1St9//73DOjt06OBwkY46deqobt26+uabb5y2v3PnTiUkJKhGjRr6+OOP5enp+BZ76WN87tw5paen6+6775akPP9nHwCsokePHtqzZ4+6du2qRYsWOXw7KMeaNWt06NAh9e3bV35+fvb2hIQExcTEOHwjK0fO2JDzk3M8cL3q1aun2NhY++0777xT7du318KFC5Wdna3s7GwtWrRIHTp0sF/vQ5IiIyPVtWtX/fjjjw7HGNLFrzbnnBF1JdnZ2fb9uNJxRM6YcenP+fPnnfpdOs4cO3ZMJ06cUMOGDZ3GmJMnT6po0aJXrS8358+fV3p6ug4fPqyUlBTNnTtX1apVs59xfXndR44ckc1my9O6+/Xr57AfjRs3VmxsrP1v4cCBA1q/fr169OihIkWK2PtVq1ZNzZs3dzk+X36m9RNPPCFJLvtOmDBBycnJevvtt9W+fXun+y+t7cSJE0pPT1ejRo20c+dOnThxIk/7CAD/RLNnz1bFihUVExPjMJ41bdpUkuzTf8ybN082m03Dhg1z+jx1rd+A/eabb+Tl5aUnn3zSof3pp5+WMUbffvutQ/uZM2eUnp6ugwcP6vPPP9dvv/2mZs2a2e/38vKSj4+PpItnqB89elQXLlxQrVq1HMba06dPy2az3dBYm5mZqfT0dHsm8N133znUcnnNx44dy/Pc6IMGDXL41vpDDz2k8PBw+1h7Pcdnl05XljN9WVZWlpYsWeLUd/DgwZo5c6Y+++wz1alTx+n+vB7PAK4QdMOycgLsq33AdRWIx8TEOPXL+ZpTzlyRnp6e6tatm+bNm6czZ85IkmbOnCk/Pz917NjRvlz9+vV17tw5vfDCC9q7d699wL7c2bNnNWzYMPvcYCEhIQoNDdXx48ddfjDq0KGDwsPDlZGRoQoVKly13hu1e/du+4fGoKAghYaG2kPjS+urV6+ebDabnnrqKe3YscM+qF6Lu+66S6GhoSpTpoymTZumCRMmOA1wp0+fVmhoqEJDQ+Xv768777xT48aNu+J68/K8/vXXXypXrpzTQVNOv7/++suhvVy5ck7rLF++vNPjfvr0acXHxystLU1Hjx51eRB29OhRPfXUUwoPD5e/v79CQ0NVqlQpSeLDMYDbztGjRzVjxgx9+OGHqlGjhp566imn97qc91xX41xMTIzTe/KlY0POz8MPP3xDdeb2Pn/mzBkdPnxYhw8f1pkzZ3Idi202m/bs2ePQfvz48TzNwbllyxaHsa5ChQr65JNPnPq9//77TvudM0/ppb7++mvdfffd8vPzU5EiRRQaGqp3333X6XGvV6+e5s6dqzlz5ujAgQNKT0+3H+vkxU8//aTQ0FCFhYWpfv36unDhgmbPnu009uXUHRISIn9/f917770O1xK5VM6yuY3ll47jkuu/mYoVKyo9PV2nT592aL/8OS5Tpow8PT2dxvJvv/3WPoWZq7lKJWnlypWKi4uzzw0eGhqqoUOHSmIsB4Ar2bZtm/744w+n8ax8+fKSLs5pLV2c9szT09Nh2qjr9ddff6lYsWJOJ8fl9tlv9OjRCg0NVWRkpB544AE1bNhQo0aNcujz4Ycfqlq1avZrSYSGhmr+/PkOY0DRokVVrlw5vffee1q0aJEOHTqk9PR0l3OR5+bTTz9VaGiowsPD1aJFC5UsWdLp2laSlJSUpNDQUBUpUkQBAQFKSEjQtm3bXK4zt7HWy8tL5cqVy9NY6+r4zNPT0+FkAEn25/XysXby5Ml68803JSnXHCGvxzOAK8zRDcsqVKiQIiMj9fvvv1+x3++//67ixYsrODhYkuN/B6+me/fuGj16tObNm6cuXbrok08+UZs2bRwupNWuXTs9/PDDGj16tEaPHp3rup544glNnz5dAwYMUL169VSoUCF5eHioc+fOLs9yeuONN1SuXDmXZxPdbNnZ2WrevLmOHj2qZ599VjExMQoMDNS+ffvUo0cPh/o6d+6sdevWafz48ZoyZcp1bW/GjBkKDw/XuXPn9N1336lfv37y8/NTjx497H38/Pz01VdfSbr4z4pp06ZpwIABioyMVKdOnZzWeS3P698hPT1dgYGB+uqrr9ShQwclJycrKSnJoU+nTp30008/afDgwapRo4aCgoJks9nUsmXLPJ/pBgBWMXr0aPs/hqdMmaK7775bQ4YM0TvvvHPd67x0bMixYsWKXOd/dJeDBw8qPj7+qv2io6M1depUSRfnzXz77bf10EMPqXTp0vZv/EhS+/btnS7s+MILL+jgwYP22ytWrFC7du1077336p133lFkZKS8vb01ffp0p/B8ypQp6tKli8M/7q9FtWrV7B9Sc+bRbty4sdatW6eIiAinuo0xSk1N1ciRI9WmTRuXH8Bv5Tie2xmBq1evVq9evRQYGKiXX35ZHTt2dPiQv2PHDjVr1kwxMTEaM2aMSpYsKR8fH33zzTd66623GMsB4ApsNpuqVq2qMWPGuLy/ZMmSt7giZw899JC6d+8um82mnTt36qWXXlKbNm20ZMkSeXh4aMaMGerRo4c6dOigwYMHKywsTF5eXkpOTtaOHTsc1jVr1ix169bN6XggMDAwT7W0aNFCgwcPliTt3btXo0aNUpMmTbRmzRqHMbN3797q2LGjsrOztXnzZg0fPlwdOnTQH3/84bROd39mli5eEPOVV17RL7/8ooEDB6ply5YO3wi7luMZwBWCblhamzZtNHXqVP3444+65557nO5fsWKFdu3apT59+tjbSpUqpa1btzr13bJliyQ5XMCoSpUqqlmzpmbOnKkSJUpo9+7dGj9+vNOy77//voYNG6YdO3bYP+Q0b97coc+cOXOUmJho/2AoXfxK76VXcb5UbGysGjVqpKCgoDzXe702bNigP//8Ux9++KG6d+9ub1+8eLFTX09PT73xxhvasGGDUlNT9c477ygtLc1hOperadCggb3uNm3a6I8//lBycrJD0O3l5eVwccqEhAQVKVJECxYscBl05/V5jYqK0u+//y6bzeZwVndOv6ioKIflXX0Y//PPP50e94CAAC1YsEAxMTEaOHCgXn31VXXq1Ml+tsCxY8e0dOlSjRgxQsOGDbvi+gHgdnDpxZJr166tfv36aeLEierevbs9xM15z926dav9q8s5tm7d6vSefPnYICnXcTSvcnufDwgIUGhoqKSL7/G5jTGenp4OH8737t2rkydP2t//ryQwMNBhfxo2bKjixYtr0aJFDkF3iRIlnPZ77NixDkH3559/Lj8/Py1cuNBh2pTp06c7bTc6OlozZsxQ1apV9fDDD6tDhw766KOP7Bfbupo77rjDoZ7GjRurWLFimj59uoYMGZJr3UFBQerWrZvDlGg5cr7h5OpvYcuWLQ7jeE6/y23ZskUhISFOIcK2bdvs65cuXujbZrM5jeXNmzfXu+++q3PnzmnevHnq3bu3li9fbg/Gv/rqK2VmZup///uf7rzzTvtyOV+3BwDkrkyZMvapQK40BUmZMmVks9m0adMmh4s7Xo+oqCgtWbJEJ0+edDirO7fPfqVLl3YYtwoVKqSuXbvq559/Vr169TRnzhyVLl1aX3zxhcM+XH6CkyTVrFlTU6dOVcOGDTVy5EjdfffdGj16tFauXJmn2iMjIx1qqVChgurXr28/CS9HuXLl7P3i4+N15swZPf/88y4vEH3pWHvpGdg2m03btm1TzZo1HR6XvB6f5fxjIOcsbunisZTknFc8/PDDGjp0qPbv369KlSpp4MCBDscf13I8A7jC1CWwtMGDB8vf3199+vTRkSNHHO47evSoHnvsMQUEBNj/EypJrVu31urVq/XTTz/Z286dO6d3331XERERDvN0Shf/q7to0SKNHTtWRYsWVatWrVzWEhUVpaZNmyouLs7pw6h08cP55XNmjR8/XtnZ2bnun4eHh1q0aKGFCxdq8+bNDvv24YcfqlatWgoPD891+bzKmZ/r0vqMMblOFTJ+/Hh99913mjlzpuLi4tSgQYMb2v7Zs2ev+jWunNounUvsUnl9Xlu3bq2DBw9q1qxZ9n4XLlzQ+PHjFRQU5DRX+Lx587Rv3z777dWrV2vVqlVOfwehoaH2r4CNHDlSJUqUUK9evZzqvvxvYOzYsVfcbwC4XbzyyiuKjIxU7969deHCBUlSrVq1FBYWpkmTJjmMA99++602b96shISEv72ulJQUhzkf9+zZoy+//FItWrSQl5eXvLy81KJFC3355ZcOX79NS0vTJ598onvuucf+rTHp4leNJTl9MMyLnH+W5zbWXYmXl5c8PDwcjit27dqlefPmOfW9cOGCunXrpsqVK+utt95SXFyc01eOr8XZs2cl6apj+ZX2r2bNmoqIiHD6W1ixYoXWrFmjNm3aSLr4wb9GjRr68MMPHf7JsXHjRi1atEitW7d2WvfEiRMdbuectHD5WF6/fn15eXkpMDBQkyZN0g8//GA/4/7Sui8dy0+cOMGHbwDIg06dOmnfvn0O76s5zp49a592qkOHDvL09NTIkSOdvimT1zmoc7Ru3VrZ2dmaMGGCQ/tbb70lDw+PXD/bX1qX9P/jm6txYNWqVUpJSXFaNiMjQw899JDatWunF154QXFxcYqMjLym+q9US26uNNY2a9ZMvr6+evvttx0e25kzZyotLc0+1l7P8dmlj7ExRhMmTJC3t7fTvOINGzaUJBUrVkyjRo3SjBkzHKZju5bjGcAVzuiGpZUrV04ffvihunXrpqpVq+qRRx5RqVKltGvXLr3//vtKT0/Xf//7X5UpU8a+zDPPPKOZM2eqVatWevLJJxUSEqIZM2Zo06ZNmjlzpgoUcHxZdO3aVc8884zmzp2rxx9/XN7e3tdVa5s2bfTxxx+rUKFCqlSpklJSUrRkyZKrXqDipZde0sKFC9WoUSM98cQTCggI0NSpU3X8+HHNmTPHqX9KSop9jvCci2Nt375dCxYssPc5fPiwzp49qwULFqhly5aKiYlRmTJl9J///Ef79u1TcHCwPv/8c5dzZv3xxx965plnNHz4cNWuXfu6Hot58+YpJCTEPnXJihUrNGDAAIc+2dnZ9ppPnjyp6dOn6/Tp0+rQoYPLdeb1ee3du7cmT55sv5hndHS05syZo5UrV2rs2LFO87eVLVtW99xzjx5//HFlZmba/+HxzDPP5Lp//v7+mjJliuLi4vTuu++qb9++Cg4O1r333qvXX39d58+ft5+xl5qael2PIQBYTcGCBTV+/Hjdf//9evPNN/Xss8/K29tbo0aNUs+ePdWoUSN16dJFaWlpGjdunKKjozVw4MC/va4qVaooPj5eTz75pHx9fe1Tq4wYMcLe5+WXX9bixYt1zz33qG/fvipQoIAmT56szMxMvf7665IuBt9JSUl677331LlzZ5dzTV/u1KlT9rHu6NGjevvtt+Xt7X1dAX9CQoLGjBmjli1bqmvXrjp06JAmTpyosmXLOk3zNmLECG3YsEG//vrrdR3XpKWlacaMGZIuTt01efJkFShQwP4BOcfu3bu1YMEC+9Qlr7zyiqKiolSzZk2nM+kLFCig119/Xd27d1fDhg3VrVs3+7QoJUqU0LPPPmvvO3r0aLVq1Ur16tXTI488orNnz2r8+PEqVKiQhg8f7lRvamqq2rVrp5YtWyolJUUzZsxQ165dVb169Vz3MT4+Xv/+97/1zDPPqG3btoqMjFSLFi3k4+Ojtm3bqk+fPjp16pSmTp2qsLAwHThw4JofRwD4J3nooYf02Wef6bHHHtOyZcvUoEEDZWdna8uWLfrss8+0cOFC1apVS2XLltXzzz+vl156SQ0bNtT9998vX19f/fLLLypWrJiSk5PzvM22bduqSZMmev7557Vr1y5Vr15dixYt0pdffqkBAwY45ATSxWlPZ8yYIWOMduzYYR+DatWqJeniZ/ovvvhC9913nxISEpSamqpJkyapUqVKOnXqlMO6+vXrp7Nnz7qcVzsvdu7caR9r9+3bpwkTJig4ONgpON66dasWLFhgPwt+9OjRql27tooXL+60ziJFiuiFF17Qiy++qPj4eLVv3147d+7UhAkTVL16dT366KOSdM3HZ35+flqwYIESExNVt25dffvtt5o/f76GDh1q/3acK71799Ynn3yixx57TBs3brTPMZ7X4xnAJQPcBn7//XfTpUsXExkZaby9vU1ERITp0qWL2bBhg8v+O3bsMA888IApVKiQ8fPzM7Vr1zbz5s3Ldf2tW7c2ksxPP/2U55okmaSkJPvtY8eOmZ49e5qQkBATFBRk4uPjzZYtW0xUVJRJTEy091u2bJmRZJYtW2ZvW7t2rWnRooUJCgoyAQEB5t577zXff/+9w/amT59uJF3zT45NmzaZuLg4ExQUZEJCQkyvXr3Mb7/9ZiSZ6dOnG2OMOXfunKlWrZq55557zIULF+zLpqamGklm9OjRV3xMLq/Rx8fHlC1b1gwbNsycO3fO3i8xMdGhX1BQkLnrrrvMxx9/nOvja0zen9e0tDT7c+Hj42OqVq1q30dX+/Tmm2+akiVLGl9fX9OwYUPz22+/OfRNTEw0UVFRTtvp2bOnCQ4ONnv37jXGGLN3715z3333mcKFC5tChQqZjh07mv3797vcFwCwqpz3+l9++cXl/e3btzcBAQFm586d9rZZs2aZmjVrGl9fX1OkSBHTrVs3+3tnjsTERBMYGOi0vtmzZzuNmzly3ssvf4/PIcn069fPzJgxw5QrV874+vqamjVrulzXunXrTHx8vH0sbtKkicNxwcqVK03ZsmXN8OHDTWZm5lXraNSokcNYV7hwYdOgQQPz7bffuqzxcgkJCU5jz/vvv2/fj5iYGDN9+nSTlJTkMN6vWLHCeHl5mcmTJzssm9Pv8OHDLh+rq9X9zTffONWd8+Ph4WEiIiLM/fffbzZv3myM+f+/k9TUVIflPvvsM4e/hS5dupi//vrLqY4lS5aYBg0aGH9/fxMcHGzatm1rNm3a5HKfNm3aZB544AFTsGBBc8cdd5j+/fubs2fPOtV7+Vicnp5uQkNDzX333Wdv+9///meqVatm/Pz8THR0tBk1apSZNm2ay30BgH+afv36OYw5l8vKyjKjRo0ylStXNr6+vuaOO+4wsbGxZsSIEebEiRMOfadNm2YfD+644w7TqFEjs3jxYqd1uvr8fKmTJ0+agQMHmmLFihlvb29Trlw5M3r0aGOz2Rz6XW3cMsYYm81mXn31VRMVFWU/Zvj666+dPg/+97//NR4eHmbBggUO28jtWOZyUVFRDvWEhISYFi1amJSUFHufnGOLnB9PT09TokQJk5iYaD+GuvwYIMfEiRNNTEyM8fb2NuHh4aZPnz7myJEjTv2u5fhsx44dpkWLFiYgIMCEh4ebpKQkk52d7VTv5cdkW7duNX5+fmbgwIH2trwczwC58TDmGr/7AfwD3XfffdqwYYO2b9/u7lJuml27dqlUqVLX/PWvf4qcx2f06NH6z3/+4+5yAAB/Aw8PD/Xr18/pK824PQwfPlwjRozQ4cOHHS50BQAAbo4ePXpozpw5Tme0A+7CHN3AVRw4cEDz58/XQw895O5SAAAAAAAAALjAHN1ALlJTU7Vy5Uq999578vb2Vp8+fdxd0k3l7++v+Ph4d5cBAAAAAAAA3DDO6AZy8f333+uhhx5SamqqPvzwQ0VERLi7pJsqPDzc4QKVAAAAAAAAgFUxRzcAAAAAAAAAwNLcekb3Dz/8oLZt26pYsWLy8PDQvHnzrrrM8uXLddddd8nX11dly5bVBx988LfXCQAAAAAAAADIv9wadJ8+fVrVq1fXxIkT89Q/NTVVCQkJatKkidavX68BAwbo0Ucf1cKFC//mSgEAAAAAAAAA+VW+mbrEw8NDc+fOVYcOHXLt8+yzz2r+/PnauHGjva1z5846fvx4nucattls2r9/vwoWLCgPD48bLRsAACfGGJ08eVLFihWTpyeXw7hejNkAgL8T4/XNwXgNAPg7Xct4XeAW1XRTpKSkKC4uzqEtPj5eAwYMyHWZzMxMZWZm2m/v27dPlSpV+rtKBADAbs+ePSpRooS7y7AMxmwAgDswXl8bxmsAgDvkZby2VNB98OBBhYeHO7SFh4crIyNDZ8+elb+/v9MyycnJGjFihFP7nj17FBwc/LfVCgD458rIyFDJkiVVsGBBd5diKYzZAIBbifH6+jBeAwBupWsZry0VdF+PIUOGaNCgQfbbOQ9OcHAwgzAA4G/F13evDWM2AMAdGK+vDeM1AMAd8jJeWyrojoiIUFpamkNbWlqagoODXZ7NLUm+vr7y9fW9FeUBAIAbwJgNAED+x3gNAMivLHXFjXr16mnp0qUObYsXL1a9evXcVBEAAAAAAAAAwN3cGnSfOnVK69ev1/r16yVJqampWr9+vXbv3i3p4leiunfvbu//2GOPaefOnXrmmWe0ZcsWvfPOO/rss880cOBAd5QPAAAAAAAAAMgH3Bp0r1mzRjVr1lTNmjUlSYMGDVLNmjU1bNgwSdKBAwfsobcklSpVSvPnz9fixYtVvXp1vfnmm3rvvfcUHx/vlvoBAAAAAAAAAO7n1jm6GzduLGNMrvd/8MEHLpf59ddf/8aqAAAAAAAAAABWYqk5ugEAAAAAAAAAuBxBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgaQTdAAAAAAAAAABLI+gGAAAAAAAAAFgaQTcAAAAAAAAAwNIKuLuA20Xs4I/cXQJgt3Z0d3eXAAAAAAAAANwyBN0AAOAfg39MIz/hH9MAAADAzcPUJQAAAAAAAAAASyPoBgAAAAAAAABYGkE3AAAAAAAAAMDSmKMbAAAAAADkG1xTA/kN19UArIEzugEAAAAAAAAAlkbQDQAAAAAAAACwNIJuAAAAAAAAAIClEXQDAAAAAAAAACyNoBsAAAAAAAAAYGkE3QAAAAAAAAAASyPoBgAAAAAAAABYGkE3AAAAAAAAAMDSCLoBAAAAAAAAAJZG0A0AAAAAAAAAsDSCbgAAAAAAAACApRF0AwAAAAAAAAAsjaAbAAAAAAAAAGBpBN0AAAAAAAAAAEsj6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgaQTdAAAAAAAAAABLI+gGAAAAAAAAAFgaQTcAAAAAAAAAwNIIugEAAAAAAAAAlkbQDQAAAAAAAACwNIJuAAAAAAAAAIClEXQDAAAAAAAAACyNoBsAAAAAAAAAYGkE3QAAAAAAAAAASyPoBgAAAAAAAABYGkE3AAAAAAAAAMDSCLoBAAAAAAAAAJZG0A0AAAAAAAAAsDSCbgAAAAAAAACApRF0AwAAAAAAAAAsjaAbAAAAAAAAAGBpBN0AAAAAAAAAAEsj6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgaQTdAAAAAAAAAABLI+gGAAAAAAAAAFgaQTcAAAAAAAAAwNIIugEAAAAAAAAAlkbQDQAAAAAAAACwNLcH3RMnTlR0dLT8/PxUt25drV69+or9x44dqwoVKsjf318lS5bUwIEDde7cuVtULQAAAAAAAAAgv3Fr0D1r1iwNGjRISUlJWrdunapXr674+HgdOnTIZf9PPvlEzz33nJKSkrR582a9//77mjVrloYOHXqLKwcAAAAAAAAA5BduDbrHjBmjXr16qWfPnqpUqZImTZqkgIAATZs2zWX/n376SQ0aNFDXrl0VHR2tFi1aqEuXLlc9CxwAAAAAAAAAcPtyW9CdlZWltWvXKi4u7v+L8fRUXFycUlJSXC5Tv359rV271h5s79y5U998841at26d63YyMzOVkZHh8AMAAPIfxmwAAPI/xmsAQH7ltqA7PT1d2dnZCg8Pd2gPDw/XwYMHXS7TtWtXjRw5Uvfcc4+8vb1VpkwZNW7c+IpTlyQnJ6tQoUL2n5IlS97U/QAAADcHYzYAAPkf4zUAIL9y+8Uor8Xy5cv16quv6p133tG6dev0xRdfaP78+XrppZdyXWbIkCE6ceKE/WfPnj23sGIAAJBXjNkAAOR/jNcAgPyqgLs2HBISIi8vL6WlpTm0p6WlKSIiwuUyL774oh566CE9+uijkqSqVavq9OnT6t27t55//nl5ejrn9r6+vvL19b35OwAAAG4qxmwAAPI/xmsAQH7ltjO6fXx8FBsbq6VLl9rbbDabli5dqnr16rlc5syZM05htpeXlyTJGPP3FQsAAAAAAAAAyLfcdka3JA0aNEiJiYmqVauW6tSpo7Fjx+r06dPq2bOnJKl79+4qXry4kpOTJUlt27bVmDFjVLNmTdWtW1fbt2/Xiy++qLZt29oDbwAAAAAAAADAP4tbg+4HH3xQhw8f1rBhw3Tw4EHVqFFDCxYssF+gcvfu3Q5ncL/wwgvy8PDQCy+8oH379ik0NFRt27bVK6+84q5dAAAAAAAAAAC4mVuDbknq37+/+vfv7/K+5cuXO9wuUKCAkpKSlJSUdAsqAwAAAAAAAABYgdvm6AYAAAAAAAAA4GYg6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgaQTdAAAAAAAAAABLI+gGAAAAAAAAAFgaQTcAAAAAAAAAwNIIugEAAAAAAAAAlkbQDQAAAAAAAACwNIJuAAAAAAAAAIClEXQDAAAAAAAAACyNoBsAAAAAAAAAYGkE3QAAAAAAAAAASyPoBgAAAAAAAABYGkE3AAAAAAAAAMDSCLoBAAAAAAAAAJZG0A0AAAAAAAAAsDSCbgAAAAAAAACApRF0AwAAAAAAAAAsjaAbAAAAAAAAAGBpBN0AAAAAAAAAAEsj6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgaQTdAAAAAAAAAABLI+gGAAAAAAAAAFgaQTcAAAAAAAAAwNIIugEAAAAAAAAAlkbQDQAAAAAAAACwNIJuAAAAAAAAAIClEXQDAAAAAAAAACyNoBsAAAAAAAAAYGkE3QAAAAAAAAAASyPoBgAAAAAAAABYGkE3AAAAAAAAAMDSCLoBAAAAAAAAAJZG0A0AAAAAAAAAsDSCbgAAAAAAAACApRF0AwAAAAAAAAAsjaAbAAAAAAAAAGBpBN0AAAAAAAAAAEsj6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgaQTdAAAAAAAAAABLI+gGAAAAAAAAAFgaQTcAAAAAAAAAwNIIugEAAAAAAAAAlkbQDQAAAAAAAACwNIJuAAAAAAAAAIClEXQDAAAAAAAAACyNoBsAAAAAAAAAYGkE3QAAAAAAAAAAS3N70D1x4kRFR0fLz89PdevW1erVq6/Y//jx4+rXr58iIyPl6+ur8uXL65tvvrlF1QIAAAAAAAAA8psC7tz4rFmzNGjQIE2aNEl169bV2LFjFR8fr61btyosLMypf1ZWlpo3b66wsDDNmTNHxYsX119//aXChQvf+uIBAAAAAAAAAPmCW4PuMWPGqFevXurZs6ckadKkSZo/f76mTZum5557zqn/tGnTdPToUf3000/y9vaWJEVHR9/KkgEAAAAAAAAA+Yzbpi7JysrS2rVrFRcX9//FeHoqLi5OKSkpLpf53//+p3r16qlfv34KDw9XlSpV9Oqrryo7O/tWlQ0AAAAAAAAAyGfcdkZ3enq6srOzFR4e7tAeHh6uLVu2uFxm586d+u6779StWzd988032r59u/r27avz588rKSnJ5TKZmZnKzMy0387IyLh5OwEAAG4axmwAAPI/xmsAQH7l9otRXgubzaawsDBNmTJFsbGxevDBB/X8889r0qRJuS6TnJysQoUK2X9Klix5CysGAAB5xZgNAED+x3gNAMiv3BZ0h4SEyMvLS2lpaQ7taWlpioiIcLlMZGSkypcvLy8vL3tbxYoVdfDgQWVlZblcZsiQITpx4oT9Z8+ePTdvJwAAwE3DmA0AQP7HeA0AyK/cFnT7+PgoNjZWS5cutbfZbDYtXbpU9erVc7lMgwYNtH37dtlsNnvbn3/+qcjISPn4+LhcxtfXV8HBwQ4/AAAg/2HMBgAg/2O8BgDkV26dumTQoEGaOnWqPvzwQ23evFmPP/64Tp8+rZ49e0qSunfvriFDhtj7P/744zp69Kieeuop/fnnn5o/f75effVV9evXz127AAAAAAAAAABwM7ddjFKSHnzwQR0+fFjDhg3TwYMHVaNGDS1YsMB+gcrdu3fL0/P/s/iSJUtq4cKFGjhwoKpVq6bixYvrqaee0rPPPuuuXQAAAAAAAAAAuJlbg25J6t+/v/r37+/yvuXLlzu11atXTz///PPfXBUAAAAAAAAAwCrcOnUJAAAAAAAAAAA3iqAbAAAAAAAAAGBpBN0AAAAAAAAAAEsj6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgaQTdAAAAAAAAAABLI+gGAAAAAAAAAFgaQTcAAAAAAAAAwNIIugEAAAAAAAAAlkbQDQAAAAAAAACwNIJuAAAAAAAAAIClFbjeBd9+++0r3v/kk09e76oBAAAAAAAAAMiz6w66BwwYoBIlSsjLy0uStGfPHkVGRqpAgQLy8PAg6AYAAAAAAAAA3BLXHXRL0po1axQWFiZJKliwoL7//nuVLl36phQGAAAAAAAAAEBeXPcc3V5eXsrOzrbfzs7OVkpKyk0pCgAAAAAAAACAvLruoLtEiRJaunSpJOmnn36SzWbToEGDNHToUBljblqBAAAAAAAAAABcyXUH3X369FGPHj0UExOjpk2bqlevXlqzZo2WLFmi5s2b38waAQAAAAAAAADI1XXP0f3cc8/prrvu0m+//aZSpUrpX//6lzw8PLRixQo99dRTN7NGAAAAAAAAAABydUMXo2zRooVatGjh0Obr66tJkybdUFEAAAAAAAAAAOTVdQfdGRkZV7w/ODj4elcNAAAAAAAAAECeXXfQXbhwYXl4eDi1G2Pk4eGh7OzsGyoMAAAAAAAAAIC8uKGpS+bMmaMiRYrcrFoAAAAAAAAAALhmNxR0N2jQQGFhYTerFgAAAAAAAAAArtkNBd2bNm3SkSNHFBgYqIiICPn4+NysugAAAAAAAAAAyBPPG1m4WbNmqly5skqVKqXAwEBVrVpVb7311s2qDQAAAAAAAACAq7ruM7pTU1NljNH58+eVkZGh/fv3a/Xq1XrxxRd14cIFDR48+GbWCQAAAAAAAACAS9cddEdFRTncjo2NVdu2bVW+fHmNHDmSoBsAAAAAAAAAcEvc0BzdrnTu3FmVK1e+2asFAAAAAAAAAMClGw66165dq82bN0uSKlWqpLvuukt33XXXDRcGAAAAAAAAAEBeXHfQfejQIXXu3FnLly9X4cKFJUnHjx9XkyZN9Omnnyo0NPRm1QgAAAAAAAAAQK48r3fBJ554QidPntQff/yho0eP6ujRo9q4caMyMjL05JNP3swaAQAAAAAAAADI1XWf0b1gwQItWbJEFStWtLdVqlRJEydOVIsWLW5KcQAAAAAAAAAAXM11n9Fts9nk7e3t1O7t7S2bzXZDRQEAAAAAAAAAkFfXHXQ3bdpUTz31lPbv329v27dvnwYOHKhmzZrdlOIAAAAAAAAAALia6w66J0yYoIyMDEVHR6tMmTIqU6aMSpUqpYyMDI0fP/5m1ggAAAAAAAAAQK6ue47ukiVLat26dVqyZIm2bNkiSapYsaKaNm2qvXv3avfu3fLy8lLx4sVvWrEAAAAAAAAAAFzuuoNuSfLw8FDz5s3VvHlze9uhQ4dUqlQpGWMUERHhMLUJAAAAAAAAAAA32zUH3UWKFLni/cYYSeKClAAAAAAAAACAW+Kag+7jx49r7NixKlSoUK73Dxo06IYLAwAAAAAAAAAgL65r6pLOnTsrLCzM5X1paWkE3QAAAAAAAACAW8bT3QUAAAAAAAAAAHAjruuM7pSUFBUpUkS+vr4qWLCgIiMjVbhw4ZtcGgAAAAAAAAAAV3ddQfd9991n/93Dw0OSFBoaqvr16ys+Pv7mVAYAAAAAAAAAQB5cc9B97NgxSdKFCxeUmZmpo0ePat++fdq0aZOWLl2qvn373vQiAQAAAAAAAADIzTXP0V2oUCEVKlRIRYsWVbFixVSlShXFx8dr4MCB+vrrrzVlyhQZY9S0aVM98MADf0fNAAAAAAAAAADYXdfUJVfSrVs3FShwcbX+/v43e/UAAAAAAAAAADi46UG3n5+fEhMTb/ZqAQAAAAAAAABw6ZqnLgEAAAAAAAAAID8h6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgaQTdAAAAAAAAAABLI+gGAAAAAAAAAFgaQTcAAAAAAAAAwNIIugEAAAAAAAAAlkbQDQAAAAAAAACwNIJuAAAAAAAAAICl5Yuge+LEiYqOjpafn5/q1q2r1atX52m5Tz/9VB4eHurQocPfWyAAAAAAAAAAIN9ye9A9a9YsDRo0SElJSVq3bp2qV6+u+Ph4HTp06IrL7dq1S//5z3/UsGHDW1QpAAAAAAAAACA/cnvQPWbMGPXq1Us9e/ZUpUqVNGnSJAUEBGjatGm5LpOdna1u3bppxIgRKl269C2sFgAAAAAAAACQ37g16M7KytLatWsVFxdnb/P09FRcXJxSUlJyXW7kyJEKCwvTI488ctVtZGZmKiMjw+EHAADkP4zZAADkf4zXAID8yq1Bd3p6urKzsxUeHu7QHh4eroMHD7pc5scff9T777+vqVOn5mkbycnJKlSokP2nZMmSN1w3AAC4+RizAQDI/xivAQD5ldunLrkWJ0+e1EMPPaSpU6cqJCQkT8sMGTJEJ06csP/s2bPnb64SAABcD8ZsAADyP8ZrAEB+VcCdGw8JCZGXl5fS0tIc2tPS0hQREeHUf8eOHdq1a5fatm1rb7PZbJKkAgUKaOvWrSpTpozDMr6+vvL19f0bqgcAADcTYzYAAPkf4zUAIL9y6xndPj4+io2N1dKlS+1tNptNS5cuVb169Zz6x8TEaMOGDVq/fr39p127dmrSpInWr1/PV6YAAAAAAAAA4B/IrWd0S9KgQYOUmJioWrVqqU6dOho7dqxOnz6tnj17SpK6d++u4sWLKzk5WX5+fqpSpYrD8oULF5Ykp3YAAAAAAAAAwD+D24PuBx98UIcPH9awYcN08OBB1ahRQwsWLLBfoHL37t3y9LTUVOIAAAAAAAAAgFvI7UG3JPXv31/9+/d3ed/y5cuvuOwHH3xw8wsCAAAAAAAAAFgGp0oDAAAAAAAAACyNoBsAAAAAAAAAYGkE3QAAAAAAAAAASyPoBgAAAAAAAABYGkE3AAAAAAAAAMDSCLoBAAAAAAAAAJZG0A0AAAAAAAAAsDSCbgAAAAAAAACApRF0AwAAAAAAAAAsjaAbAAAAAAAAAGBpBN0AAAAAAAAAAEsj6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgaQTdAAAAAAAAAABLI+gGAAAAAAAAAFgaQTcAAAAAAAAAwNIIugEAAAAAAAAAlkbQDQAAAAAAAACwNIJuAAAAAAAAAIClEXQDAAAAAAAAACyNoBsAAAAAAAAAYGkE3QAAAAAAAAAASyPoBgAAAAAAAABYGkE3AAAAAAAAAMDSCLoBAAAAAAAAAJZG0A0AAAAAAAAAsDSCbgAAAAAAAACApRF0AwAAAAAAAAAsjaAbAAAAAAAAAGBpBN0AAAAAAAAAAEsj6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgaQTdAAAAAAAAAABLI+gGAAAAAAAAAFgaQTcAAAAAAAAAwNIIugEAAAAAAAAAlkbQDQAAAAAAAACwNIJuAAAAAAAAAIClEXQDAAAAAAAAACyNoBsAAAAAAAAAYGkE3QAAAAAAAAAASyPoBgAAAAAAAABYGkE3AAAAAAAAAMDSCLoBAAAAAAAAAJZG0A0AAAAAAAAAsDSCbgAAAAAAAACApRF0AwAAAAAAAAAsjaAbAAAAAAAAAGBpBN0AAAAAAAAAAEsj6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgafki6J44caKio6Pl5+enunXravXq1bn2nTp1qho2bKg77rhDd9xxh+Li4q7YHwAAAAAAAABwe3N70D1r1iwNGjRISUlJWrdunapXr674+HgdOnTIZf/ly5erS5cuWrZsmVJSUlSyZEm1aNFC+/btu8WVAwAAAAAAAADyA7cH3WPGjFGvXr3Us2dPVapUSZMmTVJAQICmTZvmsv/MmTPVt29f1ahRQzExMXrvvfdks9m0dOnSW1w5AAAAAAAAACA/KODOjWdlZWnt2rUaMmSIvc3T01NxcXFKSUnJ0zrOnDmj8+fPq0iRIi7vz8zMVGZmpv12RkbGjRUNAAD+FozZAADkf4zXAID8yq1ndKenpys7O1vh4eEO7eHh4Tp48GCe1vHss8+qWLFiiouLc3l/cnKyChUqZP8pWbLkDdcNAABuPsZsAADyP8ZrAEB+5fapS27Ea6+9pk8//VRz586Vn5+fyz5DhgzRiRMn7D979uy5xVUCAIC8YMwGACD/Y7wGAORXbp26JCQkRF5eXkpLS3NoT0tLU0RExBWXfeONN/Taa69pyZIlqlatWq79fH195evre1PqBQAAfx/GbAAA8j/GawBAfuXWM7p9fHwUGxvrcCHJnAtL1qtXL9flXn/9db300ktasGCBatWqdStKBQAAAAAAAADkU249o1uSBg0apMTERNWqVUt16tTR2LFjdfr0afXs2VOS1L17dxUvXlzJycmSpFGjRmnYsGH65JNPFB0dbZ/LOygoSEFBQW7bDwAAAAAAAACAe7g96H7wwQd1+PBhDRs2TAcPHlSNGjW0YMEC+wUqd+/eLU/P/z/x/N1331VWVpYeeOABh/UkJSVp+PDht7J0AAAAAAAAAEA+4PagW5L69++v/v37u7xv+fLlDrd37dr19xcEAAAAAAAAALAMt87RDQAAAAAAAADAjSLoBgAAAAAAAABYGkE3AAAAAAAAAMDSCLoBAAAAAAAAAJZG0A0AAAAAAAAAsDSCbgAAAAAAAACApRF0AwAAAAAAAAAsjaAbAAAAAAAAAGBpBN0AAAAAAAAAAEsj6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgaQTdAAAAAAAAAABLI+gGAAAAAAAAAFgaQTcAAAAAAAAAwNIIugEAAAAAAAAAlkbQDQAAAAAAAACwNIJuAAAAAAAAAIClEXQDAAAAAAAAACyNoBsAAAAAAAAAYGkE3QAAAAAAAAAASyPoBgAAAAAAAABYGkE3AAAAAAAAAMDSCLoBAAAAAAAAAJZG0A0AAAAAAAAAsDSCbgAAAAAAAACApRF0AwAAAAAAAAAsjaAbAAAAAAAAAGBpBN0AAAAAAAAAAEsj6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgaQTdAAAAAAAAAABLI+gGAAAAAAAAAFgaQTcAAAAAAAAAwNIIugEAAAAAAAAAlkbQDQAAAAAAAACwNIJuAAAAAAAAAIClEXQDAAAAAAAAACytgLsLAAAAAAAAAHD9Ygd/5O4SALu1o7u7Zbuc0Q0AAAAAAAAAsDSCbgAAAAAAAACApRF0AwAAAAAAAAAsjaAbAAAAAAAAAGBpXIwSAAAAgEtc2Ar5ibsubAUAAKyBoBuAW/DBGfkJH5wBAAAAALA2pi4BAAAAAAAAAFgaQTcAAAAAAAAAwNIIugEAAAAAAAAAlkbQDQAAAAAAAACwNIJuAAAAAAAAAIClEXQDAAAAAAAAACyNoBsAAAAAAAAAYGkE3QAAAAAAAAAASyPoBgAAAAAAAABYGkE3AAAAAAAAAMDSCLoBAAAAAAAAAJaWL4LuiRMnKjo6Wn5+fqpbt65Wr159xf6zZ89WTEyM/Pz8VLVqVX3zzTe3qFIAAAAAAAAAQH7j9qB71qxZGjRokJKSkrRu3TpVr15d8fHxOnTokMv+P/30k7p06aJHHnlEv/76qzp06KAOHTpo48aNt7hyAAAAAAAAAEB+4Page8yYMerVq5d69uypSpUqadKkSQoICNC0adNc9h83bpxatmypwYMHq2LFinrppZd01113acKECbe4cgAAAAAAAABAfuDWoDsrK0tr165VXFycvc3T01NxcXFKSUlxuUxKSopDf0mKj4/PtT8AAAAAAAAA4PZWwJ0bT09PV3Z2tsLDwx3aw8PDtWXLFpfLHDx40GX/gwcPuuyfmZmpzMxM++0TJ05IkjIyMm6kdCfZmWdv6vqAG3Gz/77/DrxmkJ/c7NdMzvqMMTd1vbe7WzFm896D/ITxGrg2jNf5A+M1/okYs4FrczNfM9cyXrs16L4VkpOTNWLECKf2kiVLuqEa4NYoNP4xd5cAWMrf9Zo5efKkChUq9Les+3bEmI1/GsZr4NowXucPjNf4J2LMBq7N3/Gayct47dagOyQkRF5eXkpLS3NoT0tLU0REhMtlIiIirqn/kCFDNGjQIPttm82mo0ePqmjRovLw8LjBPcDNlJGRoZIlS2rPnj0KDg52dzlAvsdrJv8yxujkyZMqVqyYu0uxFMZsa+C9B7g2vGbyL8br68N4bR28/wDXhtdM/nQt47Vbg24fHx/FxsZq6dKl6tChg6SLg+TSpUvVv39/l8vUq1dPS5cu1YABA+xtixcvVr169Vz29/X1la+vr0Nb4cKFb0b5+JsEBwfzhgJcA14z+RNnhl07xmxr4b0HuDa8ZvInxutrx3htPbz/ANeG10z+k9fx2u1TlwwaNEiJiYmqVauW6tSpo7Fjx+r06dPq2bOnJKl79+4qXry4kpOTJUlPPfWUGjVqpDfffFMJCQn69NNPtWbNGk2ZMsWduwEAAAAAAAAAcBO3B90PPvigDh8+rGHDhungwYOqUaOGFixYYL/g5O7du+Xp6WnvX79+fX3yySd64YUXNHToUJUrV07z5s1TlSpV3LULAAAAAAAAAAA3cnvQLUn9+/fPdaqS5cuXO7V17NhRHTt2/Jurwq3m6+urpKQkp6/BAXCN1wwAd+C9B7g2vGYAuAvvP8C14TVjfR7GGOPuIgAAAAAAAAAAuF6eV+8CAAAAAAAAAED+RdANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gG/nCxIkTFR0dLT8/P9WtW1erV692d0lAvvXDDz+obdu2KlasmDw8PDRv3jx3lwTgH4QxG8gbxmsA7sR4DeQdY/btg6Abbjdr1iwNGjRISUlJWrdunapXr674+HgdOnTI3aUB+dLp06dVvXp1TZw40d2lAPiHYcwG8o7xGoC7MF4D14Yx+/bhYYwx7i4C/2x169ZV7dq1NWHCBEmSzWZTyZIl9cQTT+i5555zc3VA/ubh4aG5c+eqQ4cO7i4FwD8AYzZwfRivAdxKjNfA9WPMtjbO6IZbZWVlae3atYqLi7O3eXp6Ki4uTikpKW6sDAAAXIoxGwCA/I/xGsA/GUE33Co9PV3Z2dkKDw93aA8PD9fBgwfdVBUAALgcYzYAAPkf4zWAfzKCbgAAAAAAAACApRF0w61CQkLk5eWltLQ0h/a0tDRFRES4qSoAAHA5xmwAAPI/xmsA/2QE3XArHx8fxcbGaunSpfY2m82mpUuXql69em6sDAAAXIoxGwCA/I/xGsA/WQF3FwAMGjRIiYmJqlWrlurUqaOxY8fq9OnT6tmzp7tLA/KlU6dOafv27fbbqampWr9+vYoUKaI777zTjZUBuN0xZgN5x3gNwF0Yr4Frw5h9+/Awxhh3FwFMmDBBo0eP1sGDB1WjRg29/fbbqlu3rrvLAvKl5cuXq0mTJk7tiYmJ+uCDD259QQD+URizgbxhvAbgTozXQN4xZt8+CLoBAAAAAAAAAJbGHN0AAAAAAAAAAEsj6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gG8B127Vrlzw8PLR+/Xp3lwIAAHLBeA0AQP7HeA3cOIJu4B+mR48e6tChg7vLAAAAV8B4DQBA/sd4DeQvBN0AXDp//ry7SwAAAFfBeA0AQP7HeA3cGgTdwG1qzpw5qlq1qvz9/VW0aFHFxcVp8ODB+vDDD/Xll1/Kw8NDHh4eWr58uf0rUrNmzVKjRo3k5+enmTNnymazaeTIkSpRooR8fX1Vo0YNLViwINdtZmdn6+GHH1ZMTIx2794tSfryyy911113yc/PT6VLl9aIESN04cKFW/UwAACQrzFeAwCQ/zFeA9ZQwN0FALj5Dhw4oC5duuj111/Xfffdp5MnT2rFihXq3r27du/erYyMDE2fPl2SVKRIEe3fv1+S9Nxzz+nNN99UzZo15efnp3HjxunNN9/U5MmTVbNmTU2bNk3t2rXTH3/8oXLlyjlsMzMzU126dNGuXbu0YsUKhYaG2rf59ttvq2HDhtqxY4d69+4tSUpKSrq1DwoAAPkM4zUAAPkf4zVgIQbAbWft2rVGktm1a5fTfYmJiaZ9+/YObampqUaSGTt2rEN7sWLFzCuvvOLQVrt2bdO3b1+H5VasWGGaNWtm7rnnHnP8+HF732bNmplXX33VYfmPP/7YREZG3sjuAQBwW2C8BgAg/2O8BqyDM7qB21D16tXVrFkzVa1aVfHx8WrRooUeeOAB3XHHHVdcrlatWvbfMzIytH//fjVo0MChT4MGDfTbb785tHXp0kUlSpTQd999J39/f3v7b7/9ppUrV+qVV16xt2VnZ+vcuXM6c+aMAgICbmQ3AQCwNMZrAADyP8ZrwDqYoxu4DXl5eWnx4sX69ttvValSJY0fP14VKlRQamrqFZcLDAy8ru21bt1av//+u1JSUhzaT506pREjRmj9+vX2nw0bNmjbtm3y8/O7rm0BAHC7YLwGACD/Y7wGrIMzuoHblIeHhxo0aKAGDRpo2LBhioqK0ty5c+Xj46Ps7OyrLh8cHKxixYpp5cqVatSokb195cqVqlOnjkPfxx9/XFWqVFG7du00f/58e/+77rpLW7duVdmyZW/uzgEAcJtgvAYAIP9jvAasgaAbuA2tWrVKS5cuVYsWLRQWFqZVq1bp8OHDqlixos6dO6eFCxdq69atKlq0qAoVKpTregYPHqykpCSVKVNGNWrU0PTp07V+/XrNnDnTqe8TTzyh7OxstWnTRt9++63uueceDRs2TG3atNGdd96pBx54QJ6envrtt9+0ceNGvfzyy3/nQwAAQL7HeA0AQP7HeA1YB0E3cBsKDg7WDz/8oLFjxyojI0NRUVF688031apVK9WqVUvLly9XrVq1dOrUKS1btkzR0dEu1/Pkk0/qxIkTevrpp3Xo0CFVqlRJ//vf/5yuCJ1jwIABstlsat26tRYsWKD4+Hh9/fXXGjlypEaNGiVvb2/FxMTo0Ucf/Rv3HgAAa2C8BgAg/2O8BqzDwxhj3F0EAAAAAAAAAADXi4tRAgAAAAAAAAAsjaAbAAAAAAAAAGBpBN0AAAAAAAAAAEsj6AYAAAAAAAAAWBpBNwAAAAAAAADA0gi6AQAAAAAAAACWRtANAAAAAAAAALA0gm4AAAAAAAAAgKURdAMAAAAAAAAALI2gGwAAAAAAAABgaQTdAAAAAAAAAABLI+gGAAAAAAAAAFja/wF3BtK2Vsfe/AAAAABJRU5ErkJggg==", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Функция для анализа сбалансированности\n", + "def analyze_balance(y_train, y_val, y_test, y_name):\n", + " # Распределение классов\n", + " print(\"Распределение классов в обучающей выборке:\")\n", + " print(y_train.value_counts(normalize=True))\n", + " \n", + " print(\"\\nРаспределение классов в контрольной выборке:\")\n", + " print(y_val.value_counts(normalize=True))\n", + " \n", + " print(\"\\nРаспределение классов в тестовой выборке:\")\n", + " print(y_test.value_counts(normalize=True))\n", + "\n", + " # Создание фигуры и осей для трех столбчатых диаграмм\n", + " fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharey=True)\n", + " fig.suptitle('Распределение в различных выборках')\n", + "\n", + " # Обучающая выборка\n", + " sns.barplot(x=y_train.value_counts().index, y=y_train.value_counts(normalize=True), ax=axes[0])\n", + " axes[0].set_title('Обучающая выборка')\n", + " axes[0].set_xlabel(y_name)\n", + " axes[0].set_ylabel('Доля')\n", + "\n", + " # Контрольная выборка\n", + " sns.barplot(x=y_val.value_counts().index, y=y_val.value_counts(normalize=True), ax=axes[1])\n", + " axes[1].set_title('Контрольная выборка')\n", + " axes[1].set_xlabel(y_name)\n", + "\n", + " # Тестовая выборка\n", + " sns.barplot(x=y_test.value_counts().index, y=y_test.value_counts(normalize=True), ax=axes[2])\n", + " axes[2].set_title('Тестовая выборка')\n", + " axes[2].set_xlabel(y_name)\n", + "\n", + " plt.show()\n", + "\n", + "analyze_balance(y_train, y_val, y_test, 'stroke')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Легко заметить, что выборки несбалансированны. Необходимо сбалансировать обучающую и контрольную выборки, чтобы получить лучшие результаты при обучении модели. Для балансировки применим RandomOverSampler:" + ] + }, + { + "cell_type": "code", + "execution_count": 446, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Распределение классов в обучающей выборке:\n", + "stroke\n", + "0 0.5\n", + "1 0.5\n", + "Name: proportion, dtype: float64\n", + "\n", + "Распределение классов в контрольной выборке:\n", + "stroke\n", + "0 0.5\n", + "1 0.5\n", + "Name: proportion, dtype: float64\n", + "\n", + "Распределение классов в тестовой выборке:\n", + "stroke\n", + "0 0.941944\n", + "1 0.058056\n", + "Name: proportion, dtype: float64\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "ros = RandomOverSampler(random_state=42)\n", + "\n", + "# Применение RandomOverSampler для балансировки выборок\n", + "X_train_resampled, y_train_resampled = ros.fit_resample(X_train, y_train)\n", + "X_val_resampled, y_val_resampled = ros.fit_resample(X_val, y_val)\n", + "\n", + "# Проверка сбалансированности после RandomOverSampler\n", + "analyze_balance(y_train_resampled, y_val_resampled, y_test, 'stroke')" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Выборки сбалансированы.\n", + "\n", + "### Перейдем к конструированию признаков\n", + "\n", + "Для начала применим унитарное кодирование категориальных признаков (one-hot encoding), переведя их в бинарные вектора:" + ] + }, + { + "cell_type": "code", + "execution_count": 447, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " id age hypertension heart_disease avg_glucose_level bmi \\\n", + "0 16605 57.0 0 0 106.24 32.3 \n", + "1 12015 14.0 0 0 99.87 25.2 \n", + "2 26474 44.0 0 0 97.16 33.1 \n", + "3 31143 22.0 0 0 107.52 41.6 \n", + "4 2447 63.0 0 0 85.04 29.7 \n", + "\n", + " gender_Male gender_Other ever_married_Yes work_type_Never_worked \\\n", + "0 True False True False \n", + "1 True False False False \n", + "2 False False True False \n", + "3 False False False False \n", + "4 False False True False \n", + "\n", + " work_type_Private work_type_Self-employed work_type_children \\\n", + "0 True False False \n", + "1 False False True \n", + "2 False False False \n", + "3 True False False \n", + "4 True False False \n", + "\n", + " Residence_type_Urban smoking_status_formerly smoked \\\n", + "0 True False \n", + "1 True False \n", + "2 True False \n", + "3 False False \n", + "4 True True \n", + "\n", + " smoking_status_never smoked smoking_status_smokes \n", + "0 True False \n", + "1 False False \n", + "2 False False \n", + "3 False False \n", + "4 False False \n" + ] + } + ], + "source": [ + "# Определение категориальных признаков\n", + "categorical_features = ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']\n", + "\n", + "# Применение one-hot encoding к обучающей выборке\n", + "X_train_encoded = pd.get_dummies(X_train_resampled, columns=categorical_features, drop_first=True)\n", + "\n", + "# Применение one-hot encoding к контрольной выборке\n", + "X_val_encoded = pd.get_dummies(X_val_resampled, columns=categorical_features, drop_first=True)\n", + "\n", + "# Применение one-hot encoding к тестовой выборке\n", + "X_test_encoded = pd.get_dummies(X_test, columns=categorical_features, drop_first=True)\n", + "\n", + "print(X_train_encoded.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Далее к числовым признакам, а именно к колонке age, применим дискретизацию (позволяет преобразовать данные из числового представления в категориальное):" + ] + }, + { + "cell_type": "code", + "execution_count": 448, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " id hypertension heart_disease avg_glucose_level bmi gender_Male \\\n", + "0 16605 0 0 106.24 32.3 True \n", + "1 12015 0 0 99.87 25.2 True \n", + "2 26474 0 0 97.16 33.1 False \n", + "3 31143 0 0 107.52 41.6 False \n", + "4 2447 0 0 85.04 29.7 False \n", + "\n", + " gender_Other ever_married_Yes work_type_Never_worked work_type_Private \\\n", + "0 False True False True \n", + "1 False False False False \n", + "2 False True False False \n", + "3 False False False True \n", + "4 False True False True \n", + "\n", + " work_type_Self-employed work_type_children Residence_type_Urban \\\n", + "0 False False True \n", + "1 False True True \n", + "2 False False True \n", + "3 False False False \n", + "4 False False True \n", + "\n", + " smoking_status_formerly smoked smoking_status_never smoked \\\n", + "0 False True \n", + "1 False False \n", + "2 False False \n", + "3 False False \n", + "4 True False \n", + "\n", + " smoking_status_smokes age_bin \n", + "0 False old \n", + "1 False young \n", + "2 False middle-aged \n", + "3 False young \n", + "4 False old \n" + ] + } + ], + "source": [ + "# Определение числовых признаков для дискретизации\n", + "numerical_features = ['age']\n", + "\n", + "# Функция для дискретизации числовых признаков\n", + "def discretize_features(df, features, bins, labels):\n", + " for feature in features:\n", + " df[f'{feature}_bin'] = pd.cut(df[feature], bins=bins, labels=labels)\n", + " df.drop(columns=[feature], inplace=True)\n", + " return df\n", + "\n", + "# Заданные интервалы и метки\n", + "age_bins = [0, 25, 55, 100]\n", + "age_labels = [\"young\", \"middle-aged\", \"old\"]\n", + "\n", + "# Применение дискретизации к обучающей, контрольной и тестовой выборкам\n", + "X_train_encoded = discretize_features(X_train_encoded, numerical_features, bins=age_bins, labels=age_labels)\n", + "X_val_encoded = discretize_features(X_val_encoded, numerical_features, bins=age_bins, labels=age_labels)\n", + "X_test_encoded = discretize_features(X_test_encoded, numerical_features, bins=age_bins, labels=age_labels)\n", + "\n", + "print(X_train_encoded.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Применим ручной синтез признаков. Это создание новых признаков на основе существующих, учитывая экспертные знания и логику предметной области. К примеру, в этом случае можно создать признак, в котором вычисляется насколько уровень глюкозы отклоняется от среднего для возрастной группы пациента. Такой признак может быть полезен для выделения пациентов с нетипичными данными." + ] + }, + { + "cell_type": "code", + "execution_count": 449, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " id hypertension heart_disease avg_glucose_level bmi gender_Male \\\n", + "0 16605 0 0 106.24 32.3 True \n", + "1 12015 0 0 99.87 25.2 True \n", + "2 26474 0 0 97.16 33.1 False \n", + "3 31143 0 0 107.52 41.6 False \n", + "4 2447 0 0 85.04 29.7 False \n", + "\n", + " gender_Other ever_married_Yes work_type_Never_worked work_type_Private \\\n", + "0 False True False True \n", + "1 False False False False \n", + "2 False True False False \n", + "3 False False False True \n", + "4 False True False True \n", + "\n", + " work_type_Self-employed work_type_children Residence_type_Urban \\\n", + "0 False False True \n", + "1 False True True \n", + "2 False False True \n", + "3 False False False \n", + "4 False False True \n", + "\n", + " smoking_status_formerly smoked smoking_status_never smoked \\\n", + "0 False True \n", + "1 False False \n", + "2 False False \n", + "3 False False \n", + "4 True False \n", + "\n", + " smoking_status_smokes age_bin glucose_age_deviation \n", + "0 False old -27.642870 \n", + "1 False young 6.088032 \n", + "2 False middle-aged -6.217053 \n", + "3 False young 13.738032 \n", + "4 False old -48.842870 \n" + ] + } + ], + "source": [ + "age_glucose_mean = X_train_encoded.groupby('age_bin', observed=False)['avg_glucose_level'].transform('mean')\n", + "X_train_encoded['glucose_age_deviation'] = X_train_encoded['avg_glucose_level'] - age_glucose_mean\n", + "\n", + "age_glucose_mean = X_val_encoded.groupby('age_bin', observed=False)['avg_glucose_level'].transform('mean')\n", + "X_val_encoded['glucose_age_deviation'] = X_val_encoded['avg_glucose_level'] - age_glucose_mean\n", + "\n", + "age_glucose_mean = X_test_encoded.groupby('age_bin', observed=False)['avg_glucose_level'].transform('mean')\n", + "X_test_encoded['glucose_age_deviation'] = X_test_encoded['avg_glucose_level'] - age_glucose_mean\n", + "\n", + "print(X_train_encoded.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Теперь используем масштабирование признаков, что позволяет привести все числовые признаки к одинаковым или очень похожим диапазонам значений либо распределениям. По результатам многочисленных исследований масштабирование признаков позволяет получить более качественную модель за счет снижения доминирования одних признаков над другими." + ] + }, + { + "cell_type": "code", + "execution_count": 450, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " id hypertension heart_disease avg_glucose_level bmi \\\n", + "0 16605 0 0 -0.244097 0.426328 \n", + "1 12015 0 0 -0.360110 -0.596170 \n", + "2 26474 0 0 -0.409465 0.541539 \n", + "3 31143 0 0 -0.220785 1.765656 \n", + "4 2447 0 0 -0.630199 0.051892 \n", + "\n", + " gender_Male gender_Other ever_married_Yes work_type_Never_worked \\\n", + "0 True False True False \n", + "1 True False False False \n", + "2 False False True False \n", + "3 False False False False \n", + "4 False False True False \n", + "\n", + " work_type_Private work_type_Self-employed work_type_children \\\n", + "0 True False False \n", + "1 False False True \n", + "2 False False False \n", + "3 True False False \n", + "4 True False False \n", + "\n", + " Residence_type_Urban smoking_status_formerly smoked \\\n", + "0 True False \n", + "1 True False \n", + "2 True False \n", + "3 False False \n", + "4 True True \n", + "\n", + " smoking_status_never smoked smoking_status_smokes age_bin \\\n", + "0 True False old \n", + "1 False False young \n", + "2 False False middle-aged \n", + "3 False False young \n", + "4 False False old \n", + "\n", + " glucose_age_deviation \n", + "0 -0.528807 \n", + "1 0.116464 \n", + "2 -0.118932 \n", + "3 0.262808 \n", + "4 -0.934362 \n" + ] + } + ], + "source": [ + "# Пример масштабирования числовых признаков\n", + "numerical_features = ['avg_glucose_level', 'bmi', 'glucose_age_deviation']\n", + "\n", + "scaler = StandardScaler()\n", + "X_train_encoded[numerical_features] = scaler.fit_transform(X_train_encoded[numerical_features])\n", + "X_val_encoded[numerical_features] = scaler.transform(X_val_encoded[numerical_features])\n", + "X_test_encoded[numerical_features] = scaler.transform(X_test_encoded[numerical_features])\n", + "\n", + "print(X_train_encoded.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "И также попробуем сконструировать признаки, используя фреймворк Featuretools:" + ] + }, + { + "cell_type": "code", + "execution_count": 451, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " id hypertension heart_disease avg_glucose_level bmi \\\n", + "index \n", + "0 16605 0 0 -0.244097 0.426328 \n", + "1 12015 0 0 -0.360110 -0.596170 \n", + "2 26474 0 0 -0.409465 0.541539 \n", + "3 31143 0 0 -0.220785 1.765656 \n", + "4 2447 0 0 -0.630199 0.051892 \n", + "\n", + " gender_Male gender_Other ever_married_Yes work_type_Never_worked \\\n", + "index \n", + "0 True False True False \n", + "1 True False False False \n", + "2 False False True False \n", + "3 False False False False \n", + "4 False False True False \n", + "\n", + " work_type_Private work_type_Self-employed work_type_children \\\n", + "index \n", + "0 True False False \n", + "1 False False True \n", + "2 False False False \n", + "3 True False False \n", + "4 True False False \n", + "\n", + " Residence_type_Urban smoking_status_formerly smoked \\\n", + "index \n", + "0 True False \n", + "1 True False \n", + "2 True False \n", + "3 False False \n", + "4 True True \n", + "\n", + " smoking_status_never smoked smoking_status_smokes age_bin \\\n", + "index \n", + "0 True False old \n", + "1 False False young \n", + "2 False False middle-aged \n", + "3 False False young \n", + "4 False False old \n", + "\n", + " glucose_age_deviation \n", + "index \n", + "0 -0.528807 \n", + "1 0.116464 \n", + "2 -0.118932 \n", + "3 0.262808 \n", + "4 -0.934362 \n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\Ilya\\Desktop\\AIM\\aimenv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", + " pd.to_datetime(\n" + ] + } + ], + "source": [ + "data = X_train_encoded.copy() # Используем предобработанные данные\n", + "\n", + "es = ft.EntitySet(id=\"patients\")\n", + "\n", + "es = es.add_dataframe(dataframe_name=\"strokes_data\", dataframe=data, index=\"index\", make_index=True)\n", + "\n", + "feature_matrix, feature_defs = ft.dfs(\n", + " entityset=es, \n", + " target_dataframe_name=\"strokes_data\",\n", + " max_depth=1\n", + ")\n", + "\n", + "print(feature_matrix.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Оценим качество набора признаков.\n", + "\n", + "Представим основные оценки качества наборов признаков: \n", + "\n", + "* Предсказательная способность Метрики: RMSE, MAE, R²\n", + "\n", + " Методы: Обучение модели на обучающей выборке и оценка на контрольной и тестовой выборках.\n", + "\n", + "* Скорость вычисления \n", + "\n", + " Методы: Измерение времени выполнения генерации признаков и обучения модели.\n", + "\n", + "* Надежность \n", + "\n", + " Методы: Кросс-валидация, анализ чувствительности модели к изменениям в данных.\n", + "\n", + "* Корреляция \n", + "\n", + " Методы: Анализ корреляционной матрицы признаков, удаление мультиколлинеарных признаков.\n", + "\n", + "* Цельность \n", + "\n", + " Методы: Проверка логической связи между признаками и целевой переменной, интерпретация результатов модели." + ] + }, + { + "cell_type": "code", + "execution_count": 452, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Время обучения модели: 0.01 секунд\n", + "Среднеквадратичная ошибка: 0.41\n" + ] + } + ], + "source": [ + "X_train_encoded = pd.get_dummies(X_train_encoded, drop_first=True)\n", + "X_val_encoded = pd.get_dummies(X_val_encoded, drop_first=True)\n", + "X_test_encoded = pd.get_dummies(X_test_encoded, drop_first=True)\n", + "\n", + "all_columns = X_train_encoded.columns\n", + "X_train_encoded = X_train_encoded.reindex(columns=all_columns, fill_value=0)\n", + "X_val_encoded = X_val_encoded.reindex(columns=all_columns, fill_value=0)\n", + "X_test_encoded = X_test_encoded.reindex(columns=all_columns, fill_value=0)\n", + "\n", + "# Обучение модели\n", + "model = LinearRegression()\n", + "\n", + "# Начинаем отсчет времени\n", + "start_time = time.time()\n", + "model.fit(X_train_encoded, y_train_resampled)\n", + "\n", + "# Время обучения модели\n", + "train_time = time.time() - start_time\n", + "\n", + "# Предсказания и оценка модели и вычисляем среднеквадратичную ошибку\n", + "predictions = model.predict(X_val_encoded)\n", + "mse = root_mean_squared_error(y_val_resampled, predictions)\n", + "\n", + "print(f'Время обучения модели: {train_time:.2f} секунд')\n", + "print(f'Среднеквадратичная ошибка: {mse:.2f}')" + ] + }, + { + "cell_type": "code", + "execution_count": 453, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\n", + "RMSE: 0.24109840514907446\n", + "R²: -0.06295721700021817\n", + "MAE: 0.10402478799739073 \n", + "\n", + "Кросс-валидация RMSE: 0.1197518340742331 \n", + "\n", + "Train RMSE: 0.037396456827854585\n", + "Train R²: 0.9944060200668896\n", + "Train MAE: 0.010727424749163881\n", + "\n" + ] + } + ], + "source": [ + "# Выбор модели\n", + "model = RandomForestRegressor(random_state=42)\n", + "\n", + "# Обучение модели\n", + "model.fit(X_train_encoded, y_train_resampled)\n", + "\n", + "# Предсказание и оценка\n", + "y_pred = model.predict(X_test_encoded)\n", + "\n", + "rmse = root_mean_squared_error(y_test, y_pred)\n", + "r2 = r2_score(y_test, y_pred)\n", + "mae = mean_absolute_error(y_test, y_pred)\n", + "\n", + "print()\n", + "print(f\"RMSE: {rmse}\")\n", + "print(f\"R²: {r2}\")\n", + "print(f\"MAE: {mae} \\n\")\n", + "\n", + "# Кросс-валидация\n", + "scores = cross_val_score(model, X_train_encoded, y_train_resampled, cv=5, scoring='neg_mean_squared_error')\n", + "rmse_cv = (-scores.mean())**0.5\n", + "print(f\"Кросс-валидация RMSE: {rmse_cv} \\n\")\n", + "\n", + "# Проверка на переобучение\n", + "y_train_pred = model.predict(X_train_encoded)\n", + "\n", + "rmse_train = root_mean_squared_error(y_train_resampled, y_train_pred)\n", + "r2_train = r2_score(y_train_resampled, y_train_pred)\n", + "mae_train = mean_absolute_error(y_train_resampled, y_train_pred)\n", + "\n", + "print(f\"Train RMSE: {rmse_train}\")\n", + "print(f\"Train R²: {r2_train}\")\n", + "print(f\"Train MAE: {mae_train}\")\n", + "print()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Можно заметить, что модель хорошо подстроилась под тренировочные данные (Низкий Train RMSE и высокое значение Train R²). Однако высокий RMSE и отрицательный R² на тестовом наборе свидетельствуют о том, что модель не обобщила зависимости и плохо предсказывает новые данные, поэтому можно сделать вывод о том, что получившийся набор признаков, к сожалению, далек от идеала. " + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "aimenv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/lab_3/requirements.txt b/lab_3/requirements.txt new file mode 100644 index 0000000..035855b Binary files /dev/null and b/lab_3/requirements.txt differ