AIM-PIbd-31-Ievlewa-M-D/lab2/lab2.ipynb

2988 lines
864 KiB
Plaintext
Raw Permalink Normal View History

2024-10-11 18:41:52 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 1. Датасет: Диабет у индейцев Пима\n",
"https://www.kaggle.com/datasets/uciml/pima-indians-diabetes-database\n",
"##### О наборе данных: \n",
"Этот набор данных был получен из Национального института диабета, болезней органов пищеварения и почек. Цель набора данных - диагностически предсказать, есть ли у пациента сахарный диабет, на основе определенных диагностических измерений, включенных в набор данных. На выбор этих образцов из более обширной базы данных было наложено несколько ограничений. В частности, все пациенты были женщинами в возрасте не менее 21 года, родом из племени пима.\n",
"##### Таким образом:\n",
"* Объект наблюдения - женщины племени пима, возрастом от 21 года.\n",
"* Атрибуты: Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DiabetesPedigreeFunction, Age, Outcome.\n",
"* Проблемная область: Предсказание диабета у пациента на основе измерений."
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 347,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Количество колонок: 9\n",
"Колонки: Pregnancies, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DiabetesPedigreeFunction, Age, Outcome\n",
"\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 768 entries, 0 to 767\n",
"Data columns (total 9 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 Pregnancies 768 non-null int64 \n",
" 1 Glucose 768 non-null int64 \n",
" 2 BloodPressure 768 non-null int64 \n",
" 3 SkinThickness 768 non-null int64 \n",
" 4 Insulin 768 non-null int64 \n",
" 5 BMI 768 non-null float64\n",
" 6 DiabetesPedigreeFunction 768 non-null float64\n",
" 7 Age 768 non-null int64 \n",
" 8 Outcome 768 non-null int64 \n",
"dtypes: float64(2), int64(7)\n",
"memory usage: 54.1 KB\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Pregnancies</th>\n",
" <th>Glucose</th>\n",
" <th>BloodPressure</th>\n",
" <th>SkinThickness</th>\n",
" <th>Insulin</th>\n",
" <th>BMI</th>\n",
" <th>DiabetesPedigreeFunction</th>\n",
" <th>Age</th>\n",
" <th>Outcome</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>6</td>\n",
" <td>148</td>\n",
" <td>72</td>\n",
" <td>35</td>\n",
" <td>0</td>\n",
" <td>33.6</td>\n",
" <td>0.627</td>\n",
" <td>50</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>1</td>\n",
" <td>85</td>\n",
" <td>66</td>\n",
" <td>29</td>\n",
" <td>0</td>\n",
" <td>26.6</td>\n",
" <td>0.351</td>\n",
" <td>31</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>8</td>\n",
" <td>183</td>\n",
" <td>64</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>23.3</td>\n",
" <td>0.672</td>\n",
" <td>32</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1</td>\n",
" <td>89</td>\n",
" <td>66</td>\n",
" <td>23</td>\n",
" <td>94</td>\n",
" <td>28.1</td>\n",
" <td>0.167</td>\n",
" <td>21</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>137</td>\n",
" <td>40</td>\n",
" <td>35</td>\n",
" <td>168</td>\n",
" <td>43.1</td>\n",
" <td>2.288</td>\n",
" <td>33</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"0 6 148 72 35 0 33.6 \n",
"1 1 85 66 29 0 26.6 \n",
"2 8 183 64 0 0 23.3 \n",
"3 1 89 66 23 94 28.1 \n",
"4 0 137 40 35 168 43.1 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome \n",
"0 0.627 50 1 \n",
"1 0.351 31 0 \n",
"2 0.672 32 1 \n",
"3 0.167 21 0 \n",
"4 2.288 33 1 "
]
},
2024-10-12 12:38:44 +04:00
"execution_count": 347,
2024-10-11 18:41:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv(\".//static//csv//diabetes.csv\", sep=\",\")\n",
"print('Количество колонок: ' + str(df.columns.size)) \n",
"print('Колонки: ' + ', '.join(df.columns)+'\\n')\n",
"\n",
"df.info()\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Получение сведений о пропущенных данных\n",
"Типы пропущенных данных:\n",
"\n",
"- None - представление пустых данных в Python\n",
"- NaN - представление пустых данных в Pandas\n",
"- '' - пустая строка"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 348,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Pregnancies 0\n",
"Glucose 0\n",
"BloodPressure 0\n",
"SkinThickness 0\n",
"Insulin 0\n",
"BMI 0\n",
"DiabetesPedigreeFunction 0\n",
"Age 0\n",
"Outcome 0\n",
"dtype: int64\n",
"\n",
"Pregnancies False\n",
"Glucose False\n",
"BloodPressure False\n",
"SkinThickness False\n",
"Insulin False\n",
"BMI False\n",
"DiabetesPedigreeFunction False\n",
"Age False\n",
"Outcome False\n",
"dtype: bool\n",
"\n",
"Pregnancies процент пустых значений: %0.00\n",
"Glucose процент пустых значений: %0.00\n",
"BloodPressure процент пустых значений: %0.00\n",
"SkinThickness процент пустых значений: %0.00\n",
"Insulin процент пустых значений: %0.00\n",
"BMI процент пустых значений: %0.00\n",
"DiabetesPedigreeFunction процент пустых значений: %0.00\n",
"Age процент пустых значений: %0.00\n",
"Outcome процент пустых значений: %0.00\n"
]
}
],
"source": [
"# Количество пустых значений признаков\n",
"print(df.isnull().sum())\n",
"print()\n",
"\n",
"# Есть ли пустые значения признаков\n",
"print(df.isnull().any())\n",
"print()\n",
"\n",
"# Процент пустых значений признаков\n",
"for i in df.columns:\n",
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
" print(f\"{i} процент пустых значений: %{null_rate:.2f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Проверим выбросы и устраним их:"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 349,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Колонка Pregnancies:\n",
" Есть выбросы: Да\n",
" Количество выбросов: 4\n",
" Минимальное значение: 0.0\n",
" Максимальное значение: 13.5\n",
" 1-й квартиль (Q1): 1.0\n",
" 3-й квартиль (Q3): 6.0\n",
"\n",
"Колонка Glucose:\n",
" Есть выбросы: Да\n",
" Количество выбросов: 5\n",
" Минимальное значение: 37.125\n",
" Максимальное значение: 199.0\n",
" 1-й квартиль (Q1): 99.0\n",
" 3-й квартиль (Q3): 140.25\n",
"\n",
"Колонка BloodPressure:\n",
" Есть выбросы: Да\n",
" Количество выбросов: 45\n",
" Минимальное значение: 35.0\n",
" Максимальное значение: 107.0\n",
" 1-й квартиль (Q1): 62.0\n",
" 3-й квартиль (Q3): 80.0\n",
"\n",
"Колонка SkinThickness:\n",
" Есть выбросы: Да\n",
" Количество выбросов: 1\n",
" Минимальное значение: 0.0\n",
" Максимальное значение: 80.0\n",
" 1-й квартиль (Q1): 0.0\n",
" 3-й квартиль (Q3): 32.0\n",
"\n",
"Колонка Insulin:\n",
" Есть выбросы: Да\n",
" Количество выбросов: 34\n",
" Минимальное значение: 0.0\n",
" Максимальное значение: 318.125\n",
" 1-й квартиль (Q1): 0.0\n",
" 3-й квартиль (Q3): 127.25\n",
"\n",
"Колонка BMI:\n",
" Есть выбросы: Да\n",
" Количество выбросов: 19\n",
" Минимальное значение: 13.35\n",
" Максимальное значение: 50.550000000000004\n",
" 1-й квартиль (Q1): 27.3\n",
" 3-й квартиль (Q3): 36.6\n",
"\n",
"Колонка DiabetesPedigreeFunction:\n",
" Есть выбросы: Да\n",
" Количество выбросов: 29\n",
" Минимальное значение: 0.078\n",
" Максимальное значение: 1.2\n",
" 1-й квартиль (Q1): 0.24375\n",
" 3-й квартиль (Q3): 0.62625\n",
"\n",
"Колонка Age:\n",
" Есть выбросы: Да\n",
" Количество выбросов: 9\n",
" Минимальное значение: 21.0\n",
" Максимальное значение: 66.5\n",
" 1-й квартиль (Q1): 24.0\n",
" 3-й квартиль (Q3): 41.0\n",
"\n"
]
}
],
"source": [
"numeric_columns = ['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin', 'BMI', 'DiabetesPedigreeFunction', 'Age']\n",
"for column in numeric_columns:\n",
" if pd.api.types.is_numeric_dtype(df[column]): # Проверяем, является ли колонка числовой\n",
" q1 = df[column].quantile(0.25) # Находим 1-й квартиль (Q1)\n",
" q3 = df[column].quantile(0.75) # Находим 3-й квартиль (Q3)\n",
" iqr = q3 - q1 # Вычисляем межквартильный размах (IQR)\n",
"\n",
" # Определяем границы для выбросов\n",
" lower_bound = q1 - 1.5 * iqr # Нижняя граница\n",
" upper_bound = q3 + 1.5 * iqr # Верхняя граница\n",
"\n",
" # Подсчитываем количество выбросов\n",
" outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]\n",
" outlier_count = outliers.shape[0]\n",
"\n",
" # Устраняем выбросы: заменяем значения ниже нижней границы на саму нижнюю границу, а выше верхней — на верхнюю\n",
" df[column] = df[column].apply(lambda x: lower_bound if x < lower_bound else upper_bound if x > upper_bound else x)\n",
"\n",
" print(f\"Колонка {column}:\")\n",
" print(f\" Есть выбросы: {'Да' if outlier_count > 0 else 'Нет'}\")\n",
" print(f\" Количество выбросов: {outlier_count}\")\n",
" print(f\" Минимальное значение: {df[column].min()}\")\n",
" print(f\" Максимальное значение: {df[column].max()}\")\n",
" print(f\" 1-й квартиль (Q1): {q1}\")\n",
" print(f\" 3-й квартиль (Q3): {q3}\\n\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Постараемся выявить зависимости Outcome от остальных колонок:"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 350,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAIjCAYAAADCyya0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABHSElEQVR4nO3deVhUZf8/8PeAzLAjyCIKAgouKGpuhBuaKGpqmo+l4uOaW4qaWkqPhhgu1VNpaW6Ztqq55JOpuaWouaaIuKYIrigCAgKCLPfvD3/Mt3EGnHMYmpl8v66L6+Lc58yZz8yZM++5z6oQQggQERHJYGHsAoiIyHwxRIiISDaGCBERycYQISIi2RgiREQkG0OEiIhkY4gQEZFsDBEiIpKNIUJERLIxRIhIw5w5c6BQKIxdBpkJg4VIUlISxo4di7p168La2hqOjo5o164dFi9ejEePHhnqaYgk8fX1hUKhUP+5u7ujQ4cO+Omnn4xdGhnR+fPnMWTIENSuXRsqlQq1atVCREQEzp8/X6n5zp8/H1u3bjVMkWZCYYhrZ23fvh0DBgyASqXC0KFD0aRJEzx+/BiHDx/G5s2bMXz4cKxcudIQ9RJJ4uvrC2dnZ0ybNg0AcOfOHaxYsQLXrl3DsmXLMG7cOCNXaHqKi4tRXFwMa2trY5dSJbZs2YJBgwbBxcUFo0aNgp+fH1JSUrB69WpkZGRg/fr16Nevn6x529vb41//+hfWrl1r2KJNmaika9euCXt7e9GwYUNx584drfFXrlwRixYtquzTEMni4+MjXn75ZY221NRUYWdnJ+rXr1/u44qKikRhYWFVl0d/s6tXrwpbW1vRsGFDkZaWpjHu/v37omHDhsLOzk4kJSXJmr+dnZ0YNmyYASo1H5UOkXHjxgkA4vfff9fvCQExYcIE8d1334n69esLlUolWrRoIeLi4rSmvXXrlhgxYoRwd3cXSqVSBAYGitWrV+ucb3R0tACg9RcaGqoxXWhoqGjcuLHW4z/66CMBQCQnJ2u079ixQ7Rv317Y2toKe3t70bNnT3Hu3Dmtx1+8eFH0799fODs7C5VKJVq2bCn+97//PfP9uHTpkujcubPw8PAQSqVSeHl5ibFjx4qMjAz1NPv37xcAxMaNG7Ue//SHNiMjQ0ybNk00adJE2NnZCQcHB9G9e3dx5swZne/X03x8fLRWggcPHojJkycLLy8voVQqRb169cTChQtFSUmJeprk5GQBQHz00Uda82zcuLHGcih7Pfv379eYrmfPngKAiI6O1miX8jnQ9XqeDhEhhGjVqpWwsrLSqv3TTz8VdevWFRYWFiI+Pl4Iof+yTUhIEB07dhTW1taidu3a4v333xdfffWV1ueqrKZDhw6J1q1bC5VKJfz8/MTXX3+tMT99l2XZ+7lhwwYRGxsrateuLVQqlXjppZfElStXtOo8duyY6NGjh6hevbqwtbUVQUFBGj/0yvtsfPvtt6JFixbC2tpaODs7i9dff13cuHFDY5o///xTvPrqq8LDw0OoVCpRu3Zt8frrr4usrCyt+f1VaGiozvW37O/p9XLp0qUiMDBQKJVK4enpKd58803x4MGDCp9DCCHGjh0rAIiDBw/qHB8XFycAiLFjx6rbhg0bJnx8fLSmffp90lX3X9elW7duiZEjRwpPT0+hVCqFr6+vGDdunMaPlaSkJPGvf/1LODs7CxsbGxEcHCx++eUXjef96/KeM2eOqFWrlrC3txf9+/cXWVlZoqCgQEyePFm4ubkJOzs7MXz4cFFQUKBVvz7LUx/VKtuT2bZtG+rWrYu2bdvq/Zi4uDhs2LABkyZNgkqlwhdffIHu3bvjxIkTaNKkCQDg3r17ePHFF6FQKDBx4kS4ublh586dGDVqFHJycjBlyhSd8162bBns7e0BAFFRUZV6bd9++y2GDRuG8PBwfPDBB8jPz8eyZcvQvn17xMfHw9fXF8CT7avt2rVD7dq1MXPmTNjZ2eHHH39E3759sXnz5gq7xnl5efDy8kLv3r3h6OiIc+fOYenSpbh9+za2bdsmueZr165h69atGDBgAPz8/HDv3j2sWLECoaGhuHDhAmrVqiVpfvn5+QgNDcXt27cxduxY1KlTB0eOHEFUVBRSU1OxaNEiyTXqcvDgQezYsUOrXe7noCJFRUW4efMmatSoodG+Zs0aFBQUYMyYMVCpVHBxcdF72d6+fRudO3eGQqFAVFQU7Ozs8OWXX0KlUums4erVq/jXv/6FUaNGYdiwYfjqq68wfPhwtGzZEo0bNwYgfVkuXLgQFhYWmD59OrKzs/Hhhx8iIiICx48fV0+zZ88e9OrVC56enpg8eTJq1qyJixcv4pdffsHkyZPLfc/mzZuH2bNn47XXXsMbb7yB+/fv4/PPP0fHjh0RHx+P6tWr4/HjxwgPD0dhYSEiIyNRs2ZN3L59G7/88guysrLg5ORU4XLx8vLCggULNNp27NiBdevWabTNmTMHMTExCAsLw/jx43H58mUsW7YMJ0+exO+//w4rK6tyn2Pbtm3w9fVFhw4ddI7v2LEjfH19sX379gpr1eXbb7/FG2+8gTZt2mDMmDEAgHr16gF4shm1TZs2yMrKwpgxY9CwYUPcvn0bmzZtQn5+PpRKJe7du4e2bdsiPz8fkyZNQo0aNfD111+jT58+2LRpk9b3yIIFC2BjY4OZM2fi6tWr+Pzzz2FlZQULCws8ePAAc+bMwbFjx7B27Vr4+fnhvffeUz9Wn+WpN8mx8xfZ2dkCgHjllVf0fgz+f0L/8ccf6rbr168La2tr0a9fP3XbqFGjhKenp0hPT9d4/MCBA4WTk5PIz8/XaH/33XcFAI3pn/4FLIT+PZGHDx+K6tWri9GjR2tMd/fuXeHk5KTR3qVLFxEUFKSR9qWlpaJt27YiICDgGe+ItjfffFPY29urh6X0RAoKCjR6CEI8+aWtUqnE3Llz1W0xMTECgCgtLdWY9umeyPvvvy/s7OzEn3/+qTHdzJkzhaWlpfqXS2V7IsHBwaJHjx5aPRGpn4On+fj4iG7duon79++L+/fvi4SEBDFw4EABQERGRmrU7ujoqLWJQ99lGxkZKRQKhbr3IsSTnoSLi4vOngie+jWclpYmVCqVmDZtmrpN32VZ9n42atRI41ft4sWLBQCRmJgohBCiuLhY+Pn5CR8fH61f7X/9HDz9CzslJUVYWlqKefPmaTwmMTFRVKtWTd0eHx9f7uf0WfRdL9PS0oRSqRTdunXTeG+WLFkiAIivvvqq3OfIysrS6/uqT58+AoDIyckRQujfExGi/M1ZQ4cOFRYWFuLkyZNa48re+ylTpggA4tChQ+pxDx8+FH5+fsLX11f9esuWd5MmTcTjx4/V0w4aNEgoFArRo0cPjfmHhIRo1K/v8tRXpY7OysnJAQA4ODhIelxISAhatmypHq5Tpw5eeeUV7Nq1CyUlJRBCYPPmzejduzeEEEhPT1f/hYeHIzs7G6dPn9aYZ0FBAQDotTOwpKREY57p6enIz8/XmGbPnj3IysrCoEGDNKaztLREcHAw9u/fDwDIzMzEb7/9htdeew0PHz5UT5eRkYHw8HBcuXIFt2/ffmZN2dnZuHfvHvbt24ft27ejY8eOWtP8df5lf09TqVSwsLBQv86MjAzY29ujQYMGGu+Zu7s7AODWrVsV1rVx40Z06NABzs7OGs8bFhaGkpISHDx4UGP6/Px8rRpLSkoqfI4tW7bg5MmTWLhwoUa7nM+BLrt374abmxvc3NzQrFkzbNy4Ef/+97/xwQcfaEzXv39/uLm5qYelLNtff/0VISEhaN68ufrxLi4uiIiI0FlTYGCgxq9hNzc3NGjQANeuXVO36bssy4wYMQJKpVI9XDb/snnGx8cjOTkZU6ZM0fqlWdEhvVu2bEFpaSlee+01jWVQs2ZNBAQEqNeFsp7Grl27tNYnQ9m7dy8eP36MKVOmqN8bABg9ejQcHR0r7EE8fPgQwLO/r8rGl32/VVZpaSm2bt2K3r17o1WrVlrjy977HTt2oE2bNmjfvr16nL29PcaMGYOUlBRcuHBB43FDhw7V6HUFBwd
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAZEAAAIjCAYAAADCyya0AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABQjklEQVR4nO3dd1gUV/828HvB3aVIEZWmqKgoosb+KKKxoYi9RGOCj/VRE7tYSVTEaNSYqFGJxsSYmNhijTE2xK5I7A272AULAgKKlPP+4cv8XBeQGRZZ4P5cF9fFnDk7+91lZ2/OVJUQQoCIiEgBk/wugIiICi6GCBERKcYQISIixRgiRESkGEOEiIgUY4gQEZFiDBEiIlKMIUJERIoxRIiISDGGCJERU6lUmDZtWn6XQZSlQhkiN27cwJAhQ1CxYkWYmZnB2toaXl5e+P777/HixYv8Lo+KsPT0dKxcuRKtW7dGqVKloFarYW9vjzZt2mDZsmVITk7O7xKLhKdPn2L8+PGoWrUqzMzMYGdnBx8fH2zbti1Xy129ejUWLFhgmCILiGL5XYCh/fPPP+jRowe0Wi369OmDGjVq4NWrVzh8+DDGjx+PixcvYtmyZfldJhVBL168QNeuXbFr1y40btwY48aNg4ODA2JiYnDgwAEMHToU4eHhWL58eX6XWqhduXIFrVq1wuPHj9G/f3/Ur18fsbGxWLVqFTp27Ihx48Zh7ty5ipa9evVqXLhwAaNHjzZs0cZMFCI3b94UxYsXF+7u7uLBgwd6869duyYWLFiQD5URCTFkyBABIMvP4NWrV0VwcLBOGwARGBj4HqorGl69eiVq1KghLCwsxLFjx3Tmpaamio8//lgAEGvXrlW0/Pbt24vy5csboNKCo1CFyGeffSYAiCNHjuSoPwAxbNgw8ccff4gqVaoIrVYr6tatKw4cOKDX9969e6J///7C3t5eaDQa4eHhIZYvX57pcgMDAwUAvZ9mzZrp9GvWrJmoXr263uPnzp0rAIjIyEid9u3bt4smTZoICwsLUbx4cdGuXTtx4cIFvcdfunRJdO/eXZQoUUJotVpRr1498ddff73z/bh8+bJo0aKFcHBwEBqNRpQtW1YMGTJEPH36VOqzb98+AUCsX79e7/GWlpaib9++0vTTp0/F2LFjRY0aNYSlpaWwsrISbdu2FWfOnMn0/Xpb+fLldZYnhBDPnj0To0aNEmXLlhUajUZUqlRJzJ49W6SlpUl9IiMjBQAxd+5cvWVWr15d5++Q8Xr27dun069du3aZfoHL+Ry86c6dO8LU1FS0bdv2nX3f9HYNffv2zfRLKqv38PfffxcNGjQQ5ubmwtbWVjRt2lTs2rVLp09wcLDw8PAQGo1GODk5iaFDh4pnz57p9Ll69aro1q2bcHBwEFqtVpQpU0Z8/PHHIjY2Vu/56tatK8zMzESJEiXExx9/LO7cufPO15nVOpPxs2LFCp3+oaGh0rpgY2MjOnXqJCIiIt75PGvWrBEAxPTp0zOdHxsbK2xtbYW7u7vUtmLFikzXx7c/O82aNdOr+82/1YsXL0RgYKBwc3MTWq1WODo6iq5du4rr169LfRISEoS/v7/0+a5SpYqYO3euSE9P13nujO+uP//8U1SrVk2YmZmJRo0aiXPnzgkhhFi6dKmoVKmS0Gq1olmzZnq1CyHEsWPHhI+Pj7C2thbm5ubiww8/FIcPH37ne/i2QrU56++//0bFihXRuHHjHD/mwIEDWLduHUaOHAmtVosffvgBbdu2xb///osaNWoAAKKjo9GoUSOoVCoMHz4cpUuXxo4dOzBw4EDEx8dnOXRdsmQJihcvDgAICAjI1Wv7/fff0bdvX/j4+GDOnDlISkrCkiVL0KRJE5w+fRoVKlQAAFy8eBFeXl4oU6YMJk2aBEtLS/z555/o0qULNm7ciK5du2b5HImJiShbtiw6duwIa2trXLhwAcHBwbh//z7+/vtv2TXfvHkTW7ZsQY8ePeDq6oro6Gj8+OOPaNasGSIiIuDs7CxreUlJSWjWrBnu37+PIUOGoFy5cjh69CgCAgLw8OFDg22LPnjwILZv367XrvRzAAA7duxAWloaevfubZAacyIoKAjTpk1D48aNMX36dGg0GoSHh2Pv3r1o06YNAGDatGkICgqCt7c3Pv/8c1y5cgVLlizB8ePHceTIEajVarx69Qo+Pj5ITk7GiBEj4OjoiPv372Pbtm2IjY2FjY0NAGDmzJmYMmUKevbsif/97394/PgxFi1ahA8//BCnT5+Gra3tO2t+c50BgMjISEydOlWnz549e+Dr64uKFSti2rRpePHiBRYtWgQvLy+cOnVKWhcyk/E57tOnT6bzbWxs0LlzZ/z222+4fv06Kleu/M6aM3z55ZeIi4vDvXv3MH/+fACQXktaWho6dOiA0NBQ9OrVC6NGjcLz588REhKCCxcuoFKlShBCoFOnTti3bx8GDhyI2rVrY9euXRg/fjzu378vLTPDoUOHsHXrVgwbNgwAMGvWLHTo0AETJkzADz/8gKFDh+LZs2f45ptvMGDAAOzdu1d67N69e+Hr64t69eohMDAQJiYmWLFiBVq2bIlDhw7hP//5T45fd6EZicTFxQkAonPnzjl+DP7/fwsnTpyQ2m7fvi3MzMxE165dpbaBAwcKJycn8eTJE53H9+rVS9jY2IikpCSd9i+++EIA0On/9n/AQuR8JPL8+XNha2srBg0apNMvKipK2NjY6LS3atVK1KxZU7x8+VJqS09PF40bNxZubm7veEf0DR06VBQvXlyaljMSefnypc4IQYjXowStVqvzn2BQUJAAoPff1tsjka+++kpYWlqKq1ev6vSbNGmSMDU1lf7jze1IpGHDhsLX11dvFCD3c/CmMWPGCAB6o7Dk5GTx+PFj6eftZb9dQ05HIteuXRMmJiaia9euen+DjPf50aNHQqPRiDZt2uj0Wbx4sQAgfvnlFyGEEKdPn87yb57h1q1bwtTUVMycOVOn/fz586JYsWJ67VnV//jxY53248eP641EateuLezt7XVGyGfPnhUmJiaiT58+2T5P7dq1hY2NTbZ95s2bJwCIrVu3CiFyPhIRIuvNWb/88osAIObNm6c3L+PvsWXLFgFAzJgxQ2f+Rx99JFQqlc6IBYDQarU6Nf34448CgHB0dBTx8fFSe0BAgE796enpws3NTfj4+Oisc0lJScLV1VW0bt062/fnbYXm6Kz4+HgAgJWVlazHeXp6ol69etJ0uXLl0LlzZ+zatQtpaWkQQmDjxo3o2LEjhBB48uSJ9OPj44O4uDicOnVKZ5kvX74EAJiZmb3z+dPS0nSW+eTJEyQlJen0CQkJQWxsLD755BOdfqampmjYsCH27dsHAIiJicHevXvRs2dPPH/+XOr39OlT+Pj44Nq1a7h///47a4qLi0N0dDRCQ0Pxzz//4MMPP9Tr8+byM37eptVqYWJiIr3Op0+fonjx4qhatarOe2Zvbw8AuHfvXrZ1rV+/Hk2bNkWJEiV0ntfb2xtpaWk4ePCgTv+kpCS9GtPS0rJ9jk2bNuH48eOYPXu2TruSz8GbMj6fb/6XDQDbt29H6dKlpZ/y5ctnW19ObdmyBenp6Zg6dar0N8igUqkAvP6P/tWrVxg9erROn0GDBsHa2hr//PMPAEgjjV27dul9NjNs2rQJ6enp6Nmzp8574+joCDc3N+kzmlsPHz7EmTNn0K9fP9jZ2UntH3zwAVq3bp3pCPJNz58/f+d3RMb8jL+ZIWzcuBGlSpXCiBEj9OZl/D22b98OU1NTjBw5Umf+2LFjIYTAjh07dNpbtWqlM+pq2LAhAKB79+46rzGj/ebNmwCAM2fO4Nq1a/j000/x9OlT6W+VmJiIVq1a4eDBg0hPT8/xays0m7Osra0BvP6QyOHm5qbXVqVKFSQlJeHx48cwMTFBbGwsli1bluVRXY8ePdKZfvLkCdRqNSwsLN75/JcvX0bp0qWz7XPt2jUAQMuWLTOdn/Har1+/DiEEpkyZgilTpmRZa5kyZbJ9Ph8fH4SHhwMA2rZti3Xr1un1GTBgQLbLAF4
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAaIAAAIjCAYAAABBDx+PAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABUgElEQVR4nO3dZ1gUZ/8+/HPRZelYKSpNsXfRKJZgQYixh2hi8C+WGAtiQU3kvqOINZpiidgSojGJicH209g1ig2NIfYWC4oNLAgISN3reeHD3K4LyMDi6HJ+joND55rZme/uzuy5M3PtjEoIIUBERKQQE6ULICKiso1BREREimIQERGRohhERESkKAYREREpikFERESKYhAREZGiGERERKQoBhERESmKQUT0/5s+fTpUKpUiy+7YsSM6duyoyLKJlGawILp27RpGjBiBmjVrwszMDDY2NmjXrh0WLVqEp0+fGmoxRLK4urpCpVJJf2ZmZqhduzYmT56MxMREpcsr0I0bN3TqLleuHJydndG3b1+cOnVK6fIIQFxcHEaOHAlXV1doNBrY2dmhT58+OHLkSInmu3TpUqxevdowRb4hyhtiJtu2bUO/fv2g0WgwaNAgNGrUCFlZWTh8+DAmT56M8+fPY+XKlYZYFJFszZo1w8SJEwEAGRkZiImJwcKFCxEVFYW//vpL4eoKN2DAALz77rvIzc3FxYsXsWzZMuzYsQPHjh1Ds2bNlC6vzDpy5AjeffddAMDHH3+MBg0aID4+HqtXr0aHDh2waNEiBAUFFWveS5cuRZUqVTB48GADVvyaEyV0/fp1YWVlJerVqyfu3r2rN/7KlSti4cKFJV0MUbG4uLiI7t2767VPmjRJABD//vuv1BYaGioMsEkUi5eXl/Dy8pKGY2NjBQDx5Zdf6ky3ZcsWAUB88sknBc4rNTW1tMosNW9SzYmJicLBwUHY29uLq1ev6oxLT08XHTp0ECYmJuLIkSPFmn/Dhg111oWyoMSH5ubPn4/U1FRERETA0dFRb7y7uzvGjRsnDatUKowZMwa//PIL6tatCzMzM3h4eODgwYN6j71z5w6GDh0Ke3t7aDQaNGzYED/88EO+deQd33/x78Xj7h07dkSjRo30Hv/VV19BpVLhxo0bOu07duxAhw4dYGlpCWtra3Tv3h3nz5/Xe/ylS5fw/vvvo1KlSjAzM0PLli2xZcuWfGt93uXLl9G5c2c4ODhAo9HAyckJI0eO1DlsdODAAahUKqxfv17v8VZWVjrfnBITEzFp0iQ0btwYVlZWsLGxQbdu3XD69Ol8X68Xubq66n0TS0pKwvjx4+Hk5ASNRgN3d3fMmzcPWq1WmibvUNJXX32lN89GjRrpvA95z+fAgQM603Xv3h0qlQrTp0/XaZezHhSVg4MDAKB8+cIPCuTk5GDmzJmoVasWNBoNXF1d8Z///AeZmZl60y5duhQNGzaERqNBtWrVEBgYiKSkJL3pVq5ciVq1asHc3BxvvfUWDh06VOS6O3fuDACIjY0FAKxevRoqlQpRUVEYPXo07OzsUKNGDWn6oqy/8fHxGDJkCGrUqAGNRgNHR0f07t1bZ1v4+++/4evriypVqsDc3Bxubm4YOnSoNL6g9zRvvXj+UNPgwYNhZWWFa9eu4d1334W1tTX8/f0BAFqtFgsXLkTDhg1hZmYGe3t7jBgxAo8fP37pazN48OB8PwPy/l6sLTIyEh4eHjA3N0eVKlUwcOBA3Llz56XLWbFiBeLj4/Hll1+iVq1aOuPMzc3x448/QqVSYcaMGVJ7Qdtb3vuX91q7urri/PnziIqKyvczLCkpCRMmTJAOB9aoUQODBg3Cw4cPpWnu37+PYcOGwd7eHmZmZmjatCl+/PFHneU+v72Gh4ejZs2asLCwgI+PD27dugUhBGbOnIkaNWrA3NwcvXv3zvdQdlE/H1+mxIfmtm7dipo1a6Jt27ZFfkxUVBTWrVuHsWPHQqPRYOnSpXjnnXfw119/SSGRkJCANm3aSMFVtWpV7NixA8OGDUNKSgrGjx+f77yXLVsGKysrAEBISEiJnttPP/2EgIAA+Pr6Yt68eUhPT8eyZcvQvn17nDx5Eq6urgCA8+fPo127dqhevTqmTJkCS0tL/P777+jTpw82bNiAvn37FriMtLQ01KhRAz179oSNjQ3OnTuH8PBw3LlzB1u3bpVd8/Xr17F582b069cPbm5uSEhIwIoVK+Dl5YULFy6gWrVqsuaXnp4OLy8v3LlzByNGjICzszOOHj2KkJAQ3Lt3DwsXLpRdY34OHjyI7du367UXdz14XnZ2trShZmRk4OTJk/jmm2/w9ttvw83NrdDHfvzxx/jxxx/x/vvvY+LEiTh+/Djmzp2LixcvYtOmTdJ006dPR1hYGLy9vTFq1ChcvnwZy5Ytw4kTJ3DkyBGo1WoAQEREBEaMGIG2bdti/PjxuH79Onr16oVKlSrBycnppc/l2rVrAIDKlSvrtI8ePRpVq1bFtGnTkJaWBqDo66+fnx/Onz+PoKAguLq64v79+9izZw/i4uKkYR8fH1StWhVTpkxBhQoVcOPGDWzcuPGl9RYkJycHvr6+aN++Pb766itYWFgAAEaMGIHVq1djyJAhGDt2LGJjY7FkyRKcPHlS53UsiEajwffff6/TduLECSxevFinLW8ZrVq1wty5c5GQkIBFixbhyJEjOHnyJCpUqFDgMrZu3QozMzP0798/3/Fubm5o3749/vzzTzx9+hTm5uZFeEWeWbhwIYKCgmBlZYX//ve/AAB7e3sAQGpqKjp06ICLFy9i6NChaNGiBR4+fIgtW7bg9u3bqFKlCp4+fYqOHTvi6tWrGDNmDNzc3BAZGYnBgwcjKSlJZ6cAAH755RdkZWUhKCgIiYmJmD9/Pvr374/OnTvjwIED+Oyzz3D16lV8++23mDRpks4XwKKuX0VSkt2p5ORkAUD07t27yI8BIACIv//+W2q7efOmMDMzE3379pXahg0bJhwdHcXDhw91Hv/hhx8KW1tbkZ6ertP+n//8RwDQmT6/XVwvLy/RsGFDvbq+/PJLAUDExsYKIYR48uSJqFChghg+fLjOdPHx8cLW1lanvUuXLqJx48YiIyNDatNqtaJt27aidu3aL3lF9I0ePVpYWVlJw/v37xcARGRkpN60lpaWIiAgQBrOyMgQubm5OtPExsYKjUYjZsyYIbWFhYUJAEKr1epM6+LiojO/mTNnCktLS51DWEIIMWXKFFGuXDkRFxcnLQP5HEoSQv99yHs++/fvl9pat24tunXrJgCI0NBQqV3uevAiFxcXaZ17/q9du3Z683zx0NypU6cEAPHxxx/rTJd3WO/PP/8UQghx//59YWpqKnx8fHRe+yVLlggA4ocffhBCCJGVlSXs7OxEs2bNRGZmpjTdypUrBYB8D82FhYWJBw8eiPj4eHHgwAHRvHlzAUBs2LBBCCHEqlWrBADRvn17kZOTIz2+qOvv48ePC3zf8mzatEkAECdOnChwmvze0+efx6pVq6S2gIAAAUBMmTJFZ9pDhw4JAOKXX37Rad+5c2e+7S8KCAgQlpaWeu2RkZE6teW9D40aNRJPnz6Vpvvjjz8EADFt2rRCl1OhQgXRtGnTQqcZO3asACDOnDkjhCj4sG/e+5f3uSNEwYfmpk2bJgCIjRs36o3L244XLlwoAIiff/5ZGpeVlSU8PT2FlZWVSElJEUL8732pWrWqSEpKkqYNCQkRAETTpk1Fdna21D5gwABhamoqfcbJ+XwsihIdmktJSQEAWFtby3qcp6cnPDw8pGFnZ2f07t0bu3btQm5uLoQQ2LBhA3r27AkhBB4+fCj9+fr6Ijk5Gf/884/OPDMyMgAAZmZmL11+bm6uzjwfPnyI9PR0nWn27NmDpKQkDBgwQGe6cuXKoXXr1ti/fz+AZ4fC/vzzT/Tv3x9PnjyRpnv06BF8fX1x5cqVIu3uJycnIyEhAfv27cO2bdvw9ttv603z/Pzz/l6k0WhgYmIiPc9Hjx7BysoKdevW1XnN7OzsAAC3b98utK7IyEh06NABFStW1Fmut7c3cnNz9Q6
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAaIAAAIjCAYAAABBDx+PAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABLK0lEQVR4nO3dd1QU1/8+8Gel96LUiIBgxZZootjAiKJRI4oaSyKW2CUa9WM0iUGsUaNRYzexJRbs0dhFxIYmscYaRLCDBQEBQYH7+yM/9uu6C+7A4uDyvM7hHObO7Ox7y+yzd+bujEIIIUBERCSTcnIXQEREZRuDiIiIZMUgIiIiWTGIiIhIVgwiIiKSFYOIiIhkxSAiIiJZMYiIiEhWDCIiIpIVg4jo/5s4cSIUCgUePXpU6HJ9+vSBh4dHidayatUqKBQK/P33369d1t/fH/7+/pLWn5CQAIVCgR9++KGIFRLpjk6DKC4uDoMGDULlypVhamoKa2trNGnSBPPmzcOzZ890eVdEWnv+/DnmzZuHd999F9bW1rC1tYWPjw8GDhyIq1evvpEa8j/4tflLSEh4IzVR8R0/fhydOnWCk5MTTExM4OHhgUGDBuHWrVtFXmdmZiYmTpyIw4cP667QUs5QVyvatWsXunbtChMTE/Tu3Ru1atXC8+fPcezYMfzvf//DpUuXsGzZMl3dHZHWgoODsWfPHvTo0QMDBgzAixcvcPXqVfzxxx9o3LgxqlevLml9y5cvR15enqTbODg44Ndff1Vpmz17Nu7cuYMff/xRbVkp9u/fL2l50o2ffvoJI0aMQOXKlREaGgoXFxdcuXIFP//8MyIiIrB79240btxY8nozMzMRHh4OAJJ7um8rnQRRfHw8unfvDnd3dxw6dAguLi7KecOGDcP169exa9cuXdwVkSR//fUX/vjjD0ydOhVff/21yrwFCxYgJSVF8jqNjIwk38bCwgKffvqpStuGDRvw5MkTtXapjI2Ni3V7ku748eMYOXIkmjZtir1798Lc3Fw5b8iQIWjSpAm6dOmCS5cuwc7OTsZK3w462TU3c+ZMpKen45dfflEJoXze3t4YMWKEclqhUGD48OFYu3YtqlWrBlNTU9SvXx9HjhxRu+3du3fRr18/ZdfXx8cHK1as0FhH/j7+V/9e/Vbh7++PWrVqqd3+hx9+0LhrZM+ePWjWrBksLCxgZWWFdu3a4dKlS2q3v3r1Krp06QJ7e3uYmpqiQYMG2LFjh8ZaX3bt2jV8+OGHcHZ2homJCdzc3DB48GAkJycrlzl8+DAUCgU2b96sdntLS0v06dNHOZ2cnIwxY8agdu3asLS0hLW1Ndq2bYvz589rfL5e5eHhobI+AEhJScHIkSPh5uYGExMTeHt7Y8aMGSo9g8KOO9SqVUvldch/PK/ufmjXrh0UCgUmTpyo0i7lffCyuLg4AECTJk3U5hkYGKB8+fKF3v7mzZvw9vZGrVq1kJSUBED9GNHLj3vZsmXw8vKCiYkJ3n//ffz111+vrbEw2dnZGDVqFBwcHGBhYYFOnTrh4cOHKstoOkaUlZWFiRMnomrVqjA1NYWLiws6d+6sfD40EUJg4MCBMDY2xtatWwH837Gq48ePv7YOQLttJTExEX379kXFihVhYmICFxcXdOzYUWW7+/vvvxEYGIgKFSrAzMwMnp6e6Nev32ufLw8Pj0J3e74sJycHkydPVr5eHh4e+Prrr5Gdnf3a+5k8eTIUCgVWr16tEkIA4OXlhZkzZ+L+/ftYunSpsr2gY3kvv58SEhKUPeLw8HBl3S9vD1evXkW3bt3g4OAAMzMzVKtWDd98843KOs+ePYu2bdvC2toalpaWaNmyJU6ePKmyTP5re+zYMXzxxRdwcHCAra0tBg0ahOfPnyMlJQW9e/eGnZ0d7OzsMHbsWLx6sYa8vDzMnTsXPj4+MDU1hZOTEwYNGoQnT5689jl8mU56RDt37kTlypUldUOjo6MRERGBL774AiYmJli0aBHatGmDP//8UxkSSUlJaNSokTK4HBwcsGfPHvTv3x9paWkYOXKkxnUvXrwYlpaWAIDx48cX67H9+uuvCAkJQWBgIGbMmIHMzEwsXrwYTZs2xdmzZ5VvoEuXLqFJkyZ45513MG7cOFhYWGDjxo0ICgrCli1b0KlTpwLvIyMjAxUrVkSHDh1gbW2NixcvYuHChbh79y527twpueYbN25g+/bt6Nq1Kzw9PZGUlISlS5fCz88Ply9fhqurq6T1ZWZmws/PD3fv3sWgQYNQqVIlnDhxAuPHj8f9+/cxd+5cyTVqcuTIEezevVutvajvAwBwd3cHAKxduxZNmjSBoaH2b/m4uDh8+OGHsLe3x4EDB1ChQoVCl1+3bh2ePn2KQYMGQaFQYObMmejcuTNu3LhRpF4UAISGhsLOzg5hYWFISEjA3LlzMXz4cERERBR4m9zcXLRv3x6RkZHo3r07RowYgadPn+LAgQO4ePEivLy8NN6mX79+iIiIwLZt29CuXTvJdWi7rQQHB+PSpUsIDQ2Fh4cHHjx4gAMHDuDWrVvK6datW8PBwQHjxo2Dra0tEhISlOH4OvXq1cPo0aNV2tasWYMDBw6otH3++edYvXo1unTpgtGjR+PUqVOYPn06rly5gm3bthW4/szMTERGRqJZs2bw9PTUuMwnn3yCgQMH4o8//sC4ceO0qhv4b7fs4sWLMWTIEHTq1AmdO3cGANSpUwcAcOHCBTRr1gxGRkYYOHAgPDw8EBcXh507d2Lq1KkA/vssatasGaytrTF27FgYGRlh6dKl8Pf3R3R0NBo2bKhyn6GhoXB2dkZ4eDhOnjyJZcuWwdbWFidOnEClSpUwbdo07N69G7NmzUKtWrXQu3dv5W0HDRqEVatWoW/fvvjiiy8QHx+PBQsW4OzZszh+/Lj273tRTKmpqQKA6Nixo9a3ASAAiL///lvZdvPmTWFqaio6deqkbOvfv79wcXERjx49Url99+7dhY2NjcjMzFRp//rrrwUAleV9fHyEn5+fynJ+fn7Cx8dHra5Zs2YJACI+Pl4IIcTTp0+Fra2tGDBggMpyiYmJwsbGRqW9ZcuWonbt2iIrK0vZlpeXJxo3biyqVKnymmdE3dChQ4WlpaVyOioqSgAQmzZtUlvWwsJChISEKKezsrJEbm6uyjLx8fHCxMRETJo0SdkWHh4uAIi8vDyVZd3d3VXWN3nyZGFhYSH+/fdfleXGjRsnDAwMxK1bt5T3AUDMmjVLrcZXX4f8xxMVFaVsa9iwoWjbtq0AIMLCwpTtUt8HL8vLyxN+fn4CgHBychI9evQQCxcuFDdv3lRbNiwsTAAQDx8+FFeuXBGurq7i/fffF8nJySrLhYSECHd3d+V0/uMuX768yrK///67ACB27typsbZ27dqprOdlK1euFABEQECAyuvz5ZdfCgMDA5GSkqJs8/PzU3luV6xYIQCIOXPmaHw+Xq551qxZ4sWLF+KTTz4RZmZmYt++fUWqQ9tt5cmTJwW+R/Jt27ZNABB//fVXgcsUxN3dXbRr106tfdiwYeLlj7tz584JAOLzzz9XWW7MmDECgDh06FCB95F/2xEjRhRaS506dYS9vb1y+tXXKd+r76eHDx+qbQP5mjdvLqysrNTevy+/NkFBQcLY2FjExcUp2+7duyesrKxE8+bNlW35r21gYKDK7X19fYVCoRCDBw9WtuXk5IiKFSuq1H/06FEBQKxdu1allr1792psL0yxd82lpaUBAKysrCTdztfXF/Xr11dOV6pUCR07dsS+ffuQm5sLIQS2bNmCDh06QAiBR48eKf8CAwORmpqKM2fOqKwzKysLAGBqavra+8/NzVVZ56NHj5CZmamyzIEDB5CSkoIePXqoLGdgYICGDRsiKioKwH+7wg4dOoRu3brh6dOnyuUeP36MwMBAxMbG4u7du6+tKTU1FUlJSYiMjMSuXbvQvHlztWVeXn/+36tMTExQrlw55eN8/PgxLC0tUa1aNZXnzNHREQBw586dQuvatGkTmjVrBjs7O5X7DQgIQG5urtou1czMTLUac3NzC72PrVu34q+
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAIjCAYAAAAOUhxfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABCGElEQVR4nO3dd1gU5/428HtBliYgFpoiIFawxBbEEhuKxoZyNB71iDUWxIaNJEow9kRjVGKLMTFHjd2Yo0Y9qKgRy7F3EXsBCyICgsA+7x++7M91F2RglxXm/lwX17XzzOzMd5edvfeZqhBCCBARkWyZGLsAIiIyLgYBEZHMMQiIiGSOQUBEJHMMAiIimWMQEBHJHIOAiEjmGARERDLHICAikjkGAZEMHDx4EAqFAgcPHlS3DRgwAO7u7kariT4cJSYI4uLiMGzYMFSpUgUWFhawtbVFs2bN8MMPP+DVq1fGLo9kyt3dHZ07dzZ2GSVSamoqvvnmG9StWxdWVlaws7NDixYtsGbNGhTmyjm7du3C119/rb9Ci4FSxi5AH3bu3ImePXvC3Nwc/fv3R+3atfH69WscOXIEEydOxKVLl7BixQpjl0n0QVm5ciVUKpWxyyiQhIQEtG3bFleuXEHv3r0xatQopKenY8uWLQgKCsKuXbuwdu1amJqaSp73rl27EBkZKaswKPZBcOvWLfTu3Rtubm7Yv38/nJ2d1eOCg4Nx48YN7Ny504gVEn2YzMzMjF1CgQUFBeHKlSvYtm0bunbtqm4fPXo0Jk6ciO+++w7169fH5MmTjVhlMSKKueHDhwsA4u+//87X9ABEcHCw+Pe//y2qV68uzM3NRYMGDUR0dLTWtPfv3xcDBw4UDg4OQqlUCi8vL7Fq1Sqd8w0PDxcAtP5atmypMV3Lli2Ft7e31vO//fZbAUDcunVLo33Xrl2iefPmwsrKSpQuXVp8+umn4uLFi1rPv3LliggMDBT29vbC3NxcNGzYUPzxxx/vfT+uXr0qWrduLRwdHYVSqRSVKlUSw4YNE8+ePVNPc+DAAQFAbNq0Sev51tbWIigoSD387NkzERoaKmrXri2sra2FjY2N6NChgzh79qzO9+tdbm5uGvMTQojnz5+LMWPGiEqVKgmlUik8PT3FnDlzRHZ2tnqaW7duCQDi22+/1Zqnt7e3xv8h5/UcOHBAY7pPP/1UABDh4eEa7VI+B7peT6dOnXTWuXz5clGlShWhVCpFo0aNxIkTJzSe++jRIzFgwABRsWJFoVQqhZOTk+jatavGZ0RXvTnLfft91PWag4KChJubW4Fq02X16tU614Gcv3frPH36tOjQoYOwsbER1tbWok2bNiImJua9y4mJiREAxKBBg3SOz8zMFNWqVRP29vYiLS0t19f/9mtevXq1+j3RVXuO7OxssXDhQlG7dm1hbm4uypcvL/z9/cXJkyc1lj99+nT1++fm5ibCwsJEenq6xrJzPhsHDhwQDRs2FBYWFqJ27drqGrds2aJeToMGDcTp06e1XmtB1/t3FfsewZ9//okqVaqgadOm+X5OdHQ0NmzYgNGjR8Pc3Bw//vgjOnTogBMnTqB27doA3nQ9mzRpAoVCgVGjRqFChQrYvXs3Bg8ejOTkZIwdO1bnvJcuXYrSpUsDAMLCwgr12n777TcEBQXB398fc+fORVpaGpYuXYrmzZvjzJkz6h19ly5dQrNmzVCxYkVMmTIF1tbW2LhxIwICArBlyxZ0794912WkpqaiUqVK6NKlC2xtbXHx4kVERkbiwYMH+PPPPyXXfPPmTWzfvh09e/aEh4cHEhISsHz5crRs2RKXL1+Gi4uLpPmlpaWhZcuWePDgAYYNG4bKlSvj6NGjCAsLw6NHj7Bw4ULJNepy6NAh7Nq1S6u9oJ+D91m3bh1evnyJYcOGQaFQYN68eejRowdu3ryp/qUeGBiIS5cuISQkBO7u7nj8+DH27duHu3fvGnQnb35qy8v06dPh4eGhHk5JScGIESM0prl06RJatGgBW1tbTJo0CWZmZli+fDlatWqF6Oho+Pj45Dr/nM9l//79dY4vVaoU+vTpg4iICPz999/w8/PLz8sGAAwbNgwPHz7Evn378Ntvv2mNHzx4MH755Rd07NgRQ4YMQVZWFg4fPoxjx46hUaNGAIAhQ4bg119/xT/+8Q+Ehobi+PHjmD17troH87YbN26gT58+GDZsGPr164fvvvsOXbp0wbJly/DFF19g5MiRAIDZs2ejV69euHbtGkxMTNTvYUHXey2So+MD8uLFCwFAdOvWLd/Pwf9P+P/973/qtjt37ggLCwvRvXt3ddvgwYOFs7OzePr0qcbze/fuLezs7NS/NHJ88cUXAoDG9O/+EhUi/z2Cly9fijJlyoihQ4dqTBcfHy/s7Ow02tu2bSvq1Kmj8YtDpVKJpk2bimrVqr3nHdE2cuRIUbp0afWwlB5Benq6xi91Id786jI3NxfTp09Xt0VERAgAQqVSaUz77i/Zb775RlhbW4vr169rTDdlyhRhamoq7t69q14GCtEj8PHxER07dtT65Sr1c/Cu3HoE5cqVE4mJier2P/74QwAQf/75pxDiTS8ot9fztnfrfXu5Be0RvK+23OT0CN7+dSyEEE+ePNGqMyAgQCiVShEXF6due/jwobCxsRGffPJJnssJCAgQAMTz589znWbr1q0CgFi0aJEQIv89AiGECA4O1tlb3b9/vwAgRo8erTUu53N89uxZAUAMGTJEY/yECRMEALF//351m5ubmwAgjh49qm7bs2ePACAsLS3FnTt31O3Lly/Xql+f632xPmooOTkZAGBjYyPpeb6+vmjYsKF6uHLlyujWrRv27NmD7OxsCCGwZcsWdOnSBUIIPH36VP3n7++PFy9e4PTp0xrzTE9PBwBYWFi8d/nZ2dka83z69CnS0tI0ptm3bx+SkpLwz3/+U2M6U1NT+Pj44MCBAwCAxMRE7N+/H7169cLLly/V0z179gz+/v6IjY3FgwcP3lvTixcvkJCQgKioKOzcuROffPKJ1jRvzz/n713m5ubqXyzZ2dl49uwZSpcujRo1ami8Zw4ODgCA+/fv51nXpk2b0KJFC9jb22ss18/PD9nZ2Th06JDG9GlpaVo1Zmdn57mMrVu34uTJk5gzZ45Ge0E+B/n12Wefwd7eXj3cokULAG96VABgaWkJpVKJgwcP4vnz5wVaRkG9r7bCys7Oxt69exEQEIAqVaqo252dndGnTx8cOXJEvW7r8vLlSwB5r/c54/Kaj1RbtmyBQqFAeHi41jiFQgEA6l7l+PHjNcaHhoYCgNb+Si8vL/j6+qqHc3pCbdq0QeXKlbXac/4H+lrvcxTrTUO2trYA/u+DkV/VqlXTaqtevTrS0tLw5MkTmJiYICkpCStWrMj1aKPHjx9rDD99+hRmZmawsrJ67/KvXr2KChUq5DlNbGwsgDcfCF1yXvuNGzcghMDUqVMxderUXGutWLFinsvz9/fH8ePHAQAdOnTAhg0btKYZNGhQnvMAAJVKhR9++AE//vgjbt26pfElXK5cOfVjX19fKBQKhIWFYcaMGerNae8exRIbG4vz58/n+n69+38IDw/XuaI6OjrqfH52dja++OIL9O3bF3Xr1tUY9+TJE8mfg/x6eyUHoP7izfnSNzc3x9y5cxEaGgpHR0c0adIEnTt3Rv/+/eHk5FSgZeqrtsJ68uQJ0tLSUKNGDa1xtWrVgkqlwr179+Dt7a3z+Tlf8i9fvkSZMmV0TpOfsJAqLi4OLi4uKFu2bK7T3LlzByYmJqhatapGu5OTE8qUKYM7d+5otL/7XtvZ2QEAXF1ddbbn/A/0td7nKPZB4OLigosXL+p1vjlfRv369UNQUJDOad790rh9+zYqV66s/mWQF3d3d6xcuVKjbdOmTRpfNjk1/PbbbzpX/FKlSmlMN2HCBPj7++tc3rsfSl0WL16Mp0+f4vLly5g9ezaGDx+Of//73xrTTJs2Tf3rMEeXLl00hmfNmoWpU6di0KBB+Oabb1C2bFmYmJhg7NixGl/y9erVQ3h4OCIiIrB27dpc61KpVGjXrh0
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAIjCAYAAAAJPAAPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABF3ElEQVR4nO3deVxU1f8/8NegMKwDIrIJAoorLpWaoiaoKJKaGh8rl6+4ZFlIrqlUhphG2oYVkZZpi5i5fix3TVATrUxBJU1wV8AFAQHZz+8Pf8zHcQDnDoMD19fz8eAh99xz77xnvPfFnTN37lUIIQSIiEh2TIxdABER1Q4GPBGRTDHgiYhkigFPRCRTDHgiIpliwBMRyRQDnohIphjwREQyxYAnIpIpBjwRkUw9tgGflpaGV199Fc2bN4e5uTlUKhV69uyJpUuX4u7du8Yujx5Tnp6eUCgU6h9zc3O0bNkSb775JrKysjT6zp8/HwqFAiYmJrh8+bLWunJzc2FhYQGFQoEpU6ao2y9cuACFQoGPPvqo1p+PFEII/PDDD+jduzfs7OxgaWmJDh06YMGCBcjPz9d7vSkpKZg/fz4uXLhguGLriYbGLsAYtm7dihEjRkCpVGLs2LFo3749iouLcfDgQbz55ps4deoUli9fbuwy6TH1xBNPYObMmQCAwsJCHD16FNHR0UhISMAff/yh1V+pVGLNmjWYPXu2RvvGjRsfSb2GUFZWhlGjRuHnn3/GM888g/nz58PS0hIHDhxAZGQk1q1bhz179sDJyUnyulNSUhAZGQl/f394enoavvi6TDxmzp07J6ytrUWbNm3EtWvXtOafPXtWREdHG6EyIiE8PDzEoEGDtNpnzZolAIh///1X3RYRESEAiOeff1488cQTWsv0799fBAcHCwAiNDRU3X7+/HkBQHz44Ye18yT08P777wsAYtasWVrztmzZIkxMTMTAgQP1Wve6desEALFv374aVln/PHZDNEuWLEFeXh5WrFgBFxcXrfne3t6YOnWqerri7e3q1avRunVrmJubo3Pnzti/f7/WslevXsWECRPg5OQEpVIJHx8ffPvtt5XWUfH2+sEff39/jX7+/v5o37691vIfffQRFAqF1tvO7du345lnnoGVlRVsbGwwaNAgnDp1Smv506dP4z//+Q/s7e1hbm6OLl26YMuWLZXWer8zZ86gb9++cHZ2hlKphLu7OyZPnqwxfBAfHw+FQoH169drLW9tbY1x48app7OysjBr1ix06NAB1tbWUKlUCAoKQlJSUqWv14M8PT011gcA2dnZmDZtGtzd3aFUKuHt7Y3FixejvLxc3ae6YYr27dtr/D9UPJ/4+HiNfoMGDYJCocD8+fM12qVsB7pydnYGADRsqP2me9SoUTh+/DhOnz6tbsvIyMBvv/2GUaNG6f2YFa9RVT8PbqvXr1/HxIkT4eTkBHNzc3Tq1AnffffdQx/n7t27+PDDD9GqVStERUVpzR8yZAhCQkKwY8cOHD58WN1e2WsPaG4Tq1atwogRIwAAffr0Udd+///l9u3b4efnBxsbG6hUKnTt2hVxcXEa61y3bh06d+4MCwsLODg4YMyYMbh69apGn3HjxsHa2hqXLl3C4MGDYW1tjaZNmyImJgYAcOLECfTt2xdWVlbw8PDQegxAt21XisduiOaXX35B8+bN0aNHD52XSUhIwNq1a/HGG29AqVTiyy+/xMCBA/HHH3+owzczMxPdu3dX/0Fo0qQJtm/fjokTJyI3NxfTpk2rdN2xsbGwtrYGAISHh9fouf3www8ICQlBYGAgFi9ejIKCAsTGxqJXr144duyY+u3pqVOn0LNnTzRt2hRz586FlZUVfv75ZwwbNgwbNmzA8OHDq3yM/Px8uLm5YciQIVCpVDh58iRiYmJw9epV/PLLL5JrPnfuHDZv3owRI0bAy8sLmZmZWLZsGfz8/JCSkgJXV1dJ6ysoKICfnx+uXr2KV199Fc2aNcOhQ4cQHh6O9PR0REdHS66xMvv378e2bdu02vXdDu5XUlKCmzdvArg3RHPs2DF88skn6N27N7y8vLT69+7dG25uboiLi8OCBQsAAGvXroW1tTUGDRpUsycKYOTIkXj22Wc12h7cVu/evQt/f3+kpqZiypQp8PLywrp16zBu3DhkZ2drHDQ96ODBg7h9+zamTp1a6R8wABg7dixWrlyJX3/9Fd27d9e59t69e+ONN97AZ599hrfeegtt27YFAPW/q1atwoQJE+Dj44Pw8HDY2dnh2LFj2LFjh/qP46pVqzB+/Hh07doVUVFRyMzMxNKlS/H777/j2LFjsLOzUz9eWVkZgoKC0Lt3byxZsgSrV6/GlClTYGVlhbfffhujR4/G888/j6+++gpjx46Fr6+v+v+0VrZdY7+FeJRycnIEADF06FCdlwEgAIi//vpL3Xbx4kVhbm4uhg8frm6bOHGicHFxETdv3tRY/qWXXhK2traioKBAo/2tt94SADT6+/j4CD8/P41+fn5+wsfHR6uuDz/8UAAQ58+fF0IIcefOHWFnZycmTZqk0S8jI0PY2tpqtPfr10906NBBFBYWqtvKy8tFjx49RMuWLR/yimh7/fXXhbW1tXp63759AoBYt26dVl8rKysREhKini4sLBRlZWUafc6fPy+USqVYsGCBui0yMlIAEOXl5Rp9PTw8NNb33nvvCSsrK42hDCGEmDt3rmjQoIG4dOmS+jFQxTDFg/8PFc/n/rf43bp1E0FBQQKAiIiIULdL3Q4e5OHhod7m7v/p2bOn1jorhmhu3LghZs2aJby9vdXzunbtKsaPHy+EEHoP0Uh5jaKjowUA8eOPP6rbiouLha+vr7C2tha5ublVPk7Fsps2baqyT1ZWlno4qsKDr32FB7eJqoZosrOzhY2NjejWrZu4e/euxryK7ay4uFg4OjqK9u3ba/T59ddfBQDx7rvvqttCQkIEAPH++++r227fvi0sLCyEQqEQP/30k7r99OnTWvXruu1K8VgN0eTm5gIAbGxsJC3n6+uLzp07q6ebNWuGoUOHYufOnSgrK4MQAhs2bMCQIUMghMDNmzfVP4GBgcjJycHff/+tsc7CwkIAgLm5+UMfv6ysTGOdN2/eREFBgUaf3bt3Izs7GyNHjtTo16BBA3Tr1g379u0DcG9I5LfffsMLL7yAO3fuqPvdunULgYGBOHv2rNZbz8rk5OQgMzMTe/fuxdatW9G7d2+tPvevv+LnQUqlEiYmJurneevWLVhbW6N169Yar5mjoyMA4MqVK9XWtW7dOjzzzDNo1KiRxuMGBASgrKxMa2itoKBAq8aysrJqH2Pjxo34888/8cEHH2i067MdVKZbt27YvXs3du/ejV9//RWLFi3CqVOn8Nxzz1V5hteoUaOQmpqKP//8U/1vTYZnpNq2bRucnZ0xcuRIdZupqSneeOMN5OXlISEhocpl79y5A6D6/bJiXsU+bAi7d+/GnTt3MHfuXK39sGI48K+//sL169fx+uuva/QZNGgQ2rRpg61bt2qt9+WXX1b/bmdnh9atW8PKygovvPCCur1169aws7PDuXPn1G1St11dPFZDNCqVCsD/NihdtWzZUqutVatWKCgowI0bN2BiYoLs7GwsX768yrNvrl+/rjF98+ZNmJqawtLS8qGPf/r0aTRp0qTaPmfPngUA9O3bt9L5Fc89NTUVQgjMmzcP8+bNq7LWpk2bVvt4gYGBOHLkCABg4MCBWLt2rVafCRMmVLsOACgvL8fSpUvx5Zdf4vz58xrh2rhxY/Xvvr6+UCgUCA8Px8KFC9XDWg+OTZ49exbJyclVvl4P/j9EREQgIiJCq19VZ2uUlZXhrbfewujRo9GxY0eNeTdu3JC8HVTGwcEBAQEB6ulBgwahdevW+M9//oNvvvkGYWFhWss8+eSTaNOmDeLi4mBnZwdnZ+cqt4XacPHiRbRs2VL9x7pCxVDIxYsXq1y2Iryr2y91+SMgVVpaGgBU+hlXhYq6W7durTWvTZs2OHjwoEabubm51rZna2sLNzc3rc+QbG1tcfv2bfW01G1XF49dwLu6uuL
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAf8AAAIjCAYAAAAN5RJ3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABfKUlEQVR4nO3deXhM598G8HsS2XciCREJsYbYgoilqJDYqVpaJNReeyjSRcSuqkItQdFWtdRSXagttVbQ2onYiS0JEokkJJJ53j+8mZ9jsk0sZ5y5P9eV68o8Z5nvzJwz9zxnVQkhBIiIiMhgGMldABEREb1ZDH8iIiIDw/AnIiIyMAx/IiIiA8PwJyIiMjAMfyIiIgPD8CciIjIwDH8iIiIDw/AnIiIyMAx/MmgqlQpTpkzRebrvvvsOKpUK//3336svSgFy35/r169r2lq0aIEWLVrIVpOS5fV+ExWkWOF/5coVDBkyBBUrVoS5uTlsbW3RpEkTLFiwAI8fP37VNRIViYeHB1QqFVQqFYyMjGBvbw9vb28MHjwYR44ckbu8Qm3btq1YP0R0lfse5b5PZcuWRZs2bbB3797X/txvg379+kneo+f/tm/fLmttM2fOxJYtW2StQW5//vknAgMDUapUKZibm6NKlSoYP348Hjx4UOx53rlzB1OmTMHJkydfXaF6roSuE2zduhXdu3eHmZkZgoKCULNmTWRlZeHgwYP45JNPcO7cOSxfvvx11EpUqDp16mDcuHEAgEePHuH8+fPYsGEDVqxYgbFjx+Lrr7+WjP/48WOUKKHzavBabNu2DYsXL34jPwBat26NoKAgCCFw7do1LFmyBO+++y62bt2Ktm3bvpbn3Llz52uZ7+tgZmaGb7/9Vqu9du3aMlTzPzNnzsT777+PLl26SNr79u2LXr16wczMTJ7C3pDx48dj3rx5qF27NiZOnIiSJUvi+PHjWLRoEdatW4eoqChUrVpV5/neuXMH4eHh8PDwQJ06dV594XpIp2+9a9euoVevXnB3d8fff/+NMmXKaIYNHz4cly9fxtatW195kURF5erqij59+kja5syZgw8//BDz589H5cqVMWzYMM0wc3PzN12iXqhSpYrkferatStq1aqFiIiI1xb+pqamr3R+GRkZsLS0fKXzzFWiRAmt5UifGRsbw9jYWO4yXquff/4Z8+bNQ8+ePbF27VrJ6+3Xrx9atmyJ7t274/jx43rzg16vCR0MHTpUABD//PNPkcYHIIYPHy5+/PFHUaVKFWFmZibq1asn9u3bpzXurVu3RP/+/YWTk5MwNTUVXl5eYuXKlXnONywsTADQ+mvevLlkvObNm4saNWpoTT937lwBQFy7dk3Svm3bNtG0aVNhaWkprK2tRbt27cTZs2e1pj9//rzo1q2bcHBwEGZmZsLHx0f89ttvhb4fsbGxomXLlsLZ2VmYmpqKcuXKiSFDhogHDx5oxtmzZ48AIDZs2KA1vZWVlQgODtY8fvDggRg3bpyoWbOmsLKyEjY2NiIwMFCcPHkyz/frRe7u7pL5CSFEcnKyGD16tChXrpwwNTUVnp6eYvbs2SInJ0czzrVr1wQAMXfuXK151qhRQ/I55L6ePXv2SMZr166dACDCwsIk7bosB3m9nvbt2+c57NGjR6JkyZLC1dVVqNVqTfuLNVy/fl0MGzZMVKlSRZibm4uSJUuK999/X2tZWb16tQAg9u3bJwYPHixKliwpbGxsRN++fUVSUpLW8xe2bAUHB+e5TOfKyckR8+fPF15eXsLMzEw4OTmJwYMHaz3Xv//+K9q0aSNKlSolzM3NhYeHh+jfv79knNz18kWOjo6icuXKmsdFXc7Pnj0rWrZsKczNzYWrq6uYNm2aWLlypdY61rx5c6119Pr166Jjx47C0tJSlC5dWowZM0Zs375da5nJXZf/++8/0axZM2FhYSFGjx4thBDiyZMnYvLkycLT01OzXn3yySfiyZMnWrWuWbNG1KtXT5ibmwsHBwfRs2dPERcXJxknODhYWFlZaU2bK79lOne9WL16tda8bt26JTp37iysrKyEo6OjGDdunMjOzpZMn5OTIyIiIkTNmjWFmZmZcHR0FAEBAeLff/8VQog8l4/c9Td3eXxxOV28eLHw8vISpqamokyZMuLjjz8WycnJknFy39tz586JFi1aCAsLC1G2bFkxZ86cfN+D5+VVV+6fu7u7ZNy0tDQREhKi+X6pUqWKmDt3rmSdzE/VqlWFg4ODSElJyXN4eHi4ACB+/vlnTVte33G5rzl3Wcz9PF/8e/5zPHz4sGjbtq2wt7cXlpaWwtvbW0REREjmGRUVpVnH7ezsRKdOnURMTIxknNzv4gsXLojevXsLW1tb4ejoKD7//HOhVqtFXFyc6NSpk7CxsRHOzs7iq6++0qpdl+W9IDr9PPrjjz9QsWJFNG7cuMjT7Nu3D+vXr8eoUaNgZmaGJUuWIDAwEEePHkXNmjUBAAkJCWjUqBFUKhVGjBiB0qVL46+//sKAAQOQmpqKMWPG5DnvpUuXwtraGgAQGhqqy0vRsmbNGgQHByMgIABz5sxBRkYGli5diqZNm+LEiRPw8PAAAJw7dw5NmjSBq6srJk2aBCsrK/zyyy/o0qULNm3ahK5du+b7HOnp6ShXrhw6duwIW1tbnD17FosXL8bt27fxxx9/6Fzz1atXsWXLFnTv3h0VKlRAQkICli1bhubNmyMmJgZly5bVaX4ZGRlo3rw5bt++jSFDhqB8+fI4dOgQQkNDcffuXUREROhcY17279+Pbdu2abUXdzkoCmtra3Tt2hUrV65ETEwMatSoked4//77Lw4dOoRevXqhXLlyuH79OpYuXYoWLVogJiZGq6c5YsQI2NvbY8qUKbhw4QKWLl2KGzduYO/evVCpVACKtmwNGTIEd+7cwa5du7BmzRqtuoYMGYLvvvsO/fv3x6hRo3Dt2jUsWrQIJ06cwD///AMTExMkJiaiTZs2KF26NCZNmgR7e3tcv34dmzdvLvT9SU5ORnJyMipVqgSg6Mt5fHw8WrZsiezsbM14y5cvh4WFRaHPmZ6ejnfffRd3797F6NGj4eLigp9++gl79uzJc/wHDx6gbdu26NWrF/r06QNnZ2eo1Wp06tQJBw8exODBg1G9enWcOXMG8+fPx8WLFyX7x2fMmIEvvvgCPXr0wMCBA3Hv3j188803eOedd3DixAnY29tLnu/+/fuSxyYmJrCzsyv0db0oJycHAQEB8PX1xVdffYXdu3dj3rx58PT0lGyFGjBgAL777ju0bdsWAwcORHZ2Ng4cOIDDhw+jfv36WLNmDQYOHIiGDRti8ODBAABPT898n3fKlCkIDw+Hv78/hg0bplk+//33X80ykys5ORmBgYF477330KNHD2zcuBETJ06Et7d3kbYE5e5Get68efOQnJyseSyEQKdOnbBnzx4MGDAAderUwY4dO/DJJ5/g9u3bmD9/fr7zv3TpEi5cuIB+/frB1tY2z3GCgoIQFhaGP//8E7169Sq05lzVq1fH1KlTMXnyZAwePBjNmjUDAE3O7dq1Cx06dECZMmU0y+n58+fx559/YvTo0QCA3bt3o23btqhYsSKmTJmCx48f45tvvkGTJk1w/PhxTX7k6tmzJ6pXr47Zs2dj69atmD59OkqWLIlly5bh3XffxZw5c7B27VqMHz8eDRo0wDvvvAMAOi3vhSrqr4SUlBQBQHTu3LnIvyzw/7+g/vvvP03bjRs3hLm5uejataumbcCAAaJMmTLi/v37kul79eol7OzsREZGhqT9008/FQAk47/Y4xSi6D3/R48eCXt7ezFo0CDJePHx8cLOzk7S3qpVK+Ht7S35laVWq0Xjxo0lvaai+vjjj4W1tbXmsS49/ydPnkh65EI8632YmZmJqVOnatpyfxG/+Ov6xV/F06ZNE1ZWVuLixYuS8SZNmiSMjY01PaSX7fn7+vqKtm3bavW6dV0OXlRQz18IIebPny8ASHqvL9aQ13NER0cLAOKHH37QtOX2tHx8fERWVpam/csvv5Q8hy7L1vDhw/PcQnPgwAEBQKxdu1b
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAIjCAYAAAAJPAAPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABHyElEQVR4nO3deVxU9f4/8NeIwyAwoIhsAoLihluJXkMtXBDE3bhaLldcrksh7ml0NcI0tW6FJbmUaeWOW3bV3MXdykQ0lwQX3EAFAQFBls/vD3/M13EG5AyDA8fX8/Hg8fB8zuec855xzms+c+bMOQohhAAREclONVMXQEREFYMBT0QkUwx4IiKZYsATEckUA56ISKYY8EREMsWAJyKSKQY8EZFMMeCJiGSKAU9EJFMvZcAnJiZi7NixqF+/PiwsLGBjY4MOHTpg4cKFePTokanLo5dcYWEhXFxcoFAosHPnTlOX80IlJSVh3Lhx8PDwgEqlgoODA/r164ejR4+Wa73ffPMNVq5caZwiqxDFy3Ytmu3bt2PAgAFQqVQYNmwYmjdvjsePH+PIkSPYtGkThg8fjmXLlpm6THqJ7dmzBwEBAfDw8ECHDh2watUqU5f0Qhw9ehQ9evQAAPz73/+Gt7c3kpOTsXLlSiQmJmLhwoUICwszaN3NmzeHvb09Dh48aMSKK7+XKuCvXr2Kli1bwtXVFfv374ezs7PW/ISEBGzfvh0TJ040UYVEQEhICM6dO4eQkBB88MEHSElJgZWVlanLqlAPHjyAt7c3hBA4evQoGjRooJn36NEjBAYG4ujRozh8+DDat28vef0va8BDvETGjRsnAIijR4+WqT8AERoaKlatWiUaNWokVCqVaN26tYiNjdXpe/PmTTFixAjh4OAgzM3Nhbe3t1i+fLne9UZERAgAOn9+fn5a/fz8/ESzZs10lv/ss88EAHH16lWt9h07doiOHTsKS0tLYW1tLXr06CHOnTuns/yFCxdEcHCwqFWrllCpVMLHx0f8/PPPz30+Ll68KDp37iwcHR2Fubm5cHV1FWPHjhWpqamaPgcOHBAARExMjM7yVlZWIiQkRDOdmpoqpk6dKpo3by6srKyEWq0W3bt3F3FxcXqfr2fVq1dPa31CCPHgwQMxceJE4erqKszNzUWDBg3E/PnzRWFhoabP1atXBQDx2Wef6ayzWbNmWv8PxY/nwIEDWv169OghAIiIiAitdimvA31ycnKEWq0Wn376qbhz546oVq2aWL16td6+GzZsEE2bNhUqlUo0a9ZMbN68WYSEhIh69epp9SssLBRffvml8Pb2FiqVSjg4OIgxY8aItLS059YTEhKi97Va/Pfs87JhwwbRunVrYWFhIWrXri2GDBkibt68+dztzJs3TwAQP/74o975V65cEWZmZiIwMFDTVtLrYsWKFVr7R7169Urd1x48eCAmTZok6tWrJ8zNzUXdunXFv/71L3Hv3j1Nn5SUFDFy5Ejh4OAgVCqVaNmypVi5cqXWdp9+XS1atEh4enqKGjVqiG7duomkpCRRVFQkZs+eLerWrSssLCxEnz59tPadYmXdj8ui+ot5G6kcfvnlF9SvX1/SCCA2Nhbr16/HhAkToFKp8M0336B79+747bff0Lx5cwBASkoKXnvtNSgUCowfPx516tTBzp07MWrUKGRmZmLSpEl617148WJYW1sDAMLDw8v12H766SeEhIQgMDAQCxYsQE5ODhYvXoyOHTvi9OnT8PDwAAD89ddf6NChA+rWrYv3338fVlZW2LBhA/r164dNmzahf//+JW4jOzsbrq6u6N27N2xsbHDu3DlER0fj1q1b+OWXXyTXfOXKFWzduhUDBgyAp6cnUlJSsHTpUvj5+eH8+fNwcXGRtL6cnBz4+fnh1q1bGDt2LNzd3XHs2DGEh4fjzp07iIqKklyjPocOHcKOHTt02g19HTxt27ZtyMrKwttvvw0nJyd06tQJq1evxuDBg7X6bd++HW+99RZatGiBefPm4cGDBxg1ahTq1q2rs86xY8di5cqVGDFiBCZMmICrV69i0aJFOH36NI4ePQqlUllqTSqVCt99951W2++//46vvvpKq614G23btsW8efOQkpKChQsX4ujRozh9+jRq1qxZ4jZ++eUXWFhYYODAgXrne3p6omPHjti/fz8ePXqEGjVqlFrz06KiohAWFgZra2v85z//AQA4OjoCALKysvD666/jwoULGDlyJFq3bo379+9j27ZtuHnzJuzt7fHo0SN06tQJCQkJGD9+PDw9PRETE4Phw4cjPT1d5xP/6tWr8fjxY4SFhSEtLQ2ffvopBg4ciC5duuDgwYOYMWMGEhIS8PXXX2PatGn4/vvvNcuWdT8uM4PeFqqgjIwMAUD07du3zMvg/7/b//HHH5q269evCwsLC9G/f39N26hRo4Szs7O4f/++1vJvv/22sLW1FTk5OVrtH3zwgQCg1f/ZkaMQZR/BP3z4UNSsWVOMHj1aq19ycrKwtbXVau/atato0aKFyM3N1bQVFRWJ9u3bi4YNGz7nGdH17rvvCmtra820lBF8bm6u1shaiCejIJVKJWbPnq1pi4yMFABEUVGRVt9nR/Aff/yxsLKyEn///bdWv/fff1+YmZmJpKQkzTZQjhF8u3btRFBQkM4IXurrQJ9evXqJDh06aKaXLVsmqlevLu7evavVr0WLFsLV1VU8fPhQ03bw4EEBQGsEf/jwYQFA51PAr7/+qrf9WSEhIcLKykqnPSYmRut5efz4sXBwcBDNmzcXjx490vT73//+JwCIDz/8sNTt1KxZU7Rq1arUPhMmTBAARHx8vBCi7CN4IfTvX0II8eGHHwoAYvPmzTrzil9vUVFRAoBYtWqVZt7jx4+Fr6+vsLa2FpmZmUKI/3td1alTR6Snp2v6hoeHCwCiVatWIj8/X9M+aNAgYW5urtkXpezHZfXSnEWTmZkJAFCr1ZKW8/X1hY+Pj2ba3d0dffv2xa5du1BYWAghBDZt2oTevXtDCIH79+9r/gIDA5GRkYE///xTa525ubkAAAsLi+duv7CwUGud9+/fR05OjlafPXv2ID09HYMGDdLqZ2Zmhnbt2uHAgQMAgLS0NOzfvx8DBw7Ew4cPNf1SU1MRGBiIy5cv49atW8+tKSMjAykpKdi3bx+2b9+ON954Q6fP0+sv/nuWSqVCtWrVNI8zNTUV1tbWaNy4sdZz5uDgAAC4efNmqXXFxMTg9ddfR61atbS26+/vj8LCQhw6dEirf05Ojk6NhYWFpW5j8+bN+P333zF//nytdkNeB89KTU3Frl27MGjQIE1bcHAwFAoFNmzYoGm7ffs2zp49i2HDhmk+AQKAn58fWrRoofOc2Nraolu3blo1+fj4wNraWvPaKK8//vgDd+/exbvvvqv1uu7ZsyeaNGmC7du3l7r8w4cPn7tvFs8v3peNYdOmTWjVqpXeT64KhQIAsGPHDjg5OWn9vyiVSkyYMAFZWVmIjY3VWm7AgAGwtbXVTLdr1w4AMHToUFSvXl2r/fHjx5p9rqz7sRQvzSEaGxsbAE9eSFI0bNhQp61Ro0bIycnBvXv3UK1aNaSnp2PZsmUlnn1z9+5dren79+9DqVTC0tLyudu/ePEi6tSpU2qfy5cvAwC6dOmid37xY09ISIAQArNmzcKsWbNKrFXfx/ynBQYG4uTJkwCA7t27Y/369Tp9Ro4cWeo6AKCoqAgLFy7EN998g6tXr2qFa+3atTX/9vX1hUKhQHh4OObMmaMJtaKiIq31Xb58GfHx8SU+X8/+P0RERCAiIkKnX/HH92cVFhbigw8+wJAhQ9CyZUuteffu3ZP8OnjW+vXrkZ+fj1dffRUJCQma9nbt2mH16tUIDQ0FAFy/fh0A4OXlpbMOLy8vrTeSy5cvIyMjQ/MmKbWmsiquqXHjxjrzmjRpgiNHjpS6vFqtfu6+WTxf6iCtNImJiQgODi61z/Xr19GwYUPNYKRY06ZNNfOf5u7urjVdHPZubm562x88eACg7PuxFC9VwLu4uODcuXNGXW9xyAwdOhQhISF6+zwbBteuXYO7u7t
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
2024-10-12 12:38:44 +04:00
"# Создание диаграмм зависимости\n",
2024-10-11 18:41:52 +04:00
"for column in numeric_columns:\n",
2024-10-12 12:38:44 +04:00
" plt.figure(figsize=(4, 6)) # Установка размера графика\n",
2024-10-11 18:41:52 +04:00
" if pd.api.types.is_numeric_dtype(df[column]): # Проверяем, является ли колонка числовой\n",
" # Проверяем, содержит ли колонка только два уникальных значения (0 и 1)\n",
" if df[column].nunique() == 2 and set(df[column].unique()).issubset({0, 1}):\n",
2024-10-12 12:38:44 +04:00
" counts = df[column].value_counts() \n",
" counts.plot(kind='bar', width=0.4) # Создаем столбчатую диаграмму\n",
2024-10-11 18:41:52 +04:00
" plt.title(f'Количество значений для {column}')\n",
" plt.xlabel(column)\n",
" plt.ylabel('Количество повторений')\n",
" else:\n",
2024-10-12 12:38:44 +04:00
" grouped_data = df.groupby('Outcome')[column].mean()\n",
"\n",
" # Создаем столбчатую диаграмму\n",
" plt.bar(grouped_data.index, grouped_data.values, alpha=0.5, width=0.4)\n",
" plt.title(f'Среднее значение {column} по Outcome')\n",
2024-10-11 18:41:52 +04:00
" plt.xlabel('Outcome (0 = нет, 1 = да)')\n",
2024-10-12 12:38:44 +04:00
" plt.ylabel(f'Среднее значение {column}')\n",
2024-10-11 18:41:52 +04:00
" plt.xticks([0, 1]) # Установка меток по оси X\n",
2024-10-12 12:38:44 +04:00
" plt.grid(axis='y')\n",
2024-10-11 18:41:52 +04:00
" else:\n",
" # Если колонка не числовая, строим столбчатую диаграмму\n",
" counts = df[column].value_counts() # Считаем количество повторений каждого значения\n",
2024-10-12 12:38:44 +04:00
" counts.plot(kind='bar', width=0.4) # Создаем столбчатую диаграмму\n",
2024-10-11 18:41:52 +04:00
" plt.title(f'Количество значений для {column}')\n",
" plt.xlabel(column)\n",
" plt.ylabel('Количество повторений')\n",
"\n",
" plt.show() # Отображение графика"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Разобьем наш набор на выборки относительно параметра Outcome:"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 351,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [],
"source": [
"# Функция для создания выборок\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"def split_stratified_into_train_val_test(\n",
" df_input,\n",
" stratify_colname=\"y\",\n",
" frac_train=0.6,\n",
" frac_val=0.15,\n",
" frac_test=0.25,\n",
" random_state=None,\n",
"):\n",
"\n",
" if frac_train + frac_val + frac_test != 1.0:\n",
" raise ValueError(\n",
" \"fractions %f, %f, %f do not add up to 1.0\"\n",
" % (frac_train, frac_val, frac_test)\n",
" )\n",
"\n",
" if stratify_colname not in df_input.columns:\n",
" raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
"\n",
2024-10-12 12:38:44 +04:00
" X = df_input # содержит все столбцы\n",
2024-10-11 18:41:52 +04:00
" y = df_input[\n",
" [stratify_colname]\n",
2024-10-12 12:38:44 +04:00
" ] # датафрейм с колонкой, относительно которой разбиваем\n",
2024-10-11 18:41:52 +04:00
"\n",
2024-10-12 12:38:44 +04:00
" # Разделяем датафрейм на обучающую выборку и временную\n",
2024-10-11 18:41:52 +04:00
" df_train, df_temp, y_train, y_temp = train_test_split(\n",
" X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
" )\n",
"\n",
2024-10-12 12:38:44 +04:00
" # разделяем временную на тестовую и контрольную\n",
2024-10-11 18:41:52 +04:00
" relative_frac_test = frac_test / (frac_val + frac_test)\n",
" df_val, df_test, y_val, y_test = train_test_split(\n",
" df_temp,\n",
" y_temp,\n",
" stratify=y_temp,\n",
" test_size=relative_frac_test,\n",
" random_state=random_state,\n",
" )\n",
2024-10-12 12:38:44 +04:00
" # проверяем, что в сумме все три выборки дают то же количество значений, что и было в изначальной выборке\n",
2024-10-11 18:41:52 +04:00
" assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
"\n",
" return df_train, df_val, df_test"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 352,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Outcome\n",
"0 500\n",
"1 268\n",
"Name: count, dtype: int64\n",
"\n",
"Обучающая выборка: (460, 9)\n",
"Outcome\n",
"0 299\n",
"1 161\n",
"Name: count, dtype: int64\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAf8AAADECAYAAACROyhkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA4AklEQVR4nO3dd1wT9/8H8FcSAgTC3ooICqKCE0cdgAMHYl2t1GodqNXWttba1tZf3bb122Ld1lktKtY9WuvEusUtKFUUFBBR2SAEQiD5/P7ApISEKXhA3s/HIw/l8rnPvfO5y+d9d5+7C48xxkAIIYQQncHnOgBCCCGEvFmU/AkhhBAdQ8mfEEII0TGU/AkhhBAdQ8mfEEII0TGU/AkhhBAdQ8mfEEII0TGU/AkhhBAdQ8mfEEJeg0QiQWJiIjIzM7kOhdSwnJwcxMfHQyKRcB1KjaPkTwghVbR371707dsXJiYmEIvFcHJyws8//8x1WPVCbm4uVqxYofo7KysLa9eu5S6gEhhj2LhxI9566y0YGRnB1NQULi4u2LFjB9eh1bgqJf/ff/8dPB5P9TI0NESLFi3w6aefIjk5ubZiJERnXLp0CcOHD4ednR0MDAzg7OyMqVOn4smTJ9WuMy8vDwsWLMDZs2drLlAd9u233yIwMBAmJibYtGkTTp06hbCwMEybNo3r0OoFkUiEOXPmIDQ0FImJiViwYAH++usvrsMCAIwePRofffQRWrVqhe3bt6vW7YgRI7gOreaxKti6dSsDwBYtWsS2b9/ONm3axMaPH8/4fD5zcXFhEomkKtURQkpYtWoV4/F4rHnz5mzx4sVs8+bN7Msvv2RmZmbMzMyMXbp0qVr1pqamMgBs/vz5NRuwDjp79iwDwJYsWcJ1KPXa0qVLGZ/PZwCYqakpu3DhAtchsZCQEMbj8VhoaCjXobwR1Ur+169fV5s+c+ZMBoDt3LmzRoMjRFdcvHiR8fl85u3trbETHRsby+zs7JiDgwPLyMioct2U/GvO4MGDWffu3bkOo0FITExkly9fZpmZmVyHwhhjzNPTk40ePZrrMN6YGhnz79OnDwAgLi4OAJCRkYGvvvoKbdq0gVgshqmpKfz9/REZGakxr1QqxYIFC9CiRQsYGhrCwcEBI0aMwKNHjwAA8fHxakMNpV+9evVS1XX27FnweDzs3r0b//d//wd7e3sYGxtjyJAhSExM1Fj21atXMXDgQJiZmcHIyAi+vr64dOmS1s/Yq1cvrctfsGCBRtkdO3bAy8sLIpEIlpaWGDVqlNbll/fZSlIoFFixYgU8PDxgaGgIOzs7TJ06VeMCI2dnZwwePFhjOZ9++qlGndpiDw4O1mhTACgoKMD8+fPh6uoKAwMDNGnSBLNmzUJBQYHWtiqpV69eGvX98MMP4PP52LlzZ7XaY+nSpejevTusrKwgEong5eWFffv2aV3+jh070KVLFxgZGcHCwgI+Pj44efKkWpljx47B19cXJiYmMDU1RefOnTVi27t3r2qdWltb44MPPkBSUpJamQkTJqjFbGFhgV69euHChQsVttPixYvB4/EQEhICIyMjtfeaN2+On3/+Gc+fP8eGDRtU07W1rTIOZ2dnAMVtamNjAwBYuHCh1u02OjoagYGBsLGxgUgkgru7O7777ju1Om/fvg1/f3+YmppCLBajb9++uHLliloZ5bDgxYsXMX36dNjY2MDc3BxTp06FTCZDVlYWxo0bBwsLC1hYWGDWrFlgpX5UtLLbujav0/4A8M8//8Db2xvGxsYwNzfH0KFDcf/+fbUyV65cgaenJ0aNGgVLS0uIRCJ07twZhw4dUpXJzc2FsbExPv/8c41lPH36FAKBAEuWLFHFrFxXJZVeRwkJCZg2bRrc3d0hEolgZWWFkSNHIj4+Xm0+ZR9Ycojn+vXr6NevH0xMTGBsbKy1TZTr7saNG6ppaWlpWvuJwYMHa425Mv3pggULVN9nR0dHdOvWDXp6erC3t9eIWxvl/MqXiYkJunTpotb+QPF3w9PTs8x6lH3N77//DqD4os2oqCg0adIEAQEBMDU1LbOtAODx48cYOXIkLC0tYWRkhLfeegt///23Wpmq5KOq9JNVyVvl0avyHFooE7WVlRWA4oY5dOgQRo4cCRcXFyQnJ2PDhg3w9fXFvXv30KhRIwCAXC7H4MGDcfr0aYwaNQqff/45cnJycOrUKURFRaF58+aqZbz//vsYNGiQ2nJnz56tNZ4ffvgBPB4P33zzDVJSUrBixQr4+fkhIiICIpEIQPEX3d/fH15eXpg/fz74fD62bt2KPn364MKFC+jSpYtGvY6OjqovbW5uLj7++GOty547dy4CAwMxefJkpKamYvXq1fDx8cHt27dhbm6uMc+UKVPg7e0NADhw4AAOHjyo9v7UqVPx+++/IygoCNOnT0dcXBzWrFmD27dv49KlSxAKhVrboSqysrJUn60khUKBIUOG4OLFi5gyZQpatWqFu3fvYvny5Xj48KHGl64iW7duxZw5c/DLL79g9OjRWstU1B4rV67EkCFDMGbMGMhkMuzatQsjR47EkSNHEBAQoCq3cOFCLFiwAN27d8eiRYugr6+Pq1ev4p9//kH//v0BFHd6EydOhIeHB2bPng1zc3Pcvn0bx48fV8WnbPvOnTtjyZIlSE5OxsqVK3Hp0iWNdWptbY3ly5cDKO7oV65ciUGDBiExMVHrugeKx+RPnz4Nb29vuLi4aC3z3nvvYcqUKThy5Ai+/fbbihv6FRsbG6xbtw4ff/wxhg8frhq7bNu2LQDgzp078Pb2hlAoxJQpU+Ds7IxHjx7hr7/+wg8//AAA+Pfff+Ht7Q1TU1PMmjULQqEQGzZsQK9evXDu3Dl07dpVbZmfffYZ7O3tsXDhQly5cgUbN26Eubk5Ll++DCcnJ/z44484evQogoOD4enpiXHjxqnmfd1tvTrtDwBhYWHw9/dHs2bNsGDBAuTn52P16tXo0aMHbt26pUp26enp2LhxI8RisWoHZ8eOHRgxYgRCQ0Px/vvvQywWY/jw4di9ezeWLVsGgUCgWs4ff/wBxhjGjBlTuRX4yvXr13H58mWMGjUKjo6OiI+Px7p169CrVy/cu3dPY4dRKTY2Fr169YKRkRG+/vprGBkZYdOmTfDz88OpU6fg4+NTpTjKUp3+VOmXX36p8jVj27dvB1C8g/Lrr79i5MiRiIqKgru7e7XiT09PBwD89NNPsLe3x9dffw1DQ0OtbZWcnIzu3bsjLy8P06dPh5WVFUJCQjBkyBDs27cPw4cPV6u7MvmotLL6yddpZw1VOU2gPO0fFhbGUlNTWWJiItu1axezsrJiIpGIPX36lDHGmFQqZXK5XG3euLg4ZmBgwBYtWqSatmXLFgaALVu2TGNZCoVCNR8AFhwcrFHGw8OD+fr6qv4+c+YMA8AaN27MXr58qZq+Z88eBoCtXLlSVbebmxsbMGCAajmMMZaXl8dcXFxYv379NJbVvXt35unpqfpb26nU+Ph4JhAI2A8//KA27927d5menp7G9JiYGAaAhYSEqKbNnz+flVwtFy5cYAA0xqGOHz+uMb1p06YsICBAI/ZPPvmElV7VpWOfNWsWs7W1ZV5eXmptun37dsbn8zXG5NavX88AVDgO7evrq6rv77//Znp6euzLL7/UWrYy7cFY8XoqSSaTMU9PT9anTx+1uvh8Phs+fLjGtqhc51lZWczExIR17dqV5efnay0jk8mYra0t8/T0VCtz5MgRBoDNmzdPNW38+PGsadOmavVs3LiRAWDXrl3T+pkZYywiIoIBYJ9//nmZZRhjrG3btszS0lL1d8m2Lal0HOWd9vfx8WEmJiYsISFBbXrJ78WwYcOYvr4+e/TokWras2fPmImJCfPx8VFNU/YPpb9X3bp1Yzwej3300UeqaUVFRczR0VEt/qps69pUt/0ZY6x9+/bM1taWpaenq6ZFRkYyPp/Pxo0bp5oGgAFgZ8+eVU3Ly8tjrVq1Yvb29kwmkzHGGDtx4gQDwI4dO6a2nLZt26p95qCgIObk5KQRT+n1VXqbZ4yx8PBwBoBt27ZNNU3
"text/plain": [
"<Figure size 200x200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Контрольная выборка: (154, 9)\n",
"Outcome\n",
2024-10-12 12:38:44 +04:00
"0 101\n",
"1 53\n",
2024-10-11 18:41:52 +04:00
"Name: count, dtype: int64\n"
]
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgsAAADECAYAAAARfmKGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA3n0lEQVR4nO3dd3wT9f8H8FeSjnTSSVug0Fpo2asCMspepcoSUFHZtoiCCMpPUGQooqIIAiKoIDJkCnwFLBsFZEOByiqlZRTogg66k3x+f9TEhqRpWlqu4/V8PHjw6OVzn3vf5e7yzmdcZEIIASIiIqJCyKUOgIiIiMo3JgtERERkEpMFIiIiMonJAhEREZnEZIGIiIhMYrJAREREJjFZICIiIpOYLBAREZFJTBaIiIhKgUajQVJSEm7cuCF1KKWOyQIREZVrR44cwaFDh3R/Hzp0CEePHpUuoALu37+PiRMnok6dOrCysoK7uzsaNmyItLQ0qUMrVcVKFn7++WfIZDLdP6VSCX9/f7z99tuIj48vqxiJqoyjR49iwIAB8PDwgLW1NXx8fBAWFoZbt26VuM7MzEzMnDlT72ZLVJHcvn0b48aNw8WLF3Hx4kWMGzcOt2/fljosXL9+Ha1atcL69esRFhaGHTt2YO/evdi/fz/s7OykDq9UWZRkpdmzZ8PX1xfZ2dk4cuQIli5dil27diEyMhK2tralHSNRlbBo0SK88847eOaZZzB+/Hh4eXnh8uXL+PHHH7Fhwwbs2rUL7dq1K3a9mZmZmDVrFgCgc+fOpRw1UdkbOHAgFixYgKZNmwIA2rZti4EDB0ocFRAWFgYrKyscP34cNWvWlDqcMlWiZCE4OBjPPvssAGDMmDFwdXXF/PnzsX37drzyyiulGiBRVXD06FFMnDgRHTp0QHh4uF7S/eabb6J9+/YYNGgQ/vnnHzg7O0sYKdHTZ21tjb///huRkZEAgMaNG0OhUEga05kzZ3DgwAHs2bOn0icKQCmNWejatSsAICYmBgDw4MEDvPfee2jSpAns7e3h6OiI4OBgnD9/3mDd7OxszJw5E/7+/lAqlfDy8sLAgQMRHR0NAIiNjdXr+nj8X8FvSocOHYJMJsOGDRswbdo0eHp6ws7ODn379jXaZHXixAn07t0b1apVg62tLTp16lRoP1jnzp2Nbn/mzJkGZdesWYPAwEDY2NjAxcUFL7/8stHtm9q3gjQaDRYsWIBGjRpBqVTCw8MDYWFhePjwoV45Hx8fPP/88wbbefvttw3qNBb7vHnzDI4pAOTk5GDGjBmoW7curK2t4e3tjSlTpiAnJ8fosSqoc+fOBvXNmTMHcrkc69atK9Hx+Oqrr9CuXTu4urrCxsYGgYGB2Lx5s9Htr1mzBq1bt4atrS2cnZ3RsWNH7NmzR6/MH3/8gU6dOsHBwQGOjo5o1aqVQWybNm3Svadubm547bXXEBcXp1dmxIgRejE7Ozujc+fOOHz4cJHH6ZNPPoFMJsOqVasMWuf8/Pzw5Zdf4t69e1i2bJluubFjq43Dx8cHQP4xdXd3BwDMmjXL6Hl75coVDBkyBO7u7rCxsUFAQAA+/PBDvTrPnTuH4OBgODo6wt7eHt26dcPx48f1ymi7KY8cOYIJEybA3d0dTk5OCAsLQ25uLlJSUjBs2DA4OzvD2dkZU6ZMweM/emvuuW7Mkxz/gsdMa82aNZDL5fj888/1lh84cABBQUGws7ODk5MT+vXrh8uXL+uVmTlzJmQyGZKSkvSWnz59GjKZDD///LPRmI39i42NBfDf9b1nzx40b94cSqUSDRs2xG+//WawPzdu3MDgwYPh4uICW1tbPPfcc9i5c6dZx83YOTJixAjY29sXeRyLcw9SqVT45JNP4Ofnp+tymzZtmsF9xcfHByNGjIBCoUCzZs3QrFkz/Pbbb5DJZAbvWWExafdJLpfD09MTL730kl7Xnvbe89VXXxVaj/Y91Tp+/DiUSiWio6PRqFEjWFtbw9PTE2FhYXjw4IHB+ubeQ+zt7XHjxg306tULdnZ2qFGjBmbPnq13rWjj1Z5HAJCeno7AwED4+vri3r17uuVPck0VVKKWhcdpP9hdXV0B5J+o27Ztw+DBg+Hr64v4+HgsW7YMnTp1wqVLl1CjRg0AgFqtxvPPP4/9+/fj5ZdfxjvvvIP09HTs3bsXkZGR8PPz023jlVdeQZ8+ffS2O3XqVKPxzJkzBzKZDP/3f/+HhIQELFiwAN27d0dERARsbGwA5F/wwcHBCAwMxIwZMyCXy7Fy5Up07doVhw8fRuvWrQ3qrVWrFubOnQsAePToEd58802j254+fTqGDBmCMWPGIDExEYsWLULHjh1x7tw5ODk5GawTGhqKoKAgAMBvv/2GrVu36r0eFhaGn3/+GSNHjsSECRMQExODxYsX49y5czh69CgsLS2NHofiSElJ0e1bQRqNBn379sWRI0cQGhqKBg0a4OLFi/jmm29w7do1bNu2rVjbWblyJT766CN8/fXXGDp0qNEyRR2PhQsXom/fvnj11VeRm5uL9evXY/DgwdixYwdCQkJ05WbNmoWZM2eiXbt2mD17NqysrHDixAkcOHAAPXv2BJD/ATdq1Cg0atQIU6dOhZOTE86dO4fw8HBdfNpj36pVK8ydOxfx8fFYuHAhjh49avCeurm54ZtvvgEA3LlzBwsXLkSfPn1w+/Zto+89kN9NsH//fgQFBcHX19domZdeegmhoaHYsWMHPvjgg6IP9L/c3d2xdOlSvPnmmxgwYICu6VbbnHvhwgUEBQXB0tISoaGh8PHxQXR0NH7//XfMmTMHAPDPP/8gKCgIjo6OmDJlCiwtLbFs2TJ07twZf/75J9q0aaO3zfHjx8PT0xOzZs3C8ePHsXz5cjg5OeHvv/9G7dq18dlnn2HXrl2YN28eGjdujGHDhunWfdJzvSTH35g9e/Zg1KhRePvtt/WO9759+xAcHIxnnnkGM2fORFZWFhYtWoT27dvj7NmzZn14FRQWFobu3bvr/n799df13icAumQPAKKiovDSSy9h7NixGD58OFauXInBgwcjPDwcPXr0AADEx8ejXbt2yMzMxIQJE+Dq6opVq1ahb9++2Lx5MwYMGGAQR8Hjpo2jrI0ZMwarVq3CoEGDMHnyZJw4cQJz587F5cuXDa75glQqlUEyW5SgoCCEhoZCo9EgMjISCxYswN27d81KJAuTnJyM7OxsvPnmm+jatSvGjh2L6OhoLFmyBCdOnMCJEydgbW0NoHj3ELVajd69e+O5557Dl19+ifDwcMyYMQMqlQqzZ882GkteXh5efPFF3Lp1C0ePHoWXl5futVL7/BDFsHLlSgFA7Nu3TyQmJorbt2+L9evXC1dXV2FjYyPu3LkjhBAiOztbqNVqvXVjYmKEtbW1mD17tm7ZihUrBAAxf/58g21pNBrdegDEvHnzDMo0atRIdOrUSff3wYMHBQBRs2ZNkZaWplu+ceNGAUAsXLhQV3e9evVEr169dNsRQojMzEzh6+srevToYbCtdu3aicaNG+v+TkxMFADEjBkzdMtiY2OFQqEQc+bM0Vv34sWLwsLCwmB5VFSUACBWrVqlWzZjxgxR8G05fPiwACDWrl2rt254eLjB8jp16oiQkBCD2N966y3x+Fv9eOxTpkwR1atXF4GBgXrHdPXq1UIul4vDhw/rrf/9998LAOLo0aMG2yuoU6dOuvp27twpLCwsxOTJk42WNed4CJH/PhWUm5srGjduLLp27apXl1wuFwMGDDA4F7XveUpKinBwcBBt2rQRWVlZRsvk5uaK6tWri8aNG+uV2bFjhwAgPv74Y92y4cOHizp16ujVs3z5cgFAnDx50ug+CyFERESEACDeeeedQssIIUTTpk2Fi4uL7u+Cx7agx+Mwdq5qdezYUTg4OIibN2/qLS94XfTv319YWVmJ6Oho3bK7d+8KBwcH0bFjR90y7f3h8euqbdu2QiaTibFjx+qWqVQqUatWLb34i3OuG1PS4//4uqdPnxb29vZi8ODBBudO8+bNRfXq1UVycrJu2fnz54VcLhfDhg3TLdOet4mJiXr
2024-10-11 18:41:52 +04:00
"text/plain": [
"<Figure size 200x200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Тестовая выборка: (154, 9)\n",
"Outcome\n",
2024-10-12 12:38:44 +04:00
"0 100\n",
"1 54\n",
2024-10-11 18:41:52 +04:00
"Name: count, dtype: int64\n"
]
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAekAAADECAYAAAC7i9nLAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA1QklEQVR4nO3dd1xT1/sH8E8SEvbeKAKKiHug8lNkuEWsq9a6t1Kr1tZ+v7S2tY5qtdpq3drWPeqs2lonVr/uDbiQooIiKks2hITk/P6gSbkkICBwQ3jerxev1puTc597cnOfnHPPvVfAGGMghBBCiM4R8h0AIYQQQrSjJE0IIYToKErShBBCiI6iJE0IIYToKErShBBCiI6iJE0IIYToKErShBBCiI6iJE0IIYToKErShBBCajXGGF6/fo3Y2Fi+Q6lylKQJIYRode/ePRw+fFj978jISPz555/8BVRMdnY2vvrqKzRp0gQSiQS2trbw8vJCTEwM36FVqQol6a1bt0IgEKj/jIyM4OXlhenTpyMpKam6YiSkzrh06RIGDRoER0dHGBoawt3dHaGhoXj27Fml68zLy8O8efNw7ty5qguU1AnZ2dkIDQ3F1atXERsbi5kzZ+Lu3bt8h4W0tDR06tQJq1atwpAhQ3DkyBGcPn0a586dg7u7O9/hVSmDyrxpwYIF8PDwgFQqxcWLF7F+/XocO3YM9+7dg4mJSVXHSEidsHr1asycORMNGzbEjBkz4OzsjOjoaPzyyy/Yu3cvjh07hs6dO1e43ry8PMyfPx8AEBQUVMVRE33WqVMn9R8AeHl5YfLkyTxHBfz3v//Fy5cvceXKFTRv3pzvcKpVpZJ0cHAw2rdvDwCYNGkSbG1tsXz5chw5cgTDhw+v0gAJqQsuXbqEjz/+GF26dMGJEyc4P3anTp0KPz8/DBkyBPfv34e1tTWPkZK65vDhw3jw4AHy8/PRsmVLSCQSXuNJTk7Gtm3bsGHDBr1P0EAVnZPu1q0bACAuLg4A8Pr1a/znP/9By5YtYWZmBgsLCwQHByMqKkrjvVKpFPPmzYOXlxeMjIzg7OyMwYMH4/HjxwCA+Ph4zhB7yb/iPYNz585BIBBg7969+OKLL+Dk5ARTU1P0798fCQkJGuu+du0a+vTpA0tLS5iYmCAwMBCXLl3Suo1BQUFa1z9v3jyNsjt37oSPjw+MjY1hY2ODYcOGaV1/WdtWnFKpxI8//ojmzZvDyMgIjo6OCA0NRXp6Oqecu7s7+vXrp7Ge6dOna9SpLfZly5ZptCkAFBQUYO7cufD09IShoSFcXV0RFhaGgoICrW1VXFBQkEZ9ixYtglAoxO7duyvVHt9//z06d+4MW1tbGBsbw8fHBwcOHNC6/p07d6Jjx44wMTGBtbU1AgICcOrUKU6Z48ePIzAwEObm5rCwsECHDh00Ytu/f7/6M7Wzs8OoUaOQmJjIKTNu3DhOzNbW1ggKCsKFCxfe2E7ffPMNBAIBtm3bpjEa1ahRIyxduhQvX77Exo0b1cu1ta0qDtWQX3x8POzt7QEA8+fP17rfPnz4EEOHDoW9vT2MjY3RpEkTfPnll5w6IyIiEBwcDAsLC5iZmaF79+64evUqp4zqdNjFixfx0Ucfwd7eHlZWVggNDYVMJkNGRgbGjBkDa2trWFtbIywsDCUfwlfefV2byrZ/yfdp+4uPj1eXP378OPz9/WFqagpzc3OEhITg/v37GvWW1a7z5s174zqLn56o6v1v3bp1aN68OQwNDeHi4oJp06YhIyODU6b4/tWsWTP4+PggKipK63dSm5LHTDs7O4SEhODevXuccgKBANOnTy+1HtV+pfoMbty4AaVSCZlMhvbt28PIyAi2trYYPny41tNCf/31l/rzsrKywoABAxAdHc0po/o8VJ+ZhYUFbG1tMXPmTEilUo14i39/CgsL0bdvX9jY2ODBgwecsuXNBWWpVE+6JFVCtbW1BQA8efIEhw8fxnvvvQcPDw8kJSVh48aNCAwMxIMHD+Di4gIAUCgU6NevH86cOYNhw4Zh5syZyM7OxunTp3Hv3j00atRIvY7hw4ejb9++nPXOnj1bazyLFi2CQCDAZ599huTkZPz444/o0aMHIiMjYWxsDKDogwsODoaPjw/mzp0LoVCILVu2oFu3brhw4QI6duyoUW/9+vWxePFiAEBOTg6mTp2qdd1z5szB0KFDMWnSJKSkpGD16tUICAhAREQErKysNN4zZcoU+Pv7AwB+++03HDp0iPN6aGgotm7divHjx+Ojjz5CXFwc1qxZg4iICFy6dAlisVhrO1RERkaGetuKUyqV6N+/Py5evIgpU6agadOmuHv3LlasWIG///6bM6mkPLZs2YKvvvoKP/zwA0aMGKG1zJvaY+XKlejfvz9GjhwJmUyGPXv24L333sPRo0cREhKiLjd//nzMmzcPnTt3xoIFCyCRSHDt2jX89ddf6NWrF4CiA8CECRPQvHlzzJ49G1ZWVoiIiMCJEyfU8anavkOHDli8eDGSkpKwcuVKXLp0SeMztbOzw4oVKwAAz58/x8qVK9G3b18kJCRo/eyBouHoM2fOwN/fHx4eHlrLvP/++5gyZQqOHj2Kzz///M0N/Q97e3usX78eU6dOxaBBgzB48GAAQKtWrQAAd+7cgb+/P8RiMaZMmQJ3d3c8fvwYf/zxBxYtWgQAuH//Pvz9/WFhYYGwsDCIxWJs3LgRQUFB+N///gdfX1/OOmfMmAEnJyfMnz8fV69exU8//QQrKytcvnwZDRo0wLfffotjx45h2bJlaNGiBcaMGaN+79vu65Vp/9DQUPTo0UP979GjR3PaStWOALBjxw6MHTsWvXv3xnfffYe8vDysX78eXbp0QUREhPrH0ZvadfDgwfD09FTX/8knn6Bp06aYMmWKelnTpk0BVP3+N2/ePMyfPx89evTA1KlTERMTg/Xr1+PGjRtvbOPPPvuszPYvydvbG19++SUYY3j8+DGWL1+Ovn37vtUci7S0NABFnQ8fHx8sWbIEKSkpWLVqFS5evIiIiAjY2dkBAMLDwxEcHIyGDRti3rx5yM/Px+rVq+Hn54fbt29rnL8eOnQo3N3dsXjxYly9ehWrVq1Ceno6tm/fXmo8kyZNwrlz53D69Gk0a9ZMvbwyuUArVgFbtmxhAFh4eDhLSUlhCQkJbM+ePczW1pYZGxuz58+fM8YYk0qlTKFQcN4bFxfHDA0N2YIFC9TLNm/ezACw5cuXa6xLqVSq3weALVu2TKNM8+bNWWBgoPrfZ8+eZQBYvXr1WFZWlnr5vn37GAC2cuVKdd2NGzdmvXv3Vq+HMcby8vKYh4cH69mzp8a6OnfuzFq0aKH+d0pKCgPA5s6dq14WHx/PRCIRW7RoEee9d+/eZQYGBhrLY2NjGQC2bds29bK5c+ey4h/LhQsXGAC2a9cuzntPnDihsdzNzY2FhIRoxD5t2jRW8qMuGXtYWBhzcHBgPj4+nDbdsWMHEwqF7MKFC5z3b9iwgQFgly5d0lhfcYGBger6/vzzT2ZgYMA+/fRTrWXL0x6MFX1OxclkMtaiRQvWrVs3Tl1CoZANGjRIY19UfeYZGRnM3Nyc+fr6svz8fK1lZDIZc3BwYC1atOCUOXr0KAPAvv76a/WysWPHMjc3N049P/30EwPArl+/rnWbGWMsMjKSAWAzZ84stQxjjLVq1YrZ2Nio/128bYsrGYe2fVUlICCAmZubs6dPn3KWF/9eDBw4kEkkEvb48WP1shcvXjBzc3MWEBCgXqY6PpT8XnXq1IkJBAL2wQcfqJcVFhay+vXrc+KvyL6uTWXbv6TS2io7O5tZWVmxyZMnc5a/evWKWVpacpaXp12Lc3NzY2PHjtVYXtX7X3JyMpNIJKxXr16c78WaNWsYALZ582b1spL717FjxxgA1qdPH43vpDba9s8vvviCAWDJycnqZQDYtGnTSq1HtV/FxcVx/t2sWTPOsUB1/C9+fGnTpg1zcHBgaWlp6mVRUVFMKBSyMWPGqJepjjP9+/fnrPvDDz9kAFhUVBQnXtX+MXv2bCYSidjhw4c576toLihLpYa
2024-10-11 18:41:52 +04:00
"text/plain": [
"<Figure size 200x200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Вывод распределения количества наблюдений по меткам (классам)\n",
"print(df.Outcome.value_counts())\n",
"print()\n",
"\n",
"data = df.copy()\n",
"\n",
"df_train, df_val, df_test = split_stratified_into_train_val_test(\n",
" data, stratify_colname=\"Outcome\", frac_train=0.60, frac_val=0.20, frac_test=0.20\n",
")\n",
"\n",
"print(\"Обучающая выборка: \", df_train.shape)\n",
"print(df_train.Outcome.value_counts())\n",
"counts = df_train['Outcome'].value_counts()\n",
"plt.figure(figsize=(2, 2))# Установка размера графика\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)# Построение круговой диаграммы\n",
"plt.title('Распределение классов Outcome в обучающей выборке')# Добавление заголовка\n",
"plt.show()# Отображение графика\n",
"\n",
"print(\"Контрольная выборка: \", df_val.shape)\n",
"print(df_val.Outcome.value_counts())\n",
"counts = df_val['Outcome'].value_counts()\n",
"plt.figure(figsize=(2, 2))\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)\n",
"plt.title('Распределение классов Outcome в контрольной выборке')\n",
"plt.show()\n",
"\n",
"print(\"Тестовая выборка: \", df_test.shape)\n",
"print(df_test.Outcome.value_counts())\n",
"counts = df_test['Outcome'].value_counts()\n",
"plt.figure(figsize=(2, 2))\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)\n",
"plt.title('Распределение классов Outcome в тестовой выборке')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Сбалансируем распределение:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Балансировка данных оверсемплингом. Это метод, увеличивающий число наблюдений в меньшинственном классе для достижения более равномерного распределения классов."
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 353,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Обучающая выборка: (460, 9)\n",
"Outcome\n",
"0 299\n",
"1 161\n",
"Name: count, dtype: int64\n",
2024-10-12 12:38:44 +04:00
"Обучающая выборка после oversampling: (587, 9)\n",
2024-10-11 18:41:52 +04:00
"Outcome\n",
"0 299\n",
2024-10-12 12:38:44 +04:00
"1 288\n",
2024-10-11 18:41:52 +04:00
"Name: count, dtype: int64\n"
]
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAqIAAADECAYAAABEM0OdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+VElEQVR4nO3dd1hT1xsH8G8S9lKQ6QRBcKBScdSBqFVbxVq1llZt66zaapdWq7Z1VK21WkcdVdu6te5qq9Zt68K96wIFByoyBJkBkvP7g18iIQECBi/C9/M8PDy5uffc957c3Lw559wTmRBCgIiIiIjoOZNLHQARERERlU9MRImIiIhIEkxEiYiIiEgSTESJiIiISBJMRImIiIhIEkxEiYiIiEgSTESJiIiISBJMRImIiIhIEmZSB0BERESGZWZmIiEhAWq1GpUrV5Y6HCKTY4soERGVO6tXr0ZUVJT28fLlyxEdHS1dQLmcPn0avXv3hrOzMywtLeHh4YE333xT6rCISkSREtHly5dDJpNp/6ysrODr64vhw4cjJiampGIkKjeOHj2K7t27w83NDZaWlvD09MSQIUNw586dYpeZlpaGiRMn4p9//jFdoEQvuMOHD2P06NGIiorC7t27MWzYMMjl0rfNbNu2Da1atcKVK1cwdepU7N27F3v37sXixYulDo2KaPTo0ZDJZHj77bcNPh8VFaWTU5mbm8PZ2RktWrTAuHHjCr3uF1Y+AMTGxuLTTz9F7dq1YW1tDVdXVzRt2hRffvklUlJSkJiYCA8PD7Rs2RKGfvH9+PHjkMvlGDVqFABg4sSJkMlkcHNzQ1pamt76np6e6NKlS4Fx6xFFsGzZMgFAfPvtt2LVqlXil19+EX379hVyuVx4eXmJ1NTUohRHRLn89NNPQiaTCW9vbzF58mTx66+/ipEjR4oKFSqIChUqiKNHjxar3NjYWAFATJgwwbQBE73Arl69Ktzc3AQAAUCMGDFC6pBEfHy8cHZ2Fl27dhVKpVLqcOgZqNVqUbVqVeHp6Smsra3FkydP9NaJjIwUAESvXr3EqlWrxIoVK8ScOXNEnz59hLW1tbCxsRG///57scuPj48X1atXFxUrVhQjRowQS5YsEdOmTRO9evUS9vb2IjIyUgghxLp16wQAsXjxYp3ts7KyRMOGDYWnp6c2v5swYYL2PTNz5ky9fdaoUUOEhIQUqa6KlYieOnVKZ/mIESMEALF27doi7ZyIchw5ckTI5XIRFBSk94UuIiJCuLm5CQ8PD5GQkFDkspmIEhmWkpIijh8/LiIiIqQORQghxMyZM4WdnV2x3udUuhw4cEAAEAcOHBDm5uZi+fLleutoEtEZM2boPRcVFSV8fX2FhYWFOH/+fLHK/+GHHwQAg40YSUlJIj09Xfu4U6dOwtHRUTx8+FC7bObMmQKA2Llzp3aZJhENCAgQbm5uIi0tTafc4iSiJumHaNeuHQAgMjISAJCQkIAvvvgC9evXh52dHRwcHNCpUydcuHBBb9uMjAxMnDgRvr6+sLKygoeHB3r06IGbN28C0G+6zvvXpk0bbVn//PMPZDIZ1q9fj3HjxsHd3R22trbo2rUr7t69q7fvEydO4LXXXkOFChVgY2OD4OBgHD161OAxtmnTxuD+J06cqLfu6tWrERgYCGtrazg5OeGdd94xuP+Cji03tVqNOXPmoF69erCysoKbmxuGDBmCx48f66yXX5P48OHD9co0FPuMGTP06hQAlEolJkyYAB8fH1haWqJatWoYPXo0lEqlwbrKrU2bNnrlTZ06FXK5HGvXri1WfcycORMtWrRApUqVYG1tjcDAQGzatMng/levXo2mTZvCxsYGjo6OaN26Nfbs2aOzzt9//43g4GDY29vDwcEBTZo00Ytt48aN2tfU2dkZ7777rt54sn79+unE7OjoiDZt2uDw4cOF1tPkyZMhk8mwYsUK2NjY6Dzn7e2NH374AQ8ePNDpnjNUt5o4PD09AeTUqYuLCwBg0qRJBs/ba9euITQ0FC4uLrC2toafnx+++uornTLPnTuHTp06wcHBAXZ2dnjllVdw/PhxnXU0Q3eOHDmCTz75BC4uLqhYsSKGDBmCzMxMJCYm4v3334ejoyMcHR0xevRova4gY891Q4pb/3m3M/SnGUuoeY/t2bMHAQEBsLKyQt26dbFlyxa9chMTE/HZZ5+hWrVqsLS0hI+PD6ZPnw61Wq1dR3POz5w5U297f39/g9e3vEMsQkJC9F5TTfeZ5rV1cHBApUqV8OmnnyIjI0Nn++zsbEyePBne3t7a4SDjxo3Te397enpq60Mul8Pd3R1vv/22XvdhamoqRo4cqT1uPz8/zJw5U+e1zu9Ycp+7Ra0fAHj06BEGDhwINzc3WFlZoWHDhlixYoXOOpoyly9fDltbWzRr1gze3t4YNmwYZDIZ+vXrp7cvQ9vn7k719PTEqFGjkJmZqV1P8344ffp0vmXlfQ8fP34cAQEB+O6777T1V6tWLXz//fc65w1QtNfNmHNWE2/ucbP//fcfHB0d0aVLF2RnZ2uXG3Nu5yf3eVTYtd7YYwSMu46b+jO3IGvWrEHdunXRtm1btG/fHmvWrDF6WwCoUaMGli9fjszMTPzwww/FKv/mzZtQKBR4+eWX9Z5zcHCAlZWV9vHChQuhVCoxYsQIAMDdu3cxceJEvP322+jUqZPe9uPHj0dMTAx+/vnnIh2XISa5a16TNFaqVAkAcOvWLWzduhVvvfUWvLy8EBMTg8WLFyM4OBhXrlzR3vmnUqnQpUsX7N+/H++88w4+/fRTJCcnY+/evbh8+TK8vb21++jVqxc6d+6ss9+xY8cajGfq1KmQyWT48ssv8ejRI8yZMwft27fH+fPnYW1tDQA4cOAAOnXqhMDAQEyYMAFyuRzLli1Du3btcPjwYTRt2lSv3KpVq2LatGkAgJSUFHz44YcG9/3NN98gNDQUgwYNQmxsLObNm4fWrVvj3LlzqFixot42gwcPRlBQEABgy5Yt+OOPP3SeHzJkCJYvX47+/fvjk08+QWRkJObPn49z587h6NGjMDc3N1gPRZGYmKg9ttzUajW6du2KI0eOYPDgwahTpw4uXbqE2bNn48aNG9i6dWuR9rNs2TJ8/fXX+PHHH9G7d2+D6xRWH3PnzkXXrl3Rp08fZGZmYt26dXjrrbewfft2hISEaNebNGkSJk6ciBYtWuDbb7+FhYUFTpw4gQMHDqBjx44Aci6+AwYMQL169TB27FhUrFgR586dw65du7Txaeq+SZMmmDZtGmJiYjB37lwcPXpU7zV1dnbG7NmzAQD37t3D3Llz0blzZ9y9e9fgaw/kjOHcv38/goKC4OXlZXCdt99+G4MHD8b27dsxZsyYwiv6/1xcXPDzzz/jww8/RPfu3dGjRw8AQIMGDQAAFy9eRFBQEMzNzTF48GB4enri5s2b+OuvvzB16lQAOR9GQUFBcHBwwOjRo2Fubo7FixejTZs2+Pfff9GsWTOdfX788cdwd3fHpEmTcPz4cSxZsgQVK1bEsWPHUL16dXz33XfYuXMnZsyYAX9/f7z//vvabZ/1XC9O/Q8ZMgTt27fXPn7vvfd06kpTjxrh4eF4++23MXToUPTt2xfLli3DW2+9hV27dqFDhw4Acl7T4OBgREdHY8iQIahevTqOHTuGsWPH4sGDB5gzZ06Bx2GsQ4cOYefOnfk+HxoaCk9PT0ybNg3Hjx/HTz/9hMePH2PlypXadQYNGoQVK1agZ8+eGDlyJE6cOIFp06bh6tWreu+9oKAgDB48GGq1GpcvX8acOXNw//59bbIvhEDXrl1x8OBBDBw4EAEBAdi9ezdGjRqF6Oho7WtTEtLT09GmTRtERERg+PDh8PLywsaNG9GvXz8kJibi008/zXfbiIgI/PLLL0Xan+Y6pVQqsXv3bsycORNWVlaYPHlysY8hPj4eR44cwZEjRzBgwAAEBgZi//79GDt2LKKiorBo0SLtukV53Yw5Z/O6e/cuXnvtNdSuXRsbNmyAmVlOumCKczsgIAAjR47UWbZy5Urs3btXZ5mxx2jMdTy3kv7MVSq
2024-10-11 18:41:52 +04:00
"text/plain": [
"<Figure size 200x200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Pregnancies</th>\n",
" <th>Glucose</th>\n",
" <th>BloodPressure</th>\n",
" <th>SkinThickness</th>\n",
" <th>Insulin</th>\n",
" <th>BMI</th>\n",
" <th>DiabetesPedigreeFunction</th>\n",
" <th>Age</th>\n",
" <th>Outcome</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
2024-10-12 12:38:44 +04:00
" <td>1.000000</td>\n",
" <td>73.000000</td>\n",
" <td>50.000000</td>\n",
" <td>10.000000</td>\n",
" <td>0.000000</td>\n",
" <td>23.000000</td>\n",
" <td>0.248000</td>\n",
" <td>21.000000</td>\n",
" <td>0</td>\n",
2024-10-11 18:41:52 +04:00
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
2024-10-12 12:38:44 +04:00
" <td>1.000000</td>\n",
" <td>84.000000</td>\n",
" <td>64.000000</td>\n",
" <td>23.000000</td>\n",
" <td>115.000000</td>\n",
" <td>36.900000</td>\n",
" <td>0.471000</td>\n",
" <td>28.000000</td>\n",
2024-10-11 18:41:52 +04:00
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
2024-10-12 12:38:44 +04:00
" <td>8.000000</td>\n",
" <td>133.000000</td>\n",
" <td>72.000000</td>\n",
2024-10-11 18:41:52 +04:00
" <td>0.000000</td>\n",
2024-10-12 12:38:44 +04:00
" <td>0.000000</td>\n",
" <td>32.900000</td>\n",
" <td>0.270000</td>\n",
" <td>39.000000</td>\n",
2024-10-11 18:46:40 +04:00
" <td>1</td>\n",
2024-10-11 18:41:52 +04:00
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
2024-10-12 12:38:44 +04:00
" <td>3.000000</td>\n",
" <td>106.000000</td>\n",
" <td>72.000000</td>\n",
2024-10-11 18:41:52 +04:00
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
2024-10-12 12:38:44 +04:00
" <td>25.800000</td>\n",
" <td>0.207000</td>\n",
" <td>27.000000</td>\n",
2024-10-11 18:46:40 +04:00
" <td>0</td>\n",
2024-10-11 18:41:52 +04:00
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
2024-10-12 12:38:44 +04:00
" <td>5.000000</td>\n",
" <td>88.000000</td>\n",
" <td>78.000000</td>\n",
" <td>30.000000</td>\n",
2024-10-11 18:46:40 +04:00
" <td>0.000000</td>\n",
2024-10-12 12:38:44 +04:00
" <td>27.600000</td>\n",
2024-10-11 18:46:40 +04:00
" <td>0.258000</td>\n",
2024-10-12 12:38:44 +04:00
" <td>37.000000</td>\n",
2024-10-11 18:41:52 +04:00
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
2024-10-12 12:38:44 +04:00
" <th>582</th>\n",
" <td>2.629250</td>\n",
" <td>113.651700</td>\n",
" <td>62.696600</td>\n",
" <td>36.303400</td>\n",
" <td>0.000000</td>\n",
" <td>33.632585</td>\n",
" <td>0.445897</td>\n",
" <td>27.191151</td>\n",
2024-10-11 18:41:52 +04:00
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
2024-10-12 12:38:44 +04:00
" <th>583</th>\n",
" <td>6.450045</td>\n",
" <td>106.641614</td>\n",
" <td>60.366637</td>\n",
" <td>25.008251</td>\n",
" <td>0.000000</td>\n",
" <td>27.150780</td>\n",
" <td>0.318640</td>\n",
" <td>28.266727</td>\n",
2024-10-11 18:41:52 +04:00
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
2024-10-12 12:38:44 +04:00
" <th>584</th>\n",
" <td>8.753291</td>\n",
" <td>118.116773</td>\n",
" <td>35.000000</td>\n",
" <td>0.000000</td>\n",
2024-10-11 18:46:40 +04:00
" <td>0.000000</td>\n",
2024-10-12 12:38:44 +04:00
" <td>23.728855</td>\n",
" <td>0.212378</td>\n",
" <td>34.986837</td>\n",
2024-10-11 18:41:52 +04:00
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
2024-10-12 12:38:44 +04:00
" <th>585</th>\n",
" <td>8.411659</td>\n",
" <td>160.786384</td>\n",
" <td>72.609702</td>\n",
" <td>40.652426</td>\n",
" <td>278.176681</td>\n",
" <td>41.212817</td>\n",
" <td>0.891405</td>\n",
" <td>32.780595</td>\n",
2024-10-11 18:41:52 +04:00
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
2024-10-12 12:38:44 +04:00
" <th>586</th>\n",
" <td>4.948382</td>\n",
" <td>130.526964</td>\n",
" <td>82.314330</td>\n",
" <td>36.788906</td>\n",
" <td>110.000000</td>\n",
" <td>36.689522</td>\n",
" <td>0.771453</td>\n",
" <td>38.895223</td>\n",
2024-10-11 18:41:52 +04:00
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
2024-10-12 12:38:44 +04:00
"<p>587 rows × 9 columns</p>\n",
2024-10-11 18:41:52 +04:00
"</div>"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin \\\n",
2024-10-12 12:38:44 +04:00
"0 1.000000 73.000000 50.000000 10.000000 0.000000 \n",
"1 1.000000 84.000000 64.000000 23.000000 115.000000 \n",
"2 8.000000 133.000000 72.000000 0.000000 0.000000 \n",
"3 3.000000 106.000000 72.000000 0.000000 0.000000 \n",
"4 5.000000 88.000000 78.000000 30.000000 0.000000 \n",
2024-10-11 18:41:52 +04:00
".. ... ... ... ... ... \n",
2024-10-12 12:38:44 +04:00
"582 2.629250 113.651700 62.696600 36.303400 0.000000 \n",
"583 6.450045 106.641614 60.366637 25.008251 0.000000 \n",
"584 8.753291 118.116773 35.000000 0.000000 0.000000 \n",
"585 8.411659 160.786384 72.609702 40.652426 278.176681 \n",
"586 4.948382 130.526964 82.314330 36.788906 110.000000 \n",
2024-10-11 18:41:52 +04:00
"\n",
" BMI DiabetesPedigreeFunction Age Outcome \n",
2024-10-12 12:38:44 +04:00
"0 23.000000 0.248000 21.000000 0 \n",
"1 36.900000 0.471000 28.000000 0 \n",
"2 32.900000 0.270000 39.000000 1 \n",
"3 25.800000 0.207000 27.000000 0 \n",
"4 27.600000 0.258000 37.000000 0 \n",
2024-10-11 18:41:52 +04:00
".. ... ... ... ... \n",
2024-10-12 12:38:44 +04:00
"582 33.632585 0.445897 27.191151 1 \n",
"583 27.150780 0.318640 28.266727 1 \n",
"584 23.728855 0.212378 34.986837 1 \n",
"585 41.212817 0.891405 32.780595 1 \n",
"586 36.689522 0.771453 38.895223 1 \n",
2024-10-11 18:41:52 +04:00
"\n",
2024-10-12 12:38:44 +04:00
"[587 rows x 9 columns]"
2024-10-11 18:41:52 +04:00
]
},
2024-10-12 12:38:44 +04:00
"execution_count": 353,
2024-10-11 18:41:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from imblearn.over_sampling import ADASYN\n",
"\n",
"ada = ADASYN()\n",
"\n",
"print(\"Обучающая выборка: \", df_train.shape)\n",
"print(df_train.Outcome.value_counts())\n",
"\n",
"X_resampled, y_resampled = ada.fit_resample(df_train, df_train[\"Outcome\"])\n",
"df_train_adasyn = pd.DataFrame(X_resampled)\n",
"\n",
"print(\"Обучающая выборка после oversampling: \", df_train_adasyn.shape)\n",
"print(df_train_adasyn.Outcome.value_counts())\n",
"\n",
"counts = df_train_adasyn['Outcome'].value_counts()\n",
"plt.figure(figsize=(2, 2))\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)\n",
"plt.title('Распределение классов Outcome в тренировочной выборке после ADASYN')\n",
"plt.show()\n",
"\n",
"df_train_adasyn"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. Балансировка данных андерсемплингом. Этот метод помогает сбалансировать выборку, уменьшая количество экземпляров класса большинства, чтобы привести его в соответствие с классом меньшинства."
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 354,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Обучающая выборка после undersampling: (322, 9)\n",
"Outcome\n",
"0 161\n",
"1 161\n",
"Name: count, dtype: int64\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtwAAADECAYAAACss/a2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+PklEQVR4nO3dd3gUVdsG8Ht30xtpkEJLTCD0RIPwUkIAIUCCCIg0C1VAiiAIin5IFxUUkCKK0oRXqsIrIh2khSIdKQZMEAKkQUIKu0l2z/dH3DWT3YRNWYYk9++6csHOTnnmzNmZZ8+cOasQQggQEREREZFFKOUOgIiIiIioImPCTURERERkQUy4iYiIiIgsiAk3EREREZEFMeEmIiIiIrIgJtxERERERBbEhJuIiIiIyIKYcBMRERERWZCV3AEQERGRadnZ2bh//z50Oh18fX3lDoeISogt3EREVOmsXbsWcXFxhterVq1CfHy8fAHl8/vvv6N///7w9PSEra0tfHx88PLLL8sdFj2l/Pz8MHDgQLnDeCpMmzYNCoVCMu1pKZ9iJdyrVq2CQqEw/NnZ2aFu3boYPXo0EhISLBUjUaVx9OhR9OjRA15eXrC1tYWfnx+GDx+Ov//+u8TrzMrKwrRp03Dw4MGyC5SonDt8+DAmTZqEuLg47Nq1C6NGjYJSKX8b1LZt29C6dWtcvnwZs2fPxp49e7Bnzx58/fXXcodGZhg4cCCcnJwKfd/JyempSP7oyStRl5IZM2bA398farUaR44cwVdffYUdO3bg0qVLcHBwKOsYiSqFRYsWYezYsXjmmWcwZswY+Pj44MqVK/j222+xYcMG7NixAy1btiz2erOysjB9+nQAQNu2bcs4aqLy6Z133kHbtm3h7+8PABg/fjx8fHxkjen+/fsYOnQoOnXqhE2bNsHGxkbWeIgqgmvXrj0VX6ZLlHB36dIFTZs2BQAMHToUHh4e+OKLL7Bt2zb069evTAMkqgyOHj2KcePGoXXr1ti5c6fki+tbb72FVq1aoVevXvjjjz/g5uYmY6REFUO9evVw48YNXLp0CZ6enggICJA7JKxcuRJqtRqrVq1isk2yyM3NhU6nq1D1z9bWVu4QAJRRH+727dsDAGJjYwHkfUt/99130bhxYzg5OcHFxQVdunTB+fPnjZZVq9WYNm0a6tatCzs7O/j4+KBnz564ceMGACAuLk7SjaXgX/4Wu4MHD0KhUGDDhg344IMP4O3tDUdHR3Tr1g23bt0y2vaJEyfQuXNnVKlSBQ4ODggPD8fRo0dN7mPbtm1Nbn/atGlG865duxahoaGwt7eHu7s7+vbta3L7Re1bfjqdDgsWLEDDhg1hZ2cHLy8vDB8+HA8ePJDM5+fnh65duxptZ/To0UbrNBX73LlzjcoUADQaDaZOnYrAwEDY2tqiZs2amDRpEjQajcmyyq9t27ZG65s9ezaUSiX++9//lqg85s2bh5YtW8LDwwP29vYIDQ3F5s2bTW5/7dq1aNasGRwcHODm5oY2bdpg9+7dknl+/fVXhIeHw9nZGS4uLnj++eeNYtu0aZPhmHp6euK1114z6u85cOBAScxubm5o27YtDh8+/NhymjlzJhQKBVavXm10lyggIACfffYZ7t69K7mtbKps9XH4+fkByCvTqlWrAgCmT59ust5evXoVvXv3RtWqVWFvb4+goCB8+OGHknWePXsWXbp0gYuLC5ycnPDCCy/g+PHjknn0Xc6OHDmCt99+G1WrVoWrqyuGDx+O7OxspKam4o033oCbmxvc3NwwadIkCCEk6zC3rptS0vIvuJypP31fX/1nbPfu3QgJCYGdnR0aNGiAH3/80Wi9qampGDduHGrWrAlbW1sEBgbi008/hU6nM8yjr/Pz5s0zWr5Ro0Ymz28FuwZFRUUZHVN9P0b9sXVxcYGHhwfGjh0LtVotWT43NxczZ85EQECAoRvTBx98YPT59vPzM5SHUqmEt7c3+vTpY9TdKTMzExMmTDDsd1BQEObNmyc51oXtS/66W9zyAYDExEQMGTIEXl5esLOzQ3BwMFavXi2ZR7/OVatWwdHREc2bN0dAQABGjRoFhULx2Nv9Bc9T1tbW8PPzw8SJE5GdnW2YT/95+P333wtdV8HP8PHjxxESEoKPP/7YUH516tTBJ598Iqk3QPGOmzl1Vh9v/n7t+i/4Xbt2RW5urmG6OXW7MPnr0ePO9ebuI2Deebysr7llQV/uR48exfjx41G1alU4OjqiR48eSEpKkswrhMCsWbNQo0YNODg4oF27dvjjjz9Mrre4558FCxYYyvny5csA8u66NmzY0HD9bNq0qaRMb968iZEjRyIoKAj29vbw8PDAK6+8IqlD+fexpNeG/HHOnz8ftWvXhr29PcLDw3Hp0qXHlnHBPtzFKXOdTodp06bB19fXUOaXL18uUb/wMhmlRJ8ce3h4AAD++usvbN26Fa+88gr8/f2RkJCAr7/+GuHh4bh8+bLhSWutVouuXbti37596Nu3L8aOHYv09HTs2bMHly5dkrQ49OvXD5GRkZLtTp482WQ8s2fPhkKhwHvvvYfExEQsWLAAHTp0wLlz52Bvbw8A2L9/P7p06YLQ0FBMnToVSqUSK1euRPv27XH48GE0a9bMaL01atTAnDlzAAAZGRl46623TG57ypQp6N27N4YOHYqkpCQsWrQIbdq0wdmzZ+Hq6mq0zLBhwxAWFgYA+PHHH/HTTz9J3h8+fDhWrVqFQYMG4e2330ZsbCwWL16Ms2fP4ujRo7C2tjZZDsWRmppq2Lf8dDodunXrhiNHjmDYsGGoX78+Ll68iPnz5+PPP//E1q1bi7WdlStX4v/+7//w+eefo3///ibneVx5LFy4EN26dcOrr76K7OxsrF+/Hq+88gq2b9+OqKgow3zTp0/HtGnT0LJlS8yYMQM2NjY4ceIE9u/fj4iICAB5H7zBgwejYcOGmDx5MlxdXXH27Fns3LnTEJ++7J9//nnMmTMHCQkJWLhwIY4ePWp0TD09PTF//nwAwO3bt7Fw4UJERkbi1q1bJo89kNflY9++fQgLCzPc3i6oT58+GDZsGLZv347333//8QX9j6pVq+Krr77CW2+9hR49eqBnz54AgCZNmgAALly4gLCwMFhbW2PYsGHw8/PDjRs38PPPP2P27NkA8i66YWFhcHFxwaRJk2BtbY2vv/4abdu2xW+//YbmzZtLtjlmzBh4e3tj+vTpOH78OL755hu4urri2LFjqFWrFj7++GPs2LEDc+fORaNGjfDGG28Yli1tXS9J+Q8fPhwdOnQwvH799dclZaUvR72YmBj06dMHI0aMwIABA7By5Uq88sor2LlzJzp27Agg75iGh4cjPj4ew4cPR61atXDs2DFMnjwZd+/exYIFC4rcD3MdOnQIO3bsKPT93r17w8/PD3PmzMHx48fx5Zdf4sGDB1izZo1hnqFDh2L16tXo1asXJkyYgBMnTmDOnDm4cuWK0WcvLCwMw4YNg06nw6VLl7BgwQLcuXPH8KVGCIFu3brhwIEDGDJkCEJCQrBr1y5MnDgR8fHxhmNjCY8ePULbtm1x/fp1jB49Gv7+/ti0aRMGDhyI1NRUjB07ttBlr1+/juXLlxdre/rzlEajwa5duzBv3jzY2dlh5syZJd6HlJQUHDlyBEeOHMHgwYMRGhqKffv2YfLkyYiLi8OyZcsM8xbnuJlTZwu6desWOnfujHr16mHjxo2wsspLF8qiboeEhGDChAmSaWvWrMGePXsk08zdR3PO4/k9DdfcgsaMGQM3NzdMnToVcXFxWLBgAUaPHo0NGzYY5vnoo48wa9YsREZGIjIyEmfOnEFERITkix5Q/GOkv7MybNgw2Nrawt3dHcuXL8fbb7+NXr16Gb6oX7hwASdOnDCU6alTp3Ds2DH07dsXNWrUQFxcHL766iu0bdsWly9fNmo8Ks21AcirI+np6Rg1ahTUajUWLlyI9u3b4+LFi/Dy8rJImU+ePBmfffYZXnzxRXTq1Annz59Hp06djBouzCKKYeXKlQKA2Lt3r0hKShK3bt0S69evFx4eHsL
"text/plain": [
"<Figure size 200x200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from imblearn.under_sampling import RandomUnderSampler\n",
"\n",
"rus = RandomUnderSampler()# Создание экземпляра RandomUnderSampler\n",
"\n",
"# Применение RandomUnderSampler\n",
"X_resampled, y_resampled = rus.fit_resample(df_train.drop(columns=['Outcome']), df_train['Outcome'])\n",
"\n",
"# Создание нового DataFrame\n",
"df_train_undersampled = pd.DataFrame(X_resampled)\n",
"df_train_undersampled['Outcome'] = y_resampled # Добавление целевой переменной\n",
"\n",
"# Вывод информации о новой выборке\n",
"print(\"Обучающая выборка после undersampling: \", df_train_undersampled.shape)\n",
"print(df_train_undersampled['Outcome'].value_counts())\n",
"\n",
"# Визуализация распределения классов\n",
"counts = df_train_undersampled['Outcome'].value_counts()\n",
"plt.figure(figsize=(2, 2))\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)\n",
"plt.title('Распределение классов Outcome в тренировочной выборке после Undersampling')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 2. Датасет: Данные по инсультам\n",
"https://www.kaggle.com/datasets/fedesoriano/stroke-prediction-dataset\n",
"##### О наборе данных: \n",
"По данным Всемирной организации здравоохранения (ВОЗ), инсульт является второй по значимости причиной смертности во всем мире, на его долю приходится примерно 11% от общего числа смертей.\n",
"Этот набор данных используется для прогнозирования вероятности инсульта у пациента на основе входных параметров, таких как пол, возраст, различные заболевания и статус курильщика. Каждая строка в данных содержит соответствующую информацию о пациенте.\n",
"\n",
"Атрибуты:\n",
"1) id: уникальный идентификатор\n",
"2) gender: \"Male\", \"Female\" или \"Other\"\n",
"3) age: возраст пациента\n",
"4) hypertension: 0, если у пациента нет артериальной гипертензии, 1, если у пациента есть артериальная гипертензия\n",
"5) heart_disease: 0, если у пациента нет сердечных заболеваний, 1, если у пациента есть сердечные заболевания\n",
"6) ever_married: \"No\" или \"Yes\"\n",
"7) work_type: \"children\", \"Govt_jov\", \"Never_worked\", \"Private\" or \"Self-employed\"\n",
"8) Residence_type: \"Rural\" or \"Urban\"\n",
"9) avg_glucose_level: средний уровень глюкозы в крови\n",
"10) bmi: индекс массы тела\n",
"11) smoking_status: \"formerly smoked\", \"never smoked\", \"smokes\" или \"Unknown\"*\n",
"12) stroke: 1, если у пациента был инсульт, или 0, если нет.\n",
"##### Таким образом:\n",
"* Объект наблюдения - Реальные пациенты.\n",
"* Атрибуты: id, gender, age, hypertension, heart_disease, ever_married, work_type, Residence_type, avg_glucose_level, bmi, smoking_status, stroke.\n",
"* Проблемная область: Прогнозирование вероятности инсульта у пациента."
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 355,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Количество колонок: 12\n",
"Колонки: id, gender, age, hypertension, heart_disease, ever_married, work_type, Residence_type, avg_glucose_level, bmi, smoking_status, stroke\n",
"\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 5110 entries, 0 to 5109\n",
"Data columns (total 12 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 id 5110 non-null int64 \n",
" 1 gender 5110 non-null object \n",
" 2 age 5110 non-null float64\n",
" 3 hypertension 5110 non-null int64 \n",
" 4 heart_disease 5110 non-null int64 \n",
" 5 ever_married 5110 non-null object \n",
" 6 work_type 5110 non-null object \n",
" 7 Residence_type 5110 non-null object \n",
" 8 avg_glucose_level 5110 non-null float64\n",
" 9 bmi 4909 non-null float64\n",
" 10 smoking_status 5110 non-null object \n",
" 11 stroke 5110 non-null int64 \n",
"dtypes: float64(3), int64(4), object(5)\n",
"memory usage: 479.2+ KB\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>gender</th>\n",
" <th>age</th>\n",
" <th>hypertension</th>\n",
" <th>heart_disease</th>\n",
" <th>ever_married</th>\n",
" <th>work_type</th>\n",
" <th>Residence_type</th>\n",
" <th>avg_glucose_level</th>\n",
" <th>bmi</th>\n",
" <th>smoking_status</th>\n",
" <th>stroke</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>9046</td>\n",
" <td>Male</td>\n",
" <td>67.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Urban</td>\n",
" <td>228.69</td>\n",
" <td>36.6</td>\n",
" <td>formerly smoked</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>51676</td>\n",
" <td>Female</td>\n",
" <td>61.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Self-employed</td>\n",
" <td>Rural</td>\n",
" <td>202.21</td>\n",
" <td>NaN</td>\n",
" <td>never smoked</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>31112</td>\n",
" <td>Male</td>\n",
" <td>80.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Rural</td>\n",
" <td>105.92</td>\n",
" <td>32.5</td>\n",
" <td>never smoked</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>60182</td>\n",
" <td>Female</td>\n",
" <td>49.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Urban</td>\n",
" <td>171.23</td>\n",
" <td>34.4</td>\n",
" <td>smokes</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1665</td>\n",
" <td>Female</td>\n",
" <td>79.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Self-employed</td>\n",
" <td>Rural</td>\n",
" <td>174.12</td>\n",
" <td>24.0</td>\n",
" <td>never smoked</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id gender age hypertension heart_disease ever_married \\\n",
"0 9046 Male 67.0 0 1 Yes \n",
"1 51676 Female 61.0 0 0 Yes \n",
"2 31112 Male 80.0 0 1 Yes \n",
"3 60182 Female 49.0 0 0 Yes \n",
"4 1665 Female 79.0 1 0 Yes \n",
"\n",
" work_type Residence_type avg_glucose_level bmi smoking_status \\\n",
"0 Private Urban 228.69 36.6 formerly smoked \n",
"1 Self-employed Rural 202.21 NaN never smoked \n",
"2 Private Rural 105.92 32.5 never smoked \n",
"3 Private Urban 171.23 34.4 smokes \n",
"4 Self-employed Rural 174.12 24.0 never smoked \n",
"\n",
" stroke \n",
"0 1 \n",
"1 1 \n",
"2 1 \n",
"3 1 \n",
"4 1 "
]
},
2024-10-12 12:38:44 +04:00
"execution_count": 355,
2024-10-11 18:41:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv(\".//static//csv//stroke.csv\", sep=\",\")\n",
"print('Количество колонок: ' + str(df.columns.size)) \n",
"print('Колонки: ' + ', '.join(df.columns)+'\\n')\n",
"\n",
"df.info()\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Получение сведений о пропущенных данных\n",
"Типы пропущенных данных:\n",
"\n",
"- None - представление пустых данных в Python\n",
"- NaN - представление пустых данных в Pandas\n",
"- '' - пустая строка"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 356,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id 0\n",
"gender 0\n",
"age 0\n",
"hypertension 0\n",
"heart_disease 0\n",
"ever_married 0\n",
"work_type 0\n",
"Residence_type 0\n",
"avg_glucose_level 0\n",
"bmi 201\n",
"smoking_status 0\n",
"stroke 0\n",
"dtype: int64\n",
"\n",
"id False\n",
"gender False\n",
"age False\n",
"hypertension False\n",
"heart_disease False\n",
"ever_married False\n",
"work_type False\n",
"Residence_type False\n",
"avg_glucose_level False\n",
"bmi True\n",
"smoking_status False\n",
"stroke False\n",
"dtype: bool\n",
"\n",
"id процент пустых значений: %0.00\n",
"gender процент пустых значений: %0.00\n",
"age процент пустых значений: %0.00\n",
"hypertension процент пустых значений: %0.00\n",
"heart_disease процент пустых значений: %0.00\n",
"ever_married процент пустых значений: %0.00\n",
"work_type процент пустых значений: %0.00\n",
"Residence_type процент пустых значений: %0.00\n",
"avg_glucose_level процент пустых значений: %0.00\n",
"bmi процент пустых значений: %3.93\n",
"smoking_status процент пустых значений: %0.00\n",
"stroke процент пустых значений: %0.00\n"
]
}
],
"source": [
"# Количество пустых значений признаков\n",
"print(df.isnull().sum())\n",
"print()\n",
"\n",
"# Есть ли пустые значения признаков\n",
"print(df.isnull().any())\n",
"print()\n",
"\n",
"# Процент пустых значений признаков\n",
"for i in df.columns:\n",
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
" print(f\"{i} процент пустых значений: %{null_rate:.2f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Пропущенные данные существуют. Необходимо заполнить пропуски медианными значениями.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Заполнение пропущенных данных:"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 357,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(5110, 12)\n",
"id False\n",
"gender False\n",
"age False\n",
"hypertension False\n",
"heart_disease False\n",
"ever_married False\n",
"work_type False\n",
"Residence_type False\n",
"avg_glucose_level False\n",
"bmi False\n",
"smoking_status False\n",
"stroke False\n",
"dtype: bool\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>gender</th>\n",
" <th>age</th>\n",
" <th>hypertension</th>\n",
" <th>heart_disease</th>\n",
" <th>ever_married</th>\n",
" <th>work_type</th>\n",
" <th>Residence_type</th>\n",
" <th>avg_glucose_level</th>\n",
" <th>bmi</th>\n",
" <th>smoking_status</th>\n",
" <th>stroke</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5105</th>\n",
" <td>18234</td>\n",
" <td>Female</td>\n",
" <td>80.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Urban</td>\n",
" <td>83.75</td>\n",
" <td>27.7</td>\n",
" <td>never smoked</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5106</th>\n",
" <td>44873</td>\n",
" <td>Female</td>\n",
" <td>81.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Self-employed</td>\n",
" <td>Urban</td>\n",
" <td>125.20</td>\n",
" <td>40.0</td>\n",
" <td>never smoked</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5107</th>\n",
" <td>19723</td>\n",
" <td>Female</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Self-employed</td>\n",
" <td>Rural</td>\n",
" <td>82.99</td>\n",
" <td>30.6</td>\n",
" <td>never smoked</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5108</th>\n",
" <td>37544</td>\n",
" <td>Male</td>\n",
" <td>51.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Rural</td>\n",
" <td>166.29</td>\n",
" <td>25.6</td>\n",
" <td>formerly smoked</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5109</th>\n",
" <td>44679</td>\n",
" <td>Female</td>\n",
" <td>44.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Govt_job</td>\n",
" <td>Urban</td>\n",
" <td>85.28</td>\n",
" <td>26.2</td>\n",
" <td>Unknown</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id gender age hypertension heart_disease ever_married \\\n",
"5105 18234 Female 80.0 1 0 Yes \n",
"5106 44873 Female 81.0 0 0 Yes \n",
"5107 19723 Female 35.0 0 0 Yes \n",
"5108 37544 Male 51.0 0 0 Yes \n",
"5109 44679 Female 44.0 0 0 Yes \n",
"\n",
" work_type Residence_type avg_glucose_level bmi smoking_status \\\n",
"5105 Private Urban 83.75 27.7 never smoked \n",
"5106 Self-employed Urban 125.20 40.0 never smoked \n",
"5107 Self-employed Rural 82.99 30.6 never smoked \n",
"5108 Private Rural 166.29 25.6 formerly smoked \n",
"5109 Govt_job Urban 85.28 26.2 Unknown \n",
"\n",
" stroke \n",
"5105 0 \n",
"5106 0 \n",
"5107 0 \n",
"5108 0 \n",
"5109 0 "
]
},
2024-10-12 12:38:44 +04:00
"execution_count": 357,
2024-10-11 18:41:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"fillna_df = df.fillna(0)\n",
"\n",
"print(fillna_df.shape)\n",
"\n",
"print(fillna_df.isnull().any())\n",
"\n",
"# Замена пустых данных на 0\n",
"df[\"bmi\"] = df[\"bmi\"].fillna(0)\n",
"\n",
"# Вычисляем медиану для колонки \"bmi\"\n",
"median_bmi = df[\"bmi\"].median()\n",
"\n",
"# Заменяем значения 0 на медиану\n",
"df.loc[df[\"bmi\"] == 0, \"bmi\"] = median_bmi\n",
"\n",
"df.tail()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Удалим наблюдения с пропусками:"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 358,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(5110, 12)\n",
"id False\n",
"gender False\n",
"age False\n",
"hypertension False\n",
"heart_disease False\n",
"ever_married False\n",
"work_type False\n",
"Residence_type False\n",
"avg_glucose_level False\n",
"bmi False\n",
"smoking_status False\n",
"stroke False\n",
"dtype: bool\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>gender</th>\n",
" <th>age</th>\n",
" <th>hypertension</th>\n",
" <th>heart_disease</th>\n",
" <th>ever_married</th>\n",
" <th>work_type</th>\n",
" <th>Residence_type</th>\n",
" <th>avg_glucose_level</th>\n",
" <th>bmi</th>\n",
" <th>smoking_status</th>\n",
" <th>stroke</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>5105</th>\n",
" <td>18234</td>\n",
" <td>Female</td>\n",
" <td>80.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Urban</td>\n",
" <td>83.75</td>\n",
" <td>27.7</td>\n",
" <td>never smoked</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5106</th>\n",
" <td>44873</td>\n",
" <td>Female</td>\n",
" <td>81.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Self-employed</td>\n",
" <td>Urban</td>\n",
" <td>125.20</td>\n",
" <td>40.0</td>\n",
" <td>never smoked</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5107</th>\n",
" <td>19723</td>\n",
" <td>Female</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Self-employed</td>\n",
" <td>Rural</td>\n",
" <td>82.99</td>\n",
" <td>30.6</td>\n",
" <td>never smoked</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5108</th>\n",
" <td>37544</td>\n",
" <td>Male</td>\n",
" <td>51.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Rural</td>\n",
" <td>166.29</td>\n",
" <td>25.6</td>\n",
" <td>formerly smoked</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5109</th>\n",
" <td>44679</td>\n",
" <td>Female</td>\n",
" <td>44.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Govt_job</td>\n",
" <td>Urban</td>\n",
" <td>85.28</td>\n",
" <td>26.2</td>\n",
" <td>Unknown</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id gender age hypertension heart_disease ever_married \\\n",
"5105 18234 Female 80.0 1 0 Yes \n",
"5106 44873 Female 81.0 0 0 Yes \n",
"5107 19723 Female 35.0 0 0 Yes \n",
"5108 37544 Male 51.0 0 0 Yes \n",
"5109 44679 Female 44.0 0 0 Yes \n",
"\n",
" work_type Residence_type avg_glucose_level bmi smoking_status \\\n",
"5105 Private Urban 83.75 27.7 never smoked \n",
"5106 Self-employed Urban 125.20 40.0 never smoked \n",
"5107 Self-employed Rural 82.99 30.6 never smoked \n",
"5108 Private Rural 166.29 25.6 formerly smoked \n",
"5109 Govt_job Urban 85.28 26.2 Unknown \n",
"\n",
" stroke \n",
"5105 0 \n",
"5106 0 \n",
"5107 0 \n",
"5108 0 \n",
"5109 0 "
]
},
2024-10-12 12:38:44 +04:00
"execution_count": 358,
2024-10-11 18:41:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dropna_df = df.dropna()\n",
"\n",
"print(dropna_df.shape)\n",
"\n",
"print(fillna_df.isnull().any())\n",
"df.tail()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Проверим выбросы и усредним их:"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 359,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Колонка age:\n",
" Есть выбросы: Нет\n",
" Количество выбросов: 0\n",
" Минимальное значение: 0.08\n",
" Максимальное значение: 82.0\n",
" 1-й квартиль (Q1): 25.0\n",
" 3-й квартиль (Q3): 61.0\n",
"\n",
"Колонка avg_glucose_level:\n",
2024-10-11 18:46:40 +04:00
" Есть выбросы: Да\n",
" Количество выбросов: 627\n",
2024-10-11 18:41:52 +04:00
" Минимальное значение: 55.12\n",
2024-10-11 18:46:40 +04:00
" Максимальное значение: 271.74\n",
2024-10-11 18:41:52 +04:00
" 1-й квартиль (Q1): 77.245\n",
" 3-й квартиль (Q3): 114.09\n",
"\n",
"Колонка bmi:\n",
2024-10-11 18:46:40 +04:00
" Есть выбросы: Да\n",
" Количество выбросов: 126\n",
" Минимальное значение: 10.3\n",
" Максимальное значение: 97.6\n",
2024-10-11 18:41:52 +04:00
" 1-й квартиль (Q1): 23.8\n",
" 3-й квартиль (Q3): 32.8\n",
"\n"
]
}
],
"source": [
"numeric_columns = ['age', 'avg_glucose_level', 'bmi']\n",
"for column in numeric_columns:\n",
" if pd.api.types.is_numeric_dtype(df[column]): # Проверяем, является ли колонка числовой\n",
" q1 = df[column].quantile(0.25) # Находим 1-й квартиль (Q1)\n",
" q3 = df[column].quantile(0.75) # Находим 3-й квартиль (Q3)\n",
" iqr = q3 - q1 # Вычисляем межквартильный размах (IQR)\n",
"\n",
" # Определяем границы для выбросов\n",
" lower_bound = q1 - 1.5 * iqr # Нижняя граница\n",
" upper_bound = q3 + 1.5 * iqr # Верхняя граница\n",
"\n",
" # Подсчитываем количество выбросов\n",
" outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]\n",
" outlier_count = outliers.shape[0]\n",
"\n",
" print(f\"Колонка {column}:\")\n",
" print(f\" Есть выбросы: {'Да' if outlier_count > 0 else 'Нет'}\")\n",
" print(f\" Количество выбросов: {outlier_count}\")\n",
" print(f\" Минимальное значение: {df[column].min()}\")\n",
" print(f\" Максимальное значение: {df[column].max()}\")\n",
" print(f\" 1-й квартиль (Q1): {q1}\")\n",
" print(f\" 3-й квартиль (Q3): {q3}\\n\")\n",
"\n",
" # Устраняем выбросы: заменяем значения ниже нижней границы на саму нижнюю границу, а выше верхней — на верхнюю\n",
" df[column] = df[column].apply(lambda x: lower_bound if x < lower_bound else upper_bound if x > upper_bound else x)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Постараемся выявить зависимости Stroke от остальных колонок:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Разобьем наш набор на выборки относительно параметра Stroke:"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 360,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAIjCAYAAAAJPAAPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABGQklEQVR4nO3de1gUZf8/8PcCy3JcBOWoHD2fLTRFNEVR1DyT5aGvh3zMCk1FM6kUMfNQmVqhphk9PWX5Jcv0UUtDxBOaoeBZ0VDyAIoGCMhyun9/9GO/rgvILIsL4/t1XVzXzj33zHx2ZN4O987OKIQQAkREJDtmpi6AiIhqBwOeiEimGPBERDLFgCcikikGPBGRTDHgiYhkigFPRCRTDHgiIpliwBMRyRQDnojIAAqFAtOmTTN1GVViwFfD5cuXMXXqVPj5+cHKygpqtRqBgYFYvXo17t+/b+ryiJ44Bw8exMCBA9G4cWNYWVnBy8sLQ4YMwaZNm7R9CgoKsHDhQuzbt890hZqYhakLqOt27NiBUaNGQaVSYfz48WjXrh2Kiopw8OBBvPnmmzhz5gzWr19v6jKJnhixsbF48cUX0alTJ8yYMQOOjo5IS0vD/v37sWHDBowdOxbAPwEfFRUFAOjdu7cJKzYdBnwV0tLSMHr0aHh7e2Pv3r1wd3fXzgsLC8OlS5ewY8cOE1ZI9ORZuHAh2rRpgyNHjsDS0lJn3q1btwxeb35+PmxtbWtaXt0iqFKvvvqqACAOHTpUrf4ARFhYmPjmm29EixYthEqlEk8//bRISEjQ63vt2jUxadIk4eLiIiwtLUWbNm3Exo0bK1xvZGSkAKD306tXL51+vXr1Em3bttVb/sMPPxQARFpamk77zp07RY8ePYSNjY2ws7MTgwYNEqdPn9Zb/ty5cyI0NFQ4OjoKlUol/P39xc8///zI/XH+/HkRFBQkXF1dhaWlpWjSpImYOnWquHPnjrZPfHy8ACBiY2P1lre1tRUTJkzQTt+5c0fMnj1btGvXTtja2gp7e3sxYMAAkZycXOH+epi3t7fO+oQQ4u+//xYzZswQTZo0EZaWlqJp06Zi2bJlorS0VNsnLS1NABAffvih3jrbtm2r8+9Q/n7i4+N1+g0aNEgAEJGRkTrtUn4PHvbll1+KoKAg4ezsLCwtLUXr1q3FmjVr9PqVlpaKyMhI4e7uLqytrUXv3r3FmTNnDN4flfH29q7w97T850HFxcVi0aJFws/PT1haWgpvb28REREhCgsLH7kdlUolJk6cWGWf8n+zh3/K9/+ECROEra2tuHTpkhg4cKCws7MTw4YNE0IIkZeXJ8LDw7X7oEWLFuLDDz8UZWVlOtsoP94f9N577wmFQiE++eQTbVt1j7PawDP4Kmzfvh1+fn7o3r17tZdJSEjA5s2b8cYbb0ClUmHNmjUYMGAAfv/9d7Rr1w4AkJmZiW7dumk/pHF2dsauXbswefJk5ObmYubMmRWue+3atbCzswMARERE1Oi9/ec//8GECRMQEhKC5cuXo6CgAGvXrkWPHj1w4sQJ+Pj4AADOnDmDwMBANG7cGPPmzYOtrS3+93//F8OHD8eWLVswYsSISreRn5+PJk2aYMiQIVCr1Th9+jSio6Nx/fp1bN++XXLNf/75J7Zu3YpRo0bB19cXmZmZ+Pzzz9GrVy+cPXsWHh4ektZXUFCAXr164fr165g6dSq8vLxw+PBhRERE4ObNm1i1apXkGiuyf/9+7Ny5U6/d0N+DcmvXrkXbtm0xdOhQWFhYYPv27Xj99ddRVlaGsLAwbb+IiAh88MEHGDJkCEJCQpCSkoKQkBAUFhbqrM8Y+6NTp06YPXu2TtvXX3+NPXv26LT961//wr///W88//zzmD17No4ePYqlS5fi3Llz+Omnn6rchre3N+Li4nDt2jU0adKkwj7Ozs5Yu3YtXnvtNYwYMQIjR44EAHTo0EHbp6SkBCEhIejRowc++ugj2NjYQAiBoUOHIj4+HpMnT0anTp3w66+/4s0338T169excuXKSut69913sWTJEnz++eeYMmUKgOofZ7Xmsfw3Ug/l5OQIANr/1asD//8s4Y8//tC2Xb16VVhZWYkRI0Zo2yZPnizc3d1FVlaWzvKjR48WDg4OoqCgQKf97bffFgB0+j985ihE9c/g7927Jxo0aCCmTJmi0y8jI0M4ODjotPft21e0b99e58yqrKxMdO/eXTRv3vwRe0Tf66+/Luzs7LTTUs7gCwsL9c4k09LShEqlEosWLdK2RUVFCQB6Z1wPn7G+9957wtbWVly8eFGn37x584S5ublIT0/XbgM1OIPv2rWrGDhwoN4ZvNTfg4dVND8kJET4+flppzMyMoSFhYUYPny4Tr+FCxcKAAbtj8p4e3uL5557Tq89LCxM5ww+OTlZABD/+te/dPrNmTNHABB79+6tcjsbN24UAISlpaUICgoS8+fPFwcOHND73bh9+3aFfzUJ8c8ZPAAxb948nfatW7cKAGLx4sU67c8//7xQKBTi0qVL2jY8cAY/e/ZsYWZmJr766ivtfCnHWW3hVTSVyM3NBQDY29tLWi4gIAD+/v7aaS8vLwwbNgy//vorSktLIYTAli1bMGTIEAghkJWVpf0JCQlBTk4Ojh8/rrPO8jMtKyurR26/tLRUZ51ZWVkoKCjQ6bNnzx5kZ2djzJgxOv3Mzc3RtWtXxMfHAwDu3r2LvXv34oUXXsC9e/e0/e7cuYOQkBCkpqbi+vXrj6wpJycHmZmZiIuLw44dO/Dss8/q9Xlw/eU/D1OpVDAzM9O+zzt37sDOzg4tW7bU2WcuLi4AgGvXrlVZV2xsLHr27AlHR0ed7QYHB6O0tBT79+/X6V9QUKBXY2lpaZXb+PHHH3Hs2DEsW7ZMp92Q34OHWVtba1/n5OQgKysLvXr1wp9//omcnBwAQFxcHEpKSvD666/rLDt9+vQa7w9Dlf81Ex4ertNefub/qM+1Xn75Zfzyyy/o3bs3Dh48iPfeew89e/ZE8+bNcfjwYUm1vPbaa3q1mZub44033tCrTQiBXbt26bQLITBt2jSsXr0a33zzDSZMmKCdV93jrDZxiKYSarUawD/BI0Xz5s312lq0aIGCggLcvn0bZmZmyM7Oxvr16yu9+ubhD4qysrKgVCphY2PzyO2fP38ezs7OVfZJTU0FAPTp06fC+eXv/dKlSxBCYP78+Zg/f36ltTZu3LjK7YWEhODo0aMAgAEDBmDz5s16fV5++eUq1wEAZWVlWL16NdasWYO0tDSdcG3YsKH2dUBAABQKBSIiIrB48WLtsFZZWZnO+lJTU3Hy5MlK99fD/w6RkZGIjIzU6+fq6lrh8qWlpXj77bcxbtw4naEBALh9+7bk34OHHTp0CJGRkUhMTNT7TzwnJwcODg64evUqAKBZs2Y6852cnODo6KjTJnV/GOrq1aswMzPTq8nNzQ0NGjTQ1lyVkJAQhISEoKCgAElJSdi8eTPWrVuHwYMH4/z589r/5KtiYWGhN8Rz9epVeHh46J3YtW7dWjv/QV9//TXy8vKwdu1ajBkzRmdedY+z2sSAr4RarYaHhwdOnz5t1PWWh8xLL72k87/9gx4OgytXrsDLywsKheKR6/fx8cGGDRt02mJjY3VCpLyG//znP3Bzc9Nbh4WFhU6/OXPmICQkpMLtPXyQVuTTTz9FVlYWzp49i6VLl+LVV1/FN998o9NnwYIF6Nmzp07bkCFDdKaXLFmC+fPn4+WXX8Z7770HJycnmJmZYebMmTrh3bFjR0RGRiIqKgrffvttpXWVlZWhX79+mDt3boXzW7RooTP9yiuvYNSoUTpt5WOtFdm4cSOuXLmCX3/9tcJtA9J+Dx50+fJl9O3bF61atcLHH38MT09PWFpaYufOnVi5cqXef2bVIXV/1FR1fp8fxcbGBj179kTPnj3RqFEjREVFYdeuXZXu0wc9+BehoQIDA5GcnIzPPvsML7zwApycnLTzqnuc1SYGfBUGDx6M9evXIzExEQEBAdVapvx/7QddvHgRNjY22jMje3t7lJaWIjg4+JHrKykpQUpKCgYMGFC
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAbAAAAIjCAYAAABiVE5aAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABXs0lEQVR4nO3dd1QU1/8+8GdBWDqISjNIsWOPxoaxIthFjZXEEqMmERtWkihi7EkUo8aamOQTjV2jsURjL4QYVCzYg12wEEBA2nJ/f/hjvq5LG9gVRp/XOZzD3rkz+94y++zM3J1RCSEEiIiIFMaopAsgIiIqCgYYEREpEgOMiIgUiQFGRESKxAAjIiJFYoAREZEiMcCIiEiRGGBERKRIDDAiIlIkBhiRwkyfPh0qlaqky3jlWrdujdatW5d0GQBKVy2lxc2bN6FSqfD111+/svs0WIDduHEDI0aMgKenJ8zMzGBjYwNvb28sWrQIz549M9TdEhG9kXbu3IlWrVrBwcEBFhYW8PT0RJ8+fbB3716pz/379zF9+nScPXu25ArVozKGWOiuXbvQu3dvqNVqDBw4ELVr10ZGRgaOHz+OiRMn4uLFi1i5cqUh7pqI6I3z9ddfY+LEiWjVqhWCg4NhYWGB69ev488//8T69evRoUMHAM8DLDQ0FO7u7qhfv37JFq0Heg+wmJgY9OvXD25ubjh48CCcnZ2laSNHjsT169exa9cufd8tEdEbKSsrC19++SXat2+Pffv26Ux/+PBhkZedmpoKCwuL4pRnUHrfhTh//nwkJyfj+++/1wqvHFWqVMGYMWOk2yqVCoGBgVi7di2qV68OMzMzNGzYEEePHtWZ9969e/jwww/h6OgItVqNWrVq4Ycffsi1jpzjBC//vbzfunXr1qhdu7bO/F9//TVUKhVu3ryp1b5nzx68++67sLS0hLW1NTp37oyLFy/qzH/58mW89957sLe3h5mZGRo1aoQdO3bkWuuLrly5grZt28LJyQlqtRqurq74+OOPER8fL/U5fPgwVCoVNm/erDO/lZUVBg8eLN2Oj4/HhAkTUKdOHVhZWcHGxgYdO3ZEVFRUrs/Xy9zd3bWWBwAJCQkYO3YsXF1doVarUaVKFcybNw/Z2dlSn/z2h9euXVvrdch5PIcPH9bq17lzZ6hUKkyfPl2rXc774GVr1qxB27Zt4eDgALVaDS8vLyxbtkyrT5cuXeDp6Znr/M2aNUOjRo2k28+ePcPo0aNRvnx5WFtbo1u3brh3716udRfkyZMn+OCDD2BjYwM7OzsMGjQIUVFRUKlU+PHHH/OcL+e5zq1PXs/f0KFD4eLiArVaDQ8PD3zyySfIyMiQ+vz777/o3bs37O3tYWFhgaZNm+b6xXPx4sWoVasWLCwsULZsWTRq1Ajr1q3Tub+ivl4FSU9PR0hICKpUqSKtL5MmTUJ6errUp3bt2mjTpo3OvNnZ2ahYsSLee+89rbawsDDUqlULZmZmcHR0xIgRI/Dff/8Vqb7cPoNy/tzd3bX6pqSkYPz48dJ6Vb16dXz99dco6IIhjx8/RlJSEry9vXOd7uDgAOD5evbOO+8AAIYMGSLVkfO+yfksjIyMRMuWLWFhYYHPPvsMwPMQHDp0KBwdHWFmZoZ69erhp59+KvDxCyEwfPhwmJqaYuvWrVL7L7/8goYNG8Lc3Bz29vbo168f7ty5U+DyXqb3LbCdO3fC09MTzZs3L/Q8R44cwYYNGzB69Gio1Wp899136NChA/7++28pXOLi4tC0aVMp8CpUqIA9e/Zg6NChSEpKwtixY3Nd9rJly2BlZQUACA4OLtZj+9///odBgwbBz88P8+bNQ2pqKpYtW4YWLVrgzJkz0hvy4sWL8Pb2RsWKFTFlyhRYWlpi48aN8Pf3x5YtW9CjR4887yMlJQVvvfUWunbtChsbG1y4cAFLly7FvXv3sHPnTtk1//vvv9i+fTt69+4NDw8PxMXFYcWKFWjVqhWio6Ph4uIia3mpqalo1aoV7t27hxEjRqBSpUo4efIkgoOD8eDBA4SFhcmuMTdHjx7F7t27ddqL+j7IsWzZMtSqVQvdunVDmTJlsHPnTnz66afIzs7GyJEjAQB9+/bFwIEDcerUKWmFB4Bbt27hr7/+wldffSW1DR48GBs3bsQHH3yApk2b4siRI+jcubPsx5udnY2uXbvi77//xieffIIaNWrgt99+w6BBg2QvKz/3799H48aNkZCQgOHDh6NGjRq4d+8eNm/ejNTUVJiamiIuLg7NmzdHamoqRo8ejXLlyuGnn35Ct27dsHnzZun9u2rVKowePRrvvfcexowZg7S0NJw7dw4REREYMGAAgOK/XvnJzs5Gt27dcPz4cQwfPhw1a9bE+fPnsXDhQly9ehXbt28H8Pz1nD59OmJjY+Hk5CTNf/z4cdy/fx/9+vWT2kaMGIEff/wRQ4YMwejRoxETE4MlS5bgzJkzOHHiBExMTGTX2b59ewwcOFCr7ZtvvtEKRSEEunXrhkOHDmHo0KGoX78+/vjjD0ycOBH37t3DwoUL81y+g4MDzM3NsXPnTowaNQr29va59qtZsyZmzJiBadOmYfjw4Xj33XcBQOuz+smTJ+jYsSP69euH999/H46Ojnj27Blat26N69evIzAwEB4eHti0aRMGDx6MhIQErQ2SF2k0Gnz44YfYsGEDtm3bJq0Xs2bNwtSpU9GnTx989NFHePToERYvXoyWLVvizJkzsLOzK9TzmvPE6U1iYqIAILp3717oeQAIAOKff/6R2m7duiXMzMxEjx49pLahQ4cKZ2dn8fjxY635+/XrJ2xtbUVqaqpW+2effSYAaPWvVauWaNWqlVa/Vq1aiVq1aunU9dVXXwkAIiYmRgghxNOnT4WdnZ0YNmyYVr/Y2Fhha2ur1d6uXTtRp04dkZaWJrVlZ2eL5s2bi6pVqxbwjOj69NNPhZWVlXT70KFDAoDYtGmTTl9LS0sxaNAg6XZaWprQaDRafWJiYoRarRYzZsyQ2kJDQwUAkZ2drdXXzc1Na3lffvmlsLS0FFevXtXqN2XKFGFsbCxu374t3QcA8dVXX+nU+PLrkPN4Dh06JLU1adJEdOzYUQAQISEhUrvc98HLcpvu5+cnPD09pduJiYlCrVaL8ePHa/WbP3++UKlU4tatW0IIISIjIwUAMXbsWK1+gwcP1qm7IFu2bBEARFhYmNSm0WhE27ZtBQCxZs0aqT0kJES8uOrmPNcv9snxch0DBw4URkZG4tSpUzp9c177sWPHCgDi2LFj0rSnT58KDw8P4e7uLr2funfvnuu686Livl4vatWqldb75n//+58wMjLSqlMIIZYvXy4AiBMnTgghhLhy5YoAIBYvXqzVL2e9yqnh2LFjAoBYu3atVr+9e/fqtL9cS14AiJEjR+q0d+7cWbi5uUm3t2/fLgCImTNnavV77733hEqlEtevX8/3fqZNmyYACEtLS9GxY0cxa9YsERkZqdPv1KlTeb5XWrVqJQCI5cuXa7WHhYUJAOKXX36R2jIyMkSzZs2ElZWVSEpKEkJor/OZmZmib9++wtzcXPzxxx/SfDdv3hTGxsZi1qxZWvdx/vx5UaZMGZ32guh1F2JSUhIAwNraWtZ8zZo1Q8OGDaXblSpVQvfu3fHHH39Ao9FACIEtW7aga9euEELg8ePH0p+fnx8SExNx+vRprWWmpaUBAMzMzAq8f41Go7XMx48fIzU1VavP/v37kZCQgP79+2v1MzY2RpMmTXDo0CEAz3fZHTx4EH369MHTp0+lfk+ePIGfnx+uXbuGe/fuFVhTYmIi4uLicODAAezatQstW7bU6fPi8nP+XqZWq2FkZCQ9zidPnsDKygrVq1fXes5ydjPcvXs337o2bdqEd999F2XLltW6Xx8fH2g0Gp1dv6mpqTo1ajSafO9j69atOHXqFObOnavVXpT3wcvMzc2l/xMTE/H48WO0atUK//77LxITEwFA2s26ceNGrd03GzZsQNOmTVGpUiUAkEZ3ffrpp1r3MWrUqHxryM3evXthYmKCYcOGSW1GRkbSVqE+ZGdnY/v27ej
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAIjCAYAAAAJPAAPAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABA5UlEQVR4nO3deVQT5/4/8HdACCAERVmVRcR9v2oVcRdFWheULm4/N67aFq2WWiutilivW7XqtdSqbbWLXr1qtYv7ilrR26pAlWqVorgAohQQkLA9vz96yNcYlkwIBsb365yck3nmmZlPYubt8GQyoxBCCBARkeyYmboAIiKqHgx4IiKZYsATEckUA56ISKYY8EREMsWAJyKSKQY8EZFMMeCJiGSKAU9EJFMMeKJaSqFQYPr06dW+nZs3b0KhUGDLli3Vvq3aZOHChVAoFHjw4IGpSykXA14PiYmJmDZtGry9vWFlZQWVSgU/Pz+sXbsWjx8/NnV5RM+dnJwcREREoG3btqhbty4aNGiAjh07YubMmbh3756m3/79+7Fw4ULTFWpidUxdQE23b98+vPLKK1AqlRg/fjzatm2LgoICnDlzBu+++y6uXLmCjRs3mrpMomrj6emJx48fw8LCwtSlAAAKCwvRu3dvXL16FRMmTMCMGTOQk5ODK1euYNu2bRgxYgTc3NwA/B3wUVFRz23IM+ArkJSUhFGjRsHT0xPHjx+Hq6urZl5oaChu3LiBffv2mbBCouqnUChgZWVl6jI09u7di0uXLmHr1q0YM2aM1rz8/HwUFBQYtN6ioiKUlJTA0tLSGGXWCByiqcCKFSuQk5ODL774QivcS/n4+GDmzJma6dIx0a1bt6JFixawsrJC586dcerUKZ1l7969i8mTJ8PZ2RlKpRJt2rTBl19+WWYdpWN9Tz/69u2r1a9v375o27atzvIrV66EQqHAzZs3tdoPHDiAXr16oW7durCzs8NLL72EK1eu6Cx/9epVvPzyy3BwcICVlRW6dOmCH374ocxan3Tt2jX0798fLi4uUCqVcHd3x+uvv46MjAxNn5MnT0KhUGDXrl06y9va2mLixIma6YyMDMyePRvt2rWDra0tVCoVAgMDERcXV+b79TQvLy+t9QFAZmYmZs2aBXd3dyiVSvj4+GD58uUoKSnR9Ckdg165cqXOOtu2bav171D6ek6ePKnV76WXXoJCodA5kpTyOShPZZ+30vfjjz/+wLhx42Bvbw9HR0fMnz8fQgjcvn0bw4cPh0qlgouLC1atWqW1vL5j8Fu2bCnzc1r6ePq1X7p0CYGBgVCpVLC1tcWAAQNw7ty5Sl9vYmIiAMDPz09nXukQKgBMnDgRUVFRAKBVx5OvaeXKlVizZg2aNm0KpVKJhIQEAMDx48c1+0a9evUwfPhw/P7775XWduvWLfj4+KBt27ZIS0sDoN9nrLrwCL4CP/74I7y9vdGjRw+9l4mOjsaOHTvw1ltvQalU4tNPP8XgwYPxv//9TxO+aWlp6N69u+Y/BEdHRxw4cAAhISHIzs7GrFmzylz3+vXrYWtrCwAIDw+v0mv75ptvMGHCBAQEBGD58uXIy8vD+vXr0bNnT1y6dAleXl4AgCtXrsDPzw+NGjXC3LlzUbduXfz3v/9FUFAQdu/ejREjRpS7jdzcXDRu3BhDhw6FSqXC5cuXERUVhbt37+LHH3+UXPOff/6JvXv34pVXXkGTJk2QlpaGDRs2oE+fPkhISND8Wa6vvLw89OnTB3fv3sW0adPg4eGBs2fPIjw8HCkpKVizZo3kGsty6tQp7N+/X6fd0M/Bk/T5vJV67bXX0KpVKyxbtgz79u3D4sWL4eDggA0bNqB///5Yvnw5tm7ditmzZ6Nr167o3bu3Qa930aJFaNKkiWY6JycHb7zxhlafK1euoFevXlCpVJgzZw4sLCywYcMG9O3bF9HR0ejWrVu56/f09AQAfP3115g3b16Z/5kDwLRp03Dv3j0cOXIE33zzTZl9Nm/ejPz8fEydOhVKpRIODg44evQoAgMD4e3tjYULF+Lx48dYt24d/Pz8cPHiRc2+8bTExET0798fDg4OOHLkCBo2bPjMPmPlElSmrKwsAUAMHz5c72UACADi119/1bTdunVLWFlZiREjRmjaQkJChKurq3jw4IHW8qNGjRL29vYiLy9Pq/39998XALT6t2nTRvTp00erX58+fUSbNm106vroo48EAJGUlCSEEOLRo0eiXr16YsqUKVr9UlNThb29vVb7gAEDRLt27UR+fr6mraSkRPTo0UM0a9askndE15tvvilsbW010ydOnBAAxM6dO3X61q1bV0yYMEEznZ+fL4qLi7X6JCUlCaVSKRYtWqRpi4yMFABESUmJVl9PT0+t9X344Yeibt264o8//tDqN3fuXGFubi6Sk5M12wAgPvroI50an/53KH09J06c0LR169ZNBAYGCgAiIiJC0y71c/A0fT9vERERAoCYOnWqpq2oqEg0btxYKBQKsWzZMk37X3/9JaytrbXep9LXv3nz5grr2bx5swAgfvnlF6329PR0ndceFBQkLC0tRWJioqbt3r17ws7OTvTu3bvC7eTl5YkWLVoIAMLT01NMnDhRfPHFFyItLU2nb2hoqCgr5kpfk0qlEvfv39ea17FjR+Hk5CQePnyoaYuLixNmZmZi/PjxmrbS9zU9PV38/vvvws3NTXTt2lVkZGRo+uj7GasuHKIpR3Z2NgDAzs5O0nK+vr7o3LmzZtrDwwPDhw/HoUOHUFxcDCEEdu/ejaFDh0IIgQcPHmgeAQEByMrKwsWLF7XWmZ+fDwB6jYMWFxdrrfPBgwfIy8vT6nPkyBFkZmZi9OjRWv3Mzc3RrVs3nDhxAsDfQyLHjx/Hq6++ikePHmn6PXz4EAEBAbh+/Tru3r1baU1ZWVlIS0vDsWPHsG/fvjKPDJ9cf+njaUqlEmZmZprX+fDhQ9ja2qJFixZa75mTkxMA4M6dOxXWtXPnTvTq1Qv169fX2q6/vz+Ki4t1hjry8vJ0aiwuLq5wG9999x1++eUXLFu2TKvdkM9BWSr7vD3pn//8p+a5ubk5unTpAiEEQkJCNO316tVDixYt8Oeff1a6bUMVFxfj8OHDCAoKgre3t6bd1dUVY8aMwZkzZzT7X1msra1x/vx5vPvuuwD+HhoKCQmBq6srZsyYAbVarXctwcHBcHR01EynpKQgNjYWEydOhIODg6a9ffv2GDhwYJl/iV2+fBl9+vSBl5cXjh49ivr162vmSf2MGRuHaMpROo736NEjScs1a9ZMp6158+bIy8tDeno6zMzMkJmZiY0bN5Z79s39+/e1ph88eAALCwvY2NhUuv2rV69qfWDLcv36dQBA//79y5xf+tpv3LgBIQTmz5+P+fPnl1tro0aNKtxeQEAAzp8/DwAYPHgwduzYodNn8uTJFa4DAEpKSrB27Vp8+umnSEpK0gqwBg0aaJ77+vpCoVAgPDwcixcv1gxrPT3mef36dcTHx5f7fj397xAREYGIiAidfs7OzmUuX1xcjPfffx9jx45F+/bttealp6dL/hyUpbLPm4uLi6bdw8NDq5+9vT2srKzQsGFDnfaHDx9Wum1DpaenIy8vDy1atNCZ16pVK5SUlOD27dto06ZNueuwt7fHihUrsGLFCty6dQvHjh3DypUr8cknn8De3h6LFy/Wq5Ynh5KAv8fQAZRb26FDh5Cbm4u6detq2ocOHQpnZ2ccOnRI81krJfUzZmwM+HKoVCq4ubnh8uXLRl1vaciMGzcOEyZMKLPP02Fw8+ZNeHh4lDvW+CQvLy9s2rRJq23nzp1aIVJawzfffKMVAKXq1Kmj1W/27NkICAgoc3s+Pj6V1rRu3To8ePAACQkJWLp0KV5//XV8++23Wn0WLFiAXr16abUNHTpUa3rJkiWYP38+Jk+ejA8//BAODg4wMzPDrFmztMK7Q4cOiIiIQGRkJLZu3VpuXSUlJRg4cCDmzJlT5vzmzZtrTU+dOhWvvPKKVtuUKVPKXf8XX3yBmzdv4tChQ2VuG5D2Oagqc3NzvdqAv//CqC08PT0
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"# Создание диаграмм зависимости\n",
2024-10-12 12:38:44 +04:00
"for column in numeric_columns:\n",
" plt.figure(figsize=(4, 6)) # Установка размера графика\n",
2024-10-11 18:41:52 +04:00
" if pd.api.types.is_numeric_dtype(df[column]): # Проверяем, является ли колонка числовой\n",
" # Проверяем, содержит ли колонка только два уникальных значения (0 и 1)\n",
" if df[column].nunique() == 2 and set(df[column].unique()).issubset({0, 1}):\n",
2024-10-12 12:38:44 +04:00
" counts = df[column].value_counts() \n",
" counts.plot(kind='bar', width=0.4) # Создаем столбчатую диаграмму\n",
2024-10-11 18:41:52 +04:00
" plt.title(f'Количество значений для {column}')\n",
" plt.xlabel(column)\n",
" plt.ylabel('Количество повторений')\n",
" else:\n",
2024-10-12 12:38:44 +04:00
" grouped_data = df.groupby('stroke')[column].mean()\n",
"\n",
" # Создаем столбчатую диаграмму\n",
" plt.bar(grouped_data.index, grouped_data.values, alpha=0.5, width=0.4)\n",
" plt.title(f'Среднее значение {column} по Stroke')\n",
2024-10-11 18:41:52 +04:00
" plt.xlabel('stroke (0 = нет, 1 = да)')\n",
2024-10-12 12:38:44 +04:00
" plt.ylabel(f'Среднее значение {column}')\n",
2024-10-11 18:41:52 +04:00
" plt.xticks([0, 1]) # Установка меток по оси X\n",
2024-10-12 12:38:44 +04:00
" plt.grid(axis='y')\n",
2024-10-11 18:41:52 +04:00
" else:\n",
" # Если колонка не числовая, строим столбчатую диаграмму\n",
" counts = df[column].value_counts() # Считаем количество повторений каждого значения\n",
2024-10-12 12:38:44 +04:00
" counts.plot(kind='bar', width=0.4) # Создаем столбчатую диаграмму\n",
2024-10-11 18:41:52 +04:00
" plt.title(f'Количество значений для {column}')\n",
" plt.xlabel(column)\n",
" plt.ylabel('Количество повторений')\n",
"\n",
2024-10-12 12:38:44 +04:00
" plt.show() "
2024-10-11 18:41:52 +04:00
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 361,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [],
"source": [
"# Функция для создания выборок\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"def split_stratified_into_train_val_test(\n",
" df_input,\n",
" stratify_colname=\"y\",\n",
" frac_train=0.6,\n",
" frac_val=0.15,\n",
" frac_test=0.25,\n",
" random_state=None,\n",
"):\n",
"\n",
" if frac_train + frac_val + frac_test != 1.0:\n",
" raise ValueError(\n",
" \"fractions %f, %f, %f do not add up to 1.0\"\n",
" % (frac_train, frac_val, frac_test)\n",
" )\n",
"\n",
" if stratify_colname not in df_input.columns:\n",
" raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
"\n",
" X = df_input # Contains all columns.\n",
" y = df_input[\n",
" [stratify_colname]\n",
" ] # Dataframe of just the column on which to stratify.\n",
"\n",
" # Split original dataframe into train and temp dataframes.\n",
" df_train, df_temp, y_train, y_temp = train_test_split(\n",
" X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
" )\n",
"\n",
" # Split the temp dataframe into val and test dataframes.\n",
" relative_frac_test = frac_test / (frac_val + frac_test)\n",
" df_val, df_test, y_val, y_test = train_test_split(\n",
" df_temp,\n",
" y_temp,\n",
" stratify=y_temp,\n",
" test_size=relative_frac_test,\n",
" random_state=random_state,\n",
" )\n",
"\n",
" assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
"\n",
" return df_train, df_val, df_test"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 362,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"stroke\n",
"0 4861\n",
"1 249\n",
"Name: count, dtype: int64\n",
"\n",
"Обучающая выборка: (3066, 12)\n",
"stroke\n",
"0 2917\n",
"1 149\n",
"Name: count, dtype: int64\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAeYAAADECAYAAABKgIJGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA3W0lEQVR4nO3dd1gU1/oH8O/2Xar0LqCAKHYssWLBgjUxRo2JGhMVE2OuJfEX47XGXK+X2CL2RI3Rm1iIGo2xl6hRrNhFqgXpIB2W3T2/P7i7Yd2lCsy6+36eh0d39sw578zOzDvtzPAYYwyEEEIIMQh8rgMghBBCyN8oMRNCCCEGhBIzIYQQYkAoMRNCCCEGhBIzIYQQYkAoMRNCCCEGhBIzIYQQYkAoMRNCCCEGhBIzIcRoqVQqZGRkID4+nutQSB1TKBRIS0vDkydPuA6lzlFiJoSgV69eaNmyJddh1ImUlBTMmDEDnp6eEIvFcHBwQIsWLZCbm8t1aK+Fw4cPIyoqSvP5wIEDuHfvHncBlRMTE4PJkyfDxcUFYrEYTk5O6NKlC4ztAZbCmhTevn07Jk6cqPkskUjQuHFj9O/fH/Pnz4eTk1OdB0gIKbN+/XqYmZnhgw8+4DoUgxUbG4vevXujtLQUn332Gdq3bw+hUAiZTAZzc3Ouw3st3LlzB2FhYfj++++RmZmJqVOn4sCBA1yHhcuXLyMkJAS2trb48ssv0aJFC/B4PFhbW4PH43EdXp2qUWJWW7JkCby9vVFcXIwLFy5gw4YNOHLkCO7evQszM7O6jpEQgrLEbG9vT4m5EqGhoRCLxbh8+TLc3Ny4Due1NGnSJGzduhV+fn4AgBEjRuCNN97gNCa5XI6JEyfCz88Px48fh7W1Nafx1LdaJeaQkBB06NABQNmPaGdnh5UrV+LgwYN499136zRAQkjNFRQUmNwR4vXr13H69GkcP36ckvIrcHBwwN27dzUHWs2bN+c6JBw6dAjR0dF4+PCh0SdloI6uMffp0wcAkJCQAADIysrC559/jlatWsHCwgJWVlYICQnBrVu3dMYtLi7GokWL4OfnB6lUChcXF4wYMQJxcXEAgMTERPB4vAr/evXqpanr7Nmz4PF42L17N7766is4OzvD3Nwcw4YNw9OnT3XajoyMxMCBA2FtbQ0zMzMEBQXh4sWLeqexV69eettftGiRTtmdO3ciMDAQMpkMtra2GDNmjN72K5u28lQqFVavXo2AgABIpVI4OTkhNDQU2dnZWuW8vLwwZMgQnXY+/fRTnTr1xR4WFqYzTwGgpKQECxcuhI+PDyQSCTw8PDBnzhyUlJTonVfl9erVS6e+b775Bnw+H//9739rNT++/fZbdO3aFXZ2dpDJZAgMDMS+ffv0tr9z50506tQJZmZmsLGxQc+ePXH8+HGtMn/88QeCgoJgaWkJKysrdOzYUSe2vXv3an5Te3t7vP/++0hKStIq88EHH2jFbGNjg169euH8+fNVzqeUlBRMnDgR7u7ukEgkcHFxwfDhw5GYmAig7Le9d+8ezp07p7Psb9++HTweD+fOncMnn3wCR0dHuLu7a+pev349AgICIJFI4OrqimnTpuHFixdVxnT8+HGYmZnh3XffhUKhAAA8fPgQI0eOhK2tLaRSKTp06IDffvutyrpe/m1FIhG8vLzwxRdfQC6XVzl+QUEBZs+eDQ8PD0gkEjRr1gzffvut1rXFy5cvQyqVIi4uTjO9zs7OCA0NRVZWlqbcwoULIRKJkJ6ertPOlClT0KhRIxQXF2ti3r59u1aZRYsW6SyT27ZtQ58+feDo6AiJRIIWLVpgw4YNOvW/vD6UlpZi/vz58Pb2hlgsRuPGjTFnzhwUFRVpjadv3da3Xl+7dk1vzNVdh9XbBYlEgsDAQDRv3rzC7YI+5X9jgUAANzc3TJkyRWt5U2+nK1pngbJ1ycvLS/P58uXL8Pb2RkREBJo2bVrpvAKqt8yr76u4fv06unbtCplMBm9vb2zcuFGrnDres2fPaoY9f/4cXl5e6NChA/Lz8zXDX2VbWV6tjphfpk6idnZ2AID4+HgcOHAA77zzDry9vZGamopNmzYhKCgI9+/fh6urKwBAqVRiyJAhOHXqFMaMGYN//OMfyMvLw4kTJ3D37l00bdpU08a7776LQYMGabU7d+5cvfF888034PF4+L//+z+kpaVh9erVCA4ORlRUFGQyGQDg9OnTCAkJQWBgIBYuXAg+n69Zuc6fP49OnTrp1Ovu7o5ly5YBAPLz8/Hxxx/rbXv+/PkYNWoUJk2ahPT0dKxduxY9e/bEzZs30ahRI51xpkyZgh49egAAfv31V+zfv1/r+9DQUM31/c8++wwJCQkIDw/HzZs3cfHiRYhEIr3zoSZevHihmbbyVCoVhg0bhgsXLmDKlClo3rw57ty5g1WrVuHRo0c1vva0bds2/POf/8SKFSswduxYvWWqmh9r1qzBsGHD8N5770Eul+OXX37BO++8g8OHD2Pw4MGacosXL8aiRYvQtWtXLFmyBGKxGJGRkTh9+jT69+8PoCypffjhhwgICMDcuXPRqFEj3Lx5E0ePHtXEp573HTt2xLJly5Camoo1a9bg4sWLOr+pvb09Vq1aBQB49uwZ1qxZg0GDBuHp06d6f3u1t99+G/fu3cP06dPh5eWFtLQ0nDhxAk+ePIGXlxdWr16N6dOnw8LCAvPmzQMAnXs6PvnkEzg4OGDBggUoKCgAUJZEFi9ejODgYHz88ceIjo7Ghg0bcPXq1UqXncOHD2PkyJEYPXo0tm7dCoFAgHv37qFbt25wc3PDl19+CXNzc+zZswdvvvkmIiIi8NZbb1U4fWrq37akpATHjh3Dt99+C6lUiq+//rrCcRhjGDZsGM6cOYOPPvoIbdu2xbFjx/DFF18gKSlJM78zMzNRXFyMjz/+GH369MHUqVMRFxeHdevWITIyEpGRkZBIJBg3bhyWLFmC3bt349NPP9W0I5fLsW/fPrz99tuQSqVVTkt5GzZsQEBAAIYNGwahUIhDhw7hk08+gUqlwrRp0yocb9q0adiyZQuGDRuGzz//HDdv3kRYWBju3r2L33//vU6unb7KOlzRdqEyb731FkaMGAGFQoFLly5h8+bNKCoqwk8//VTracjMzER8fDy++uorjBgxArNnz8a1a9f0zquaLPPZ2dkYNGgQRo0ahXfffRd79uzBxx9/DLFYjA8//FBvLDk5OQgJCYFIJMKRI0dgYWEBoI63lawGtm3bxgCwkydPsvT0dPb06VP2yy+/MDs7OyaTydizZ88YY4wVFxczpVKpNW5CQgKTSCRsyZIlmmFbt25lANjKlSt12lKpVJrxALCwsDCdMgEBASwoKEjz+cyZMwwAc3NzY7m5uZrhe/bsYQDYmjVrNHX7+vqyAQMGaNphjLHCwkLm7e3N+vXrp9NW165dWcuWLTWf09PTGQC2cOFCzbDExEQmEAjYN998ozXunTt3mFAo1BkeExPDALAff/xRM2zhwoWs/M9y/vx5BoDt2rVLa9yjR4/qDPf09GSDBw/WiX3atGns5Z/65djnzJnDHB0dWWBgoNY8/emnnxifz2fnz5/XGn/jxo0MALt48aJOe+UFBQVp6vv999+ZUChks2fP1lu2OvODsbLfqTy5XM5atmzJ+vTpo1UXn89nb731ls6yqP7NX7x4wSwtLVnnzp1ZUVGR3jJyuZw5Ojqyli1bapU5fPgwA8AWLFigGTZhwgTm6empVc/mzZsZAHblyhW908wYY9nZ2RUu4+W9vLyrqdfL7t27M4VCoRmelpbGxGIx69+/v9Y8CA8PZwDY1q1bNcOCgoJYQEAAY4yxiIgIJhKJ2OTJk7XG69u3L2vVqhUrLi7WDFOpVKxr167M19e30tjV6/G2bdu0hru6urJBgwZVOu6BAwcYALZ06VKt4SNHjmQ8Ho/FxsYyxv5eVvr27as1H9TzZ+3atZphXbp0YZ07d9aq79dff2UA2JkzZxhjjD1+/FhnPpVvp7yXl0nGGBswYABr0qSJ1rDy68Pt27cZj8djY8aM0SqzaNEiBoAdOnRIM0z
"text/plain": [
"<Figure size 200x200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Контрольная выборка: (1022, 12)\n",
"stroke\n",
"0 972\n",
"1 50\n",
"Name: count, dtype: int64\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAfMAAADECAYAAACLB8jqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA3FElEQVR4nO3dd1xTVxsH8F8SQhIIyJ4iIKAooiiideJeWGsdaNWqtI46X0e1Wl9nbX2tWrGuauuq0NZB1dZWxb33nogKDvbeEEjO+wdNakhYClxinu/nw0dzc+45z115cs8994bHGGMghBBCiM7icx0AIYQQQt4OJXNCCCFEx1EyJ4QQQnQcJXNCCCFEx1EyJ4QQQnQcJXNCCCFEx1EyJ4QQQnQcJXNCCCFEx1EyJ4QQohdycnLw8uVLpKWlcR1KlaNkTghBp06d0KRJE67DIDrqm2++gUKhAAAoFAosW7aM44j+tWfPHnTt2hUmJiaQSqWoV68evv32W67DqnKVSubbt28Hj8dT/YnFYjRo0ACTJ09GQkJCdcVICAGwYcMGbN++neswCNGwY8cOrFy5Eq9evcKqVauwY8cOrkMCAMyZMweBgYEwMTHBjz/+iKNHj+LYsWOYOHEi16FVOYM3mWnJkiVwdXVFfn4+zp07h40bN+Lvv//GvXv3YGRkVNUxEkJQnMytrKwwevRorkMhRM2SJUswcuRIfPHFFxCJRAgJCeE6JJw+fRrLly/HsmXLMGfOHK7DqXZvlMx79+6Nli1bAgDGjBkDS0tLfPfddzhw4AA++uijKg2QEFJ5OTk5MDY25joMoieGDBmCzp0748mTJ/Dw8IC1tTXXIWHlypVo27atXiRyoIqumXfp0gUAEBUVBQBITU3F559/Dm9vb0ilUpiamqJ37964ffu2xrz5+flYtGgRGjRoALFYDHt7ewwYMABPnz4FAERHR6t17Zf869Spk6quU6dOgcfjYdeuXfjyyy9hZ2cHY2Nj9OvXDy9fvtRo+/Lly+jVqxfq1KkDIyMj+Pv74/z581qXsVOnTlrbX7RokUbZkJAQ+Pr6QiKRwMLCAkOHDtXaflnL9jqFQoHg4GB4eXlBLBbD1tYW48eP1xjE4eLigr59+2q0M3nyZI06tcW+YsUKjXUKAAUFBVi4cCHc3d0hEong5OSE2bNno6CgQOu6el2nTp006vv666/B5/Pxyy+/vNH6UB6klpaWkEgk8PX1xd69e7W2HxISglatWsHIyAjm5ubo2LEjwsPD1cocOnQI/v7+MDExgampKfz8/DRi27Nnj2qbWllZYcSIEYiJiVErM3r0aLWYzc3N0alTJ5w9e7bc9RQfH4+goCDUrVsXIpEI9vb2+OCDDxAdHQ2geNvev38fp0+f1tj3lZe/Tp8+jYkTJ8LGxgZ169ZV1b1hwwZ4eXlBJBLBwcEBkyZNQnp6erkxhYeHw8jICB999BGKiooAAI8ePcKgQYNgYWEBsViMli1b4o8//ii3rpLbVigUwsXFBbNmzYJMJqvQvK9fYsjKyoKvry9cXV0RFxenmp6Tk4OZM2fCyckJIpEIDRs2xMqVK1HyxyF5PB4mT56s0Vbfvn3h4uKiNWZtf8peEuU2OHPmDMaPHw9LS0uYmppi5MiRWgdbVXSbVOSYUJZZuXJlmetRGeO1a9fUpicnJ2v9PLh58yZ69+4NU1NTSKVSdO3aFZcuXdJaZ3R0NGxsbFTHZdOmTTW2WVkxKf+MjIzg7e2Nn376Sa3c6NGjIZVKy6yr5DJcunQJTZo0wdChQ2FhYQGJRAI/Pz/s379fY97ExER8+umnsLW1hVgsRrNmzTQuFby+nlevXg1nZ2dIJBL4+/vj3r17GvEq9yOlkJAQ8Pl8/O9//1Ob/qbHVElvdGZekjLxWlpaAgCePXuG/fv3Y/DgwXB1dUVCQgI2bdoEf39/PHjwAA4ODgAAuVyOvn374vjx4xg6dCj+85//ICsrC0ePHsW9e/fg5uamauOjjz5Cnz591NqdO3eu1ni+/vpr8Hg8fPHFF0hMTERwcDC6deuGW7duQSKRAABOnDiB3r17w9fXFwsXLgSfz8e2bdvQpUsXnD17Fq1atdKot27duqqBHdnZ2ZgwYYLWtufPn4/AwECMGTMGSUlJWLt2LTp27IibN2/CzMxMY55x48ahQ4cOAIDff/8d+/btU3t//Pjx2L59O4KCgjB16lRERUVh3bp1uHnzJs6fPw+hUKh1PVRGenq61kErCoUC/fr1w7lz5zBu3Dg0atQId+/exerVq/H48WOtB0ZZtm3bhv/+979YtWoVhg0bprVMeetjzZo16NevH4YPHw6ZTIbffvsNgwcPxsGDBxEQEKAqt3jxYixatAht27bFkiVLYGhoiMuXL+PEiRPo0aMHgOIPk08++QReXl6YO3cuzMzMcPPmTRw+fFgVn3Ld+/n5YdmyZUhISMCaNWtw/vx5jW1qZWWF1atXAwBevXqFNWvWoE+fPnj58qXWba80cOBA3L9/H1OmTIGLiwsSExNx9OhRvHjxAi4uLggODsaUKVMglUoxb948AICtra1aHRMnToS1tTUWLFiAnJwcAMCiRYuwePFidOvWDRMmTEBERAQ2btyIq1evlrnvHDx4EIMGDcKQIUOwdetWCAQC3L9/H+3atYOjoyPmzJkDY2Nj7N69G/3790dYWBg+/PDDUpdPSbltCwoKcOTIEaxcuRJisRhfffVVufMqFRYWYuDAgXjx4gXOnz8Pe3t7AABjDP369cPJkyfx6aefwsfHB0eOHMGsWbMQExOj2i4VZW1tjZ07d6peK/fF16e9/hkFFH9xNjMzw6JFi1Tr+vnz56oTDeDNtkl5x0RVu3//Pjp06ABTU1PMnj0bQqEQmzZtQqdOnXD69Gm0bt261Hl37tyJu3fvVqq91atXw8rKCpmZmdi6dSvGjh0LFxcXdOvW7Y2XISUlBZs3b4ZUKsXUqVNhbW2NkJAQDBgwAKGhoape5Ly8PHTq1AlPnjzB5MmT4erqij179mD06NFIT0/Hf/7zH7V6f/75Z2RlZWHSpEnIz8/HmjVr0KVLF9y9e1fjmFQKDw/HJ598gsmTJ6v1FFTFMaXCKmHbtm0MADt27BhLSkpiL1++ZL/99huztLRkEomEvXr1ijHGWH5+PpPL5WrzRkVFMZFIxJYsWaKatnXrVgaAfffddxptKRQK1XwA2IoVKzTKeHl5MX9/f9XrkydPMgDM0dGRZWZmqqbv3r2bAWBr1qxR1e3h4cF69uypaocxxnJzc5mrqyvr3r27Rltt27ZlTZo0Ub1OSkpiANjChQtV06Kjo5lAIGBff/212rx3795lBgYGGtMjIyMZALZjxw7VtIULF7LXN8vZs2cZABYaGqo27+HDhzWmOzs7s4CAAI3YJ02axEpu6pKxz549m9nY2DBfX1+1dbpz507G5/PZ2bNn1eb/4YcfGAB2/vx5jfZe5+/vr6rvr7/+YgYGBmzmzJlay1ZkfTBWvJ1eJ5PJWJMmTViXLl3U6uLz+ezDDz/U2BeV2zw9PZ2ZmJiw1q1bs7y8PK1lZDIZs7GxYU2aNFErc/DgQQaALViwQDVt1KhRzNnZWa2ezZs3MwDsypUrWpeZMcbS0tJK3cdfV3J/V1Iel+3bt2dFRUWq6YmJiczQ0JD16NFDbR2sW7eOAWBbt25VTfP392deXl6MMcbCwsKYUChkY8eOVZuva9euzNvbm+Xn56umKRQK1rZtW+bh4VFm7MrjeNu2bWrTHRwcWJ8+fSo8r0KhYMOHD2dGRkbs8uXLauX279/PALClS5eqTR80aBDj8XjsyZMnqmkA2KRJkzTaCggI0NiGStr2RSXlNvD19WUymUw1/dtvv2UA2IEDBxhjldsmjFXsmCjrM1JbjFevXlWbru2zrH///szQ0JA9ffpUNS02NpaZmJiwjh07atQZFRXFGCv+7K9Xrx7r3bu31u1dWkzK+Rlj7PHjxwwA+/bbb1XTRo0axYyNjcusq+QyAGAA2KlTp1TTcnNzWaNGjZidnZ1qOwUHBzMALCQkRFVOJpOxNm3aMKlUqsolyvX
"text/plain": [
"<Figure size 200x200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Тестовая выборка: (1022, 12)\n",
"stroke\n",
"0 972\n",
"1 50\n",
"Name: count, dtype: int64\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAdEAAADECAYAAADArxsHAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA200lEQVR4nO3dd1gU1/oH8O82dpfeqwgIIopYgqJiwa6gIUYNMWpi18R2jSa5amIs0WuMxhJbNLFF/RkLscQbG3YJscTeEBAsSG/CUpbdPb8/uLthWUBAlnHh/TwPj+7smXPemZ2dd+fMnBkeY4yBEEIIIdXG5zoAQgghxFBREiWEEEJqiJIoIYQQUkOURAkhhJAaoiRKCCGE1BAlUUIIIaSGKIkSQgghNURJlBBCCKkhIdcBEEIIqd8KCwuRmZkJoVAIe3t7rsOpVXQkSghB9+7d0bJlS67DINW0bt06ZGdna16vXr0aMpmMu4BKiYiIQGhoKCwtLSGVSuHi4oJ//etfXIdV66qVRLdv3w4ej6f5k0gk8Pb2xtSpU5GSkqKvGAkhADZs2IDt27dzHQZ5g/z+++9YsGABnj17ht27d2PevHmQSqVch4UNGzagX79+yMnJwZo1a3Dq1CmcOnUKixYt4jq0Wlej7txFixbBw8MDhYWFuHTpEjZu3Ig//vgDd+/ehbGxcW3HSAhByY7J1tYWo0eP5joU8oaYO3cuQkNDsWbNGvD5fHz//ffg87ntYIyJicHMmTMxceJEbNiwATwej9N49K1GSTQ4OBjt2rUDAIwfPx42NjZYuXIlDh8+jA8++KBWAySEVJ9MJoOJiQnXYRA9CwoKwpMnT/DgwQO4urqiUaNGXIeEH374AY6Ojvjhhx/qfQIFaumcaM+ePQEA8fHxAIDMzEx89tln8PPzg6mpKczNzREcHIxbt27pzFtYWIgFCxbA29sbEokETk5OGDx4MOLi4gAACQkJWl3IZf+6d++uqevcuXPg8XjYu3cv5s6dC0dHR5iYmCA0NBTPnj3Tafvy5cvo378/LCwsYGxsjKCgIERGRpa7jN27dy+3/QULFuiU3bVrF/z9/SGVSmFtbY1hw4aV235ly1aaSqXC6tWr4evrC4lEAgcHB0yaNAlZWVla5dzd3TFw4ECddqZOnapTZ3mxL1++XGedAkBRURHmz58PLy8viMViuLq64osvvkBRUVG566q07t2769S3ZMkS8Pl8/N///V+N1seKFSsQGBgIGxsbSKVS+Pv748CBA+W2v2vXLgQEBMDY2BhWVlbo1q0bTp48qVXm2LFjCAoKgpmZGczNzdG+fXud2Pbv36/5TG1tbTFy5EgkJiZqlRk9erRWzFZWVujevTsuXrz4yvWUnJyMMWPGoFGjRhCLxXBycsI777yDhIQEACWf7b1793D+/HmdbV99muX8+fOYPHky7O3ttXamGzZsgK+vL8RiMZydnTFlyhSt82gVOXnyJIyNjfHBBx9AoVAAAB4+fIihQ4fC2toaEokE7dq1w5EjR15ZV9nPViQSwd3dHZ9//jnkcnmV5yvvr/SReXZ2NmbMmAFXV1eIxWJ4eXlh2bJlUKlUWvWqVCqsWbMGfn5+kEgksLOzQ//+/XHt2jUAeGWbpbfp1NRUjBs3Dg4ODpBIJGjdujV27NhR4+V//Pgx3nvvPVhbW8PY2BgdO3bEf//7X60y6n3duXPnYGlpiU6dOqFRo0YYMGBAhful8uZX/4nFYnh7e2Pp0qUo/WCvBQsWgMfjIT09vcK63N3dtT6Dv/76C/7+/pg8eTIcHBwgFovRsmVL/PTTTzrzymQyzJo1S/N5NWvWDCtWrEDZh4vxeDxMnToVu3fvRrNmzSCRSODv748LFy5olVPHW9rZs2chFovx8ccfa01PTEzE2LFjNTH6+vpi69atla638tTK1bnqhGdjYwOgZCM4dOgQ3nvvPXh4eCAlJQWbNm1CUFAQ7t+/D2dnZwCAUqnEwIEDcfr0aQwbNgz/+te/kJubi1OnTuHu3bvw9PTUtPHBBx8gJCREq905c+aUG8+SJUvA4/Hw73//G6mpqVi9ejV69+6Nmzdvas4XnDlzBsHBwfD398f8+fPB5/Oxbds29OzZExcvXkRAQIBOvY0aNcLSpUsBAHl5efjkk0/KbXvevHkICwvD+PHjkZaWhrVr16Jbt264ceMGLC0tdeaZOHEiunbtCgD47bffcPDgQa33J02ahO3bt2PMmDGYPn064uPjsW7dOty4cQORkZEQiUTlrofqyM7O1ixbaSqVCqGhobh06RImTpyI5s2b486dO1i1ahUePXqEQ4cOVaudbdu24auvvsL333+P4cOHl1vmVetjzZo1CA0NxYgRIyCXy/Hrr7/ivffew9GjRzFgwABNuYULF2LBggUIDAzEokWLYGRkhMuXL+PMmTPo27cvgJIENHbsWPj6+mLOnDmwtLTEjRs3cPz4cU186nXfvn17LF26FCkpKVizZg0iIyN1PlNbW1usWrUKAPD8+XOsWbMGISEhePbsWbmfvdqQIUNw7949TJs2De7u7khNTcWpU6fw9OlTuLu7Y/Xq1Zg2bRpMTU3x5ZdfAgAcHBy06pg8eTLs7Ozw9ddfay4uWbBgARYuXIjevXvjk08+QXR0NDZu3IirV69Wuu0cPXoUQ4cOxfvvv4+tW7dCIBDg3r176Ny5M1xcXDB79myYmJhg3759GDRoEMLDw/Huu+9WuHxq6s+2qKgIJ06cwIoVKyCRSPDNN9+UW97Ozg47d+7UvFZvD6WnqfcT+fn5CAoKQmJiIiZNmoTGjRvjzz//xJw5c5CUlITVq1dr5hk3bhy2b9+O4OBgjB8/HgqFAhcvXsRff/2Fdu3aadV/8eJFbN68GatWrYKtra3Wui8oKED37t0RGxuLqVOnwsPDA/v378fo0aORnZ2tcyHNq5Y/JSUFgYGByM/Px/Tp02FjY4MdO3YgNDQUBw4cqHQdX7hwAX/88ccrP4PS5s6di+bNm6OgoEBz8GFvb49x48ZVq57SMjIycO3aNQiFQkyZMgWenp44dOgQJk6ciIyMDMyePRsAwBhDaGgozp49i3HjxqFNmzY4ceIEPv/8cyQmJmq+R2rnz5/H3r17MX36dIjFYmzYsAH9+/fHlStXKrwo7tatWxg0aBBCQkKwfv16zfSUlBR07NhRk5zt7Oxw7NgxjBs3Di9fvsSMGTOqvsCsGrZt28YAsIiICJaWlsaePXvGfv31V2ZjY8OkUil7/vw5Y4yxwsJCplQqteaNj49nYrGYLVq0SDNt69atDABbuXKlTlsqlUozHwC2fPlynTK+vr4sKChI8/rs2bMMAHNxcWEvX77UTN+3bx8DwNasWaOpu2nTpqxfv36adhhjLD8/n3l4eLA+ffrotBUYGMhatmypeZ2WlsYAsPnz52umJSQkMIFAwJYsWaI17507d5hQKNSZHhMTwwCwHTt2aKbNnz+flf5YLl68yACw3bt3a817/Phxnelubm5swIABOrFPmTKFlf2oy8b+xRdfMHt7e+bv76+1Tnfu3Mn4fD67ePGi1vw//vgjA8AiIyN12istKChIU99///tfJhQK2axZs8otW5X1wVjJ51SaXC5nLVu2ZD179tSqi8/ns3fffVdnW1R/5tnZ2czMzIx16NCBFRQUlFtGLpcze3t71rJlS60yR48eZQDY119/rZk2atQo5ubmplXP5s2bGQB25cqVcpeZMcaysrIq3MZLK7u9q6m/l126dGEKhUIzPTU1lRkZGbG+fftqrYN169YxAGzr1q2aaUFBQczX15cxxlh4eDgTiURswoQJWvP16tWL+fn5scLCQs00lUrFAgMDWdOmTSuNXf093rZtm9Z0Z2dnFhISUum8pZW3Pah98803zMTEhD169Ehr+uzZs5lAIGBPnz5ljDF25swZBoBNnz5dp47S+wM19fqNj4/XeW/16tUMANu1a5dmmlwuZ506dWKmpqaa/VBVl3/GjBkMgNb3LTc3l3l4eDB3d3fN56He1509e1ZTrkOHDiw4OFjnu12e8uYvLCxkfD6fTZ48WTNNvb7
"text/plain": [
"<Figure size 200x200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Вывод распределения количества наблюдений по меткам (классам)\n",
"print(df.stroke.value_counts())\n",
"print()\n",
"\n",
"data = df.copy()\n",
"\n",
"df_train, df_val, df_test = split_stratified_into_train_val_test(\n",
" data, stratify_colname=\"stroke\", frac_train=0.60, frac_val=0.20, frac_test=0.20\n",
")\n",
"\n",
"print(\"Обучающая выборка: \", df_train.shape)\n",
"print(df_train.stroke.value_counts())\n",
"counts = df_train['stroke'].value_counts()\n",
"plt.figure(figsize=(2, 2))# Установка размера графика\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)# Построение круговой диаграммы\n",
"plt.title('Распределение классов stroke в обучающей выборке')# Добавление заголовка\n",
"plt.show()# Отображение графика\n",
"\n",
"print(\"Контрольная выборка: \", df_val.shape)\n",
"print(df_val.stroke.value_counts())\n",
"counts = df_val['stroke'].value_counts()\n",
"plt.figure(figsize=(2, 2))\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)\n",
"plt.title('Распределение классов stroke в контрольной выборке')\n",
"plt.show()\n",
"\n",
"print(\"Тестовая выборка: \", df_test.shape)\n",
"print(df_test.stroke.value_counts())\n",
"counts = df_test['stroke'].value_counts()\n",
"plt.figure(figsize=(2, 2))\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)\n",
"plt.title('Распределение классов stroke в тестовой выборке')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Сбалансируем распределение:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Балансировка данных оверсемплингом. Это метод, увеличивающий число наблюдений в меньшинственном классе для достижения более равномерного распределения классов."
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 363,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-10-12 12:38:44 +04:00
"Обучающая выборка после oversampling: (5865, 17)\n",
2024-10-11 18:41:52 +04:00
"stroke\n",
2024-10-12 12:38:44 +04:00
"1 2948\n",
2024-10-11 18:41:52 +04:00
"0 2917\n",
"Name: count, dtype: int64\n"
]
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAosAAAH4CAYAAAAitKflAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABa90lEQVR4nO3dd3hTZf/H8U/STQe0QMseMmXLVIaAIlNwM1yACojiQHxU9FHArfgTUBDHo6CAoDhAUVCmgoOhTBmyyl5llA66kvP7ozYS2kDTpj1J+n5dVy5oxsn3JCd3Prnv+5xjMQzDEAAAAJAHq9kFAAAAwHsRFgEAAOASYREAAAAuERYBAADgEmERAAAALhEWAQAA4BJhEQAAAC4RFgEAAOASYREAgEKw2+1KSEjQnj17zC4FKBKERQAOFotFI0aMMLsMQJK0atUqrVixwvH3ihUr9Msvv5hX0HmOHj2qRx99VNWrV1dwcLDKly+vBg0a6OzZs2aXBnicW2Fx+vTpslgsjktoaKjq1q2rESNG6NixY0VVI1AirFq1Sj169FDlypUVGhqqatWqqXfv3vr0008d90lNTdXYsWOdvkABf3XgwAE98MAD2rx5szZv3qwHHnhABw4cMLss7dq1S61atdKcOXM0bNgwLViwQIsXL9bSpUsVHh5udnlww/fffy+LxaJKlSrJbrfneZ8aNWo4co/ValWZMmXUuHFjDR06VKtXry708jMyMjRp0iRdccUVioqKUpkyZdSwYUMNHTpU27dvlyT16NFD0dHReWatxMREVaxYUW3atJHdbteKFSsc9f7xxx+57j9o0CBFRERc6qVxEujWvf/x/PPPq2bNmkpLS9OqVas0depUff/999qyZYtKlSpVkEUCJdrcuXPVr18/NWvWTI888oiio6O1d+9e/fzzz/rggw90++23S8oOi+PGjZMkderUycSKgaJ38803a+LEiWrSpIkk6aqrrtLNN99sclXSsGHDFBwcrN9//12VK1c2uxwUwqxZs1SjRg3Fx8dr2bJl6tKlS573a9asmUaNGiVJSkpK0rZt2zR37lx98MEHGjlypN58880CL/+WW27RwoULNWDAAA0ZMkSZmZnavn27FixYoLZt26p+/fp655131KhRI40cOdKpA0GSnn76aSUkJGjRokWyWp37AMeOHatvv/22IC+NM8MN06ZNMyQZa9eudbr+scceMyQZn376qTuLA/CPBg0aGA0bNjTS09Nz3Xbs2DHH/0+cOGFIMsaMGZOv5SYnJ7tVhyTjwQcfdOsxQFHKysoyNmzYYGzYsMHIysoyuxxj3bp1hiTjxx9/NLsUFFJycrIRHh5uvPXWW8YVV1xhDBo0KM/7Va9e3ejVq1eu61NTU40bb7zRkGS88847BVr+mjVrDEnGSy+9lOu2rKwsIyEhwfH3a6+9ZkgyfvjhB6fHW61W44knnnBct3z5ckOS0axZM0OS8ccffzgtd+DAgUZ4eHie6+qKR+YsXnPNNZKkvXv3SpJOnTqlxx9/XI0bN1ZERISioqLUo0cPbdy4Mddj09LSNHbsWNWtW1ehoaGqWLGibr75Zu3evVuSFB8f7zT0feHl/N6VnK7Xzz77TE8//bQqVKig8PBw9enTJ8+hi9WrV6t79+4qXbq0SpUqpY4dO7qcD9OpU6c8n3/s2LG57jtz5ky1aNFCYWFhiomJUf/+/fN8/out2/nsdrsmTpyohg0bKjQ0VHFxcRo2bJhOnz7tdL8aNWro+uuvz/U8I0aMyLXMvGofP358rtdUktLT0zVmzBjVrl1bISEhqlq1qp544gmlp6fn+Vqdr1OnTrmW99JLL8lqteb6dZTf1+ONN95Q27ZtVbZsWYWFhalFixb64osv8nz+mTNnqnXr1ipVqpSio6N19dVX68cff3S6z8KFC9WxY0dFRkYqKipKrVq1ylXb3LlzHe9puXLldOedd+rQoUNO9xk0aJBTzdHR0erUqZNWrlx5yddp9+7datWqlYKDg3PdFhsb63h9ypcvL0kaN25crm0wZ2hh9+7d6tmzpyIjI3XHHXdIklJSUjRq1ChVrVpVISEhqlevnt544w0ZhnHJ2l588UVZrVa9/fbbTq9Zhw4dFB4ersjISPXq1Ut//fXXJZd14VSWUqVKqXHjxvrf//7n1uPyukyfPt3pddizZ4+6deum8PBwVapUSc8//3yu9TXzs+Vue+XpbfCdd95Rw4YNFRISokqVKunBBx/UmTNnLrkuOe9FfHx8gV6frKwsvfDCC6pVq5ZCQkJUo0YNPf3007nakxo1amjQoEEKCAhQ06ZN1bRpU3311VeyWCyqUaNGrue60IVDhxUqVFC/fv20f/9+x31y2pw33njD5XLGjh3rtA6///67QkNDtXv3bsfrV6FCBQ0bNkynTp3K9fj8vm/52WZz6s3Z1qXsXq4WLVqoZs2aOnLkiOP6/G7beblwO7rwcuE0mPysoyRt375dffv2Vfny5RUWFqZ69erpmWeeyXW/89+7iz1vQduhHF9//bXOnTun2267Tf3799dXX32ltLS0fD8+LCxMM2bMUExMjF566aVc7Ut+lp+Tddq1a5dr+QEBASpbtqzj78cee0xNmjTRAw88oLS0NNlsNt1///2qXr26xowZk+vxDz30kKKjo/PMKe4q0DD0hXJWNmel9uzZo3nz5um2225TzZo1dezYMb333nvq2LGjtm7dqkqVKkmSbDabrr/+ei1dulT9+/fXI488oqSkJC1evFhbtmxRrVq1HM8xYMAA9ezZ0+l5R48enWc9L730kiwWi5588kkdP35cEydOVJcuXbRhwwaFhYVJkpYtW6YePXqoRYsWGjNmjKxWq6ZNm6ZrrrlGK1euVOvWrXMtt0qVKnrllVckScnJyRo+fHiez/3ss8+qb9++uu+++3TixAm9/fbbuvrqq7V+/XqVKVMm12OGDh2qDh06SJK++uorff311063Dxs2TNOnT9fgwYP18MMPa+/evZo8ebLWr1+vX375RUFBQXm+Du44c+aMY93OZ7fb1adPH61atUpDhw7V5Zdfrs2bN2vChAn6+++/NW/ePLeeZ9q0afrvf/+r//u//3MMrV7oUq/HpEmT1KdPH91xxx3KyMjQnDlzdNttt2nBggXq1auX437jxo3T2LFj1bZtWz3//PMKDg7W6tWrtWzZMnXt2lVS9hffPffco4YNG2r06NEqU6aM1q9fr0WLFjnqy3ntW7VqpVdeeUXHjh3TpEmT9Msvv+R6T8uVK6cJEyZIkg4ePKhJkyapZ8+eOnDgQJ7vfY7q1atr6dKlOnjwoKpUqZLnfcqXL6+pU6dq+PDhuummmxzDcTlDdFL2F3G3bt3Uvn17vfHGGypVqpQMw1CfPn20fPly3XvvvWrWrJl++OEH/ec//9GhQ4cc9eblv//9r15++WW99957GjJkiCRpxowZGjhwoLp166bXXntNqampmjp1qtq3b6/169fn64t8woQJKleunM6ePauPPvpIQ4YMUY0aNVwOAV199dWaMWOG4++XXnpJkpy+aNq2bev4v81mU/fu3XXllVfq9ddf16JFizRmzBhlZWXp+eefd9zPzM/W+etyqfbK09vg2LFjNW7cOHXp0kXDhw/Xjh07NHXqVK1du9Zj6+3Kfffdp48//li33nqrRo0apdWrV+uVV17Rtm3bcn3Wz5eVlZVnsLiYDh06aOjQobLb7dqyZYsmTpyow4cP5+sHnCsnT55UWlqahg8frmuuuUb333+/du/erSlTpmj16tVavXq1QkJCJLn3vuV3mz1fZmambrnlFu3fv1+//PKLKlas6LitsNt2SEhIrh9xa9eu1VtvveV0XX7XcdOmTerQoYOCgoI0dOhQ1ahRQ7t379a3337r+DyfL+e9k6Rt27bp5ZdfdrrdE+3QrFmz1LlzZ1WoUEH9+/fXU089pW+//Va33XbbJR+bIyIiQjfddJM+/PBDbd26VQ0bNnRr+dWrV3fct127dgoMdB3LAgMD9f7776tt27Z64YUXFBsbqz///FOLFi3KcwpgVFSURo4cqeeee05
2024-10-11 18:41:52 +04:00
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from imblearn.over_sampling import ADASYN\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"\n",
"categorical_features = ['gender', 'ever_married', 'work_type', 'Residence_type'] # Ваши категориальные признаки\n",
"numeric_features = ['age', 'hypertension', 'heart_disease', 'avg_glucose_level', 'bmi'] # Ваши числовые признаки\n",
"\n",
"# Создание пайплайна для обработки категориальных данных\n",
"preprocessor = ColumnTransformer(\n",
" transformers=[\n",
" ('cat', OneHotEncoder(), categorical_features), # OneHotEncoder для категориальных данных\n",
" ('num', 'passthrough', numeric_features) # Оставляем числовые колонки без изменений\n",
" ]\n",
")\n",
"\n",
"# Создание экземпляра ADASYN\n",
"ada = ADASYN()\n",
"\n",
"# Преобразование данных с помощью пайплайна\n",
"X = preprocessor.fit_transform(df_train.drop(columns=['stroke']))\n",
"y = df_train['stroke']\n",
"\n",
"# Применение ADASYN\n",
"X_resampled, y_resampled = ada.fit_resample(X, y)\n",
"\n",
"# Создание нового DataFrame\n",
"df_train_adasyn = pd.DataFrame(X_resampled)\n",
"# Восстанавливаем названия столбцов для DataFrame\n",
"ohe_columns = preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features)\n",
"new_column_names = list(ohe_columns) + numeric_features\n",
"df_train_adasyn.columns = new_column_names\n",
"\n",
"# Добавление целевой переменной\n",
"df_train_adasyn['stroke'] = y_resampled\n",
"\n",
"# Вывод информации о новой выборке\n",
"print(\"Обучающая выборка после oversampling: \", df_train_adasyn.shape)\n",
"print(df_train_adasyn['stroke'].value_counts())\n",
"\n",
"# Визуализация\n",
"counts = df_train_adasyn['stroke'].value_counts()\n",
"plt.figure(figsize=(6, 6))\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)\n",
"plt.title('Распределение классов Stroke в тренировочной выборке после ADASYN')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. Балансировка данных андерсемплингом. Этот метод помогает сбалансировать выборку, уменьшая количество экземпляров класса большинства, чтобы привести его в соответствие с классом меньшинства."
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 364,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Обучающая выборка после undersampling: (298, 12)\n",
"stroke\n",
"0 149\n",
"1 149\n",
"Name: count, dtype: int64\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsMAAADECAYAAAB6FizTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA+cUlEQVR4nO3dd3hT1f8H8HeStkk3lNJBGQXKpqwyhAJllwICimxkyfjJUBRF0S9TFBkCykYBFVBZynBQ9pSlLNmrZRTaUko36UjO74+aa9OkpSktN23fr+fpA7m543PPXZ977rknCiGEABERERFRCaSUOwAiIiIiIrkwGSYiIiKiEovJMBERERGVWEyGiYiIiKjEYjJMRERERCUWk2EiIiIiKrGYDBMRERFRicVkmIiIiIhKLCbDRERE+RQXF4ebN28iIyND7lCIKJ+YDBMR2rRpg7p168odBhEA4P79+/j222+lz+Hh4diwYYN8AWWRnp6OuXPnon79+lCr1ShdujSqVauGffv2yR0aWaFvv/0WCoUC4eHhcodiFRQKBaZPny59tpbysSgZNgRt+NNoNKhevTrGjRuHqKiowoqRiAAsW7bMKEEgKq4UCgXGjh2L0NBQhIeHY9KkSThy5IjcYSE1NRUdOnTAlClT0KZNG2zevBl79uzB/v370bx5c7nDo2cIDw+HQqHA/PnzzX4/f/58q0jM6MWzyc9EM2fOROXKlaHVanH06FEsX74cv//+Oy5evAgHB4eCjpGIkJkMu7u7Y+jQoXKHQlSofHx8MHLkSHTu3BkA4O3tjYMHD8obFIA5c+bg5MmTCA0NRZs2beQOh6jIe/3119GvXz+o1WpZ48hXMhwSEoLGjRsDAEaMGIEyZcpgwYIF2L59O/r371+gARKR5ZKTk+Ho6Ch3GET5tmjRIowfPx4xMTGoW7eu7PtzRkYGFi1ahIkTJzIRJtkUt3O7SqWCSqWSO4yCaTPcrl07AEBYWBgAIDY2Fu+99x78/f3h5OQEFxcXhISE4Pz58ybTarVaTJ8+HdWrV4dGo4G3tzdeffVV3Lp1C8B/jzVy+st6Ujp48CAUCgU2btyIjz76CF5eXnB0dET37t1x7949k2WfPHkSnTt3hqurKxwcHBAUFIRjx46ZXcc2bdqYXX7Wti8G69evR0BAAOzt7eHm5oZ+/fqZXX5u65aVXq/HokWLUKdOHWg0Gnh6emL06NF48uSJ0Xi+vr7o1q2byXLGjRtnMk9zsc+bN8+kTIHMR4PTpk2Dn58f1Go1KlSogEmTJiE1NdVsWWXVpk0bk/l9+umnUCqV+OGHH/JVHvPnz0eLFi1QpkwZ2NvbIyAgAFu2bDG7/PXr16Np06ZwcHBA6dKl0bp1a+zevdtonD/++ANBQUFwdnaGi4sLmjRpYhLb5s2bpW3q7u6OQYMGISIiwmicoUOHGsVcunRptGnTJk+PdyMjIzFs2DCUL18earUa3t7e6NGjh/S4ztfXF5cuXcKhQ4dM9n1D86VDhw5hzJgx8PDwQPny5aV5L1u2DHXq1IFarUa5cuUwduxYxMXFPTOm3bt3w8HBAf3795deDrp69Spee+01uLm5QaPRoHHjxtixY8cz55V929ra2sLX1xfvv/8+0tLS8jyduT9DTbmhHA4fPozRo0ejTJkycHFxweDBg02OFSBzu7dq1QqOjo5wdnZG165dcenSJaNxhg4dCicnJ5Npt2zZAoVCYVRbaW5fP336tNl9WKFQYNy4cdiwYQNq1KgBjUaDgIAAHD582GRZZ8+eRUhICFxcXODk5IT27dvjxIkTRuNkb8Lm4OAAf39/fPPNNybz279/v7TepUqVQo8ePXDlyhWjccyti2FbZG2qY0n5AHk/jnx9fQEAVatWRbNmzRAbGwt7e/s8PcLO63GY0/nSwHA9MazDtWvX8OTJEzg7OyMoKAgODg5wdXVFt27dcPHiRZPpLdluedlnfX19TZ4KjRo1ChqNxqSc87Jvm5N9P3rW9S4v6whkvmT4zjvvwNfXF2q1GuXLl8fgwYMRExNjNF72bZfTcvN7HsoPw35y9OhRNG3aFBqNBlWqVMH3339vMu6lS5fQrl072Nvbo3z58pg1axb0er3Z+Vpy/rl16xa6dOkCZ2dnDBw4EABw48YN9OrVC15eXtBoNChfvjz69euH+Ph4afq1a9eiXbt28PDwgFqtRu3atbF8+fIc1/HgwYNo3Lgx7O3t4e/vL+1XP//8M/z9/aXz1NmzZ83Gefv2bQQHB8PR0RHlypXDzJkzIYTItXzNtRm2pMwvXLiAoKAgozJfu3atxc1d8lUznJ0hcS1TpgwA4Pbt29i2bRt69+6NypUrIyoqCitXrkRQUBAuX76McuXKAQB0Oh26deuGffv2oV+/fnj77beRmJiIPXv24OLFi6hataq0jP79+6NLly5Gy508ebLZeD799FMoFAp88MEHiI6OxqJFi9ChQwecO3cO9vb2ADIvCCEhIQgICMC0adOgVCqlHefIkSNo2rSpyXzLly+P2bNnAwCSkpLw5ptvml32lClT0KdPH4wYMQKPHj3C4sWL0bp1a5w9exalSpUymWbUqFFo1aoVgMyd7pdffjH6fvTo0fj2228xbNgwvPXWWwgLC8OSJUtw9uxZHDt2DLa2tmbLwRJxcXHSumWl1+vRvXt3HD16FKNGjUKtWrXwzz//YOHChbh+/Tq2bdtm0XLWrl2L//3vf/jiiy8wYMAAs+M8qzy+/PJLdO/eHQMHDkRaWhp++ukn9O7dG7/++iu6du0qjTdjxgxMnz4dLVq0wMyZM2FnZ4eTJ09i//796NSpE4DMA3H48OGoU6cOJk+ejFKlSuHs2bPYtWuXFJ+h7Js0aYLZs2cjKioKX375JY4dO2ayTd3d3bFw4UIAmS8Bffnll+jSpQvu3btndtsb9OrVC5cuXcL48ePh6+uL6Oho7NmzB3fv3oWvr69US+bk5ISPP/4YAODp6Wk0jzFjxqBs2bKYOnUqkpOTAQDTp0/HjBkz0KFDB7z55pu4du0ali9fjtOnT+e67/z666947bXX0LdvX6xZswYqlQqXLl1CYGAgfHx88OGHH8LR0RGbNm1Cz549sXXrVrzyyis5rp+BYdumpqYiNDQU8+fPh0ajwSeffGJ2/LJly2LdunXSZ8P+kHVY1vMEkHnzV6pUKUyfPl1a3zt37kjJDQCsW7cOQ4YMQXBwMObMmYOUlBQsX74cLVu2xNmzZ6Vk7Hl98MEHOX536NAhbNy4EW+99RbUajWWLVuGzp0749SpU9LLjJcuXUKrVq3g4uKCSZMmwdbWFitXrkSbNm1w6NAhNGvWzGieCxcuhLu7OxISErBmzRqMHDkSvr6+6NChAwBg7969CAkJQZUqVTB9+nQ8ffoUixcvRmBgIM6cOVNg622OJcdRdlOnToVWq83zsvJ7HObm8ePHADKvO9WqVcOMGTOg1WqxdOlSBAYG4vTp06hevToAy7dbXvbZ7KZNm4bVq1dj48aNRjcuBbFvG5pCGpi73uV1HZOSktCqVStcuXIFw4cPR6NGjRATE4MdO3bg/v37cHd3N5pv1m0HZD5Gz77c5z0PWermzZt47bXX8MYbb2DIkCFYs2YNhg4dioCAANSpUwdAZoVG27ZtkZGRIcW1atUqKefIypJtlJGRgeDgYLRs2RLz58+Hg4MD0tLSEBwcjNTUVIwfPx5eXl6IiIjAr7/+iri4OLi6ugIAli9fjjp16qB79+6wsbHBzp07MWbMGOj1eowdO9ZkHQcMGIDRo0dj0KBBmD9/Pl5++WWsWLECH330EcaMGQMAmD17Nvr06YNr165BqfyvPlWn06Fz58546aWXMHfuXOzatQvTpk1DRkYGZs6cWShlHhERgbZt20KhUGDy5MlwdHTEN998k78mF8ICa9euFQDE3r17xaNHj8S9e/fETz/9JMqUKSPs7e3F/fv3hRBCaLVaodPpjKYNCwsTarVazJw5Uxq2Zs0aAUAsWLDAZFl6vV6aDoCYN2+eyTh16tQRQUFB0ucDBw4IAMLHx0ckJCRIwzdt2iQAiC+//FK
"text/plain": [
"<Figure size 200x200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from imblearn.under_sampling import RandomUnderSampler\n",
"\n",
"rus = RandomUnderSampler()# Создание экземпляра RandomUnderSampler\n",
"\n",
"# Применение RandomUnderSampler\n",
"X_resampled, y_resampled = rus.fit_resample(df_train.drop(columns=['stroke']), df_train['stroke'])\n",
"\n",
"# Создание нового DataFrame\n",
"df_train_undersampled = pd.DataFrame(X_resampled)\n",
"df_train_undersampled['stroke'] = y_resampled # Добавление целевой переменной\n",
"\n",
"# Вывод информации о новой выборке\n",
"print(\"Обучающая выборка после undersampling: \", df_train_undersampled.shape)\n",
"print(df_train_undersampled['stroke'].value_counts())\n",
"\n",
"# Визуализация распределения классов\n",
"counts = df_train_undersampled['stroke'].value_counts()\n",
"plt.figure(figsize=(2, 2))\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)\n",
"plt.title('Распределение классов stroke в тренировочной выборке после Undersampling')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### 3. Датасет: Набор данных для анализа и прогнозирования сердечного приступа\n",
"https://www.kaggle.com/datasets/kamilpytlak/personal-key-indicators-of-heart-disease\n",
"##### О наборе данных: \n",
"По данным CDC, болезни сердца являются основной причиной смерти представителей большинства рас в США (афроамериканцев, американских индейцев и коренных жителей Аляски, а также белых). Около половины всех американцев (47%) имеют по крайней мере 1 из 3 основных факторов риска сердечно-сосудистых заболеваний: высокое кровяное давление, высокий уровень холестерина и курение. Другие ключевые показатели включают сахарный диабет, ожирение (высокий ИМТ), недостаточную физическую активность или чрезмерное употребление алкоголя. Выявление и профилактика факторов, оказывающих наибольшее влияние на сердечно-сосудистые заболевания, очень важны в здравоохранении. В свою очередь, достижения в области вычислительной техники позволяют применять методы машинного обучения для выявления \"закономерностей\" в данных, которые позволяют предсказать состояние пациента.\n",
"\n",
"##### Таким образом:\n",
"* Объект наблюдения - представители большинства рас в США\n",
"* Атрибуты: HeartDisease, BMI, Smoking, AlcoholDrinking, Stroke, PhysicalHealth(как много дней за месяц вы чувствовали себя плохо), MentalHealth(как много дней за месяц вы чувствовали себя ментально плохо), DiffWalking, Sex, AgeCategory, Race, Diabetic, PhysicalActivity, GenHealth, SleepTime, Asthma, KidneyDisease, SkinCancer.\n",
"* Проблемная область: прогнозирование сердечного приступа у человека."
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 365,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Количество колонок: 18\n",
"Колонки: HeartDisease, BMI, Smoking, AlcoholDrinking, Stroke, PhysicalHealth, MentalHealth, DiffWalking, Sex, AgeCategory, Race, Diabetic, PhysicalActivity, GenHealth, SleepTime, Asthma, KidneyDisease, SkinCancer\n",
"\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 319795 entries, 0 to 319794\n",
"Data columns (total 18 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 HeartDisease 319795 non-null object \n",
" 1 BMI 319795 non-null float64\n",
" 2 Smoking 319795 non-null object \n",
" 3 AlcoholDrinking 319795 non-null object \n",
" 4 Stroke 319795 non-null object \n",
" 5 PhysicalHealth 319795 non-null float64\n",
" 6 MentalHealth 319795 non-null float64\n",
" 7 DiffWalking 319795 non-null object \n",
" 8 Sex 319795 non-null object \n",
" 9 AgeCategory 319795 non-null object \n",
" 10 Race 319795 non-null object \n",
" 11 Diabetic 319795 non-null object \n",
" 12 PhysicalActivity 319795 non-null object \n",
" 13 GenHealth 319795 non-null object \n",
" 14 SleepTime 319795 non-null float64\n",
" 15 Asthma 319795 non-null object \n",
" 16 KidneyDisease 319795 non-null object \n",
" 17 SkinCancer 319795 non-null object \n",
"dtypes: float64(4), object(14)\n",
"memory usage: 43.9+ MB\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>HeartDisease</th>\n",
" <th>BMI</th>\n",
" <th>Smoking</th>\n",
" <th>AlcoholDrinking</th>\n",
" <th>Stroke</th>\n",
" <th>PhysicalHealth</th>\n",
" <th>MentalHealth</th>\n",
" <th>DiffWalking</th>\n",
" <th>Sex</th>\n",
" <th>AgeCategory</th>\n",
" <th>Race</th>\n",
" <th>Diabetic</th>\n",
" <th>PhysicalActivity</th>\n",
" <th>GenHealth</th>\n",
" <th>SleepTime</th>\n",
" <th>Asthma</th>\n",
" <th>KidneyDisease</th>\n",
" <th>SkinCancer</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>No</td>\n",
" <td>16.60</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>3.0</td>\n",
" <td>30.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>55-59</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>5.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>No</td>\n",
" <td>20.34</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>80 or older</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>7.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>No</td>\n",
" <td>26.58</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>20.0</td>\n",
" <td>30.0</td>\n",
" <td>No</td>\n",
" <td>Male</td>\n",
" <td>65-69</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Fair</td>\n",
" <td>8.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>No</td>\n",
" <td>24.21</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>75-79</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Good</td>\n",
" <td>6.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>No</td>\n",
" <td>23.71</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>28.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Female</td>\n",
" <td>40-44</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>8.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" HeartDisease BMI Smoking AlcoholDrinking Stroke PhysicalHealth \\\n",
"0 No 16.60 Yes No No 3.0 \n",
"1 No 20.34 No No Yes 0.0 \n",
"2 No 26.58 Yes No No 20.0 \n",
"3 No 24.21 No No No 0.0 \n",
"4 No 23.71 No No No 28.0 \n",
"\n",
" MentalHealth DiffWalking Sex AgeCategory Race Diabetic \\\n",
"0 30.0 No Female 55-59 White Yes \n",
"1 0.0 No Female 80 or older White No \n",
"2 30.0 No Male 65-69 White Yes \n",
"3 0.0 No Female 75-79 White No \n",
"4 0.0 Yes Female 40-44 White No \n",
"\n",
" PhysicalActivity GenHealth SleepTime Asthma KidneyDisease SkinCancer \n",
"0 Yes Very good 5.0 Yes No Yes \n",
"1 Yes Very good 7.0 No No No \n",
"2 Yes Fair 8.0 Yes No No \n",
"3 No Good 6.0 No No Yes \n",
"4 Yes Very good 8.0 No No No "
]
},
2024-10-12 12:38:44 +04:00
"execution_count": 365,
2024-10-11 18:41:52 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"df = pd.read_csv(\".//static//csv//heart.csv\", sep=\",\")\n",
"print('Количество колонок: ' + str(df.columns.size)) \n",
"print('Колонки: ' + ', '.join(df.columns)+'\\n')\n",
"\n",
"df.info()\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Получение сведений о пропущенных данных\n",
"Типы пропущенных данных:\n",
"\n",
"- None - представление пустых данных в Python\n",
"- NaN - представление пустых данных в Pandas\n",
"- '' - пустая строка"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 366,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HeartDisease 0\n",
"BMI 0\n",
"Smoking 0\n",
"AlcoholDrinking 0\n",
"Stroke 0\n",
"PhysicalHealth 0\n",
"MentalHealth 0\n",
"DiffWalking 0\n",
"Sex 0\n",
"AgeCategory 0\n",
"Race 0\n",
"Diabetic 0\n",
"PhysicalActivity 0\n",
"GenHealth 0\n",
"SleepTime 0\n",
"Asthma 0\n",
"KidneyDisease 0\n",
"SkinCancer 0\n",
"dtype: int64\n",
"\n",
"HeartDisease False\n",
"BMI False\n",
"Smoking False\n",
"AlcoholDrinking False\n",
"Stroke False\n",
"PhysicalHealth False\n",
"MentalHealth False\n",
"DiffWalking False\n",
"Sex False\n",
"AgeCategory False\n",
"Race False\n",
"Diabetic False\n",
"PhysicalActivity False\n",
"GenHealth False\n",
"SleepTime False\n",
"Asthma False\n",
"KidneyDisease False\n",
"SkinCancer False\n",
"dtype: bool\n",
"\n",
"HeartDisease процент пустых значений: %0.00\n",
"BMI процент пустых значений: %0.00\n",
"Smoking процент пустых значений: %0.00\n",
"AlcoholDrinking процент пустых значений: %0.00\n",
"Stroke процент пустых значений: %0.00\n",
"PhysicalHealth процент пустых значений: %0.00\n",
"MentalHealth процент пустых значений: %0.00\n",
"DiffWalking процент пустых значений: %0.00\n",
"Sex процент пустых значений: %0.00\n",
"AgeCategory процент пустых значений: %0.00\n",
"Race процент пустых значений: %0.00\n",
"Diabetic процент пустых значений: %0.00\n",
"PhysicalActivity процент пустых значений: %0.00\n",
"GenHealth процент пустых значений: %0.00\n",
"SleepTime процент пустых значений: %0.00\n",
"Asthma процент пустых значений: %0.00\n",
"KidneyDisease процент пустых значений: %0.00\n",
"SkinCancer процент пустых значений: %0.00\n"
]
}
],
"source": [
"# Количество пустых значений признаков\n",
"print(df.isnull().sum())\n",
"print()\n",
"\n",
"# Есть ли пустые значения признаков\n",
"print(df.isnull().any())\n",
"print()\n",
"\n",
"# Процент пустых значений признаков\n",
"for i in df.columns:\n",
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
" print(f\"{i} процент пустых значений: %{null_rate:.2f}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Пропущенные данные отсутствуют.\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Проверим выбросы и усредним их:"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 367,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Колонка BMI:\n",
" Есть выбросы: Да\n",
" Количество выбросов: 10396\n",
" Минимальное значение: 12.02\n",
" Максимальное значение: 94.85\n",
" 1-й квартиль (Q1): 24.03\n",
" 3-й квартиль (Q3): 31.42\n",
"\n",
"Колонка PhysicalHealth:\n",
" Есть выбросы: Да\n",
" Количество выбросов: 47146\n",
" Минимальное значение: 0.0\n",
" Максимальное значение: 30.0\n",
" 1-й квартиль (Q1): 0.0\n",
" 3-й квартиль (Q3): 2.0\n",
"\n",
"Колонка MentalHealth:\n",
" Есть выбросы: Да\n",
" Количество выбросов: 51576\n",
" Минимальное значение: 0.0\n",
" Максимальное значение: 30.0\n",
" 1-й квартиль (Q1): 0.0\n",
" 3-й квартиль (Q3): 3.0\n",
"\n",
"Колонка SleepTime:\n",
" Есть выбросы: Да\n",
" Количество выбросов: 4543\n",
" Минимальное значение: 1.0\n",
" Максимальное значение: 24.0\n",
" 1-й квартиль (Q1): 6.0\n",
" 3-й квартиль (Q3): 8.0\n",
"\n"
]
}
],
"source": [
"numeric_columns = ['BMI', 'PhysicalHealth', 'MentalHealth', 'AgeCategory', 'SleepTime']\n",
"for column in numeric_columns:\n",
" if pd.api.types.is_numeric_dtype(df[column]): # Проверяем, является ли колонка числовой\n",
" q1 = df[column].quantile(0.25) # Находим 1-й квартиль (Q1)\n",
" q3 = df[column].quantile(0.75) # Находим 3-й квартиль (Q3)\n",
" iqr = q3 - q1 # Вычисляем межквартильный размах (IQR)\n",
"\n",
" # Определяем границы для выбросов\n",
" lower_bound = q1 - 1.5 * iqr # Нижняя граница\n",
" upper_bound = q3 + 1.5 * iqr # Верхняя граница\n",
"\n",
" # Подсчитываем количество выбросов\n",
" outliers = df[(df[column] < lower_bound) | (df[column] > upper_bound)]\n",
" outlier_count = outliers.shape[0]\n",
"\n",
" print(f\"Колонка {column}:\")\n",
" print(f\" Есть выбросы: {'Да' if outlier_count > 0 else 'Нет'}\")\n",
" print(f\" Количество выбросов: {outlier_count}\")\n",
" print(f\" Минимальное значение: {df[column].min()}\")\n",
" print(f\" Максимальное значение: {df[column].max()}\")\n",
" print(f\" 1-й квартиль (Q1): {q1}\")\n",
" print(f\" 3-й квартиль (Q3): {q3}\\n\")\n",
"\n",
" # Устраняем выбросы: заменяем значения ниже нижней границы на саму нижнюю границу, а выше верхней — на верхнюю\n",
" df[column] = df[column].apply(lambda x: lower_bound if x < lower_bound else upper_bound if x > upper_bound else x)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Постараемся выявить зависимости HeartDisease от остальных колонок:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Разобьем наш набор на выборки относительно параметра HeartDisease:"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 368,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYgAAAIjCAYAAAAZcIyWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABJDElEQVR4nO3deVwU9f8H8NeCsIBconIph4p4IF5ohneK4p2GRx4paqaGlqmZVIpYeXR4ZGYeRZfkWWaaqangmSUK3hdhpAEeCAjIIXx+f/hjv677ARlYWMDX8/HYB8xnZ2fes7uzr52Zz86ohBACREREjzEydAFERFQxMSCIiEiKAUFERFIMCCIikmJAEBGRFAOCiIikGBBERCTFgCAiIikGBBERSTEgiIiKITAwEO7u7oYuo1xVuoCIjY3FxIkTUb9+fZiZmcHa2hodOnTA8uXLcf/+fUOXR08pd3d3qFQqzc3MzAwNGzbEm2++ieTkZK1x582bB5VKBSMjI/z7778600pLS4O5uTlUKhWmTJmiab927RpUKhU+/vjjMl+e4uratSuaNWsmvc/Q9WZmZmLevHmIiIjQua/gNSi4WVhYwNXVFf3790dYWBiys7PLv+AKqJqhC1Bi586dGDJkCNRqNUaPHo1mzZohJycHhw8fxptvvolz585hzZo1hi6TnlItW7bEjBkzAABZWVmIiorCsmXLEBkZiT///FNnfLVajR9++AGzZs3Sav/xxx/Lpd6qLjMzE6GhoQAeBpnMqlWrYGlpiezsbNy4cQO7d+/GuHHjsGzZMuzYsQMuLi6acdeuXYv8/PzyKL3CqDQBERcXhxdffBFubm7Yv38/nJycNPcFBQXh6tWr2LlzpwErpKddnTp1MGrUKM3wyy+/DEtLS3z88ce4cuUKGjZsqDV+nz59pAERHh6Ovn37YuvWreVSd1WTn5+PnJycYo07ePBg1KpVSzM8d+5crF+/HqNHj8aQIUPwxx9/aO4zMTHRe60VXaXZxfThhx8iPT0dX375pVY4FPDw8MDrr7+uGS7YPF+/fj0aNWoEMzMz+Pj44ODBgzqPvXHjBsaNGwcHBweo1Wp4eXnhq6++ktbx+KZpwe3xbyiFbXp//PHHUKlUuHbtmlb7rl270KlTJ1SvXh1WVlbo27cvzp07p/P4ixcvYvDgwbCzs4OZmRnatGmD7du3S2t91KVLl9CtWzc4OjpCrVbDxcUFkyZN0tr9ERERAZVKhS1btug83tLSEoGBgZrh5ORkzJw5E97e3rC0tIS1tTV69+6NmJgY6fP1OHd3d63pAUBKSgqmTZsGFxcXqNVqeHh4YPHixVrf2orabdGsWTOt16FgeR7fxdC3b1+oVCrMmzdPq13J+6C4HB0dAQDVqul+FxsxYgSio6Nx8eJFTVtiYiL279+PESNGlHieBc9RYbfH36s3b97E+PHj4eDgADMzM7Ro0QLffPNNief/JMV5nYGH60r79u1Rs2ZNmJubw8fHR/refHRd9/LyglqtxhdffIHatWsDAEJDQzXL/vhrLjNy5Ei8/PLLOH78OPbu3atplx2D2LBhA3x8fGBlZQVra2t4e3tj+fLlZbq8e/fuRceOHWFrawtLS0s0atQIb7/9ttY42dnZCAkJgYeHh2Z9nzVrluJdZ5VmC+KXX35B/fr10b59+2I/JjIyEhs3bsRrr70GtVqNzz//HL169cKff/6p+fBOSkrCs88+q3mT1a5dG7t27cL48eORlpaGadOmSaddsGkKAMHBwaVatu+++w5jxoyBv78/Fi9ejMzMTKxatQodO3bEqVOnNG/Kc+fOoUOHDqhTpw5mz56N6tWrY9OmTRg4cCC2bt2KQYMGFTqPjIwM1K1bF/3794e1tTXOnj2LlStX4saNG/jll18U1/z3339j27ZtGDJkCOrVq4ekpCSsXr0aXbp0wfnz5+Hs7KxoepmZmejSpQtu3LiBiRMnwtXVFUePHkVwcDASEhKwbNkyxTXKHDx4EL/++qtOe0nfB4/Kzc3F7du3ATzcxXTq1CksWbIEnTt3Rr169XTG79y5M+rWrYvw8HDMnz8fALBx40ZYWlqib9++pVtQAMOHD0efPn202h5/r96/fx9du3bF1atXMWXKFNSrVw+bN29GYGAgUlJStL50FSYvL0+z3I+6e/euTpuS13n58uUYMGAARo4ciZycHGzYsAFDhgzBjh07dJ6f/fv3Y9OmTZgyZQpq1aqFFi1aYNWqVZg8eTIGDRqEF154AQDQvHnzJy4PALz00ktYs2YN9uzZgx49ekjH2bt3L4YPH47u3btj8eLFAIALFy7gyJEjmudN38t77tw59OvXD82bN8f8+fOhVqtx9epVHDlyRDOd/Px8DBgwAIcPH8Yrr7yCJk2a4MyZM1i6dCkuX76Mbdu2Fes5AACISiA1NVUAEM8//3yxHwNAABAnTpzQtP3zzz/CzMxMDBo0SNM2fvx44eTkJG7fvq31+BdffFHY2NiIzMxMrfa3335bANAa38vLS3Tp0kVrvC5duggvLy+duj766CMBQMTFxQkhhLh3756wtbUVEyZM0BovMTFR2NjYaLV3795deHt7i6ysLE1bfn6+aN++vWjYsOETnhFdr776qrC0tNQMHzhwQAAQmzdv1hm3evXqYsyYMZrhrKwskZeXpzVOXFycUKvVYv78+Zq20NBQAUDk5+drjevm5qY1vffee09Ur15dXL58WWu82bNnC2NjYxEfH6+ZBwDx0Ucf6dT4+OtQsDwHDhzQtLVr10707t1bABAhISGadqXvg8e5ublp3nOP3jp06KAzzZCQEAFA3Lp1S8ycOVN4eHho7mvbtq0YO3asEOLhezgoKEhzX1HL/iglz9GyZcsEAPH9999r2nJycoSvr6+wtLQUaWlpRc6rS5cu0uV+9PZoHcV9nYUQOs95Tk6OaNasmejWrZtWOwBhZGQkzp07p9V+69Ytnde5wKOvgczdu3cFAK3PijFjxgg3NzfN8Ouvvy6sra3FgwcP5E9OGSzv0qVLi6xbCCG+++47YWRkJA4dOqTV/sUXXwgA4siRI4U+9nGVYhdTWloaAMDKykrR43x9feHj46MZdnV1xfPPP4/du3cjLy8PQghs3boV/fv3hxACt2/f1tz8/f2RmpqKkydPak0zKysLAGBmZvbE+Rd8s3r0lpmZqTXO3r17kZKSguHDh2uNZ2xsjHbt2uHAgQMAHu7S2b9/P4YOHYp79+5pxrtz5w78/f1x5coV3Lhx44k1paamIikpCfv27cPOnTvRuXNnnXEenX7B7XFqtRpGRkaa5bxz545mc/fR58ze3h4AcP369SLr2rx5Mzp16oQaNWpozdfPzw95eXk6uwYzMzN1aszLyytyHj/++CP++usvLFq0SKu9JO8DmXbt2mHv3r3Yu3cvduzYgQ8++ADnzp3DgAEDCu1hN2LECFy9ehV//fWX5m9pdi8p9euvv8LR0RHDhw/XtJmYmOC1115Deno6IiMjnzgNd3d3zXI/evv+++91xlXyOpubm2v+v3v3LlJTU9GpUyfpa9GlSxc0bdpU6eIXqmDvwL179wodx9bWFhkZGVq7oR6n7+W1tbUFAPz888+FHjDfvHkzmjRpgsaNG2vNs1u3bgCg+Uwpjkqxi8na2hpA0S+WzOMHBQHA09MTmZmZuHXrFoyMjJCSkoI1a9YU2vvp5s2bWsO3b9+GiYkJLCwsnjj/ixcvavaDFubKlSsAoHnxHlew7FevXoUQAnPmzMGcOXMKrbVOnTpFzs/f3x/Hjx8HAPTq1QsbN27UGWfcuHFFTgN4uBm7fPlyfP7554iLi9P6cK5Zs6bmf19fX6hUKgQHB+P999/XrHiPv7mvXLmC06dPF/p8Pf46hISEICQkRGc8BwcH6ePz8vLw9ttvY+TIkTq7GW7duqX4fSBTq1Yt+Pn5aYb79u2LRo0aYfDgwVi3bh2mTp2q85hWrVqhcePGCA8Ph62tLRwdHQt9L5SFf/75Bw0bNtSEfYEmTZpo7n+S6tWray13gcePswHKXucdO3bg/fffR3R0tNa+c9kxLdkuvNJ
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAcUAAAIjCAYAAAByNACGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABhBUlEQVR4nO3deVwU9f8H8NeCsNwgcisCCh4gikciHqmJIqmFlXeJeJSJlqKZVIqoiVre+dO8O9TMI7M0RVE8EjUPPPJICSUVkENAQDnn94fLfF13QfaABX09H499wHzmM7PvWXbmxVy7EkEQBBARERH0dF0AERFRTcFQJCIikmEoEhERyTAUiYiIZBiKREREMgxFIiIiGYYiERGRDEORiIhIhqFIREQkw1Ckl46rqyv69u1brc85YsQIuLq6Vtn8u3Xrhm7dulXZ/NURGxsLiUSC2NjYSvfdvn171RdGlSaRSDBz5kxdl1Gt1ArFhIQEfPDBB2jUqBGMjIxgYWGBTp06YenSpXj06JG2aySqFFdXV0gkEvFhZ2eHLl264JdfftF1adXu1q1bkEgk+Prrr5WOnzlzJiQSCdLT06u1rs2bN2PJkiXV+pzaVFNf1zInTpzAzJkzkZWVpTDu6fVDT08PVlZW8Pb2xvvvv49Tp05Vf7E1VB1VJ9izZw8GDBgAqVSK4cOHo0WLFigsLMTx48fxySef4O+//8bq1aurolai5/Lx8cHkyZMBAPfu3cO3336Lt956CytXrsTYsWN1VteaNWtQWlqqs+evKTZv3ozLly9j4sSJui7lhXTixAlERkZixIgRsLKyUhj/9Prx8OFDXL16Fdu2bcOaNWswadIkLFq0SK7/o0ePUKeOyjFRq6m0tImJiRg8eDBcXFxw6NAhODo6iuNCQ0Nx8+ZN7NmzR+tFElVW/fr18e6774rDw4cPh7u7OxYvXqzTUDQwMNDZc9OLLy8vD6amps/t9+z6AQDz58/H0KFDsXjxYnh4eODDDz8UxxkZGWm91ppOpcOnCxYsQG5uLtatWycXiGXc3d3x8ccfi8MSiQTjx4/Hpk2b0LRpUxgZGaFt27Y4evSowrR3797FyJEjYW9vD6lUCi8vL6xfv15pHWWHKJ59PHtOpVu3bmjRooXC9F9//TUkEglu3bol1/7HH3+gS5cuMDU1hbm5Ofr06YO///5bYfpr167hnXfegbW1NYyMjNCuXTvs3r1baa1Pu379Ol577TU4ODhAKpXC2dkZY8eORWZmptinonMrZmZmGDFihDicmZmJKVOmwNvbG2ZmZrCwsEBgYCAuXLig9PV6lqurq9z8ACArKwsTJ06Es7MzpFIp3N3dMX/+fLm9nIoOIbVo0ULu71DeeaU+ffooPV+hyvugMhwcHNC8eXMkJiYqjDt+/Djat28PIyMjNGrUCN9//7047t9//4VEIsHixYsVpjtx4gQkEgm2bNkC4Ml/3BMnToSrqyukUins7OzQs2dPnDt3TpxG2TnF0tJSLF26FN7e3jAyMoKtrS169+6NM2fOiH02bNiA1157DXZ2dpBKpfD09MTKlSvVfj2e59SpU+jduzcsLS1hYmKCrl274s8//5Trc/v2bYwbNw5NmzaFsbEx6tWrhwEDBiisT8/q1q0b9uzZg9u3b4vrrLLX5Msvv0SDBg1gZGSEHj164ObNm8+tu7xtQtlj48aNcv0PHTokrutWVlZ48803cfXq1cq8RGrR5uu6ceNGSCQSHDlyBOPGjYOdnR0aNGiAmTNn4pNPPgEAuLm5icv+vL+LsbExfvjhB1hbW+PLL7/E01+c9Ow6Wpn3uraXt6ioCJGRkfDw8ICRkRHq1auHzp0748CBA3L91N0uP0ulPcXffvsNjRo1QseOHSs9zZEjR7B161Z89NFHkEql+L//+z/07t0bp0+fFgMrNTUVHTp0EEPU1tYWf/zxB0aNGoWcnJxyD7WsXLkSZmZmAIDw8HBVFkXBDz/8gODgYAQEBGD+/PnIz8/HypUr0blzZ5w/f15cef/++2906tQJ9evXx7Rp02Bqaoqff/4ZQUFB2LFjB/r371/uc+Tl5aFBgwbo168fLCwscPnyZaxYsQJ3797Fb7/9pnLN//77L3bt2oUBAwbAzc0Nqamp+Pbbb9G1a1dcuXIFTk5OKs0vPz8fXbt2xd27d/HBBx+gYcOGOHHiBMLDw5GcnKy1c0FHjx7F3r17FdrVfR9UpKioCP/99x/q1asn137z5k288847GDVqFIKDg7F+/XqMGDECbdu2hZeXFxo1aoROnTph06ZNmDRpkty0mzZtgrm5Od58800AwNixY7F9+3aMHz8enp6eyMjIwPHjx3H16lW0adOm3NpGjRqFjRs3IjAwEKNHj0ZxcTGOHTuGkydPol27dgCevMe9vLzwxhtvoE6dOvjtt98wbtw4lJaWIjQ09LnLn5+fr/T8Vn5+vkLboUOHEBgYiLZt2yIiIgJ6enpiKB87dgzt27cHAPz11184ceIEBg8ejAYNGuDWrVtYuXIlunXrhitXrsDExERpLZ9//jmys7Nx584d8Z+NsvW3zLx586Cnp4cpU6YgOzsbCxYswLBhwyp9zuvpbQLw5OjWjBkz5PocPHgQgYGBaNSoEWbOnIlHjx5h+fLl6NSpE86dO1epC6Jqwus6btw42NraYsaMGcjLy0NgYCD++ecfbNmyBYsXL4aNjQ0AwNbW9rnLY2Zmhv79+2PdunW4cuUKvLy8lParzHtd28s7c+ZMREVFYfTo0Wjfvj1ycnJw5swZnDt3Dj179gSg2XZZgVBJ2dnZAgDhzTffrOwkAgABgHDmzBmx7fbt24KRkZHQv39/sW3UqFGCo6OjkJ6eLjf94MGDBUtLSyE/P1+u/bPPPhMAyPX38vISunbtKteva9eugpeXl0JdX331lQBASExMFARBEB4+fChYWVkJY8aMkeuXkpIiWFpayrX36NFD8Pb2Fh4/fiy2lZaWCh07dhQ8PDye84ooGjdunGBmZiYOHz58WAAgbNu2TaGvqampEBwcLA4/fvxYKCkpkeuTmJgoSKVSYdasWWJbZGSkAEAoLS2V6+vi4iI3v9mzZwumpqbCP//8I9dv2rRpgr6+vpCUlCQ+BwDhq6++Uqjx2b9D2fIcPnxYbPP19RUCAwMFAEJERITYrur74FkuLi5Cr169hLS0NCEtLU24cOGCMHjwYAGAMGHCBLl+AISjR4+Kbffv3xekUqkwefJkse3bb78VAAhXr14V2woLCwUbGxu5183S0lIIDQ2tsLbg4GDBxcVFHD506JAAQPjoo48U+j79d1K2zAEBAUKjRo3k2rp27Sr3upf9jZ73SEtLE5/Tw8NDCAgIUHh+Nzc3oWfPnhXWFBcXJwAQvv/+e7FN2d++T58+cq/Ds32bN28uFBQUiO1Lly4VAAiXLl1SmOZpERERcstT5q+//hIACBs2bBDbfHx8BDs7OyEjI0Nsu3DhgqCnpycMHz68wuepCa/rhg0bBABC586dheLiYrn+z27bnubi4iL06dOn3GVbvHixAED49ddfxbZn19HnvderYnlbtWpVYd2CoN3tcqUPn+bk5AAAzM3NK5+4APz8/NC2bVtxuGHDhnjzzTexf/9+lJSUQBAE7NixA/369YMgCEhPTxcfAQEByM7OVtg1f/z4MYDKHe8uKSmRm2d6errCf3MHDhxAVlYWhgwZItdPX18fvr6+OHz4MIAnhysPHTqEgQMH4uHDh2K/jIwMBAQE4MaNG7h79+5za8rOzkZqaipiYmKwZ88evPrqqwp9np5/2eNZUqkUenp64nJmZGTAzMwMTZs2lXvN7OzsAAB37typsK5t27ahS5cuqFu3rtzz+vv7o6SkROGwd9l/y08/SkpKKnyOnTt34q+//sK8efPk2tV5HygTHR0NW1tb2NraolWrVti2bRvee+89zJ8/X66fp6cnunTpIg7b2tqiadOm+Pfff8W2gQMHwsjICJs2bRLb9u/fj/T0dLnzMlZWVjh16hTu3bv33PrK7NixAxKJBBEREQrjnj7UbWxsLP6enZ2N9PR0dO3aFf/
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAb0AAAIjCAYAAACXqsXqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABeEUlEQVR4nO3deVhU5f8+8HtAGPYtZVME3BVxw0LcFxRNLczcTVzTPq6hmZSKuKGVppVl7lrua5Z7KK6oue+mCJEKuAICiQjP7w9/nK/jDDgHZkQ89+u65tJ5zjNn3ufMnLk5u0oIIUBERKQAJsVdABER0avC0CMiIsVg6BERkWIw9IiISDEYekREpBgMPSIiUgyGHhERKQZDj4iIFIOhR0REisHQIyoB4uPjoVKpsGzZsuIuRUPfvn3h5eWld18bGxvjFkSyLFu2DCqVCvHx8cVdyitjkNCLjY3F4MGDUaFCBVhYWMDOzg6NGjXC3Llz8d9//xniLYhk8/LygkqlQmBgoM7hCxcuhEqlgkqlwokTJ4xay48//mjwwJo0aRJUKhXu3bunc7iXlxc6dOhg0Pd8mczMTEyaNAnR0dGv9H0N6XWcr8/L77sUHR0tfZ9VKhXUajVcXFzQvHlzTJ8+HXfv3n31xb6GShV1BNu2bUOXLl2gVqvRp08f1KxZE0+ePMGhQ4fw2Wef4eLFi1iwYIEhaiWSzcLCAvv27UNSUhJcXV01hq1cuRIWFhZ4/Pix0ev48ccfUbp0afTt29fo71WcMjMzERERAQBo3rx58RbzhnrZd2nEiBF4++23kZOTg7t37+LIkSMIDw/H7NmzsW7dOrRs2VLq+9FHH6F79+5Qq9WvqPriV6TQi4uLQ/fu3eHp6Ym9e/fCzc1NGjZ06FBcv34d27ZtK3KRRIXVqFEj/PXXX1i7di1Gjhwptd+8eRMHDx5Ep06dsHHjxmKskEg/mZmZsLKyemm/Jk2a4MMPP9RoO3v2LNq0aYPOnTvj0qVL0m+1qakpTE1NjVLv66pImze/+uorpKenY/HixRqBl6dSpUoaPzQqlQrDhg3DypUrUbVqVVhYWMDPzw8HDhzQeu2tW7fQv39/uLi4QK1Ww8fHB0uWLNFZR97miBcfL/6l2bx5c9SsWVPr9d98843O7do7duxAkyZNYG1tDVtbW7Rv3x4XL17Uev2VK1fw4YcfwsnJCRYWFqhfvz62bt2qs9bnXb16FS1btoSrqyvUajU8PDwwZMgQPHjwQOqTt8liw4YNWq+3sbHR+GvvwYMHGDNmDHx9fWFjYwM7Ozu0a9cOZ8+e1Tm/XuTl5aX112NKSgpGjRoFDw8PqNVqVKpUCTNnzkRubq7UJ29/0zfffKM1zpo1a2p8DnnT8+Lmr/bt20OlUmHSpEka7XK+B7pYWFjggw8+wKpVqzTaV69eDUdHRwQFBel8nT6fad7+kMOHDyM0NBRlypSBtbU1OnXqpLEpycvLCxcvXsT+/fu1vpv6fmaGkpubizlz5sDHxwcWFhZwcXHB4MGD8fDhQ41+v/32G9q3bw93d3eo1WpUrFgRU6ZMQU5OTr7jjo+PR5kyZQAAERER0rTq+kyDg4NhY2ODMmXKYMyYMQWON0/e5ur8Hs97+vQppkyZgooVK0KtVsPLywtffPEFsrKy9JxT8hh6vub9Vp08eRJNmzaFlZUVvvjiiwK/SwWpXbs25syZg5SUFPzwww9Su659eidOnEBQUBBKly4NS0tLeHt7o3///kad3mvXrqFz585wdXWFhYUFypUrh+7duyM1NVWj36+//go/Pz9YWlrCyckJ3bt3x7///vvS6X9ekdb0fv/9d1SoUAENGzbU+zX79+/H2rVrMWLECKjVavz4449o27Ytjh8/LgVScnIyGjRoIIVkmTJlsGPHDgwYMABpaWkYNWqUznH/9NNP0o7ysLCwokwafvnlF4SEhCAoKAgzZ85EZmYmfvrpJzRu3BinT5+Wdt5fvHgRjRo1QtmyZTFu3DhYW1tj3bp1CA4OxsaNG9GpU6d83yMjIwPlypVDx44dYWdnhwsXLmDevHm4desWfv/9d9k137hxA1u2bEGXLl3g7e2N5ORk/Pzzz2jWrBkuXboEd3d3WePLzMxEs2bNcOvWLQwePBjly5fHkSNHEBYWhsTERMyZM0d2jbocOHAA27dv12ov7PfgRT179kSbNm0QGxuLihUrAgBWrVqFDz/8EGZmZlr95X6mw4cPh6OjI8LDwxEfH485c+Zg2LBhWLt2LQBgzpw5GD58OGxsbPDll18CAFxcXAAY5jN7/o+k5z3/h0mewYMHY9myZejXrx9GjBiBuLg4/PDDDzh9+jQOHz4szY9ly5bBxsYGoaGhsLGxwd69ezFx4kSkpaXh66+/1vl+ZcqUwU8//YRPPvkEnTp1wgcffAAAqFWrltQnJycHQUFB8Pf3xzfffIM///wTs2bNQsWKFfHJJ5+8dFrr1KmD0aNHa7StWLECe/bs0WgbOHAgli9fjg8//BCjR4/GsWPHEBkZicuXL2Pz5s0vfR+g+Ofr/fv30a5dO3Tv3h29e/eW9s/l9116mQ8//BADBgzA7t27MW3aNJ197ty5gzZt2qBMmTIYN24cHBwcEB8fj02bNhltep88eYKgoCBkZWVh+PDhcHV1xa1bt/DHH38gJSUF9vb2AIBp06ZhwoQJ6Nq1KwYOHIi7d+/i+++/R9OmTXH69Gk4ODjoNR8gCik1NVUAEO+//77erwEgAIgTJ05Ibf/884+wsLAQnTp1ktoGDBgg3NzcxL179zRe3717d2Fvby8yMzM12r/44gsBQKO/j4+PaNasmUa/Zs2aCR8fH626vv76awFAxMXFCSGEePTokXBwcBCDBg3S6JeUlCTs7e012lu1aiV8fX3F48ePpbbc3FzRsGFDUbly5ZfMEW3/+9//hI2NjfR83759AoBYv369Vl9ra2sREhIiPX/8+LHIycnR6BMXFyfUarWYPHmy1BYRESEAiNzcXI2+np6eGuObMmWKsLa2Fn///bdGv3HjxglTU1ORkJAgvQcA8fXXX2vV+OLnkDc9+/btk9r8/f1Fu3btBAARHh4utcv9HrzI09NTtG/fXjx9+lS4urqKKVOmCCGEuHTpkgAg9u/fL5YuXSoAiL/++kt6nb6fad5rAwMDNeblp59+KkxNTUVKSkq+8yGPvp9Z3jxeunSp1BYeHi4tU/k92rdvL/U/ePCgACBWrlyp8X47d+7Uatc1bwcPHiysrKw05ktISIjw9PSUnt+9e1frc3y+LwCN6RJCiLp16wo/Pz+t/i/K+zxfNHToUPH8T9mZM2cEADFw4ECNfmPGjBEAxN69ewt8n9dhvjZr1kwAEPPnz9fqn993qaDfijy1a9cWjo6O0vO873Deb9/mzZu1locXGXp6T58+/dK64+PjhampqZg2bZpG+/nz50WpUqW02gtS6M2baWlpAABbW1tZrwsICICfn5/0vHz58nj//fexa9cu5OTkQAiBjRs3omPHjhBC4N69e9IjKCgIqampOHXqlMY48w5EsLCweOn75+TkaIzz3r17yMzM1OizZ88epKSkoEePHhr9TE1N4e/vj3379gF49pfg3r170bVrVzx69Ejqd//+fQQFBeHatWu4devWS2tKTU1FcnIyoqKisG3bNjRt2lSrz/Pjz3u8SK1Ww8TERJrO+/fvw8bGBlWrVtWYZ87OzgCe7dcqyPr169GkSRM4OjpqvG9gYCBycnK0NktnZmZq1fiyzVabNm3CX3/9hRkzZmi0F+Z7kB9TU1N07doVq1evBvDsABYPDw80adJEq29hPtOPP/5YY/NakyZNkJOTg3/++eelten7mRVk48aN2LNnj9bjxTWA9evXw97eHq1bt9aYn35+frCxsZG+1wBgaWkp/T9vPjRp0gSZmZm4cuWKXnXlZ8iQIRrPmzRpghs3bhRpnM/L22oQGhqq0Z63hqjvcQbFPV/VajX69eun/4TrwcbGBo8ePcp3eN7a0h9//IHs7GydfQw9vXlrcrt27dL6Lc6zadMm5ObmomvXrhrv6erqisqVK2u
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAaIAAAJhCAYAAAADn7ToAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABxxklEQVR4nO3dd1gUV9sG8HspSxUQERAxiGDDLjbsBUXFqNEktgRjjcYSxWCJXZPYYtREoylGYm+xxFgRu2LvBSsKFrCLlfp8f/gxryuoLOw6KPfvuvbSnTl75tlh2ZuZOTOjEREBERGRSkzULoCIiHI3BhEREamKQURERKpiEBERkaoYREREpCoGERERqYpBREREqmIQERGRqhhERESkKgYR0Xvu0aNHmDp1qvL8/v37mDFjhnoFEb2EQZSB0NBQaDQaHDx4MN28P/74AxqNBi1btkRKSooK1RHpx8rKCsOGDcOCBQsQExODUaNGYc2aNWqXZRD379+HpaUlNBoNzpw581aXnZKSgjlz5qBu3bpwdHSEhYUFChcujE6dOmX43fEmp0+fxqhRo3D58mXDF5vDMYj0sHLlSvTs2RO1atXC4sWLYWpqqnZJRG9kamqK0aNHIygoCB988AHmzJmDYcOGqV2WQSxbtgwajQaurq5YsGDBW1vu06dP0axZM3Tu3Bkigm+//RYzZ85EUFAQIiIiUKVKFVy9elWvPk+fPo3Ro0fnyiAyU7uAd8W2bdvQrl07+Pj4YM2aNbC0tFS7JKJMGzBgANq0aYOYmBiULFkSDg4OapdkEPPnz0fTpk3h4eGBhQsX4rvvvnsryw0JCcGGDRswZcoU9OvXT2feyJEjMWXKlLdShxpSU1ORmJho2O9AoXTmzJkjAOTAgQMiInLkyBGxs7OTwoULy/Xr1zN8zdKlS6VixYpiaWkp+fLlkw4dOsjVq1czbAsgw0dUVJROm5EjR+q8buLEiQJA6tSpo0wbOXKkZPRj9PDwkI4dO+pMu3fvnnz99dfi7u4uWq1WvLy8ZPz48ZKSkqLTLiUlRaZOnSqlS5cWCwsLcXJykoCAAGV9vKr+tEdafVu3btWZrtVqpWjRovLDDz9IamqqzjIPHz4sjRs3ljx58oiNjY3Ur19fIiIiMlx/L4qMjJR69eqJi4uLaLVacXd3ly+//FLu3LmjtEmrY9myZeleb2Njo7Oe7ty5IwMGDJDSpUuLjY2N5MmTRxo3bixHjx7VeZ2h13tUVJQAkEmTJqXrs1SpUjo/87T3s3XrVp12TZs2Tfe5yajOhw8fiouLS4Z9vEqdOnUy/Fm//BkV+d/vz6s+F2kOHz4sAQEB4uTkpNMuMDAwUzVduXJFNBqNLF26VPbt2ycAZPfu3Rm2nT59unh6eoqlpaVUrlxZduzYIXXq1ElX07Nnz2TEiBHi5eWlfJ5CQkLk2bNnSpuYmBgxMzOThg0bZqrOy5cvS8+ePaVYsWJiaWkpjo6O8vHHH+v8vr9qnb3481m3bp3UrFlTrK2txdbWVpo2bSonT55Mt7ylS5dKyZIlxcLCQkqVKiUrVqyQjh07ioeHh067R48eSXBwsPK5LFasmEyaNCnd7yYA6dWrl8yfP198fHzEzMxMVqxYIR4eHtK8efN0y3/69KnY2dlJ9+7dM7V+RES4RfQGFy9eROPGjWFhYYGNGzeiQIEC6dqEhoaiU6dOqFy5MsaNG4e4uDhMmzYNu3fvxpEjRzL86/Ojjz5Cq1atAAA7d+7E77///to67t+/j3HjxmX5fTx58gR16tTBtWvX8OWXX+KDDz7Anj17MGTIENy4cUPnYHaXLl0QGhqKJk2aoGvXrkhOTsbOnTuxd+9eVKpUCfPmzVPaptU+ZcoUODk5AQBcXFx0lv3tt9+iZMmSePr0KZYsWYJvv/0Wzs7O6NKlCwDg1KlTqFWrFuzs7DBw4ECYm5vjt99+Q926dbF9+3ZUrVr1le/r8ePHcHd3x4cffgg7OzucPHkSM2bMwLVr17J0HOTSpUtYtWoVPvnkE3h6eiIuLg6//fYb6tSpg9OnT8PNzU2v/vRZ79mxY8cOrFu3LlNtJ0+ejLi4OL2X4e7urnwGHz16hJ49e762/Yufie+//15n3oMHD9CkSROICIKDg1GoUCEAQP/+/TNdz6JFi2BjY4NmzZrBysoKXl5eWLBgAapXr67TbubMmejduzdq1aqF/v374/Lly2jZsiXy5s0Ld3d3pV1qaiqaN2+OXbt2oXv37ihZsiROnDiBKVOm4Ny5c1i1ahUAYP369UhOTsbnn3+eqToPHDiAPXv2oG3btnB3d8fly5cxc+ZM1K1bF6dPn4a1tTVq166Nvn374ueff1Z+XwAo/86bNw8dO3ZEQEAAJkyYgCdPnmDmzJmoWbMmjhw5gsKFCwMA1q5dizZt2qBMmTIYN24c7t27hy5duqBgwYI6NYkImjdvjq1bt6JLly4oX748Nm7ciJCQEFy7di3dFt2WLVuwdOlS9O7dG05OTvD09MRnn32GiRMn4u7du3B0dFTarlmzBvHx8fjss88ytX7SCqKXpP118t9//4mXl5cAkEaNGmXYNjExUZydnaV06dLy9OlTZfp///0nAGTEiBE67ZOSkgSAjB49Ot3yXrdFNHDgQHF2dhZfX1+dv+JGjx4tANL9FfPyX+Zjx44VGxsbOXfunE67wYMHi6mpqURHR4uIyJYtWwSA9O3bN917fXkZr6o9TUZ/uT979kxMTEzkq6++Uqa1bNlStFqtXLx4UZl2/fp1yZMnj9SuXTtdv2/y1Vdfia2tbbo6MrNF9OzZs3RbiFFRUWJhYSFjxoxRphl6vWd3i6hq1arSpEmTN24R3bx5U/LkyaO0zewWUfXq1aV06dLK81u3br1yi+iPP/4QAHLlyhVl2stbHxs3bhQAsmjRIp3Xenh4ZHqLqEyZMtKhQwfl+bfffitOTk6SlJSkTEtISJB8+fJJ5cqVdaaHhoam20qbN2+emJiYyM6dO3WWM2vWLJ2trf79+wsAOXLkSKbqfPLkSbppERERAkDmzp2rTFu2bFmGP5OHDx+Kg4ODdOvWTWd6bGys2Nvb60wvU6aMuLu7y8OHD5Vp27ZtEwA6W0SrVq0SAPLdd9/p9Pnxxx+LRqORCxcuKNMAiImJiZw6dUqn7dmzZwWAzJw5U2d68+bNpXDhwhl+X7wKByu8xhdffIGYmBi0b98emzZtwrJly9K1OXjwIG7evImvvvpKZ59pYGAgSpQogbVr1+q0T0xMBABYWFhkuo5r167hl19+wfDhw2Fra6szz9nZGQDeeGB02bJlqFWrFvLmzYvbt28rD39/f6SkpGDHjh0AgH/++QcajQYjR45M14dGo8l0zS968OABbt++jejoaEycOBGpqamoX78+gOcjjzZt2oSWLVuiSJEiymsKFCiA9u3bY9euXYiPj8/UMuLi4hAeHo61a9eidu3a6do8fPhQ573fvn07XRsLCwuYmJgotd25cwe2trYoXrw4Dh8+rLQz9HpP8+TJk3Q1vml05ooVK3DgwAGMHz/+te0AYOzYsbC3t0ffvn3f2PZFz549y/Qxgcx8xh8+fAgAyJcvn151pDl+/DhOnDiBdu3aKdPatWuH27dvY+PGjcq0gwcP4s6dO+jWrRvMzP63A6hDhw7ImzevTp/Lli1DyZIlUaJECZ31n/ZZ3bp1KwAon8c8efJkqlYrKyvl/0lJSbhz5w68vb3h4OCg85l6lbCwMNy/f195f2kPU1NTVK1aVanr+vXrOHHiBIKCgnS+J+rUqYMyZcro9Llu3TqYmpqm+xwMGDAAIoL169frTK9Tpw58fHx0phUrVgxVq1bVGSRy9+5drF+/Hh06dNDr+4K75l7j7t27WLx4MT766COcPn0aX3/9NRo1agR7e3ulzZUrVwAAxYsXT/f6EiVKYNeuXTrT7t+/DwDpAuV1Ro4cCTc3N3z55ZdYvny5zjw/Pz9oNBoMGTIE3333ndJvamqqTrvz58/j+PHjyJ8/f4bLuHnzJoDnuyLd3Nx0NrWzq2XLlsr/TUxMMGzYMLRu3RoAcOvWLTx58iT
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAaEAAAIjCAYAAACqOKSMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABOA0lEQVR4nO3deXhMZ/8/8PfIMlknEWRBJMQSiVDb00bsQqj98aWIEpTyhFI8Km2JqLWt2qu20oXWrlpVa2KnsaVIEWkIKSFIYhKyzf37wy/zGDOJTOakJ8b7dV25rpx7zpz7M8s977nPnDmjEEIIEBERyaCC3AUQEdGriyFERESyYQgREZFsGEJERCQbhhAREcmGIURERLJhCBERkWwYQkREJBuGEBERyYYhRK+cdevWQaFQ4Pr163KXYhSFQoHp06fLXQaVoZiYGCgUCsTExMhdyj+mVCGUmJiId999F7Vq1YKNjQ1UKhWCgoKwaNEiPH78WOoaiUokNzcXixYtQuPGjaFSqeDs7Ax/f3+MHDkSly9flrs8PYVh+KI/b29vuUuVVeH9dPr0aYOXt23bFg0aNPiHq/qfDRs2YOHChXrt169f13kcraysULlyZbRo0QIffvghkpOT//liyyFLY6+wa9cu9O3bF0qlEoMHD0aDBg2Qm5uLo0eP4r///S8uXbqElStXlkWtRMXq06cPdu/ejQEDBmDEiBHIy8vD5cuX8csvv6BFixbw9fWVu0QdrVu3xnfffafT9s477+Bf//oXRo4cqW1zcHAAADx+/BiWlkYPWSpjGzZswMWLFzF+/HiDlw8YMABvvvkmNBoNHj58iNjYWCxcuBCLFi3CmjVr0L9/f+26rVu3xuPHj2Ftbf0PVS8/o57RSUlJ6N+/P7y8vHDw4EF4eHhoLwsPD8e1a9ewa9cuyYskepHY2Fj88ssvmDVrFj788EOdy5YuXYr09HR5CitGrVq1UKtWLZ22UaNGoVatWhg0aJDe+jY2Nv9UaVQCWVlZsLe3f+F6TZo00Xs8b9y4gU6dOmHIkCGoX78+GjVqBACoUKHCK/c4G7U77tNPP4VarcaaNWt0AqhQ7dq1MW7cOO2yQqHAmDFjsH79etSrVw82NjZo2rQpDh8+rHfdlJQUDBs2DG5ublAqlfD398fXX39tsI7p06cb3G3Rtm1bnfWKmqZ//vnnBj8T2L17N1q1agV7e3s4Ojqia9euuHTpkt71L1++jP/7v/+Di4sLbGxs0KxZM+zcudNgrc+6cuUK2rdvD3d3dyiVSnh6emLUqFF48OCBdp3CfcJbtmzRu76DgwPCwsK0yw8ePMCkSZMQEBAABwcHqFQqdOnSBXFxcQbvr+d5e3vrbA8A0tPTMX78eHh6ekKpVKJ27dqYN28eNBqNdp3C3Qyff/653jYbNGig8zgUtY+7a9euBj/jMOZ58KzExEQAQFBQkN5lFhYWqFSp0gu3IeXjX7gL6fDhw3j33XdRqVIlqFQqDB48GA8fPnxhLYY8f38VPq5Xr17FoEGD4OTkhCpVqmDq1KkQQuDmzZvo2bMnVCoV3N3dMX/+fL1t5uTkIDIyErVr19Y+JydPnoycnJwX1tO2bdtidyM+P76+/PJL+Pv7Q6lUomrVqggPDy/TNwfff/89mjZtCltbW7i4uKB///64efOmzjpHjhxB3759UaNGDe3tf//99/U+VggLC4ODgwMSExPx5ptvwtHREaGhoWjbti127dqFGzduGLX71MvLC+vWrUNubi4+/fRTbbuh8ZKQkIA+ffrA3d0dNjY2qF69Ovr374+MjIwyu7137tzB0KFDUb16dSiVSnh4eKBnz56lfs0sjlEzoZ9//hm1atVCixYtSnydQ4cOYePGjXjvvfegVCrx5ZdfonPnzvj999+1AZGamoo33nhDG1pVqlTB7t27MXz4cGRmZhY5zV2+fLl2V0VERIQxN0XPd999hyFDhiAkJATz5s1DdnY2li9fjpYtW+LcuXPaJ9alS5cQFBSEatWqYcqUKbC3t8emTZvQq1cvbN26Fb179y6yj6ysLFSvXh3du3eHSqXCxYsXsWzZMqSkpODnn382uua//voLO3bsQN++fVGzZk2kpqZixYoVaNOmDeLj41G1alWjtpednY02bdogJSUF7777LmrUqIHjx48jIiICt2/fNrjfuzQOHz6MX3/9Va+9tM8D4OmgBoD169cjKCjI6N1WZfX4jxkzBs7Ozpg+fTquXLmC5cuX48aNG9oXGym89dZbqF+/PubOnYtdu3Zh5syZcHFxwYoVK9C+fXvMmzcP69evx6RJk9C8eXO0bt0aAKDRaNCjRw8cPXoUI0eORP369XHhwgUsWLAAV69exY4dO17Yd/Xq1TFnzhydtl9//RU//PCDTtv06dMRFRWF4OBgjB49WntfxMbG4tixY7CysnphXxkZGUhLS9Nrz8vL02ubNWsWpk6din79+uGdd97BvXv3sGTJErRu3Rrnzp2Ds7MzAGDz5s3Izs7G6NGjUalSJfz+++9YsmQJbt26hc2bN+tsMz8/HyEhIWjZsiU+//xz2NnZwd3dHRkZGbh16xYWLFgA4H+7T18kMDAQPj4+2LdvX5Hr5ObmIiQkBDk5ORg7dizc3d2RkpKCX375Benp6XByciqT29unTx9cunQJY8eOhbe3N+7evYt9+/YhOTlZOxZKOmZeSJRQRkaGACB69uxZ0qsIAAKAOH36tLbtxo0bwsbGRvTu3VvbNnz4cOHh4SHS0tJ0rt+/f3/h5OQksrOzddo//PBDAUBnfX9/f9GmTRud9dq0aSP8/f316vrss88EAJGUlCSEEOLRo0fC2dlZjBgxQme9O3fuCCcnJ532Dh06iICAAPHkyRNtm0ajES1atBB16tR5wT2i7z//+Y9wcHDQLkdHRwsAYvPmzXrr2tvbiyFDhmiXnzx5IgoKCnTWSUpKEkqlUsyYMUPbFhUVJQAIjUajs66Xl5fO9j755BNhb28vrl69qrPelClThIWFhUhOTtb2AUB89tlnejU+/zgU3p7o6Ght2+uvvy66dOkiAIjIyEhtu7HPg2dpNBrRpk0bAUC4ubmJAQMGiGXLlokbN27orbt27doyf/wL+2jatKnIzc3Vtn/66acCgPjpp58M3o7nH+NnPX9/RUZGCgBi5MiR2rb8/HxRvXp1oVAoxNy5c7XtDx8+FLa2tjrb/u6770SFChXEkSNHdPr56quvBABx7Ngxg3UUKun4unv3rrC2thadOnXSeb4uXbpUABBff/11sf0U3pfF/T1bx/Xr14WFhYWYNWuWznYuXLggLC0tddoNPafmzJkjFAqFznNnyJAhAoCYMmWK3vpdu3YVXl5eeu3FjZNCPXv2FABERkaGEEJ/vJw7d67I14Oyur0PHz58Yd3GjJkXKfHuuMzMTACAo6NjSa8C4GnaN23aVLtco0YN9OzZE3v27EFBQQGEENi6dSu6d+8OIQTS0tK0fyEhIcjIyMDZs2d1tvnkyRMAJdtHXlBQoLPNtLQ0ZGdn66yzb98+pKenY8CAATrrWVhY4PXXX0d0dDSAp7u/Dh48iH79+uHRo0fa9e7fv4+QkBAkJCQgJSXlhTVlZGQgNTUVBw4cwK5du7TvTJ/17PYL/56nVCpRoUIF7e28f/8+HBwcUK9ePZ37zNXVFQBw69atYuvavHkzWrVqhYoVK+r0GxwcjIKCAr3dqNnZ2Xo1FhQUFNvHtm3bEBsbi7lz5+q0l+Z58CyFQoE9e/Zg5syZqFixIn744QeEh4fDy8sLb731VrG7fcry8R85cqTOu/zRo0fD0tLS4EywtN555x3t/xYWFmjWrBmEEBg+fLi23dnZGfXq1cNff/2lbdu8eTPq168PX19fndvdvn17ANDeblPt378fubm5GD9+vPb5CgAjRoyASqUq8efIy5Ytw759+/T+GjZsqLPetm3boNFo0K9fP53b5e7ujjp16ujcLltbW+3/WVlZSEtLQ4sWLSCEwLlz5/RqGD16tLE3v1iFs6ZHjx4ZvLxwprNnzx69161CUt9eW1tbWFtbIyYmpshdxyUdMyV
2024-10-11 18:41:52 +04:00
"text/plain": [
2024-10-12 12:38:44 +04:00
"<Figure size 400x600 with 1 Axes>"
2024-10-11 18:41:52 +04:00
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"# Создание диаграмм зависимости\n",
2024-10-12 12:38:44 +04:00
"for column in numeric_columns:\n",
" plt.figure(figsize=(4, 6)) # Установка размера графика\n",
2024-10-11 18:41:52 +04:00
" if pd.api.types.is_numeric_dtype(df[column]): # Проверяем, является ли колонка числовой\n",
" # Проверяем, содержит ли колонка только два уникальных значения (0 и 1)\n",
" if df[column].nunique() == 2 and set(df[column].unique()).issubset({0, 1}):\n",
2024-10-12 12:38:44 +04:00
" counts = df[column].value_counts() \n",
" counts.plot(kind='bar', width=0.4) # Создаем столбчатую диаграмму\n",
2024-10-11 18:41:52 +04:00
" plt.title(f'Количество значений для {column}')\n",
" plt.xlabel(column)\n",
" plt.ylabel('Количество повторений')\n",
" else:\n",
2024-10-12 12:38:44 +04:00
" grouped_data = df.groupby('HeartDisease')[column].mean()\n",
"\n",
" # Создаем столбчатую диаграмму\n",
" plt.bar(grouped_data.index, grouped_data.values, alpha=0.5, width=0.4)\n",
" plt.title(f'Среднее значение {column} по HeartDisease')\n",
2024-10-11 18:41:52 +04:00
" plt.xlabel('HeartDisease (0 = нет, 1 = да)')\n",
2024-10-12 12:38:44 +04:00
" plt.ylabel(f'Среднее значение {column}')\n",
2024-10-11 18:41:52 +04:00
" plt.xticks([0, 1]) # Установка меток по оси X\n",
2024-10-12 12:38:44 +04:00
" plt.grid(axis='y')\n",
2024-10-11 18:41:52 +04:00
" else:\n",
" # Если колонка не числовая, строим столбчатую диаграмму\n",
" counts = df[column].value_counts() # Считаем количество повторений каждого значения\n",
2024-10-12 12:38:44 +04:00
" counts.plot(kind='bar', width=0.4) # Создаем столбчатую диаграмму\n",
2024-10-11 18:41:52 +04:00
" plt.title(f'Количество значений для {column}')\n",
" plt.xlabel(column)\n",
" plt.ylabel('Количество повторений')\n",
"\n",
2024-10-12 12:38:44 +04:00
" plt.show() "
2024-10-11 18:41:52 +04:00
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 369,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [],
"source": [
"# Функция для создания выборок\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"def split_stratified_into_train_val_test(\n",
" df_input,\n",
" stratify_colname=\"y\",\n",
" frac_train=0.6,\n",
" frac_val=0.15,\n",
" frac_test=0.25,\n",
" random_state=None,\n",
"):\n",
"\n",
" if frac_train + frac_val + frac_test != 1.0:\n",
" raise ValueError(\n",
" \"fractions %f, %f, %f do not add up to 1.0\"\n",
" % (frac_train, frac_val, frac_test)\n",
" )\n",
"\n",
" if stratify_colname not in df_input.columns:\n",
" raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
"\n",
" X = df_input # Contains all columns.\n",
" y = df_input[\n",
" [stratify_colname]\n",
" ] # Dataframe of just the column on which to stratify.\n",
"\n",
" # Split original dataframe into train and temp dataframes.\n",
" df_train, df_temp, y_train, y_temp = train_test_split(\n",
" X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
" )\n",
"\n",
" # Split the temp dataframe into val and test dataframes.\n",
" relative_frac_test = frac_test / (frac_val + frac_test)\n",
" df_val, df_test, y_val, y_test = train_test_split(\n",
" df_temp,\n",
" y_temp,\n",
" stratify=y_temp,\n",
" test_size=relative_frac_test,\n",
" random_state=random_state,\n",
" )\n",
"\n",
" assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
"\n",
" return df_train, df_val, df_test"
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 370,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"HeartDisease\n",
2024-10-11 18:46:40 +04:00
"No 292422\n",
"Yes 27373\n",
2024-10-11 18:41:52 +04:00
"Name: count, dtype: int64\n",
"\n",
2024-10-11 18:46:40 +04:00
"Обучающая выборка: (191877, 18)\n",
2024-10-11 18:41:52 +04:00
"HeartDisease\n",
2024-10-11 18:46:40 +04:00
"No 175453\n",
"Yes 16424\n",
2024-10-11 18:41:52 +04:00
"Name: count, dtype: int64\n"
]
},
{
"data": {
2024-10-11 18:46:40 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAiMAAADECAYAAABN9CGiAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA8FklEQVR4nO3dd1xT1/sH8E8SIAkEkA0iAgVxQbUi2joAR0XEWid1i6Pan6u1to62bqtV61bU2joq2FbEUW2dVVtcWPfAgQoqKCMgmzCS8/uDb1JDAgImXMbzfr3yUm7OPfe5M0/OPeeGxxhjIIQQQgjhCJ/rAAghhBBSv1EyQgghhBBOUTJCCCGEEE5RMkIIIYQQTlEyQgghhBBOUTJCCCGEEE5RMkIIIYQQTlEyQgghhBBOGXAdACGEEN2RyWRIT0+HgYEBbG1tuQ6H6FBubi7S09MhkUhgYWHBdTg6RS0jhJBqtWPHDvB4PMTHx3MdSp1x8uRJ9OnTBw0aNIBYLIajoyM+/fRTrsOqNZYsWQKFQgEAUCgUWLp0KccR/SciIgLdunWDqakpJBIJGjdujOXLl3Mdls5VKhlRXkSUL5FIBA8PD0yePBnJycn6ipGQOm/+/Png8XiQSqVa33dxcUHv3r2rOar/hIaGYseOHRrTz5w5o3ZNEAqFsLOzg7+/P5YsWYLU1NTqD7aeCQ0NRUBAADIzM7F27VqcOHECJ06cwMKFC7kOrdbYuXMnvv/+eyQkJGDlypXYuXMn1yEBAGbNmoXg4GCYmppi69atOHHiBE6ePImJEydyHZrOVek2zcKFC+Hq6gqZTIazZ89i06ZN+PPPP3H79m0YGxvrOkZCCMdCQ0NhbW2NkJAQre9PnToVPj4+kMvlSE1Nxfnz5zFv3jysWrUKe/bsQdeuXVVlR4wYgcGDB0MoFFZT9HVXbGwsPv/8c4wfPx6hoaHg8Xhch1QrLVy4ECNHjsTMmTMhFAoRFhbGdUj4+++/sWzZMixduhSzZs3iOhy9q1IyEhgYiLZt2wIAxo0bBysrK6xatQoHDx7EkCFDdBogIYQ7eXl5FfqC0blzZwwcOFBt2o0bN9CjRw8MGDAAMTExcHBwAAAIBAIIBAK9xFvfrFu3Dvb29li3bh0lIm/go48+QpcuXfDw4UM0adIENjY2XIeE77//Hh06dKgXiQigoz4jym89cXFxAID09HR88cUX8PLygkQigZmZGQIDA3Hjxg2NeWUyGebPnw8PDw+IRCI4ODigf//+ePToEQAgPj5erRm49Mvf319Vl7LJ+LfffsNXX30Fe3t7mJiYoE+fPnj27JnGsqOjo9GzZ0+Ym5vD2NgYfn5+OHfunNZ19Pf317r8+fPna5QNCwuDt7c3xGIxLC0tMXjwYK3LL2/dXqVQKLBmzRq0bNkSIpEIdnZ2mDBhAl6+fKlWrqym/MmTJ2vUqS32FStWaGxTACgoKMC8efPg7u4OoVAIJycnzJgxAwUFBVq31av8/f016vv222/B5/Oxe/fuKm0P5UlqZWUFsVgMb29v7N27V+vyw8LC0K5dOxgbG8PCwgK+vr44fvy4WpkjR47Az88PpqamMDMzg4+Pj0ZsERERqn1qbW2N4cOHIzExUa1MSEiIWswWFhbw9/dHVFTUa7dTVVT0uDh48CCCgoLQsGFDCIVCuLm5YdGiRZDL5Wrl/P394enpiStXrsDX1xfGxsb46quv4OLigjt37uDvv//Wet6VpVWrVlizZg0yMjKwYcMG1XRtfUYuX76MgIAAWFtbQywWw9XVFWPGjNHr+sbGxmLAgAGwt7eHSCRCo0aNMHjwYGRmZqqVq+j5XJry1pvyZWpqinbt2uHAgQOvnRcArl27hsDAQJiZmUEikaBbt264ePGiWpmLFy/C29sbEydOhJ2dHYRCITw9PbF161ZVGcYYXFxc8OGHH2osQyaTwdzcHBMmTFCLuTQXFxe1VrGKXuOV5/Srt/gePHiAfv36wcLCAmKxGD4+PhrbRHktL31eSyQSjdY5bdc3ALh37x4GDhwIS0tLiEQitG3bFr///rtamVePRVtbW9V15e2339aIW5vSXReMjY3h5eWFH3/8Ua1cSEgIJBJJuXWVviZfvHgRnp6eGDx4MCwtLcvcVgCQkpKCsWPHws7ODiKRCK1atdK41aTcF99//z1Wr14NZ2dniMVi+Pn54fbt2xrxuri4qE0LCwsDn8/Hd999pza9Itu5InQymkaZOFhZWQEAHj9+jAMHDmDQoEFwdXVFcnIytmzZAj8/P8TExKBhw4YAALlcjt69e+Ovv/7C4MGD8emnnyI7OxsnTpzA7du34ebmplrGkCFD0KtXL7Xlzp49W2s83377LXg8HmbOnImUlBSsWbMG3bt3x/Xr1yEWiwEAp06dQmBgILy9vTFv3jzw+Xxs374dXbt2RVRUFNq1a6dRb6NGjVQdm3JycvB///d/Wpc9Z84cBAcHY9y4cUhNTcX69evh6+uLa9euoUGDBhrzjB8/Hp07dwYA7Nu3D/v371d7f8KECdixYwdGjx6NqVOnIi4uDhs2bMC1a9dw7tw5GBoaat0OlZGRkaG105ZCoUCfPn1w9uxZjB8/Hs2bN8etW7ewevVqPHjwoMIXVqXt27fjm2++wcqVKzF06FCtZV63PdauXYs+ffpg2LBhKCwsxK+//opBgwbh8OHDCAoKUpVbsGAB5s+fjw4dOmDhwoUwMjJCdHQ0Tp06hR49egAouZiMGTMGLVu2xOzZs9GgQQNcu3YNR48eVcWn3PY+Pj5YunQpkpOTsXbtWpw7d05jn1pbW2P16tUAgISEBKxduxa9evXCs2fPtO770tLT07VOV3aue1VFj4sdO3ZAIpHg888/h0QiwalTpzB37lxkZWVhxYoVanWmpaUhMDAQgwcPxvDhw1X9P6ZMmQKJRIKvv/4aAGBnZ/fadQGAgQMHYuzYsTh+/Di+/fZbrWVSUlLQo0cP2NjYYNasWWjQoAHi4+Oxb98+va1vYWEhAgICUFBQgClTpsDe3h6JiYk4fPgwMjIyYG5uDqBq53Npu3btAgBIpVKEhoZi0KBBuH37Npo2bVrmPHfu3EHnzp1hZmaGGTNmwNDQEFu2bIG/vz/+/vtvtG/fHkDJ/rp8+TIMDAwwadIkuLm54cCBAxg/fjzS0tIwa9Ys8Hg8DB8+HMuXL0d6ejosLS1Vyzl06BCysrIwfPjw167Hqyp6jS8tPT0dvr6+yM7OxtSpU2Fvb4+wsDD0798f4eHhOmtZv3PnDjp27AhHR0fMmjULJiYm2LNnD/r27YvIyEj069evzHl37dqFW7duVWp5q1evhrW1NbKysrBt2zZ8/PHHcHFxQffu3au8Dmlpafjhhx8gkUgwdepU2NjYaN1W+fn58Pf3x8OHDzF58mS4uroiIiICISEhyMjI0OjI/PPPPyM7OxuTJk2CTCbD2rVr0bVrV9y6davM8/r48eMYM2YMJk+erNZS8ybbWQOrhO3btzMA7OTJkyw1NZU9e/aM/frrr8zKyoqJxWKWkJDAGGNMJpMxuVyuNm9cXBwTCoVs4cKFqmnbtm1jANiqVas0lqVQKFTzAWArVqzQKNOyZUvm5+en+vv06dMMAHN0dGRZWVmq6Xv27GEA2Nq1a1V1N2nShAUEBKiWwxhjeXl5zNXVlb3//vsay+rQoQPz9PRU/Z2amsoAsHnz5qmmxcfHM4FAwL799lu1eW/dusUMDAw0psfGxjIAbOfOnapp8+bNY6/ulqioKAaAhYeHq8179OhRjenOzs4sKChII/ZJkyax0ru6dOwzZsxgtra2zNvbW22b7tq1i/H5fBYVFaU2/+bNmxkAdu7cOY3lvcrPz09V3x9//MEMDAzY9OnTtZatyPZgrGQ/vaqwsJB5enqyrl27qtXF5/NZv379NI5F5T7PyMhgpqamrH379iw/P19rmcLCQmZra8s8PT3Vyhw+fJgBYHPnzlVNGzVqFHN2dlar54cffmAA2KVLl7Suc+n1LO/16r6tzHFRensxxtiECROYsbExk8lkqml+fn4MANu8ebNG+dLnmpLynIu
2024-10-11 18:41:52 +04:00
"text/plain": [
"<Figure size 200x200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-10-11 18:46:40 +04:00
"Контрольная выборка: (63959, 18)\n",
2024-10-11 18:41:52 +04:00
"HeartDisease\n",
2024-10-11 18:46:40 +04:00
"No 58485\n",
"Yes 5474\n",
2024-10-11 18:41:52 +04:00
"Name: count, dtype: int64\n"
]
},
{
"data": {
2024-10-11 18:46:40 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAi8AAADECAYAAABXyMEsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA7o0lEQVR4nO3dd1gU1/oH8O8uHRakSbOAoigiNqKxArG3EHvsPXJjSTNXYxJjicYkxm6IJjdRo+YmKpYUe9QEscSKXVHBgkqT3hZ2z+8PfruXZUEBYYfy/TzPPsrszJl36r5z5pwZmRBCgIiIiKiKkEsdABEREVFpMHkhIiKiKoXJCxEREVUpTF6IiIioSmHyQkRERFUKkxciIiKqUpi8EBERUZXC5IWIiIiqFCYvRERE1ZAQAk+fPkVkZKTUoZQ7Ji9EZFAbN26ETCZDdHS01KEQlcmVK1ewe/du7d8XL17EH3/8IV1ABaSlpeHjjz9GkyZNYGpqCgcHB3h5eeHmzZtSh1auSpW8aE46mo+5uTm8vLwwffp0xMbGVlSMRNXe/PnzIZPJkJCQUOT3Hh4e6N+/v4Gj+p+QkBBs3LhRb/ixY8d0zglmZmZwdnZGYGAgPvvsM8THxxs+WKIKlpaWhuDgYJw6dQqRkZF4++23cfnyZanDQmJiIjp06IDVq1djyJAh2LNnDw4dOoRjx47Bw8ND6vDKlXFZJlq4cCEaNGiA7OxsHD9+HN988w327t2LK1euwNLSsrxjJCKJhYSEwNHREePHjy/y+7feegtt27aFSqVCfHw8Tpw4gXnz5mH58uXYtm0bunbtqh13zJgxGD58OMzMzAwUPVH56tChg/YDAF5eXnjjjTckjgr497//jcePH+PkyZPw8fGROpwKVabkpU+fPnjppZcAAJMnT4aDgwOWL1+OPXv2YMSIEeUaIBFJJzMzs0QXJF26dMGQIUN0hkVERKBnz54YPHgwrl27BldXVwCAkZERjIyMKiReIkPZvXs3rl27hqysLPj6+sLU1FTSeOLi4rBp0yasW7eu2icuQDm1edFcVUVFRQEAnj59ivfffx++vr5QKBSwsbFBnz59EBERoTdtdnY25s+fDy8vL5ibm8PV1RWDBg3CnTt3AADR0dE61dKFP4GBgdqyNFXYv/zyCz788EO4uLjAysoKQUFBePDggd68T58+jd69e6NWrVqwtLREQEAAwsPDi1zGwMDAIuc/f/58vXG3bNkCPz8/WFhYwN7eHsOHDy9y/s9atoLUajVWrlwJHx8fmJubw9nZGcHBwUhKStIZr7hbC9OnT9crs6jYly5dqrdOASAnJwfz5s1Do0aNYGZmhnr16mHWrFnIyckpcl0VFBgYqFfe4sWLIZfL8dNPP5VpfXz11Vfo2LEjHBwcYGFhAT8/P+zYsaPI+W/ZsgXt2rWDpaUl7Ozs4O/vj4MHD+qMs2/fPgQEBMDa2ho2NjZo27atXmzbt2/XblNHR0eMHj0aMTExOuOMHz9eJ2Y7OzsEBgYiLCzsueupLEq6X+zZswf9+vWDm5sbzMzM4OnpiU8//RQqlUpnvMDAQDRv3hznzp2Dv78/LC0t8eGHH8LDwwNXr17FX3/9VeRxV5yWLVti5cqVSE5Oxtq1a7XDi2rzcvbsWfTq1QuOjo6wsLBAgwYNMHHixApd3sjISAwePBguLi4wNzdH3bp1MXz4cKSkpOiMV9LjuTDNrUDNx9raGu3atdNpK/G8aQs6evQozMzM8K9//Utn+IULF9CnTx/Y2NhAoVCgW7duOHXqlM44mnV+9uxZneEJCQk654LCMRf1OXbsGADd/aVjx47a7bZu3Tq95YmLi8OkSZPg7OwMc3NztGzZEps2bSrRetN8Ctb6Pe82q4YmxsK++uqrIttdhYSEwMfHB2ZmZnBzc8O0adOQnJysV6Zm/2/WrBn8/PwQERFR5LmquJgKLpejoyP69euHK1eu6Iwnk8kwffr0YsspfBydOXMGarUaSqUSL730EszNzeHg4IARI0bg/v37etMfOXIEXbp0gZWVFWxtbfHaa6/h+vXrOuNo1vONGzcwbNgw2NjYwMHBAW+//Tays7P14i34m5KXl4e+ffvC3t4e165d0xm3rMdUQWWqeSlMk2g4ODgAAO7evYvdu3dj6NChaNCgAWJjY7F+/XoEBATg2rVrcHNzAwCoVCr0798ff/75J4YPH463334baWlpOHToEK5cuQJPT0/tPEaMGIG+ffvqzHfOnDlFxrN48WLIZDLMnj0bcXFxWLlyJbp3746LFy/CwsICQP6G69OnD/z8/DBv3jzI5XJs2LABXbt2RVhYGNq1a6dXbt26dbFkyRIAQHp6Ot58880i5z137lwMGzYMkydPRnx8PNasWQN/f39cuHABtra2etNMmTIFXbp0AQDs3LkTu3bt0vk+ODgYGzduxIQJE/DWW28hKioKa9euxYULFxAeHg4TE5Mi10NpJCcna5etILVajaCgIBw/fhxTpkyBt7c3Ll++jBUrVuDWrVslOhEXtGHDBnz88cdYtmwZRo4cWeQ4z1sfq1atQlBQEEaNGgWlUomff/4ZQ4cOxe+//45+/fppx1uwYAHmz5+Pjh07YuHChTA1NcXp06dx5MgR9OzZE0D+CWDixInw8fHBnDlzYGtriwsXLmD//v3a+DTrvm3btliyZAliY2OxatUqhIeH621TR0dHrFixAgDw8OFDrFq1Cn379sWDBw+K3PaFPX36tMjharVab1hJ94uNGzdCoVDgvffeg0KhwJEjR/DJJ58gNTUVS5cu1SkzMTERffr0wfDhwzF69Ght+5UZM2ZAoVDgo48+AgA4Ozs/d1kAYMiQIZg0aRIOHjyIxYsXFzlOXFwcevbsidq1a+ODDz6Ara0toqOjsXPnzgpbXqVSiV69eiEnJwczZsyAi4sLYmJi8PvvvyM5ORm1atUCULbjubDNmzcDyE8UQkJCMHToUFy5cgVNmjQp0ToE8muxBgwYgL59++Lrr7/WDr969Sq6dOkCGxsbzJo1CyYmJli/fj0CAwPx119/4eWXXy7xPABg0KBBaNSokfbvd999F97e3pgyZYp2mLe3t/b/SUlJ6Nu3L4YNG4YRI0Zg27ZtePPNN2FqaqpNPrOyshAYGIjbt29j+vTpaNCgAbZv347x48cjOTkZb7/9dpGxaNabJo6KNn/+fCxYsADdu3fHm2++iZs3b+Kbb77BmTNnnnuenT17dqnm1bRpU3z00UcQQuDOnTtYvnw5+vbtW2SSUVKJiYkA8i9W/fz88PnnnyM+Ph6rV6/G8ePHceHCBTg6OgIADh8+jD59+qBhw4aYP38+srKysGbNGnTq1Annz5/Xax8zbNgweHh4YMmSJTh16hRWr16NpKQk/Pjjj8XGM3nyZBw7dgyHDh1Cs2bNtMPL45gCAIhS2LBhgwAgDh8+LOLj48WDBw/Ezz//LBwcHISFhYV4+PChEEKI7OxsoVKpdKaNiooSZmZmYuHChdphP/zwgwAgli9frjcvtVqtnQ6AWLp0qd44Pj4+IiAgQPv30aNHBQBRp04dkZqaqh2+bds2AUCsWrVKW3bjxo1Fr169tPMRQojMzEzRoEED0aNHD715dezYUTRv3lz7d3x8vAAg5s2bpx0WHR0tjIyMxOLFi3WmvXz5sjA2NtYbHhkZKQCITZs2aYfNmzdPFNwsYWFhAoDYunWrzrT79+/XG+7u7i769eunF/u0adNE4U1dOPZZs2YJJycn4efnp7NON2/eLORyuQgLC9OZft26dQKACA8P15tfQQEBAdry/vjjD2FsbCxmzpxZ5LglWR9C5G+ngpRKpWjevLno2rWrTllyuVwMHDhQb1/UbPPk5GRhbW0tXn75ZZGVlVXkOEqlUjg5OYnmzZvrjPP7778LAOKTTz7RDhs3bpxwd3fXKefbb78VAMQ///xT5DIXXs5nfQpu29LsF4XXlxBCBAcHC0tLS5Gdna0dFhAQIACIdevW6Y1f+FjT0Bxz27dvL3bZWrZsKezs7LR/a84jUVFRQgghdu3aJQCIM2fOFFtGeS/vhQsXnht
2024-10-11 18:41:52 +04:00
"text/plain": [
"<Figure size 200x200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-10-11 18:46:40 +04:00
"Тестовая выборка: (63959, 18)\n",
2024-10-11 18:41:52 +04:00
"HeartDisease\n",
2024-10-11 18:46:40 +04:00
"No 58484\n",
"Yes 5475\n",
2024-10-11 18:41:52 +04:00
"Name: count, dtype: int64\n"
]
},
{
"data": {
2024-10-11 18:46:40 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAg0AAADECAYAAAAcYBLBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA4jUlEQVR4nO3dd1gUV9sG8Ht3gaUsvSg2RGwoVmKJKNgJaow99hajSdQ0E415Y43GqIklGkvyJsaoeROxRhN7id3YYxcVVBAVUDrLwu75/uBjw7ILLAgMyP27rr24mJ0580w5s8/MnDMjE0IIEBERERVALnUAREREVD4waSAiIiKzMGkgIiIiszBpICIiIrMwaSAiIiKzMGkgIiIiszBpICIiIrMwaSAiIiKzMGkgIiIqZvHx8bh9+zYyMzOlDqVYMWkgolL1008/QSaTISIiQupQqJyJjIzETz/9pP8/IiICGzZskC6gHDIyMrBgwQI0adIESqUSzs7OqFOnDg4cOCB1aMWqUElDdmXP/lhbW6Nu3bqYMGECHj9+XFIxEr3wZs6cCZlMhtjYWJPf16xZEz169CjlqP61YsUKg4N1tsOHDxscE5RKJSpVqoT27dvjiy++QExMTOkHSy8smUyG8ePHY8+ePYiIiMDkyZNx9OhRqcNCeno6OnfujGnTpqF9+/YIDQ3Fvn37cPDgQbz88stSh1esLIoy0ezZs+Ht7Q21Wo1jx45h5cqV+PPPP3HlyhXY2toWd4xEJLEVK1bAzc0NI0eONPn9u+++ixYtWkCr1SImJgYnTpzAjBkzsGjRImzcuBEdO3bUjzts2DAMHDgQSqWylKKnF0XVqlXx5ptv4pVXXgEAeHp64vDhw9IGBWD+/Pk4ffo09uzZg/bt20sdTokqUtIQEhKCl156CQAwZswYuLq6YtGiRdi+fTsGDRpUrAESkXRSU1PNOhFo164d+vXrZzDs0qVL6Nq1K/r27Ytr167B09MTAKBQKKBQKEokXnrxLVmyBBMnTkRsbCz8/PxgZ2cnaTyZmZlYsmQJJk2a9MInDEAxtWnIPosIDw8HADx9+hQfffQRGjVqBJVKBQcHB4SEhODSpUtG06rVasycORN169aFtbU1PD090adPH9y5cwdA1j2rnJc/c39ybqTsS6W//fYbPv30U1SuXBl2dnbo2bMnHjx4YDTv06dP45VXXoGjoyNsbW0RFBSE48ePm1zG9u3bm5z/zJkzjcZdv349/P39YWNjAxcXFwwcONDk/PNbtpx0Oh2WLFmChg0bwtraGpUqVcK4cePw7Nkzg/HyuoQ9YcIEozJNxb5w4UKjdQpkXXqbMWMGateuDaVSierVq2Py5MlIT083ua5yat++vVF5c+fOhVwuxy+//FKk9fHVV1+hTZs2cHV1hY2NDfz9/bFp0yaT81+/fj1atmwJW1tbODs7IzAwEHv37jUYZ9euXQgKCoK9vT0cHBzQokULo9hCQ0P129TNzQ1Dhw5FVFSUwTgjR440iNnZ2Rnt27cvscun5u4X27dvR/fu3VGlShUolUr4+Pjg888/h1arNRivffv28PPzw7lz5xAYGAhbW1t8+umnqFmzJq5evYq//vrLZL3LS5MmTbBkyRLEx8dj+fLl+uGm2jScPXsWwcHBcHNzg42NDby9vTF69OgSXd6wsDD07dsXlStXhrW1NapVq4aBAwciISHBYDxz63Nu2becsj/29vZo2bIltm3bVqjpTH1ynl2bexyLiorCG2+8oV8v3t7eePvtt6HRaIxuPZv65Lw9dfDgQbRr1w52dnZwcnLCa6+9huvXrxd5+c2tXzVr1gQA+Pj4oFWrVnj69ClsbGzMaiNjbv0s6FZg9u9M9ja4efMmnj17Bnt7ewQFBcHW1haOjo7o0aMHrly5YjT9hQsXEBISAgcHB6hUKnTq1AmnTp0yGCd7exw5cgTjxo2Dq6srHBwcMHz4cJPH/dxXAMeOHQtra2ujqzC7du3Sbzd7e3t0794dV69ezXe95VakKw25Zf/Au7q6AgDu3r2Lbdu2oX///vD29sbjx4+xevVqBAUF4dq1a6hSpQoAQKvVokePHjhw4AAGDhyI9957D0lJSdi3bx+uXLkCHx8f/TwGDRqEbt26Gcx36tSpJuOZO3cuZDIZpkyZgidPnmDJkiXo3LkzLl68CBsbGwBZO31ISAj8/f0xY8YMyOVyrFmzBh07dsTRo0fRsmVLo3KrVauGefPmAQCSk5Px9ttvm5z3tGnTMGDAAIwZMwYxMTFYtmwZAgMDceHCBTg5ORlNM3bsWLRr1w4AsGXLFmzdutXg+3HjxuGnn37CqFGj8O677yI8PBzLly/HhQsXcPz4cVhaWppcD4URHx+vX7acdDodevbsiWPHjmHs2LHw9fXF5cuXsXjxYty6davAA2Bua9aswWeffYavv/4agwcPNjlOQetj6dKl6NmzJ4YMGQKNRoNff/0V/fv3x86dO9G9e3f9eLNmzcLMmTPRpk0bzJ49G1ZWVjh9+jQOHjyIrl27AsiqnKNHj0bDhg0xdepUODk54cKFC9i9e7c+vux136JFC8ybNw+PHz/G0qVLcfz4caNt6ubmhsWLFwPIarS1dOlSdOvWDQ8ePDC57XN7+vSpyeE6nc5omLn7xU8//QSVSoUPP/wQKpUKBw8exPTp05GYmIiFCxcalBkXF4eQkBAMHDgQQ4cO1bdPmDhxIlQqFf7zn/8AACpVqlTgsgBAv3798MYbb2Dv3r2YO3euyXGePHmCrl27wt3dHZ988gmcnJwQERGBLVu2lNjyajQaBAcHIz09HRMnTkTlypURFRWFnTt3Ij4+Ho6OjgCKVp9zW7duHQAgNjYWK1asQP/+/XHlyhXUq1fP5Ph9+vRB7dq19f9/8MEH8PX1xdixY/XDfH19AZh/HHv48CFatmyJ+Ph4jB07FvXr10dUVBQ2bdqE1NRUBAYG6uPMXm4A+u0NAG3atAEA7N+/HyEhIahVqxZmzpyJtLQ0LFu2DAEBATh//rz+h93c5S9M/cpt+vTpUKvVea/8XJ63fpoSFxcHIOv3qE6dOpg1axbUajW+/fZbBAQE4MyZM6hbty4A4OrVq2jXrh0cHBwwefJkWFpaYvXq1Wjfvj3++usvtGrVyqDsCRMmwMnJCTNnzsTNmzexcuVK3Lt3T5+4mDJjxgz88MMP+O233wyS+3Xr1mHEiBEIDg7G/PnzkZqaipUrV6Jt27a4cOGC0XbLkyiENWvWCABi//79IiYmRjx48ED8+uuvwtXVVdjY2IjIyEghhBBqtVpotVqDacPDw4VSqRSzZ8/WD/vxxx8FALFo0SKjeel0Ov10AMTChQuNxmnYsKEICgrS/3/o0CEBQFStWlUkJibqh2/cuFEAEEuXLtWXXadOHREcHKyfjxBCpKamCm9vb9GlSxejebVp00b4+fnp/4+JiREAxIwZM/TDIiIihEKhEHPnzjWY9vLly8LCwsJoeFhYmAAg1q5dqx82Y8YMkXOzHD16VAAQGzZsMJh29+7dRsO9vLxE9+7djWIfP368yL2pc8c+efJk4eHhIfz9/Q3W6bp164RcLhdHjx41mH7VqlUCgDh+/LjR/HIKCgrSl/fHH38ICwsLMWnSJJPjmrM+hMjaTjlpNBrh5+cnOnbsaFCWXC4XvXv3NtoXs7d5fHy8sLe3F61atRJpaWkmx9FoNMLDw0P4+fkZjLNz504BQEyfPl0/bMSIEcLLy8ugnO+++04AEH///bfJZc69nPl9cm7bwuwXudeXEEKMGzdO2NraCrVarR8WFBQkAIhVq1YZjZ+7rmXLrnOhoaF5LluTJk2Es7Oz/v/s40h4eLgQQoitW7cKAOLMmTN5llHcy3vhwoUC4y5sfc7N1L67d+9eAUBs3Lgx32lz8vLyEiNGjDAaXpjj2PDhw4VcLje5jnNOmy1nvc2tadOmwsPDQ8TFxemHXbp0ScjlcjF8+HD9MHOW/3nq15UrV4RcLhchISEG+1NezK2feR1Hs2Xv84cOHTL4383NTcTGxurHu3XrlrC0tBR9+/bVD+vVq5ewsrISd+7
2024-10-11 18:41:52 +04:00
"text/plain": [
"<Figure size 200x200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Вывод распределения количества наблюдений по меткам (классам)\n",
"print(df.HeartDisease.value_counts())\n",
"print()\n",
"\n",
"data = df.copy()\n",
"\n",
"df_train, df_val, df_test = split_stratified_into_train_val_test(\n",
" data, stratify_colname=\"HeartDisease\", frac_train=0.60, frac_val=0.20, frac_test=0.20\n",
")\n",
"\n",
"print(\"Обучающая выборка: \", df_train.shape)\n",
"print(df_train.HeartDisease.value_counts())\n",
"counts = df_train['HeartDisease'].value_counts()\n",
"plt.figure(figsize=(2, 2))# Установка размера графика\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)# Построение круговой диаграммы\n",
"plt.title('Распределение классов HeartDisease в обучающей выборке')# Добавление заголовка\n",
"plt.show()# Отображение графика\n",
"\n",
"print(\"Контрольная выборка: \", df_val.shape)\n",
"print(df_val.HeartDisease.value_counts())\n",
"counts = df_val['HeartDisease'].value_counts()\n",
"plt.figure(figsize=(2, 2))\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)\n",
"plt.title('Распределение классов HeartDisease в контрольной выборке')\n",
"plt.show()\n",
"\n",
"print(\"Тестовая выборка: \", df_test.shape)\n",
"print(df_test.HeartDisease.value_counts())\n",
"counts = df_test['HeartDisease'].value_counts()\n",
"plt.figure(figsize=(2, 2))\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)\n",
"plt.title('Распределение классов HeartDisease в тестовой выборке')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Сбалансируем распределение:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"1. Балансировка данных оверсемплингом. Это метод, увеличивающий число наблюдений в меньшинственном классе для достижения более равномерного распределения классов."
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 371,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
2024-10-11 18:46:40 +04:00
"name": "stdout",
"output_type": "stream",
"text": [
2024-10-12 12:38:44 +04:00
"Обучающая выборка после oversampling: (352020, 51)\n",
2024-10-11 18:46:40 +04:00
"HeartDisease\n",
2024-10-12 12:38:44 +04:00
"Yes 176567\n",
2024-10-11 18:46:40 +04:00
"No 175453\n",
"Name: count, dtype: int64\n"
2024-10-11 18:41:52 +04:00
]
2024-10-11 18:46:40 +04:00
},
{
"data": {
2024-10-12 12:38:44 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsYAAAH4CAYAAABJ8Cv1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABgyUlEQVR4nO3dd3hT5cPG8TvppotZNqVsZItsGSqiDFEQcLMcOBBxvCouloqIqCgi4E8QEUUQBcXBEFBAhmwZskE2FCjdKznvH7WR0BY6OT3p93NdvZQ0PbmTJk/uPnnOOTbDMAwBAAAARZzd7AAAAABAYUAxBgAAAEQxBgAAACRRjAEAAABJFGMAAABAEsUYAAAAkEQxBgAAACRRjAEAAABJFGMAAJBNqampOn36tP755x+zowAFgmIM4Kr67LPPZLPZdOjQIbOjAIXGwoULtWXLFte/58+frx07dpgX6CJ79+7Vww8/rPLly8vX11dly5ZVq1atxIlz4YlyVIzT39DSv/z9/VWrVi0NHjxYp06dKqiMgMcbMWKEbDabIiMjM/1+1apV1a1bt6uc6j+TJk3SZ599luHyFStWuI0Jfn5+Klu2rDp06KA333xTZ86cufphAQv666+/9NRTT2nv3r1au3atHn30UcXExJgdS2vXrlXz5s21bNkyvfjii1q0aJGWLFmi+fPny2azmR0POTBp0iTZbDa1aNEiy+tcPJ57e3urZMmSatq0qZ566int3Lkzz9uPjY3V8OHDVb9+fQUGBqpUqVJq3LixnnrqKR0/flwpKSlq0KCBqlevroSEhAw/f+jQIRUrVky9e/eW9F8v9ff317FjxzJcv0OHDqpfv/5lc1/KO0fX/teoUaMUERGhxMRErVq1Sh9//LF++uknbd++XcWKFcvNJgEUYpMmTVLp0qXVv3//TL8/ZMgQNWvWTA6HQ2fOnNEff/yh4cOH691339WcOXN04403uq77wAMP6O6775afn99VSg8Ufg899JCmTZumWrVqSZJ69uypli1bmpopOTlZAwYMUK1atbR48WKFhoaamgd5M2vWLFWtWlXr16/Xvn37VKNGjUyvd/PNN6tv374yDEMXLlzQ1q1bNWPGDE2aNEljx47VM888k6vtp6SkqF27dvr777/Vr18/Pfnkk4qNjdWOHTv05ZdfqkePHqpQoYKmTp2qNm3aaPTo0XrzzTfdtjF48GD5+vrqgw8+cLs8KSlJb731lj788MM8PEL/MnJg+vTphiTjzz//dLv8mWeeMSQZX375ZU42B+Bfw4cPNyQZZ86cyfT74eHhRteuXa9yKsOIi4szDMMw6tWrZ7Rv3z7D95cvX25IMubOnZvhe1u2bDHCwsKM4sWLG8ePHy/oqIDlJSYmGhs2bDB27txpdhTDMAzjm2++MWw2m7F7926zoyCPDhw4YEgyvv32W6NMmTLGiBEjMr2eJOOJJ57IcHlkZKTRqlUrQ5Lx448/5mr7c+bMMSQZs2bNyvC9hIQE48KFC65/P/bYY4aPj4+xfft212XffPONIcmYNGmS67L0Xtq4cWPDz8/POHbsmNt227dvb9SrVy/T+5qVfFljnD4bdPDgQUnSuXPn9Nxzz6lBgwYKCgpSSEiIOnfurK1bt2b42cTERI0YMUK1atWSv7+/ypcvr549e2r//v2S0qbNL57av/SrQ4cOrm2lf6z79ddf66WXXlK5cuUUGBio7t2768iRIxlue926dbr11lsVGhqqYsWKqX379lq9enWm97FDhw6Z3v6IESMyXPeLL75Q06ZNFRAQoJIlS+ruu+/O9PYvd98u5nQ69f7776tevXry9/dX2bJlNWjQIJ0/f97tell93D548OAM28ws+7hx4zI8plLaX2LDhw9XjRo15Ofnp8qVK+v5559XUlJSpo/VxTp06JBhe2+88Ybsdru+/PLLXD0e77zzjlq3bq1SpUopICBATZs21TfffJPp7X/xxRdq3ry5ihUrphIlSqhdu3ZavHix23V+/vlntW/fXsHBwQoJCVGzZs0yZJs7d67rd1q6dGndf//9GT626d+/v1vmEiVKqEOHDlq5cuUVH6fcyO7zYsGCBeratasqVKggPz8/Va9eXaNHj5bD4XC7XvpHThs3blS7du1UrFgxvfTSS6patap27Nih3377LdPXXVYaNWqk999/X1FRUZo4caLr8szWGG/YsEG33HKLSpcurYCAAEVERGjgwIEFen/37t2rO++8U+XKlZO/v78qVaqku+++WxcuXHC7XnZfz5dKXx6T/hUcHKzmzZtr/vz5Ofq5zL5WrFghyf131rp1a9djN3ny5Azbze7r2GazafDgwRl+vlu3bqpatarr3+mv10uX2DzxxBOy2Wxuny6k/85///13DRo0SKVKlVJISIj69u2b4fcnpX1CUa9ePfn5+alChQp64oknFBUV5XadS8fk0qVLq2vXrtq+fbvb9VJTUzV69GhVr15dfn5+qlq1ql566SW3+53VfUn/XeTm8ZGkuLg4Pfvss6pcubL8/PxUu3ZtvfPOOxnW5qaPx35+fmratKnq1q2b5XicmYsfBy8vL1WsWFGPPPKI22OW/v6Y1VgppY1hF9+HtWvXKiIiQvPmzVP16tXl6+urKlWq6Pnnn8/0Y+7s/t6y85xNz5v+XJek48ePq2rVqrruuusUGxvrujyv71GXe61dui9Edu6jlNYvunTpohIlSigwMFANGzbUhAkTMlwvu7eb23Eo3axZs1SiRAl17dpVvXr10qxZs7L9s5JUqlQpzZ49W97e3nrjjTdytf30XtemTZsM3/P391dISIjr32PGjFHp0qX16KOPyjAMxcbGaujQoWrVqpUeffTRDD//0ksvyeFw6K233srR/cpMrpZSXCr9zpYqVUqSdODAAc2fP1+9e/dWRESETp06pSlTpqh9+/bauXOnKlSoIElyOBzq1q2bfv31V91999166qmnFBMToyVLlmj79u2qXr266zbuuecedenSxe12hw0blmmeN954QzabTS+88IJOnz6t999/Xx07dtSWLVsUEBAgSVq2bJk6d+6spk2bavjw4bLb7Zo+fbpuvPFGrVy5Us2bN8+w3UqVKmnMmDGS0tbJPPbYY5ne9quvvqo+ffrooYce0pkzZ/Thhx+qXbt22rx5s4oXL57hZx555BG1bdtWkvTtt9/qu+++c/v+oEGD9Nlnn2nAgAEaMmSIDh48qIkTJ2rz5s1avXq1fHx8Mn0cciIqKsp13y7mdDrVvXt3rVq1So888ojq1q2rv/76S++995727NlzxTf5S02fPl2vvPKKxo8fr3vvvTfT61zp8ZgwYYK6d++u++67T8nJyZo9e7Z69+6thQsXqmvXrq7rjRw5UiNGjFDr1q01atQo+fr6at26dVq2bJk6deokKe0Ne+DAgapXr56GDRum4sWLa/Pmzfrll19c+dIf+2bNmmnMmDE6deqUJkyYoNWrV2f4nZYuXVrvvfeeJOno0aOaMGGCunTpoiNHjmT6u7/UuXPnMr3c6XRmuCy7z4vPPvtMQUFBeuaZZxQUFKRly5bptddeU3R0tMaNG+e2zbNnz6pz5866++67df/997vWCz/55JMKCgrSyy+/LEkqW7bsFe+LJPXq1UsPPvigFi9enOlgKkmnT59Wp06dVKZMGb344osqXry4Dh06pG+//bbA7m9ycrJuueUWJSUl6cknn1S5cuV07NgxLVy4UFFRUa6PjHPzer7UzJkzJUmRkZGaNGmSevfure3bt6t27dqZXr9nz55uH0E+/fTTqlu3rh555BHXZXXr1nX9//nz59WlSxf16dNH99xzj+bMmaPHHntMvr6+rj8u8vt1nJV9+/bpk08+yfL7gwcPVvHixTVixAjt3r1bH3/8sQ4fPuwqQVJaGR05cqQ6duyoxx57zHW9P//8M8N4V6dOHb388ssyDEP79+/Xu+++qy5durgdMeGhhx7SjBkz1KtXLz377LNat26dxowZo127dmUYW/KTYRjq3r27li9frgcffFCNGzfWokWL9H//9386duyYa5zITFbj8eX06NF
2024-10-11 18:46:40 +04:00
"text/plain": [
"<Figure size 600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
2024-10-11 18:41:52 +04:00
}
],
"source": [
"from imblearn.over_sampling import ADASYN\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"\n",
"categorical_features = ['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer'] # Ваши категориальные признаки\n",
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime'] # Ваши числовые признаки\n",
"\n",
"# Создание пайплайна для обработки категориальных данных\n",
"preprocessor = ColumnTransformer(\n",
" transformers=[\n",
" ('cat', OneHotEncoder(), categorical_features), # OneHotEncoder для категориальных данных\n",
" ('num', 'passthrough', numeric_features) # Оставляем числовые колонки без изменений\n",
" ]\n",
")\n",
"\n",
"# Создание экземпляра ADASYN\n",
"ada = ADASYN()\n",
"\n",
"# Преобразование данных с помощью пайплайна\n",
"X = preprocessor.fit_transform(df_train.drop(columns=['HeartDisease']))\n",
"y = df_train['HeartDisease']\n",
"\n",
"# Применение ADASYN\n",
"X_resampled, y_resampled = ada.fit_resample(X, y)\n",
"\n",
"# Создание нового DataFrame\n",
"df_train_adasyn = pd.DataFrame(X_resampled)\n",
"# Восстанавливаем названия столбцов для DataFrame\n",
"ohe_columns = preprocessor.named_transformers_['cat'].get_feature_names_out(categorical_features)\n",
"new_column_names = list(ohe_columns) + numeric_features\n",
"df_train_adasyn.columns = new_column_names\n",
"\n",
"# Добавление целевой переменной\n",
"df_train_adasyn['HeartDisease'] = y_resampled\n",
"\n",
"# Вывод информации о новой выборке\n",
"print(\"Обучающая выборка после oversampling: \", df_train_adasyn.shape)\n",
"print(df_train_adasyn['HeartDisease'].value_counts())\n",
"\n",
"# Визуализация\n",
"counts = df_train_adasyn['HeartDisease'].value_counts()\n",
"plt.figure(figsize=(6, 6))\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)\n",
"plt.title('Распределение классов HeartDisease в тренировочной выборке после ADASYN')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"2. Балансировка данных андерсемплингом. Этот метод помогает сбалансировать выборку, уменьшая количество экземпляров класса большинства, чтобы привести его в соответствие с классом меньшинства."
]
},
{
"cell_type": "code",
2024-10-12 12:38:44 +04:00
"execution_count": 372,
2024-10-11 18:41:52 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Обучающая выборка после undersampling: (32848, 18)\n",
"HeartDisease\n",
"No 16424\n",
"Yes 16424\n",
"Name: count, dtype: int64\n"
]
},
{
"data": {
2024-10-11 18:46:40 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAwAAAADECAYAAAAoEEaxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABBhklEQVR4nO3dd1xTV/8H8E8SIIyAIsgSRURx4sJtHXWgOOvice/RVmtt9bHa1q211rqtq49bWxeOn9a9quKqe6GignUzFGQIIcn5/UGTEhIgIBiUz/v1yktzOffe7z13fu8990QihBAgIiIiIqJCQWruAIiIiIiI6N1hAkBEREREVIgwASAiIiIiKkSYABARERERFSJMAIiIiIiIChEmAEREREREhQgTACIiIiKiQoQJABERERFRIcIEgIiIiEyiUqkQGRmJv//+29yhENFbYAJARO/UmjVrIJFIEBERYe5QiAqMPXv24MqVK7rvO3fuxM2bN80XUDphYWEYMmQI3N3dYWVlBVdXV9SvXx9CCHOHRgVQ06ZN0bRpU3OHUSAYO98VlPrJUQKgXRDtx9raGr6+vhgxYgRevHiRXzESffAmT54MiUSC6Ohoo38vXbo02rVr946j+teSJUuwZs0ag+HHjx/XOybI5XK4urqiadOm+OGHHxAVFfXugyV6D12/fh1ffvklwsLCcPbsWXz66aeIj483d1g4e/Ys6tSpg6NHj2LcuHE4cOAADh06hJ07d0IikZg7PMpGdueWKlWqFIiLUXr3LHIz0tSpU+Ht7Y3k5GScOnUKS5cuxd69e3Hjxg3Y2trmdYxEZGZLliyBs7Mz+vfvb/TvI0eORO3ataFWqxEVFYXTp09j0qRJmDt3LrZs2YJmzZrpyvbp0wfdu3eHXC5/R9ETFXyDBw/GqlWr4OvrCwDo3Lkz6tWrZ9aYlEolBgwYAF9fXxw8eBBFihQxazxEH4KDBw+aOwQAuUwAAgMDUatWLQBpBy0nJyfMnTsXu3btQo8ePfI0QCIyn6SkJJOS+kaNGqFr1656w65evYqAgAB06dIFt27dgru7OwBAJpNBJpPlS7xE76vixYvjxo0buhtpFStWNHdI2L17N+7cuYPbt2/z4p/MQqPRQKlUwtra2tyh5BkrKytzhwAgj94B0N7dCw8PBwC8fPkSY8aMgZ+fHxQKBRwcHBAYGIirV68ajJucnIzJkyfD19cX1tbWcHd3R+fOnXH//n0AQEREhF4Tg4yf9I+utM0RNm/ejG+//RZubm6ws7NDhw4d8OjRI4N5nzt3Dq1bt0aRIkVga2uLJk2aICQkxOgyNm3a1Oj8J0+ebFB2w4YN8Pf3h42NDYoVK4bu3bsbnX9Wy5aeRqPB/PnzUblyZVhbW8PV1RXDhg3Dq1ev9Mpl1kxkxIgRBtM0Fvvs2bMN6hQAUlJSMGnSJJQtWxZyuRwlS5bE2LFjkZKSYrSu0jPW1m3GjBmQSqX47bffclUfP//8Mxo0aAAnJyfY2NjA398f27ZtMzr/DRs2oE6dOrC1tYWjoyMaN25skH3v27cPTZo0gb29PRwcHFC7dm2D2LZu3apbp87OzujduzeePHmiV6Z///56MTs6OqJp06Y4efJktvWUG6ZuF7t27ULbtm3h4eEBuVwOHx8fTJs2DWq1Wq9c06ZNUaVKFVy8eBGNGzeGra0tvv32W5QuXRo3b97En3/+aXS/y0y1atUwf/58xMbGYvHixbrhxtpEXrhwAa1atYKzszNsbGzg7e2NgQMH5uvyhoWFoUuXLnBzc4O1tTU8PT3RvXt3xMXF6ZUzdX/OSPvoXfuxt7dHnTp1sHPnzhyNZ+xz/PhxAPrrrEGDBrq6W7ZsmcF0Td2PJRIJRowYYTB+u3btULp0ad137f6asWnY8OHDIZFI9J4Wadf5iRMnMGzYMDg5OcHBwQF9+/Y1WH9A2hOnypUrQy6Xw8PDA8OHD0dsbKxemYzHZGdnZ7Rt2xY3btzQK6dSqTBt2jT4+PhALpejdOnS+Pbbb/WWO7Nl0a6L3NQPACQmJmL06NEoWbIk5HI5ypcvj59//tmg7bz2eCyXy+Hv74+KFStmejw2Jn09yGQylChRAkOHDtWrM+35MbNjJZB2DEu/DGfPnoW3tzeCg4Ph4+MDKysrlCpVCmPHjsWbN28Mxjd1vZmyzWrj1W7rAPD06VOULl0atWrVQkJCgm74256jstrXMr6rZMoyAmnXF23atIGjoyPs7OxQtWpVLFiwwKCcqfPN7XEop7T1vmXLFsyYMQOenp6wtrZG8+bNce/ePYPyK1asgI+PD2xsbFCnTp1Mz3c5Pf5s3LhRV8/79+8HAGzatAn+/v66c7Wfn59enZp67Zl+GadMmYISJUrA3t4eXbt2RVxcHFJSUjBq1Ci4uLhAoVBgwIABWcZZvnx5WFtbw9/fHydOnMi2jjNeF+W0zn/55ReUKVNGr86NXWtlJ1dPADLSXqw7OTkBAB48eICdO3eiW7du8Pb2xosXL7B8+XI0adIEt27dgoeHBwBArVajXbt2OHLkCLp3744vv/wS8fHxOHToEG7cuAEfHx/dPHr06IE2bdrozXf8+PFG45kxYwYkEgm++eYbREZGYv78+WjRogWuXLkCGxsbAMDRo0cRGBgIf39/TJo0CVKpFKtXr0azZs1w8uRJ1KlTx2C6np6emDlzJgAgISEBn332mdF5T5gwAUFBQRg8eDCioqKwaNEiNG7cGJcvX0bRokUNxhk6dCgaNWoEANi+fTt27Nih9/dhw4ZhzZo1GDBgAEaOHInw8HAsXrwYly9fRkhICCwtLY3WQ07Exsbqli09jUaDDh064NSpUxg6dCgqVqyI69evY968ebh79262FzMZrV69Gt9//z3mzJmDnj17Gi2TXX0sWLAAHTp0QK9evaBUKrFp0yZ069YNe/bsQdu2bXXlpkyZgsmTJ6NBgwaYOnUqrKyscO7cORw9ehQBAQEA0i5MBg4ciMqVK2P8+PEoWrQoLl++jP379+vi09Z97dq1MXPmTLx48QILFixASEiIwTp1dnbGvHnzAACPHz/GggUL0KZNGzx69Mjous/o5cuXRodrNBqDYaZuF2vWrIFCocDXX38NhUKBo0ePYuLEiXj9+jVmz56tN82YmBgEBgaie/fu6N27t649/xdffAGFQoHvvvsOAODq6prtsgBA165dMWjQIBw8eBAzZswwWiYyMhIBAQEoXrw4xo0bh6JFiyIiIgLbt2/Pt+VVKpVo1aoVUlJS8MUXX8DNzQ1PnjzBnj17EBsbq7vbmZv9OaP169cDAKKjo7FkyRJ069YNN27cQPny5Y2W79y5M8qWLav7/tVXX6FixYoYOnSoblj6u8OvXr1CmzZtEBQUhB49emDLli347LPPYGVlpUui8no/zsy9e/fw66+/Zvr3ESNGoGjRopg8eTLu3LmDpUuX4uHDh7oTIJB20T1lyhS0aNECn332ma7cX3/9ZXC8q1ChAr777jsIIXD//n3MnTsXbdq00euhZvDgwVi7di26du2K0aNH49y5c5g5cyZCQ0MNji15SQiBDh064NixYxg0aBCqV6+OAwcO4L///S+ePHmiO04Yk9nxOCudOnVC586doVKpcObMGaxYsQJv3rzRbX+5ERMTgwcPHuDbb79F586dMXr0aFy4cAGzZ8/GjRs38Mcff+RqvZmyzWYUFxeHwMBAWFpaYu/evVAoFADyZttOf27X2rt3L37//Xe9YaYu46FDh9CuXTu4u7vjyy+/hJubG0JDQ7Fnzx58+eWXBvPXrjsAOHnyJFasWKH397w4DuXUjz/+CKlUijFjxiAuLg4//fQTevXqhXPnzunKrFy5EsOGDUODBg0watQoPHjwAB06dECxYsVQsmRJXbmcrqOjR49iy5YtGDFiBJydnVG6dGkcOnQIPXr0QPPmzTFr1iwAQGhoKEJCQnR1auq1p9bMmTNhY2ODcePG4d69e1i0aBEsLS0hlUrx6tUrTJ48GWfPnsWaNWvg7e2NiRMn6o3/559/YvPmzRg5ciTkcjmWLFmC1q1b4/z586hSpUq+1Pn
2024-10-11 18:41:52 +04:00
"text/plain": [
"<Figure size 200x200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from imblearn.under_sampling import RandomUnderSampler\n",
"\n",
"rus = RandomUnderSampler()# Создание экземпляра RandomUnderSampler\n",
"\n",
"# Применение RandomUnderSampler\n",
"X_resampled, y_resampled = rus.fit_resample(df_train.drop(columns=['HeartDisease']), df_train['HeartDisease'])\n",
"\n",
"# Создание нового DataFrame\n",
"df_train_undersampled = pd.DataFrame(X_resampled)\n",
"df_train_undersampled['HeartDisease'] = y_resampled # Добавление целевой переменной\n",
"\n",
"# Вывод информации о новой выборке\n",
"print(\"Обучающая выборка после undersampling: \", df_train_undersampled.shape)\n",
"print(df_train_undersampled['HeartDisease'].value_counts())\n",
"\n",
"# Визуализация распределения классов\n",
"counts = df_train_undersampled['HeartDisease'].value_counts()\n",
"plt.figure(figsize=(2, 2))\n",
"plt.pie(counts, labels=counts.index, autopct='%1.1f%%', startangle=90)\n",
"plt.title('Распределение классов HeartDisease в тренировочной выборке после Undersampling')\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "aimenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}