777 lines
1.4 MiB
Plaintext
777 lines
1.4 MiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['HeartDisease', 'BMI', 'Smoking', 'AlcoholDrinking', 'Stroke',\n",
|
|||
|
" 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory',\n",
|
|||
|
" 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'SleepTime',\n",
|
|||
|
" 'Asthma', 'KidneyDisease', 'SkinCancer'],\n",
|
|||
|
" dtype='object')\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>HeartDisease</th>\n",
|
|||
|
" <th>BMI</th>\n",
|
|||
|
" <th>Smoking</th>\n",
|
|||
|
" <th>AlcoholDrinking</th>\n",
|
|||
|
" <th>Stroke</th>\n",
|
|||
|
" <th>PhysicalHealth</th>\n",
|
|||
|
" <th>MentalHealth</th>\n",
|
|||
|
" <th>DiffWalking</th>\n",
|
|||
|
" <th>Sex</th>\n",
|
|||
|
" <th>AgeCategory</th>\n",
|
|||
|
" <th>Race</th>\n",
|
|||
|
" <th>Diabetic</th>\n",
|
|||
|
" <th>PhysicalActivity</th>\n",
|
|||
|
" <th>GenHealth</th>\n",
|
|||
|
" <th>SleepTime</th>\n",
|
|||
|
" <th>Asthma</th>\n",
|
|||
|
" <th>KidneyDisease</th>\n",
|
|||
|
" <th>SkinCancer</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>16.60</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>3.0</td>\n",
|
|||
|
" <td>30.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>55-59</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Very good</td>\n",
|
|||
|
" <td>5.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>20.34</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>80 or older</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Very good</td>\n",
|
|||
|
" <td>7.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>26.58</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>20.0</td>\n",
|
|||
|
" <td>30.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Male</td>\n",
|
|||
|
" <td>65-69</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Fair</td>\n",
|
|||
|
" <td>8.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>24.21</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>75-79</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>6.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>23.71</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>28.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>40-44</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Very good</td>\n",
|
|||
|
" <td>8.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>5</th>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>28.87</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>6.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>75-79</td>\n",
|
|||
|
" <td>Black</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Fair</td>\n",
|
|||
|
" <td>12.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>6</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>21.63</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>15.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>70-74</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Fair</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>7</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>31.64</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>5.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>80 or older</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>9.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>8</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>26.45</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Female</td>\n",
|
|||
|
" <td>80 or older</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>No, borderline diabetes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Fair</td>\n",
|
|||
|
" <td>5.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>9</th>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>40.69</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Male</td>\n",
|
|||
|
" <td>65-69</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Good</td>\n",
|
|||
|
" <td>10.0</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" HeartDisease BMI Smoking AlcoholDrinking Stroke PhysicalHealth \\\n",
|
|||
|
"0 No 16.60 Yes No No 3.0 \n",
|
|||
|
"1 No 20.34 No No Yes 0.0 \n",
|
|||
|
"2 No 26.58 Yes No No 20.0 \n",
|
|||
|
"3 No 24.21 No No No 0.0 \n",
|
|||
|
"4 No 23.71 No No No 28.0 \n",
|
|||
|
"5 Yes 28.87 Yes No No 6.0 \n",
|
|||
|
"6 No 21.63 No No No 15.0 \n",
|
|||
|
"7 No 31.64 Yes No No 5.0 \n",
|
|||
|
"8 No 26.45 No No No 0.0 \n",
|
|||
|
"9 No 40.69 No No No 0.0 \n",
|
|||
|
"\n",
|
|||
|
" MentalHealth DiffWalking Sex AgeCategory Race \\\n",
|
|||
|
"0 30.0 No Female 55-59 White \n",
|
|||
|
"1 0.0 No Female 80 or older White \n",
|
|||
|
"2 30.0 No Male 65-69 White \n",
|
|||
|
"3 0.0 No Female 75-79 White \n",
|
|||
|
"4 0.0 Yes Female 40-44 White \n",
|
|||
|
"5 0.0 Yes Female 75-79 Black \n",
|
|||
|
"6 0.0 No Female 70-74 White \n",
|
|||
|
"7 0.0 Yes Female 80 or older White \n",
|
|||
|
"8 0.0 No Female 80 or older White \n",
|
|||
|
"9 0.0 Yes Male 65-69 White \n",
|
|||
|
"\n",
|
|||
|
" Diabetic PhysicalActivity GenHealth SleepTime Asthma \\\n",
|
|||
|
"0 Yes Yes Very good 5.0 Yes \n",
|
|||
|
"1 No Yes Very good 7.0 No \n",
|
|||
|
"2 Yes Yes Fair 8.0 Yes \n",
|
|||
|
"3 No No Good 6.0 No \n",
|
|||
|
"4 No Yes Very good 8.0 No \n",
|
|||
|
"5 No No Fair 12.0 No \n",
|
|||
|
"6 No Yes Fair 4.0 Yes \n",
|
|||
|
"7 Yes No Good 9.0 Yes \n",
|
|||
|
"8 No, borderline diabetes No Fair 5.0 No \n",
|
|||
|
"9 No Yes Good 10.0 No \n",
|
|||
|
"\n",
|
|||
|
" KidneyDisease SkinCancer \n",
|
|||
|
"0 No Yes \n",
|
|||
|
"1 No No \n",
|
|||
|
"2 No No \n",
|
|||
|
"3 No Yes \n",
|
|||
|
"4 No No \n",
|
|||
|
"5 No No \n",
|
|||
|
"6 No Yes \n",
|
|||
|
"7 No No \n",
|
|||
|
"8 Yes No \n",
|
|||
|
"9 No No "
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd \n",
|
|||
|
"df = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\")\n",
|
|||
|
"print(df.columns)\n",
|
|||
|
"\n",
|
|||
|
"display(df.head(10))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Сегментация пациентов по рискам сердечно-сосудистых заболеваний\n",
|
|||
|
"Цель: Определить группы пациентов с различными уровнями риска развития сердечно-сосудистых заболеваний на основе их демографических данных, образа жизни и состояния здоровья.\n",
|
|||
|
"\n",
|
|||
|
"Пример:\n",
|
|||
|
"\n",
|
|||
|
"Кластер 1: Пациенты с высоким риском (курение, высокий ИМТ, низкая физическая активность).\n",
|
|||
|
"\n",
|
|||
|
"Кластер 2: Пациенты со средним риском (умеренное курение, средний ИМТ, средняя физическая активность).\n",
|
|||
|
"\n",
|
|||
|
"Кластер 3: Пациенты с низким риском (отсутствие вредных привычек, нормальный ИМТ, высокая физическая активность).\n",
|
|||
|
"\n",
|
|||
|
"Бизнес-применение:\n",
|
|||
|
"\n",
|
|||
|
"Разработка персонализированных программ профилактики и лечения для каждой группы пациентов.\n",
|
|||
|
"\n",
|
|||
|
"Таргетированная реклама медицинских услуг и продуктов для улучшения образа жизни."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"## почистила данные чут чут\n",
|
|||
|
"df = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\").head(1000)\n",
|
|||
|
"global df_cleaned\n",
|
|||
|
"df_cleaned = df.dropna()\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjUAAASlCAYAAADgeltjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXxTVf7/8XfapknbNKUtbVhCoYCssonCuKAiCuK41n0ZQRl/zijqDDIqft0YHXEfd3HGBR03RHEdxRW3AcUFxI3VQi1LgbY0TdukaZPfHzWxIV1ps5XX8/HoQ3OXcz733Jtw7/3ce47B5/P5BAAAAAAAAAAAEOMSoh0AAAAAAAAAAABAW5DUAAAAAAAAAAAAcYGkBgAAAAAAAAAAiAskNQAAAAAAAAAAQFwgqQEAAAAAAAAAAOICSQ0AAAAAAAAAABAXSGoAAAAAAAAAAIC4QFIDAAAAAAAAAADEBZIaAAAAAAAAAAAgLpDUAACEzfTp02WxWCJa54IFC2QwGLRp06awlH/TTTfJYDCEpeyOMBgMuummm9q87MyZM8MbEAAAAOLO9OnT1a9fv2iHEWTTpk0yGAxasGBBm5e96667wh9YmPXr10/Tp0+PdhjtEqvXSgC6HpIaANAJ/DfSG//l5uZq4sSJevvtt0OW9y/zxz/+scny/u///i+wzK5duwLTo5EkaMr06dODttVqtWrUqFG6++675Xa7ox1exPXr10/HH398k/M++ugjGQwGvfTSSxGNadmyZbrpppu0e/fuiNYLAACwL2l8HfDZZ5+FzPf5fOrTp48MBkOz54ud5ccff9RNN93U6Q/3tPRAjH/7v/rqq06tszVvvfVWmx/oiTXfffedTjvtNPXt21dms1m9e/fWMcccowceeCDaoTWpX79+Ide6Tf21JekEAJ0lKdoBAEBX8ve//135+fny+XwqKSnRggULdNxxx+mNN94IuYgxm816+eWX9fDDDys5OTlo3vPPPy+z2SyXyxXJ8NvFZDLpsccekyTt3r1bL7/8smbPnq0vv/xSL7zwQtTi+sMf/qCzzjpLJpMpajHEgmXLlmnu3LmaPn26unXrFu1wAAAAujSz2aznnntOhx12WND0jz/+WMXFxRE5N/3xxx81d+5cHXnkkTH3tkVne+utt/TQQw/FXWJj2bJlmjhxovLy8nTRRRepR48e+uWXX/T555/rvvvu02WXXRbtEEPce++9cjqdgc9vvfWWnn/+ef3zn/9U9+7dA9MPOeQQnXfeebrmmmuiESaAfQxJDQDoRFOnTtWBBx4Y+DxjxgzZbDY9//zzIUmNY489Vq+//rrefvttnXTSSYHpy5YtU2FhoU499VS9/PLLEYu9vZKSknTeeecFPl9yySUaP368Fi5cqHvuuUe9evWKSlyJiYlKTEyMSt0AAADYNx133HFatGiR7r//fiUl/Xar5bnnntPYsWOD3r7Gvusf//iHMjIy9OWXX4Y8eLRjx47oBNWKk08+Oejz9u3b9fzzz+vkk09uMnnW+PgHgHCh+ykACKNu3bopJSWlyRO73r176/DDD9dzzz0XNP3ZZ5/ViBEjtP/+++9VnXfddZcMBoM2b94cMm/OnDlKTk5WeXm5JGn9+vU69dRT1aNHD5nNZtntdp111lmqqKhod70JCQk68sgjJSnklfctW7bo5JNPlsViUU5OjmbPnq36+npJDa/k9+vXLyix4+dyuZSRkaGLL744MO2BBx7Q8OHDlZqaqszMTB144IFBbdjcmBpvv/22jjjiCKWnp8tqteqggw4KWu/TTz/V6aefrry8PJlMJvXp00d//etfVVNT0+62aIstW7bowgsvlM1mk8lk0vDhw/XEE08ELVNbW6sbbrhBY8eOVUZGhtLS0jRhwgQtXbq0xbJvuukm/e1vf5Mk5efnB14J37NNXn31Ve2///6B+pcsWdKp2wgAALCvOPvss1VaWqr33nsvMK22tlYvvfSSzjnnnCbX8Xq9uvfeezV8+HCZzWbZbDZdfPHFgXN1P39Xp5999pnGjRsns9ms/v376+mnnw4ss2DBAp1++umSpIkTJwbO/z766CNJ0muvvabf//736tWrl0wmkwYMGKCbb745cE7e2dasWaPTTjtNWVlZMpvNOvDAA/X6668HLVNWVqbZs2drxIgRslgsslqtmjp1qr799tsWy54+fboeeughSQrq/mhP//rXvzRgwACZTCYddNBB+vLLL1ss96uvvpLBYNBTTz0VMu+dd96RwWDQm2++KUmqrKzUX/7yF/Xr108mk0m5ubk65phj9M0337RYx8aNGzV8+PAm36TOzc1tcV2p4Q35v/zlL+rTp49MJpMGDhyo22+/XV6vN2i59h5b7777rkaPHi2z2axhw4Zp8eLFrcbSlKbG1PB3X7Zo0SINGzZMKSkpOvjgg/Xdd99Jkh599FENHDhQZrNZRx55ZJPdp33xxRc69thjlZGRodTUVB1xxBH63//+t1cxAugaSJ8CQCeqqKjQrl275PP5tGPHDj3wwANyOp1BbzQ0ds455+iKK66Q0+mUxWJRXV2dFi1apFmzZu1111NnnHGGrrrqKr344ouBG9t+L774oiZPnqzMzEzV1tZqypQpcrvduuyyy9SjRw9t2bJFb775pnbv3q2MjIx2171x40ZJUnZ2dmBafX29pkyZovHjx+uuu+7S+++/r7vvvlsDBgzQn//8ZxkMBp133nm64447VFZWpqysrMC6b7zxhhwOR6D9/v3vf+vyyy/XaaedpiuuuEIul0urV6/WF1980ezFotRwkXfhhRdq+PDhmjNnjrp166aVK1dqyZIlgfUWLVqk6upq/fnPf1Z2drZWrFihBx54QMXFxVq0aFGr2+7xeJp8Aq+pBFFJSYl+97vfBU7wc3Jy9Pbbb2vGjBlyOBz6y1/+IklyOBx67LHHdPbZZ+uiiy5SZWWlHn/8cU2ZMkUrVqzQ6NGjm4yloKBA69atC3ktPCcnJ7DMZ599psWLF+uSSy5Renq67r//fp166qkqKioK2n8AAABoXb9+/XTwwQfr+eef19SpUyU1PFRTUVGhs846S/fff3/IOhdffLEWLFigCy64QJdffrkKCwv14IMPauXKlfrf//4no9EYWHbDhg067bTTNGPGDE2bNk1PPPGEpk+frrFjx2r48OE6/PDDdfnll+v+++/Xtddeq6FDh0pS4L8LFiyQxWLRrFmzZLFY9OGHH+qGG26Qw+HQnXfe2er2uVyuJs91G3dL5PfDDz/o0EMPVe/evXXNNdcoLS1NL774ok4++WS9/PLLOuWUUyRJP//8s1599VWdfvrpys/PV0lJiR599FEdccQR+vHHH5t98/viiy/W1q1b9d577+k///lPk8s899xzqqys1MUXXyyDwaA77rhDBQUF+vnnn4PatbEDDzxQ/fv314svvqhp06YFzVu4cKEyMzM1ZcoUSdKf/vQnvfTSS5o5c6aGDRum0tJSffbZZ/rpp590wAEHNNuOffv21fLly/X999+3+yG26upqHXHEEdqyZYsuvvhi5eXladmyZZozZ462bdume++9N6iN2npsrV+/Xmeeeab+9Kc/adq0aXryySd1+umna8mSJTrmmGPaFWNzPv30U73++uu69NJLJUnz5s3T8ccfr6uuukoPP/ywLrnkEpWXl+uOO+7QhRdeqA8//DCw7ocffqipU6dq7NixuvHGG5WQkKAnn3xSRx11lD799FONGzeuU2IEEGd8AIAOe/LJJ32SQv5MJpNvwYIFIctL8l166aW+srIyX3Jysu8///mPz+fz+f773//6DAaDb9OmTb4bb7zRJ8m3c+fOwHrTpk3zpaWltRrPwQcf7Bs7dmzQtBUrVvgk+Z5++mmfz+fzrVy50ifJt2jRonZvrz+OnTt3+nbu3OnbsGGD79Zbb/UZDAbfyJEjg5aT5Pv73/8etP6YMWOC4lu7dq1Pku+RRx4JWu7EE0/09evXz+f1en0+n8930kkn+YYPH95ibP59UVhY6PP5fL7du3f70tPTfePHj/fV1NQELesv1+fz+aqrq0PKmjd
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"df = df_cleaned\n",
|
|||
|
"\n",
|
|||
|
"# кодирование категориальных переменных\n",
|
|||
|
"global df_encoded\n",
|
|||
|
"df_encoded = pd.get_dummies(df, columns=['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer'], drop_first=True)\n",
|
|||
|
"\n",
|
|||
|
"# числовые признаки для визуализации\n",
|
|||
|
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"\n",
|
|||
|
"# сравнение зависимости BMI и PhysicalHealth\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(x=df_encoded['BMI'], y=df_encoded['PhysicalHealth'], alpha=0.6)\n",
|
|||
|
"plt.title('BMI vs PhysicalHealth')\n",
|
|||
|
"\n",
|
|||
|
"# сравнение зависимости MentalHealth и SleepTime\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(x=df_encoded['MentalHealth'], y=df_encoded['SleepTime'], alpha=0.6)\n",
|
|||
|
"plt.title('MentalHealth vs SleepTime')\n",
|
|||
|
"\n",
|
|||
|
"# сравнение зависимости PhysicalHealth и SleepTime\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(x=df_encoded['PhysicalHealth'], y=df_encoded['SleepTime'], alpha=0.6)\n",
|
|||
|
"plt.title('PhysicalHealth vs SleepTime')\n",
|
|||
|
"\n",
|
|||
|
"# сравнение зависимости BMI и MentalHealth\n",
|
|||
|
"plt.subplot(2, 2, 4)\n",
|
|||
|
"sns.scatterplot(x=df_encoded['BMI'], y=df_encoded['MentalHealth'], alpha=0.6)\n",
|
|||
|
"plt.title('BMI vs MentalHealth')\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"## стандартизация\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"df_encoded[numeric_features] = scaler.fit_transform(df_encoded[numeric_features])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0wAAAJwCAYAAAC6UuHVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADBNklEQVR4nOzdd5wU9f3H8ffubbm93rg7Do7eRaSJICqEIip2xKgYQY0t2FvExFgSS9REjS2aKGKNYtTErlgjigVBrAhKkyrtgLu9Pr8//M24u7dzt7u3d7t393o+Hjy43Z2d+e7Md2a+n/k2h2EYhgAAAAAADTgTnQAAAAAASFYETAAAAABgg4AJAAAAAGwQMAEAAACADQImAAAAALBBwAQAAAAANgiYAAAAAMAGARMAAAAA2CBgAgAAAAAbBEwAAHQAX3zxhZ577jnr9dKlS/Xiiy8mLkEA0EYQMAFt0NNPPy2HwxH23+DBgxOdPABJaPfu3TrrrLO0aNEirVixQhdccIE+//zzRCcLAJKeK9EJABC7K6+8UgMHDrReX3/99QlMDYBkNmbMGOufJPXr109nnHFGglMFAMmPgAlowyZPnqzx48dbr//5z39q69atiUsQgKT23HPP6auvvpLf79fee+8tj8eT6CQBQNKjSR7QBlVXV0uSnM7ITuGdO3fqwgsvVGlpqbxer/r06aM///nPqq+vt5ZZvXq1HA6Hbr311gbfHzx4cFBgZrrmmmvCNgsMXXb8+PEaPHiwFi9erP33318+n089e/bU3//+9wbr3LJli04//XQVFRUpNTVV++yzj+bNmxe0jJnWcP8effRRSdJDDz0kh8Ohd999V2eddZby8/OVlZWlU045RTt27Aha33/+8x9NnTpVJSUl8nq96t27t/74xz+qrq6uwe9wOBw6+uijG6T7rLPOatAkMjCdgX1HJKmyslK5ubkN9vmaNWv0m9/8Rv3795fP51N+fr6mT5+u1atXN9hmOLfeeqv2339/5efny+fzacSIEXr66acbLGe3//70pz8FLTdr1qywy11zzTVBy7355ps68MADlZ6erpycHB111FH6+uuvg5Yx80thYaFqamqCPnviiSesdYcG/S+//LK17szMTE2dOlVffvllg3RmZGTo+++/15QpU5Senq6SkhJdd911Mgwj6n1kt39C8/jbb78th8Oht99+O+j7U6dObbCfQs+XzMxMjRo1qkHeMM+XULfeeqscDkeDvHDPPfdor732ktfrVUlJiWbPnq2dO3c2WKeZ5kGDBmnEiBH67LPPrLQ0JfD7puuvv15Op1OPP/540PuNnZ+hvyeSvCpJjz76qEaNGqW0tDTl5ubqoIMO0muvvSZJ6tGjR6PHqkePHtZ66uvrdfvtt2uvvfZSamqqioqKdNZZZzW4JvTo0UOHH364XnvtNQ0dOlSpqakaNGiQnnnmmaDlzOtM4DGpr6/XkCFD5HA49NBDD1nvX3PNNRo0aJAyMjKUlZWl0aNHNzj2//vf/zR9+nR169ZNXq9XpaWluuiii+T3+4OWM/N7KLO5dmB+jCWPBtqzZ4+Ki4vDrgPoKKhhAtogM2Dyer1NLltRUaFx48Zp/fr1Ouuss9StWze9//77mjNnjjZu3Kjbb7+92em59957rZv3nDlzwi6zY8cOHXbYYTr++ON14okn6qmnntI555wjj8ej0047TZLk9/s1fvx4rVy5Uueee6569uyp+fPna9asWdq5c6cuuOCCoHWeeOKJOuyww4LeGzt2bNDrc889Vzk5Obrmmmu0fPly3XvvvVqzZo1ViJB+KvRkZGTo4osvVkZGht5880394Q9/0K5du3TLLbcErS81NVUvvviitmzZosLCQivdTz75pFJTU8P+9tTUVM2dOzco0HrmmWdUWVnZYNmPP/5Y77//vk444QR17dpVq1ev1r333qvx48frq6++UlpaWthtmO644w4deeSRmjFjhqqrq/Wvf/1L06dP1wsvvKCpU6cGLTt58mSdcsopQe8NHTq0wToLCgp02223Wa9/9atfBX2+YMECHXrooerVq5euueYa+f1+3XnnnRo7dqw+/fTToAKr9FNfmhdeeEHHHHOM9d7cuXOVmpraYJ888sgjmjlzpqZMmaI///nPqqio0L333qsDDjhAS5YsCVp3XV2dDjnkEI0ePVo333yzXnnlFV199dWqra3VddddF9U+euSRR6zl//e//+n+++/XbbfdpoKCAklSUVFRg/1kevfdd/XSSy/Zfm6ue+vWrbrnnns0ffp0ffHFF+rfv7/td+xcc801uvbaazVp0iSdc845Vh7/+OOPtXDhQrndbtvv/va3v416e6a5c+fq97//vf7yl7/opJNOCrvMmWeeqQMPPFDST/n92WefDfo80rx67bXX6pprrtH++++v6667Th6PRx9++KHefPNNHXzwwbr99tu1Z88eSdLXX3+tG264Iai5cmBgcdZZZ+mhhx7SqaeeqvPPP1+rVq3SXXfdpSVLljTYXytWrNAvf/lLnX322Zo5c6bmzp2r6dOn65VXXtHkyZNt980jjzwStm9YeXm5jjnmGPXo0UN+v18PPfSQpk2bpg8++ECjRo2SJM2fP18VFRU655xzlJ+fr48++kh33nmnfvjhB82fP7/RYxKNpvJooL/85S/avHlz3LYNtEkGgDbn9ttvNyQZn332WdD748aNM/baa6+g9/74xz8a6enpxrfffhv0/hVXXGGkpKQYa9euNQzDMFatWmVIMm655ZYG29trr72McePGNXj/yiuvNCQZW7dubXTZcePGGZKMv/zlL9Z7VVVVxtChQ43CwkKjuro66Hc9+uij1nLV1dXGmDFjjIyMDGPXrl1NptU0d+5cQ5IxYsQIa/2GYRg333yzIcn4z3/+Y71XUVHR4PtnnXWWkZaWZlRWVgb9jr322ssYMmSIceutt1rvP/LII0bXrl2NAw88MGj/m+k88cQTDZfLZWzatMn6bOLEicZJJ53U4HeES8sHH3xgSDIefvhh299r9/3q6mpj8ODBxoQJE4Lel2TMnj27yfXNmDHD6NmzZ4PvXn311dZr8zhu27bNeu+zzz4znE6nccopp1jvXX311db+OPzww63316xZYzidTuPEE080JBk//vijYRiGsXv3biMnJ8c444wzgra/adMmIzs7O+j9mTNnGpKM8847z3qvvr7emDp1quHxeKx1RrOPTGZeWrVqVYPP3nrrLUOS8dZbb1nv7bfffsahhx7aYD+Zvz/Qa6+9ZkgynnrqKeu9cOexYRjGLbfcEpSOLVu2GB6Pxzj44IONuro6a7m77rrLkGQ8+OCDQesMPC9feuklQ5JxyCGHNEhTOIHff/HFFw2Xy2VccsklYZddsWKFIcmYN29eo789kuOwYsUKw+l0Gsccc0zQbzSMn45vqHDHw/S///3PkGQ89thjQe+/8sorDd7v3r27Icn497//bb1XVlZmdO7c2Rg2bJj1XmjeqKysNLp162Yd/7lz5zZIh2nLli2GpKBrSbjz/8YbbzQcDoexZs0a672ZM2ca6enpDZadP39+g9/fnDy6ZcsWIzMz01o23H4FOgKa5AFt0LZt2yRJnTp1anLZ+fPn68ADD1Rubq62bt1q/Zs0aZLq6ur07rvvBi1fUVERtNzWrVsbNE0zmbUBdjUrgVwul8466yzrtcfj0VlnnaUtW7Zo8eLFkqSXXnpJxcXFOvHEE63l3G63zj//fO3Zs0fvvPNOk9sJdeaZZwY9NT7nnHPkcrmCnq76fD7r7927d2vr1q068MADVVFRoW+++abBOk899VTNnTvXej137lzNnDnTtonk8OHDtddee1k1C2vWrNFbb72lWbNmNVg2MC01NTXatm2b+vTpo5ycHH366adN/t7A7+/YsUNlZWU68MADI/puONXV1Y3WZG7cuFFLly7VrFmzlJeXZ70/ZMgQTZ48OexT7NNOO02vvPKKNm3aJEmaN2+exowZo379+gUt9/rrr2vnzp068cQTg/JjSkqK9ttvP7311lsN1n3uuedafzscDp177rmqrq7WggU
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" HeartDisease BMI PhysicalHealth MentalHealth SleepTime \\\n",
|
|||
|
"0 No -1.903749 -0.178909 3.477436 -1.363066 \n",
|
|||
|
"1 No -1.334675 -0.509405 -0.432138 -0.147128 \n",
|
|||
|
"2 No -0.385204 1.693903 3.477436 0.460840 \n",
|
|||
|
"3 No -0.745820 -0.509405 -0.432138 -0.755097 \n",
|
|||
|
"4 No -0.821900 2.575227 -0.432138 0.460840 \n",
|
|||
|
"\n",
|
|||
|
" Smoking_Yes AlcoholDrinking_Yes Stroke_Yes DiffWalking_Yes Sex_Male \\\n",
|
|||
|
"0 True False False False False \n",
|
|||
|
"1 False False True False False \n",
|
|||
|
"2 True False False False True \n",
|
|||
|
"3 False False False False False \n",
|
|||
|
"4 False False False True False \n",
|
|||
|
"\n",
|
|||
|
" ... Diabetic_Yes (during pregnancy) PhysicalActivity_Yes GenHealth_Fair \\\n",
|
|||
|
"0 ... False True False \n",
|
|||
|
"1 ... False True False \n",
|
|||
|
"2 ... False True True \n",
|
|||
|
"3 ... False False False \n",
|
|||
|
"4 ... False True False \n",
|
|||
|
"\n",
|
|||
|
" GenHealth_Good GenHealth_Poor GenHealth_Very good Asthma_Yes \\\n",
|
|||
|
"0 False False True True \n",
|
|||
|
"1 False False True False \n",
|
|||
|
"2 False False False True \n",
|
|||
|
"3 True False False False \n",
|
|||
|
"4 False False True False \n",
|
|||
|
"\n",
|
|||
|
" KidneyDisease_Yes SkinCancer_Yes Cluster \n",
|
|||
|
"0 False True 2 \n",
|
|||
|
"1 False False 1 \n",
|
|||
|
"2 False False 2 \n",
|
|||
|
"3 False True 1 \n",
|
|||
|
"4 False False 0 \n",
|
|||
|
"\n",
|
|||
|
"[5 rows x 39 columns]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.cluster import AgglomerativeClustering\n",
|
|||
|
"from scipy.cluster.hierarchy import dendrogram, linkage\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"df = df_encoded\n",
|
|||
|
"\n",
|
|||
|
"X = df.drop(columns=['HeartDisease'])\n",
|
|||
|
"\n",
|
|||
|
"agg_clustering = AgglomerativeClustering(n_clusters=3)\n",
|
|||
|
"clusters = agg_clustering.fit_predict(X)\n",
|
|||
|
"\n",
|
|||
|
"df_encoded['Cluster'] = clusters\n",
|
|||
|
"\n",
|
|||
|
"Z = linkage(X, 'ward')\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"dendrogram(Z)\n",
|
|||
|
"plt.title('Дендрограмма агломеративной кластеризации')\n",
|
|||
|
"plt.xlabel('Объекты')\n",
|
|||
|
"plt.ylabel('Расстояние')\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"print(df_encoded.head())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjYAAASlCAYAAAALTeBgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3RU1drH8d/MZErapFda6F26BQRBEPRaLoqAHWwvV8Fe8dpRsfeLol7FAogI1isICohIURRRBKmhh/ReJsnMef+IjAxJgECSSeD7WStLZ+9TnjlnZjj7PGfvbTIMwxAAAAAAAAAAAEAjYPZ3AAAAAAAAAAAAAEeKxAYAAAAAAAAAAGg0SGwAAAAAAAAAAIBGg8QGAAAAAAAAAABoNEhsAAAAAAAAAACARoPEBgAAAAAAAAAAaDRIbAAAAAAAAAAAgEaDxAYAAAAAAAAAAGg0SGwAAAAAAAAAAIBGg8QGAKBOjR07ViEhIfW6z2nTpslkMmn79u11sv2HH35YJpOpTrZ9LEwmkx5++OEjXnbChAl1GxAAAAAanbFjxyopKcnfYfjYvn27TCaTpk2bdsTLPvvss3UfWB1LSkrS2LFj/R1GjTTUthKA4w+JDQCoJftvph/4Fxsbq0GDBmnevHmVlt+/zHXXXVfl9v797397l8nIyPCW+yNRUJWxY8f6vFen06lu3brpueeek8vl8nd49S4pKUnnnXdelXVLliyRyWTSxx9/XK8xLV++XA8//LBycnLqdb8AAAAnkgPbAcuWLatUbxiGmjVrJpPJVO31Ym1Zv369Hn744Vp/wOdQD8Xsf/+rV6+u1X0ezldffXXED/U0NL///rsuvvhitWjRQg6HQ02aNNFZZ52lV155xd+hVSkpKalSW7eqvyNJPAFAbQnwdwAAcLx59NFH1bJlSxmGodTUVE2bNk3/+Mc/9MUXX1RqyDgcDs2ZM0dTpkyRzWbzqZs5c6YcDodKSkrqM/wasdvteuuttyRJOTk5mjNnju6880799NNP+vDDD/0W15VXXqlLLrlEdrvdbzE0BMuXL9cjjzyisWPHKjw83N/hAAAAHNccDodmzJih008/3af8u+++0+7du+vl2nT9+vV65JFHNHDgwAbX66K2ffXVV/rPf/7T6JIby5cv16BBg9S8eXNdf/31io+P165du7Ry5Uq99NJLuummm/wdYiUvvviiCgoKvK+/+uorzZw5Uy+88IKio6O95X379tUVV1yhe++91x9hAjjBkNgAgFp2zjnnqHfv3t7X1157reLi4jRz5sxKiY2zzz5bn3/+uebNm6d//vOf3vLly5crOTlZI0aM0Jw5c+ot9poKCAjQFVdc4X1944036pRTTtGsWbP0/PPPKzEx0S9xWSwWWSwWv+wbAAAAJ6Z//OMfmj17tl5++WUFBPx9u2XGjBnq1auXTy9snLgef/xxhYWF6aeffqr08FFaWpp/gjqM4cOH+7zet2+fZs6cqeHDh1eZQDvw8w8AdYWhqACgjoWHhyswMLDKi7smTZpowIABmjFjhk/59OnT1bVrV3Xp0uWo9vnss8/KZDJpx44dleomTpwom82m7OxsSdLmzZs1YsQIxcfHy+FwqGnTprrkkkuUm5tb4/2azWYNHDhQkip1f9+zZ4+GDx+ukJAQxcTE6M4775Tb7ZZU0T0/KSnJJ7mzX0lJicLCwjRu3Dhv2SuvvKLOnTsrKChIERER6t27t88xrG6OjXnz5umMM85QaGionE6n+vTp47Pe999/r5EjR6p58+ay2+1q1qyZbrvtNhUXF9f4WByJPXv26JprrlFcXJzsdrs6d+6st99+22eZ0tJSPfjgg+rVq5fCwsIUHBys/v37a/HixYfc9sMPP6y77rpLktSyZUtv9/CDj8mnn36qLl26ePc/f/78Wn2PAAAAJ4pLL71UmZmZWrhwobestLRUH3/8sS677LIq1/F4PHrxxRfVuXNnORwOxcXFady4cd5r9f32D3u6bNkynXzyyXI4HGrVqpXee+897zLTpk3TyJEjJUmDBg3yXv8tWbJEkvTZZ5/p3HPPVWJioux2u1q3bq1JkyZ5r8lr259//qmLL75YkZGRcjgc6t27tz7//HOfZbKysnTnnXeqa9euCgkJkdPp1DnnnKO1a9cecttjx47Vf/7zH0nyGQrpYG+88YZat24tu92uPn366KeffjrkdlevXi2TyaR33323Ut3XX38tk8mkL7/8UpKUn5+vW2+9VUlJSbLb7YqNjdVZZ52lX3755ZD72Lp1qzp37lxlj+rY2NhDritV9JS/9dZb1axZM9ntdrVp00ZPPfWUPB6Pz3I1/WwtWLBA3bt3l8PhUKdOnTR37tzDxlKVqubY2D+U2ezZs9WpUycFBgbqtNNO0++//y5Jmjp1qtq0aSOHw6GBAwdWOZTaqlWrdPbZZyssLExBQUE644wz9MMPPxxVjACOD6RQAaCW5ebmKiMjQ4ZhKC0tTa+88ooKCgp8ejYc6LLLLtMtt9yigoIChYSEqLy8XLNnz9btt99+1MNQjRo1Snfffbc++ugj783t/T766CMNHTpUERERKi0t1bBhw+RyuXTTTTcpPj5ee/bs0ZdffqmcnByFhYXVeN9bt26VJEVFRXnL3G63hg0bplNOOUXPPvusvvnmGz333HNq3bq1brjhBplMJl1xxRV6+umnlZWVpcjISO+6X3zxhfLy8rzH780339TNN9+siy++WLfccotKSkr022+/adWqVdU2GKWKht4111yjzp07a+LEiQoPD9eaNWs0f/5873qzZ89WUVGRbrjhBkVFRenHH3/UK6+8ot27d2v27NmHfe9lZWVVPolXVZIoNTVVp556qvciPyYmRvPmzdO1116rvLw83XrrrZKkvLw8vfXWW7r00kt1/fXXKz8/X//97381bNgw/fjjj+revXuVsVx00UXatGlTpS7iMTEx3mWWLVumuXPn6sYbb1RoaKhefvlljRgxQjt37vQ5fwAAADi8pKQknXbaaZo5c6bOOeccSRUP1uTm5uqSSy7Ryy+/XGmdcePGadq0abr66qt18803Kzk5Wa+++qrWrFmjH374QVar1bvsli1bdPHFF+vaa6/VmDFj9Pbbb2vs2LHq1auXOnfurAEDBujmm2/Wyy+/rPvuu08dO3aUJO9/p02bppCQEN1+++0KCQnRokWL9OCDDyovL0/PPPPMYd9fSUlJlde6Bw5RtN8ff/yhfv36qUmTJrr33nsVHBysjz76SMOHD9ecOXN04YUXSpK2bdumTz/9VCNHjlTLli2VmpqqqVOn6owzztD69eur7QE+btw47d27VwsXLtT7779f5TIzZsxQfn6+xo0bJ5PJpKeffloXXXSRtm3b5nNcD9S7d2+1atVKH330kcaMGeNTN2vWLEVERGjYsGGSpH/961/6+OOPNWHCBHXq1EmZmZlatmyZNmzYoJ49e1Z7HFu0aKEVK1Zo3bp1NX6QraioSGeccYb27NmjcePGqXnz5lq+fLkmTpyolJQUvfjiiz7H6Eg/W5s3b9bo0aP1r3/9S2PGjNE777yjkSNHav78+TrrrLNqFGN1vv/+e33++ecaP368JGny5Mk677zzdPfdd2vKlCm68cYblZ2draefflrXXHONFi1a5F130aJFOuecc9SrVy899NBDMpvNeuedd3TmmWfq+++/18knn1wrMQJoZAwAQK145513DEmV/ux2uzFt2rRKy0syxo8fb2RlZRk2m814//33DcMwjP/973+GyWQytm/fbjz00EOGJCM9Pd273pgxY4zg4ODDxnPaaacZvXr18in78ccfDUnGe++9ZxiGYaxZs8aQZMyePbvG73d/HOnp6UZ6erqxZcsW44knnjBMJpNx0kkn+SwnyXj00Ud91u/Ro4dPfBs3bjQkGa+99prPchdccIGRlJRkeDwewzAM45///KfRuXPnQ8a2/1wkJycbhmEYOTk5RmhoqHHKKacYxcXFPsvu365hGEZRUVGlbU2ePNkwmUzGjh07vGX7z8uBWrRoUeX5P/DvwON87bXXGgkJCUZGRob
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация данных с учетом понимания их особенностей\n",
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"\n",
|
|||
|
"# Взаимодействие между BMI и PhysicalHealth\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(x='BMI', y='PhysicalHealth', hue='HeartDisease', data=df)\n",
|
|||
|
"plt.title('BMI vs PhysicalHealth')\n",
|
|||
|
"\n",
|
|||
|
"# Взаимодействие между MentalHealth и SleepTime\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(x='MentalHealth', y='SleepTime', hue='HeartDisease', data=df)\n",
|
|||
|
"plt.title('MentalHealth vs SleepTime')\n",
|
|||
|
"\n",
|
|||
|
"# Взаимодействие между PhysicalHealth и SleepTime\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(x='PhysicalHealth', y='SleepTime', hue='HeartDisease', data=df)\n",
|
|||
|
"plt.title('PhysicalHealth vs SleepTime')\n",
|
|||
|
"\n",
|
|||
|
"# Сравнение зависимости BMI и MentalHealth\n",
|
|||
|
"plt.subplot(2, 2, 4)\n",
|
|||
|
"sns.scatterplot(x='BMI', y='MentalHealth', hue='HeartDisease', data=df)\n",
|
|||
|
"plt.title('BMI vs MentalHealth')\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjYAAASlCAYAAAALTeBgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXxTVf7/8Xdpm6Zb0tJYdqHauqBUEJdRoIobbuPI1A11FGX8OSO4DOMM4L6MLI47Lrh8FR13RNxGHRd0AMUdBZfR4jAiIGBLm7RN03TJ7w+mmaZJkybNbXLb1/Px6ENzz73nfO45N5d77yf33hSfz+cTAAAAAAAAAACACfRLdAAAAAAAAAAAAABdRWIDAAAAAAAAAACYBokNAAAAAAAAAABgGiQ2AAAAAAAAAACAaZDYAAAAAAAAAAAApkFiAwAAAAAAAAAAmAaJDQAAAAAAAAAAYBokNgAAAAAAAAAAgGmQ2AAAAAAAAAAAAKZBYgMAYKipU6cqJyenR9tcvHixUlJS9J///MeQ+q+77jqlpKQYUnd3pKSk6LrrruvyvDNmzDA2IAAAAJjO1KlTNWLEiESHEeA///mPUlJStHjx4i7Pe8sttxgfmMFGjBihqVOnJjqMqCTruRKA3ofEBgDESdvF9PZ/hYWFmjhxol577bWg+dvm+e1vfxuyviuvvNI/T2VlpX96IhIFoUydOjVgXW02m/bbbz/deuutamxsTHR4PW7EiBE68cQTQ5a9++67SklJ0XPPPdejMb3//vu67rrrVFNT06PtAgAA9CXtzwNWrVoVVO7z+TRs2DClpKR0erwYL19//bWuu+66uP/AJ9yPYtrW/5NPPolrm5G8+uqrXf5RT7JZt26dTjnlFA0fPlxWq1VDhgzR0UcfrYULFyY6tJBGjBgRdK4b6q8riScAiJe0RAcAAL3NDTfcoKKiIvl8Pm3btk2LFy/W8ccfr5dffjnoRMZqtWrp0qW69957ZbFYAsqeeuopWa1WeTyengw/KhkZGXrooYckSTU1NVq6dKkuv/xyffzxx3r66acTFtdvfvMbnXHGGcrIyEhYDMng/fff1/XXX6+pU6cqLy8v0eEAAAD0alarVU8++aTGjx8fMP2f//ynNm3a1CPHpl9//bWuv/56HX744Ul310W8vfrqq7rnnntMl9x4//33NXHiRO2666664IILNHDgQP3444/64IMPdOedd+riiy9OdIhB7rjjDtXV1fk/v/rqq3rqqad0++23y+Fw+KcfeuihOvvsszV79uxEhAmgjyGxAQBxdtxxx+mAAw7wf542bZoGDBigp556Kiixceyxx+qll17Sa6+9pl/96lf+6e+//742bNig8vJyLV26tMdij1ZaWprOPvts/+eLLrpIBx98sJ555hnddtttGjx4cELiSk1NVWpqakLaBgAAQN90/PHHa8mSJbrrrruUlva/yy1PPvmkxo4dG3AXNvqum266SXa7XR9//HHQj4+2b9+emKAiOPnkkwM+b926VU899ZROPvnkkAm09ts/ABiFR1EBgMHy8vKUmZkZ8uBuyJAhKisr05NPPhkw/YknntCoUaO07777xtTmLbfcopSUFP3www9BZXPmzJHFYlF1dbUkqaKiQuXl5Ro4cKCsVquGDh2qM844Q06nM+p2+/Xrp8MPP1ySgm5/37x5s04++WTl5ORol1120eWXX66WlhZJO2/PHzFiREByp43H45HdbteFF17on7Zw4ULts88+ysrKUn5+vg444ICAPuzsHRuvvfaaDjvsMOXm5spms+nAAw8MWG7lypU69dRTteuuuyojI0PDhg3TH/7wBzU0NETdF12xefNmnX/++RowYIAyMjK0zz776OGHHw6Yx+v16pprrtHYsWNlt9uVnZ2tCRMm6J133glb93XXXac//elPkqSioiL/7eEd++SFF17Qvvvu62//9ddfj+s6AgAA9BVTpkxRVVWV3nzzTf80r9er5557TmeeeWbIZVpbW3XHHXdon332kdVq1YABA3ThhRf6j9XbtD32dNWqVTrooINktVq122676bHHHvPPs3jxYp166qmSpIkTJ/qP/959911J0osvvqgTTjhBgwcPVkZGhnbffXfdeOON/mPyePvXv/6lU045Rf3795fVatUBBxygl156KWCeHTt26PLLL9eoUaOUk5Mjm82m4447Tl988UXYuqdOnap77rlHkgIehdTRAw88oN13310ZGRk68MAD9fHHH4et95NPPlFKSooeffTRoLJ//OMfSklJ0SuvvCJJqq2t1WWXXaYRI0YoIyNDhYWFOvroo/XZZ5+FbeP777/XPvvsE/KO6sLCwrDLSjvvlL/ssss0bNgwZWRkqLi4WAsWLFBra2vAfNFuW2+88YZGjx4tq9WqkSNH6vnnn48YSyih3rHR9iizJUuWaOTIkcrMzNQhhxyidevWSZLuv/9+FRcXy2q16vDDDw/5KLUPP/xQxx57rOx2u7KysnTYYYfpvffeiylGAL0DKVQAiDOn06nKykr5fD5t375dCxcuVF1dXcCdDe2deeaZuvTSS1VXV6ecnBw1NzdryZIlmjlzZsyPoTrttNP05z//Wc8++6z/4nabZ599Vsccc4zy8/Pl9Xo1adIkNTY26uKLL9bAgQO1efNmvfLKK6qpqZHdbo+67e+//16SVFBQ4J/W0tKiSZMm6eCDD9Ytt9yit956S7feeqt23313/f73v1dKSorOPvts3XzzzdqxY4f69+/vX/bll1+Wy+Xy99+DDz6oSy65RKeccoouvfRSeTwerV27Vh9++GGnJ4zSzhO9888/X/vss4/mzJmjvLw8rVmzRq+//rp/uSVLlsjtduv3v/+9CgoK9NFHH2nhwoXatGmTlixZEnHdm5qaQv4SL1SSaNu2bfrFL37hP8jfZZdd9Nprr2natGlyuVy67LLLJEkul0sPPfSQpkyZogsuuEC1tbX6v//7P02aNEkfffSRRo8eHTKWX//61/ruu++CbhHfZZdd/POsWrVKzz//vC666CLl5ubqrrvuUnl5uTZu3BgwfgAAAIhsxIgROuSQQ/TUU0/puOOOk7TzhzVOp1NnnHGG7rrrrqBlLrzwQi1evFjnnXeeLrnkEm3YsEF333231qxZo/fee0/p6en+edevX69TTjlF06ZN07nnnquHH35YU6dO1dixY7XPPvuorKxMl1xyie666y5dccUV2nvvvSXJ/9/FixcrJydHM2fOVE5OjpYvX65rrrlGLpdLf/3rXyOun8fjCXms2/4RRW2++uorjRs3TkOGDNHs2bOVnZ2tZ599VieffLKWLl2qyZMnS5L+/e9/64UXXtCpp56qoqIibdu2Tffff78OO+wwff31153eAX7hhRdqy5YtevPNN/W3v/0t5DxPPvmkamtrdeGFFyolJUU333yzfv3rX+vf//53QL+2d8ABB2i33XbTs88+q3PPPTeg7JlnnlF+fr4mTZokSfrd736n5557TjNmzNDIkSNVVVWlVatW6ZtvvtH+++/faT8OHz5cq1ev1pdffhn1D9ncbrcOO+wwbd68WRdeeKF23XVXvf/++5ozZ45++ukn3XHHHQF91NVtq6KiQqeffrp+97vf6dxzz9UjjzyiU089Va+//rqOPvroqGLszMqVK/XSSy9p+vTpkqR58+bpxBNP1J///Gfde++9uuiii1RdXa2bb75Z559/vpYvX+5fdvny5TruuOM0duxYXXvtterXr58eeeQRHXHEEVq5cqUOOuiguMQIwGR8AIC4eOSRR3ySgv4yMjJ8ixcvDppfkm/69Om+HTt2+CwWi+9vf/ubz+fz+f7+97/7UlJSfP/5z3981157rU+S7+eff/Yvd+655/qys7MjxnPIIYf4xo4dGzDto48+8knyPfbYYz6fz+dbs2aNT5JvyZIlUa9vWxw///yz7+eff/atX7/eN3fuXF9KSoqvtLQ0YD5JvhtuuCFg+TFjxgTE9+233/ok+e67776A+U466STfiBEjfK2trT6fz+f71a9+5dtnn33CxtY2Fhs2bPD5fD5fTU2NLzc
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Визуализация данных без учета понимания их особенностей\n",
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"\n",
|
|||
|
"# Взаимодействие между BMI и PhysicalHealth\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(x='BMI', y='PhysicalHealth', data=df)\n",
|
|||
|
"plt.title('BMI vs PhysicalHealth')\n",
|
|||
|
"\n",
|
|||
|
"# Взаимодействие между MentalHealth и SleepTime\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(x='MentalHealth', y='SleepTime', data=df)\n",
|
|||
|
"plt.title('MentalHealth vs SleepTime')\n",
|
|||
|
"\n",
|
|||
|
"# Взаимодействие между PhysicalHealth и SleepTime\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(x='PhysicalHealth', y='SleepTime', data=df)\n",
|
|||
|
"plt.title('PhysicalHealth vs SleepTime')\n",
|
|||
|
"\n",
|
|||
|
"# Сравнение зависимости BMI и MentalHealth\n",
|
|||
|
"plt.subplot(2, 2, 4)\n",
|
|||
|
"sns.scatterplot(x='BMI', y='MentalHealth', data=df)\n",
|
|||
|
"plt.title('BMI vs MentalHealth')\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAzgAAAJcCAYAAAA1ngF6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACgn0lEQVR4nOzdd3xc1Z338e/0JmnUuyzLvcjGBtvYVAMGYyChBdgkBEIoKcAuYUkI+6SRZAP7pEAgQEjChhCWTYBNSKMGQnkIsHRTDQYbjI3lqmJ1ae7zh30Oo7HKjLquP+/Xyy9Loylnbjn3/M75nXM9juM4AgAAAAAX8I51AQAAAABguBDgAAAAAHANAhwAAAAArkGAAwAAAMA1CHAAAAAAuAYBDgAAAADXIMABAAAA4BoEOAAAAABcgwAHAAAAgGsQ4AAARs3kyZP12c9+dqyLAQBwMQIcAHCZW2+9VR6Px/4Lh8MqLy/XypUrdd1116mpqWmsiwgAwIjxj3UBAAAj4zvf+Y5qamrU2dmpzZs369FHH9Ull1yiH//4x/rTn/6k+fPnj3URAQAYdgQ4AOBSq1at0qJFi+zvV1xxhR555BGdcMIJ+vjHP6433nhDkUhkDEvYu+bmZsVisVH5rLa2NgWDQXm9JDQAgFtQowPAPuTII4/UN77xDb333nu6/fbb7eNvvvmmPvGJTyg/P1/hcFiLFi3Sn/70px6vNalvTz75pC699FIVFRUpFovp5JNP1tatW3s813Ecfe9731NlZaWi0aiOOOIIvfbaa3uVx7znY489pi996UsqLi5WZWWl/fuNN96ouXPnKhQKqby8XBdeeKHq6+v3ep8bbrhBU6ZMUSQS0ZIlS/TEE09o+fLlWr58uX3Oo48+Ko/Ho9/+9rf6+te/roqKCkWjUTU2NmrHjh267LLLNG/ePGVlZSknJ0erVq3Syy+/3ONzzHvceeeduvLKK1VRUaHs7Gx94hOfUENDg9rb23XJJZeouLhYWVlZOuecc9Te3p7JLgIADBEjOACwj/nMZz6jf/u3f9ODDz6o888/X6+99poOPvhgVVRU6Gtf+5pisZjuvPNOnXTSSfqf//kfnXzyyT1ef/HFFysvL0/f+ta3tH79el177bW66KKL9Lvf/c4+55vf/Ka+973v6bjjjtNxxx2nF154Qcccc4w6Ojp6LdOXvvQlFRUV6Zvf/Kaam5slSd/+9rd15ZVXasWKFfriF7+oNWvW6KabbtKzzz6rJ598UoFAQJJ000036aKLLtKhhx6qL3/5y1q/fr1OOukk5eXl9QiWjO9+97sKBoO67LLL1N7ermAwqNdff1333HOPTjvtNNXU1Kiurk4333yzDj/8cL3++usqLy/v8R5XXXWVIpGIvva1r2nt2rW6/vrrFQgE5PV6tXPnTn3729/W008/rVtvvVU1NTX65je/OaR9BgDIgAMAcJVf/epXjiTn2Wef7fM58XjcWbhwoeM4jnPUUUc58+bNc9ra2uzfE4mEc9BBBznTp0/f631XrFjhJBIJ+/iXv/xlx+fzOfX19Y7jOM6WLVucYDDoHH/88T2e92//9m+OJOfss8/e6z0POeQQp6uryz5u3uOYY45xuru77eM//elPHUnOf/7nfzqO4zjt7e1OQUGBs3jxYqezs9M+79Zbb3UkOYcffrh97O9//7sjyZkyZYrT0tLSY3u0tbX1+BzHcZx169Y5oVDI+c53vrPXe9TW1jodHR328U9+8pOOx+NxVq1a1eM9li1b5lRXVzsAgNFDihoA7IOysrLU1NSkHTt26JFHHtHpp5+upqYmbdu2Tdu2bdP27du1cuVKvf3229q4cWOP115wwQXyeDz290MPPVTd3d167733JEl/+9vf1NHRoYsvvrjH8y655JI+y3P++efL5/PZ3817XHLJJT3mx5x//vnKycnRX//6V0nSc889p+3bt+v888+X3/9RUsKnP/1p5eXl9fpZZ5999l5zj0KhkP2c7u5ubd++XVlZWZo5c6ZeeOGFvd7jrLPOsiNIknTggQfKcRx97nOf6/G8Aw88UBs2bFBXV1ef3x0AMLxIUQOAfdCuXbtUXFystWvXynEcfeMb39A3vvGNXp+7ZcsWVVRU2N8nTZrU4+8mkNi5c6ck2UBn+vTpPZ5XVFTUZ9BRU1PT43fzHjNnzuzxeDAY1JQpU+zfzf/Tpk3r8Ty/36/Jkyen9VmSlEgk9JOf/EQ33nij1q1bp+7ubvu3goKCvZ6fug3i8bgkqaqqaq/HE4mEGhoaen0fAMDwI8ABgH3MBx98oIaGBk2bNk2JREKSdNlll2nlypW9Pj81eEgeaUnmOM6gyzSaq7n19lnf//739Y1vfEOf+9zn9N3vflf5+fnyer265JJL7DZK1tc2GIltAwDIDAEOAOxjfvOb30iSVq5cqSlTpkiSAoGAVqxYMSzvX11dLUl6++237ftL0tatW+0oT7rvsWbNmh7v0dHRoXXr1tmymuetXbtWRxxxhH1eV1eX1q9fn/a9fu6++24dccQRuuWWW3o8Xl9fr8LCwrTeAwAwPjAHBwD2IY888oi++93vqqamRp/+9KdVXFys5cuX6+abb9aHH3641/NTl39Ox4oVKxQIBHT99df3GLm49tprM3qPYDCo6667rsd73HLLLWpoaNDxxx8vSVq0aJEKCgr0i1/8osc8l//6r/9KO5iSdo+8pI6y3HXXXXvNPwIAjH+M4ACAS913331688031dXVpbq6Oj3yyCN66KGHVF1drT/96U8Kh8OSdt9D5pBDDtG8efN0/vnna8qUKaqrq9NTTz2lDz74YK97wQykqKhIl112ma666iqdcMIJOu644/Tiiy/qvvvuS3s0pKioSFdccYWuvPJKHXvssfr4xz+uNWvW6MYbb9TixYt15plnSto9J+fb3/62Lr74Yh155JE6/fTTtX79et16662aOnVqj0UO+nPCCSfoO9/5js455xwddNBBeuWVV/Rf//VfPUaPAAATAwEOALiUufdKMBhUfn6+5s2bp2uvvVbnnHOOsrOz7fPmzJmj5557TldeeaVuvfVWbd++XcXFxVq4cOGg79/yve99T+FwWD/72c/097//XQceeKAefPBBO/KSjm9/+9sqKirST3/6U335y19Wfn6+LrjgAn3/+9/vsYLZRRddJMdx9KMf/UiXXXaZ9ttvP/3pT3/SP//zP9sgbiD/9m//pubmZt1xxx363e9+p/33319//etf9bWvfS3j7w4AGFseh5mPAACXSSQSKioq0imnnKJf/OIXY10cAMAoYg4OAGBCa2tr22v+zG233aYdO3Zo+fLlY1MoAMCYYQQHADChPfroo/ryl7+s0047TQUFBXrhhRd0yy23aPbs2Xr++ecVDAbHuogAgFHEHBwAwIQ2efJkVVVV6brrrtOOHTuUn5+vs846S1dffTXBDQDsgxjBAQAAAOAazMEBAAAA4BoEOAAAAABcY9zNwUkkEtq0aZOys7PTvkEbAAAAAPdxHEdNTU0qLy+X15ve2My4C3A2bdqkqqqqsS4GAAAAgHFiw4YNqqysTOu54y7AMXfX3rBhg3Jycsa4NAAAAADGSmNjo6qqqmyMkI5xF+CYtLScnBwCHAAAAAAZTV1hkQEAAAAArkGAAwAAAMA1CHAAAAAAuAYBDgAAAADXIMABAAAA4BoEOAAAAABcgwAHAAAAgGsQ4AAAAABwDQIcAAAAAK5BgAMAAADANQhwAAAAALgGAQ4AAAAA1yDAAQAAAOAaBDgAAAAAXIMABwAAAIBrEOAAAAAAcA0CHAAAAACuQYADAAAAwDUIcAAAAAC4BgEOAAAAANfwj3UB0D/HcdTa2T3WxQAAABNcJOCTx+MZ62IAI44AZxxzHEef+NlTev69nWNdFAAAMMEtqs7TXV9YRpAD1yNFbRxr7ewmuAEAAMPiufd2khWCfQIjOBPEc19foWjQN9bFAAAAE0xLR7cWfe9vY10MYNQQ4EwQ0aBP0SC7CwAAAOgPKWoAAAAAXIMABwAAAIBrEOAAAAAAcA0CHAAAAACuQYADAAAAwDUIcAA
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.cluster import AgglomerativeClustering\n",
|
|||
|
"from scipy.cluster.hierarchy import dendrogram, linkage\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"# Выбор признаков для кластеризации\n",
|
|||
|
"X = df[['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']]\n",
|
|||
|
"\n",
|
|||
|
"# Применение агломеративной кластеризации\n",
|
|||
|
"agg_clustering = AgglomerativeClustering(n_clusters=3)\n",
|
|||
|
"clusters = agg_clustering.fit_predict(X)\n",
|
|||
|
"\n",
|
|||
|
"# Добавление результатов кластеризации в датасет\n",
|
|||
|
"df['Cluster'] = clusters\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация дендрограммы\n",
|
|||
|
"linked = linkage(X, 'ward')\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"dendrogram(linked,\n",
|
|||
|
" orientation='top',\n",
|
|||
|
" distance_sort='descending',\n",
|
|||
|
" show_leaf_counts=True)\n",
|
|||
|
"plt.title('Dendrogram')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 35,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjYAAAlWCAYAAAD5nOoXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3wURf/A8c/s1fQCgdB7byooTYoiIE1QEREV7L09dn+PvT72x/LYFbGgFCtgQ8EKKqAggvQiHUJ6v7ud3x+bhFxyd7kUEoLf9+uVF2Rvd+a7s3ubmZ3ZWaW11gghhBBCCCGEEEIIIYQQQtQDRl0HIIQQQgghhBBCCCGEEEIIES7p2BBCCCGEEEIIIYQQQgghRL0hHRtCCCGEEEIIIYQQQgghhKg3pGNDCCGEEEIIIYQQQgghhBD1hnRsCCGEEEIIIYQQQgghhBCi3pCODSGEEEIIIYQQQgghhBBC1BvSsSGEEEIIIYQQQgghhBBCiHpDOjaEEEIIIYQQQgghhBBCCFFvSMeGEEIIIYQQQgghhBBCCCHqDenYEEIcEVq3bs0FF1xQ12HUiccee4zOnTtjmmZdh3JE+/bbb1FKMXfu3FrNVynFvffee1jS3rZtG0op3nzzzcOSflVdcMEFtG7dOux1o6OjK1zv4MGDREVF8dlnn1UzOiGEEEKI+ufee+9FKUVKSkqNpVmZOls4duzYgdvt5qeffqqxNI9WrVu3ZuzYsbWaZ00f77KGDh3K0KFDD1v6VVHcBvz222/DXjec9uLkyZOZNGlSDUQohKhL0rEhhDisNm/ezOWXX07btm1xu93ExsYycOBAnnnmGfLy8molhtzcXO69996wKkO1LTMzk0cffZTbbrsNwzh0SVZK+f1ERUXRtWtXHnzwQXJzc/3SuOCCC1BKERsbG7BMN27cWJLOE088UbK8rjoKyiqOo/jH4XDQtm1bpk6dypYtW+o0trpQUaO3LhpRNfEdatCgAZdccgl33XVXzQUmhBBCiHqpbF032E9d19+HDh1K9+7d6zSG2nT//ffTt29fBg4cWLKsuK1R/GO322nRogWTJ09m7dq1ftuXrte/8847AfMYOHAgSqly5VoXddxAWrdu7be/jRo1YtCgQXz00Ud1HVqtKx6EVboNWdrh6KwLx8yZM/nvf/9brTRuu+02PvjgA1atWlUzQQkh6oS9rgMQQhy9FixYwFlnnYXL5WLq1Kl0796dwsJCfvzxR2655RbWrFnDK6+8ctjjyM3N5b777gM44kagvPHGG3i9Xs4555xynw0fPpypU6cCkJ2dzQ8//MBdd93FqlWrmDNnjt+6drud3Nxc5s2bV27kybvvvovb7SY/P//w7UgNuO666zj++OPxeDz89ttvvPLKKyxYsIDVq1fTtGnTOosrLy8Pu/2f/eeypr5DV1xxBc8++yyLFi3i5JNPrqHohBBCCFHfvP32236/v/XWWyxcuLDc8i5dutRmWP9oBw4cYMaMGcyYMaPcZy6Xi9deew0Ar9fL5s2beemll/jiiy9Yu3Ztubq62+1m5syZnHfeeX7Lt23bxpIlS3C73YdvR2rAMcccw0033QTA7t27efnllznjjDN48cUXueKKK+osrldffVWe8sfq2Pjzzz+54YYbqpzGscceS58+fXjyySd56623ai44IUSt+mffqRFCHDZbt25l8uTJtGrVikWLFtGkSZOSz66++mo2bdrEggUL6jDC6svJySEqKqpaaUyfPp3TTjstYOW+Y8eOfo2BK664gsLCQj788EPy8/P9tnG5XAwcOJD33nuvXMfGzJkzGTNmDB988EG1Yj3cBg0axMSJEwG48MIL6dixI9dddx0zZszgjjvuqLO4jvSGV33SpUsXunfvzptvvikdG0IIIcQ/WNkb3j///DMLFy4st7ys3NxcIiMjD2do/1jvvPMOdrudcePGlfvMbreXOzb9+vVj7NixLFiwgEsvvdTvs9GjR/Ppp5+SkpJCw4YNS5bPnDmTxo0b06FDB9LS0g7PjtSAZs2a+e3v1KlTad++PU8//XSddmw4HI46y/toNGnSJO655x5eeOGFsKbWFUIceWQqKiHEYfHYY4+RnZ3N66+/7tepUax9+/Zcf/31Qbcvfqy1rDfffBOlFNu2bStZtnz5ckaOHEnDhg2JiIigTZs2XHTRRYA1KigpKQmA++67r+SR4tLvTFi3bh0TJ04kMTERt9tNnz59+PTTTwPm+91333HVVVfRqFEjmjdvDkBWVhY33HADrVu3xuVy0ahRI4YPH85vv/0Wsoy2bt3KH3/8wSmnnBJyvdKSk5NLHgEva8qUKXz++eekp6eXLFu2bBkbN25kypQpYedR2r59+7Db7SWj9Utbv349Simef/55ADweD/fddx8dOnTA7XbToEEDTjzxRBYuXFilvItvfG/dutVvuWmaPPTQQzRv3hy3282wYcPYtGlTyef33HMPDoeDAwcOlEvzsssuIz4+vuTplVDnTrFA79jYtWsXF198MU2bNsXlctGmTRuuvPJKCgsLAUhNTeXmm2+mR48eREdHExsby6hRow7bo86mafLf//6Xbt264Xa7ady4MZdffnm5BuMnn3zCmDFjSuJu164dDzzwAD6fL2ja4XyHwCqTCRMmEB0dTVJSEjfffHPAdIcPH868efPQWld/x4UQQghx1CqeBmrFihUMHjyYyMhI/u///g8I/g60QO/tS09P54YbbqBFixa4XC7at2/Po48+WmMj3//44w8uuOCCkql3k5OTueiiizh48GDA9VNSUpg0aRKxsbE0aNCA66+/PuCT1e+88w69e/cmIiKCxMREJk+ezI4dOyqM5/3336d3797ExMQQGxtLjx49eOaZZyrc7uOPP6Zv375h3+BNTk4GCNguGT9+PC6Xq9xT5jNnzmTSpEnYbLaw8ihr7NixtG3bNuBn/fv3p0+fPiW/L1y4kBNPPJH4+Hiio6Pp1KlTyflTWcnJyXTp0qVcuwTgxx9/5IQTTsDtdtO2bVu/0f9btmxBKcXTTz9dbrslS5aglOK9994DwmtTBnrHhmmaPPPMM/To0QO3201SUhKnnnoqy5cvL1ln+vTpnHzyyTRq1AiXy0XXrl158cUXq1QW4fjll1849dRTiYuLIzIykiFDhpR7b8v27du56qqr6NSpExERETRo0ICzzjrLr50fyNChQ1mwYAHbt28vaZcEKpNQ7cViw4cPJycnp8rtVSFE3ZMnNoQQh8W8efNo27YtAwYMOKz57N+/nxEjRpCUlMTtt99OfHw827Zt48MPPwQgKSmJF198kSuvvJLTTz+dM844A4CePXsCsGbNGgYOHEizZs24/fbbiYqKYvbs2UyYMIEPPviA008/3S+/q666iqSkJO6++25ycnIA60mKuXPncs0119C1a1cOHjzIjz/+yF9//cVxxx0XNPYlS5YABF0nPz+/ZL7SnJwcfvrpJ2bMmMGUKVMCNiDOOOMMrrjiCj788MOSm/MzZ86kc+fOIeMIpXHjxgwZMoTZs2dzzz33+H02a9YsbDYbZ511FmB1Rj3yyCNccsklnHDCCWRmZrJ8+XJ+++03hg8fXum8N2/eDFjvZijtP//5D4ZhcPPNN5ORkcFjjz3Gueeeyy+//ALA+eefz/3338+sWbO45pprSrYrLCxk7ty5nHnmmbjd7grPnWB2797NCSecQHp6OpdddhmdO3dm165dzJ07l9zcXJxOJ1u2bOHjjz/mrLPOok2bNuzbt4+XX36ZIUOGBHxcP5DU1NSAywM1wi+//HLefPNNLrzwQq677jq2bt3K888/z++//85PP/1UMrrrzTffJDo6mhtvvJHo6GgWLVrE3XffTWZmJo8//njA/Cr6DgH4fD5GjhxJ3759eeKJJ/j666958sknadeuHVdeeaVfer179+bpp59mzZo1/6g5q4UQQghReQcPHmTUqFFMnjyZ8847j8aNG1dq+9zcXIYMGcKuXbu4/PLLadmyJUuWLOGOO+5gz5491Z6nH6wb6Fu2bOHCCy8kOTm5ZLrdNWvW8PPPP5cbrDVp0iRat27
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x2400 with 8 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.preprocessing import LabelEncoder\n",
|
|||
|
"from scipy.cluster.hierarchy import linkage\n",
|
|||
|
"from scipy.cluster import hierarchy\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"# Загрузка данных\n",
|
|||
|
"df = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\").head(15000)\n",
|
|||
|
"\n",
|
|||
|
"# Инициализация LabelEncoder\n",
|
|||
|
"label_encoders = {}\n",
|
|||
|
"\n",
|
|||
|
"# Кодирование категориальных переменных\n",
|
|||
|
"for column in df.select_dtypes(include=['object']).columns:\n",
|
|||
|
" le = LabelEncoder()\n",
|
|||
|
" df[column] = le.fit_transform(df[column])\n",
|
|||
|
" label_encoders[column] = le\n",
|
|||
|
"\n",
|
|||
|
"# Выбор признаков для кластеризации\n",
|
|||
|
"X = df[['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']]\n",
|
|||
|
"\n",
|
|||
|
"# Создание матрицы связей\n",
|
|||
|
"linkage_matrix = linkage(X, method='ward')\n",
|
|||
|
"\n",
|
|||
|
"# Применение иерархической кластеризации\n",
|
|||
|
"result = hierarchy.fcluster(linkage_matrix, 10, criterion=\"distance\")\n",
|
|||
|
"\n",
|
|||
|
"# Преобразование меток кластеров\n",
|
|||
|
"result = [0 if val == 1 else 1 if val == 3 else 2 for val in result]\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов кластеризации и истинных меток\n",
|
|||
|
"plt.figure(figsize=(16, 24))\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация кластеров на основе признаков 0 и 1\n",
|
|||
|
"plt.subplot(4, 2, 1)\n",
|
|||
|
"plt.scatter(X['BMI'], X['PhysicalHealth'], c=result, cmap='viridis')\n",
|
|||
|
"plt.title('Clusters (BMI vs PhysicalHealth)')\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация истинных меток на основе признаков 0 и 1\n",
|
|||
|
"plt.subplot(4, 2, 2)\n",
|
|||
|
"plt.scatter(X['BMI'], X['PhysicalHealth'], c=df['HeartDisease'], cmap='viridis')\n",
|
|||
|
"plt.title('True Labels (BMI vs PhysicalHealth)')\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация кластеров на основе признаков 2 и 3\n",
|
|||
|
"plt.subplot(4, 2, 3)\n",
|
|||
|
"plt.scatter(X['MentalHealth'], X['SleepTime'], c=result, cmap='viridis')\n",
|
|||
|
"plt.title('Clusters (MentalHealth vs SleepTime)')\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация истинных меток на основе признаков 2 и 3\n",
|
|||
|
"plt.subplot(4, 2, 4)\n",
|
|||
|
"plt.scatter(X['MentalHealth'], X['SleepTime'], c=df['HeartDisease'], cmap='viridis')\n",
|
|||
|
"plt.title('True Labels (MentalHealth vs SleepTime)')\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация кластеров на основе признаков 0 и 2\n",
|
|||
|
"plt.subplot(4, 2, 5)\n",
|
|||
|
"plt.scatter(X['BMI'], X['MentalHealth'], c=result, cmap='viridis')\n",
|
|||
|
"plt.title('Clusters (BMI vs MentalHealth)')\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация истинных меток на основе признаков 0 и 2\n",
|
|||
|
"plt.subplot(4, 2, 6)\n",
|
|||
|
"plt.scatter(X['BMI'], X['MentalHealth'], c=df['HeartDisease'], cmap='viridis')\n",
|
|||
|
"plt.title('True Labels (BMI vs MentalHealth)')\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация кластеров на основе признаков 1 и 3\n",
|
|||
|
"plt.subplot(4, 2, 7)\n",
|
|||
|
"plt.scatter(X['PhysicalHealth'], X['SleepTime'], c=result, cmap='viridis')\n",
|
|||
|
"plt.title('Clusters (PhysicalHealth vs SleepTime)')\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация истинных меток на основе признаков 1 и 3\n",
|
|||
|
"plt.subplot(4, 2, 8)\n",
|
|||
|
"plt.scatter(X['PhysicalHealth'], X['SleepTime'], c=df['HeartDisease'], cmap='viridis')\n",
|
|||
|
"plt.title('True Labels (PhysicalHealth vs SleepTime)')\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "aimenv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.5"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|