AIM-PIbd-32-Chubykina-P-P/lab_5/lab5.ipynb

777 lines
1.4 MiB
Plaintext
Raw Normal View History

2024-11-23 11:55:22 +04:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['HeartDisease', 'BMI', 'Smoking', 'AlcoholDrinking', 'Stroke',\n",
" 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory',\n",
" 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'SleepTime',\n",
" 'Asthma', 'KidneyDisease', 'SkinCancer'],\n",
" dtype='object')\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>HeartDisease</th>\n",
" <th>BMI</th>\n",
" <th>Smoking</th>\n",
" <th>AlcoholDrinking</th>\n",
" <th>Stroke</th>\n",
" <th>PhysicalHealth</th>\n",
" <th>MentalHealth</th>\n",
" <th>DiffWalking</th>\n",
" <th>Sex</th>\n",
" <th>AgeCategory</th>\n",
" <th>Race</th>\n",
" <th>Diabetic</th>\n",
" <th>PhysicalActivity</th>\n",
" <th>GenHealth</th>\n",
" <th>SleepTime</th>\n",
" <th>Asthma</th>\n",
" <th>KidneyDisease</th>\n",
" <th>SkinCancer</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>No</td>\n",
" <td>16.60</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>3.0</td>\n",
" <td>30.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>55-59</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>5.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>No</td>\n",
" <td>20.34</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>80 or older</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>7.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>No</td>\n",
" <td>26.58</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>20.0</td>\n",
" <td>30.0</td>\n",
" <td>No</td>\n",
" <td>Male</td>\n",
" <td>65-69</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Fair</td>\n",
" <td>8.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>No</td>\n",
" <td>24.21</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>75-79</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Good</td>\n",
" <td>6.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>No</td>\n",
" <td>23.71</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>28.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Female</td>\n",
" <td>40-44</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>8.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Yes</td>\n",
" <td>28.87</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>6.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Female</td>\n",
" <td>75-79</td>\n",
" <td>Black</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Fair</td>\n",
" <td>12.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>No</td>\n",
" <td>21.63</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>15.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>70-74</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Fair</td>\n",
" <td>4.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>No</td>\n",
" <td>31.64</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Female</td>\n",
" <td>80 or older</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Good</td>\n",
" <td>9.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>No</td>\n",
" <td>26.45</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>80 or older</td>\n",
" <td>White</td>\n",
" <td>No, borderline diabetes</td>\n",
" <td>No</td>\n",
" <td>Fair</td>\n",
" <td>5.0</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>No</td>\n",
" <td>40.69</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Male</td>\n",
" <td>65-69</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Good</td>\n",
" <td>10.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" HeartDisease BMI Smoking AlcoholDrinking Stroke PhysicalHealth \\\n",
"0 No 16.60 Yes No No 3.0 \n",
"1 No 20.34 No No Yes 0.0 \n",
"2 No 26.58 Yes No No 20.0 \n",
"3 No 24.21 No No No 0.0 \n",
"4 No 23.71 No No No 28.0 \n",
"5 Yes 28.87 Yes No No 6.0 \n",
"6 No 21.63 No No No 15.0 \n",
"7 No 31.64 Yes No No 5.0 \n",
"8 No 26.45 No No No 0.0 \n",
"9 No 40.69 No No No 0.0 \n",
"\n",
" MentalHealth DiffWalking Sex AgeCategory Race \\\n",
"0 30.0 No Female 55-59 White \n",
"1 0.0 No Female 80 or older White \n",
"2 30.0 No Male 65-69 White \n",
"3 0.0 No Female 75-79 White \n",
"4 0.0 Yes Female 40-44 White \n",
"5 0.0 Yes Female 75-79 Black \n",
"6 0.0 No Female 70-74 White \n",
"7 0.0 Yes Female 80 or older White \n",
"8 0.0 No Female 80 or older White \n",
"9 0.0 Yes Male 65-69 White \n",
"\n",
" Diabetic PhysicalActivity GenHealth SleepTime Asthma \\\n",
"0 Yes Yes Very good 5.0 Yes \n",
"1 No Yes Very good 7.0 No \n",
"2 Yes Yes Fair 8.0 Yes \n",
"3 No No Good 6.0 No \n",
"4 No Yes Very good 8.0 No \n",
"5 No No Fair 12.0 No \n",
"6 No Yes Fair 4.0 Yes \n",
"7 Yes No Good 9.0 Yes \n",
"8 No, borderline diabetes No Fair 5.0 No \n",
"9 No Yes Good 10.0 No \n",
"\n",
" KidneyDisease SkinCancer \n",
"0 No Yes \n",
"1 No No \n",
"2 No No \n",
"3 No Yes \n",
"4 No No \n",
"5 No No \n",
"6 No Yes \n",
"7 No No \n",
"8 Yes No \n",
"9 No No "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd \n",
"df = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\")\n",
"print(df.columns)\n",
"\n",
"display(df.head(10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Сегментация пациентов по рискам сердечно-сосудистых заболеваний\n",
"Цель: Определить группы пациентов с различными уровнями риска развития сердечно-сосудистых заболеваний на основе их демографических данных, образа жизни и состояния здоровья.\n",
"\n",
"Пример:\n",
"\n",
"Кластер 1: Пациенты с высоким риском (курение, высокий ИМТ, низкая физическая активность).\n",
"\n",
"Кластер 2: Пациенты со средним риском (умеренное курение, средний ИМТ, средняя физическая активность).\n",
"\n",
"Кластер 3: Пациенты с низким риском (отсутствие вредных привычек, нормальный ИМТ, высокая физическая активность).\n",
"\n",
"Бизнес-применение:\n",
"\n",
"Разработка персонализированных программ профилактики и лечения для каждой группы пациентов.\n",
"\n",
"Таргетированная реклама медицинских услуг и продуктов для улучшения образа жизни."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"## почистила данные чут чут\n",
"df = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\").head(1000)\n",
"global df_cleaned\n",
"df_cleaned = df.dropna()\n"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjUAAASlCAYAAADgeltjAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXxTVf7/8XfapknbNKUtbVhCoYCssonCuKAiCuK41n0ZQRl/zijqDDIqft0YHXEfd3HGBR03RHEdxRW3AcUFxI3VQi1LgbY0TdukaZPfHzWxIV1ps5XX8/HoQ3OXcz733Jtw7/3ce47B5/P5BAAAAAAAAAAAEOMSoh0AAAAAAAAAAABAW5DUAAAAAAAAAAAAcYGkBgAAAAAAAAAAiAskNQAAAAAAAAAAQFwgqQEAAAAAAAAAAOICSQ0AAAAAAAAAABAXSGoAAAAAAAAAAIC4QFIDAAAAAAAAAADEBZIaAAAAAAAAAAAgLpDUAACEzfTp02WxWCJa54IFC2QwGLRp06awlH/TTTfJYDCEpeyOMBgMuummm9q87MyZM8MbEAAAAOLO9OnT1a9fv2iHEWTTpk0yGAxasGBBm5e96667wh9YmPXr10/Tp0+PdhjtEqvXSgC6HpIaANAJ/DfSG//l5uZq4sSJevvtt0OW9y/zxz/+scny/u///i+wzK5duwLTo5EkaMr06dODttVqtWrUqFG6++675Xa7ox1exPXr10/HH398k/M++ugjGQwGvfTSSxGNadmyZbrpppu0e/fuiNYLAACwL2l8HfDZZ5+FzPf5fOrTp48MBkOz54ud5ccff9RNN93U6Q/3tPRAjH/7v/rqq06tszVvvfVWmx/oiTXfffedTjvtNPXt21dms1m9e/fWMcccowceeCDaoTWpX79+Ide6Tf21JekEAJ0lKdoBAEBX8ve//135+fny+XwqKSnRggULdNxxx+mNN94IuYgxm816+eWX9fDDDys5OTlo3vPPPy+z2SyXyxXJ8NvFZDLpsccekyTt3r1bL7/8smbPnq0vv/xSL7zwQtTi+sMf/qCzzjpLJpMpajHEgmXLlmnu3LmaPn26unXrFu1wAAAAujSz2aznnntOhx12WND0jz/+WMXFxRE5N/3xxx81d+5cHXnkkTH3tkVne+utt/TQQw/FXWJj2bJlmjhxovLy8nTRRRepR48e+uWXX/T555/rvvvu02WXXRbtEEPce++9cjqdgc9vvfWWnn/+ef3zn/9U9+7dA9MPOeQQnXfeebrmmmuiESaAfQxJDQDoRFOnTtWBBx4Y+DxjxgzZbDY9//zzIUmNY489Vq+//rrefvttnXTSSYHpy5YtU2FhoU499VS9/PLLEYu9vZKSknTeeecFPl9yySUaP368Fi5cqHvuuUe9evWKSlyJiYlKTEyMSt0AAADYNx133HFatGiR7r//fiUl/Xar5bnnntPYsWOD3r7Gvusf//iHMjIy9OWXX4Y8eLRjx47oBNWKk08+Oejz9u3b9fzzz+vkk09uMnnW+PgHgHCh+ykACKNu3bopJSWlyRO73r176/DDD9dzzz0XNP3ZZ5/ViBEjtP/+++9VnXfddZcMBoM2b94cMm/OnDlKTk5WeXm5JGn9+vU69dRT1aNHD5nNZtntdp111lmqqKhod70JCQk68sgjJSnklfctW7bo5JNPlsViUU5OjmbPnq36+npJDa/k9+vXLyix4+dyuZSRkaGLL744MO2BBx7Q8OHDlZqaqszMTB144IFBbdjcmBpvv/22jjjiCKWnp8tqteqggw4KWu/TTz/V6aefrry8PJlMJvXp00d//etfVVNT0+62aIstW7bowgsvlM1mk8lk0vDhw/XEE08ELVNbW6sbbrhBY8eOVUZGhtLS0jRhwgQtXbq0xbJvuukm/e1vf5Mk5efnB14J37NNXn31Ve2///6B+pcsWdKp2wgAALCvOPvss1VaWqr33nsvMK22tlYvvfSSzjnnnCbX8Xq9uvfeezV8+HCZzWbZbDZdfPHFgXN1P39Xp5999pnGjRsns9ms/v376+mnnw4ss2DBAp1++umSpIkTJwbO/z766CNJ0muvvabf//736tWrl0wmkwYMGKCbb745cE7e2dasWaPTTjtNWVlZMpvNOvDAA/X6668HLVNWVqbZs2drxIgRslgsslqtmjp1qr799tsWy54+fboeeughSQrq/mhP//rXvzRgwACZTCYddNBB+vLLL1ss96uvvpLBYNBTTz0VMu+dd96RwWDQm2++KUmqrKzUX/7yF/Xr108mk0m5ubk65phj9M0337RYx8aNGzV8+PAm36TOzc1tcV2p4Q35v/zlL+rTp49MJpMGDhyo22+/XV6vN2i59h5b7777rkaPHi2z2axhw4Zp8eLFrcbSlKbG1PB3X7Zo0SINGzZMKSkpOvjgg/Xdd99Jkh599FENHDhQZrNZRx55ZJPdp33xxRc69thjlZGRodTUVB1xxBH63//+t1cxAugaSJ8CQCeqqKjQrl275PP5tGPHDj3wwANyOp1BbzQ0ds455+iKK66Q0+mUxWJRXV2dFi1apFmzZu1111NnnHGGrrrqKr344ouBG9t+L774oiZPnqzMzEzV1tZqypQpcrvduuyyy9SjRw9t2bJFb775pnbv3q2MjIx2171x40ZJUnZ2dmBafX29pkyZovHjx+uuu+7S+++/r7vvvlsDBgzQn//8ZxkMBp133nm64447VFZWpqysrMC6b7zxhhwOR6D9/v3vf+vyyy/XaaedpiuuuEIul0urV6/WF1980ezFotRwkXfhhRdq+PDhmjNnjrp166aVK1dqyZIlgfUWLVqk6upq/fnPf1Z2drZWrFihBx54QMXFxVq0aFGr2+7xeJp8Aq+pBFFJSYl+97vfBU7wc3Jy9Pbbb2vGjBlyOBz6y1/+IklyOBx67LHHdPbZZ+uiiy5SZWWlHn/8cU2ZMkUrVqzQ6NGjm4yloKBA69atC3ktPCcnJ7DMZ599psWLF+uSSy5Renq67r//fp166qkqKioK2n8AAABoXb9+/XTwwQfr+eef19SpUyU1PFRTUVGhs846S/fff3/IOhdffLEWLFigCy64QJdffrkKCwv14IMPauXKlfrf//4no9EYWHbDhg067bTTNGPGDE2bNk1PPPGEpk+frrFjx2r48OE6/PDDdfnll+v+++/Xtddeq6FDh0pS4L8LFiyQxWLRrFmzZLFY9OGHH+qGG26Qw+HQnXfe2er2uVyuJs91G3dL5PfDDz/o0EMPVe/evXXNNdcoLS1NL774ok4++WS9/PLLOuWUUyRJP//8s1599VWdfvrpys/PV0lJiR599FEdccQR+vHHH5t98/viiy/W1q1b9d577+k///lPk8s899xzqqys1MUXXyyDwaA77rhDBQUF+vnnn4PatbEDDzxQ/fv314svvqhp06YFzVu4cKEyMzM1ZcoUSdKf/vQnvfTSS5o5c6aGDRum0tJSffbZZ/rpp590wAEHNNuOffv21fLly/X999+3+yG26upqHXHEEdqyZYsuvvhi5eXladmyZZozZ462bdume++9N6iN2npsrV+/Xmeeeab+9Kc/adq0aXryySd1+umna8mSJTrmmGPaFWNzPv30U73++uu69NJLJUnz5s3T8ccfr6uuukoPP/ywLrnkEpWXl+uOO+7QhRdeqA8//DCw7ocffqipU6dq7NixuvHGG5WQkKAnn3xSRx11lD799FONGzeuU2IEEGd8AIAOe/LJJ32SQv5MJpNvwYIFIctL8l166aW+srIyX3Jysu8///mPz+fz+f773//6DAaDb9OmTb4bb7zRJ8m3c+fOwHrTpk3zpaWltRrPwQcf7Bs7dmzQtBUrVvgk+Z5++mmfz+fzrVy50ifJt2jRonZvrz+OnTt3+nbu3OnbsGGD79Zbb/UZDAbfyJEjg5aT5Pv73/8etP6YMWOC4lu7dq1Pku+RRx4JWu7EE0/09evXz+f1en0+n8930kkn+YYPH95ibP59UVhY6PP5fL7du3f70tPTfePHj/fV1NQELesv1+fz+aqrq0PKmjd
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"\n",
"df = df_cleaned\n",
"\n",
"# кодирование категориальных переменных\n",
"global df_encoded\n",
"df_encoded = pd.get_dummies(df, columns=['Smoking', 'AlcoholDrinking', 'Stroke', 'DiffWalking', 'Sex', 'AgeCategory', 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'Asthma', 'KidneyDisease', 'SkinCancer'], drop_first=True)\n",
"\n",
"# числовые признаки для визуализации\n",
"numeric_features = ['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']\n",
"\n",
"plt.figure(figsize=(16, 12))\n",
"\n",
"# сравнение зависимости BMI и PhysicalHealth\n",
"plt.subplot(2, 2, 1)\n",
"sns.scatterplot(x=df_encoded['BMI'], y=df_encoded['PhysicalHealth'], alpha=0.6)\n",
"plt.title('BMI vs PhysicalHealth')\n",
"\n",
"# сравнение зависимости MentalHealth и SleepTime\n",
"plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x=df_encoded['MentalHealth'], y=df_encoded['SleepTime'], alpha=0.6)\n",
"plt.title('MentalHealth vs SleepTime')\n",
"\n",
"# сравнение зависимости PhysicalHealth и SleepTime\n",
"plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x=df_encoded['PhysicalHealth'], y=df_encoded['SleepTime'], alpha=0.6)\n",
"plt.title('PhysicalHealth vs SleepTime')\n",
"\n",
"# сравнение зависимости BMI и MentalHealth\n",
"plt.subplot(2, 2, 4)\n",
"sns.scatterplot(x=df_encoded['BMI'], y=df_encoded['MentalHealth'], alpha=0.6)\n",
"plt.title('BMI vs MentalHealth')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"## стандартизация\n",
"scaler = StandardScaler()\n",
"df_encoded[numeric_features] = scaler.fit_transform(df_encoded[numeric_features])"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0wAAAJwCAYAAAC6UuHVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADBNklEQVR4nOzdd5wU9f3H8ffubbm93rg7Do7eRaSJICqEIip2xKgYQY0t2FvExFgSS9REjS2aKGKNYtTErlgjigVBrAhKkyrtgLu9Pr8//M24u7dzt7u3d7t393o+Hjy43Z2d+e7Md2a+n/k2h2EYhgAAAAAADTgTnQAAAAAASFYETAAAAABgg4AJAAAAAGwQMAEAAACADQImAAAAALBBwAQAAAAANgiYAAAAAMAGARMAAAAA2CBgAgAAAAAbBEwAAHQAX3zxhZ577jnr9dKlS/Xiiy8mLkEA0EYQMAFt0NNPPy2HwxH23+DBgxOdPABJaPfu3TrrrLO0aNEirVixQhdccIE+//zzRCcLAJKeK9EJABC7K6+8UgMHDrReX3/99QlMDYBkNmbMGOufJPXr109nnHFGglMFAMmPgAlowyZPnqzx48dbr//5z39q69atiUsQgKT23HPP6auvvpLf79fee+8tj8eT6CQBQNKjSR7QBlVXV0uSnM7ITuGdO3fqwgsvVGlpqbxer/r06aM///nPqq+vt5ZZvXq1HA6Hbr311gbfHzx4cFBgZrrmmmvCNgsMXXb8+PEaPHiwFi9erP33318+n089e/bU3//+9wbr3LJli04//XQVFRUpNTVV++yzj+bNmxe0jJnWcP8effRRSdJDDz0kh8Ohd999V2eddZby8/OVlZWlU045RTt27Aha33/+8x9NnTpVJSUl8nq96t27t/74xz+qrq6uwe9wOBw6+uijG6T7rLPOatAkMjCdgX1HJKmyslK5ubkN9vmaNWv0m9/8Rv3795fP51N+fr6mT5+u1atXN9hmOLfeeqv2339/5efny+fzacSIEXr66acbLGe3//70pz8FLTdr1qywy11zzTVBy7355ps68MADlZ6erpycHB111FH6+uuvg5Yx80thYaFqamqCPnviiSesdYcG/S+//LK17szMTE2dOlVffvllg3RmZGTo+++/15QpU5Senq6SkhJdd911Mgwj6n1kt39C8/jbb78th8Oht99+O+j7U6dObbCfQs+XzMxMjRo1qkHeMM+XULfeeqscDkeDvHDPPfdor732ktfrVUlJiWbPnq2dO3c2WKeZ5kGDBmnEiBH67LPPrLQ0JfD7puuvv15Op1OPP/540PuNnZ+hvyeSvCpJjz76qEaNGqW0tDTl5ubqoIMO0muvvSZJ6tGjR6PHqkePHtZ66uvrdfvtt2uvvfZSamqqioqKdNZZZzW4JvTo0UOHH364XnvtNQ0dOlSpqakaNGiQnnnmmaDlzOtM4DGpr6/XkCFD5HA49NBDD1nvX3PNNRo0aJAyMjKUlZWl0aNHNzj2//vf/zR9+nR169ZNXq9XpaWluuiii+T3+4OWM/N7KLO5dmB+jCWPBtqzZ4+Ki4vDrgPoKKhhAtogM2Dyer1NLltRUaFx48Zp/fr1Ouuss9StWze9//77mjNnjjZu3Kjbb7+92em59957rZv3nDlzwi6zY8cOHXbYYTr++ON14okn6qmnntI555wjj8ej0047TZLk9/s1fvx4rVy5Uueee6569uyp+fPna9asWdq5c6cuuOCCoHWeeOKJOuyww4LeGzt2bNDrc889Vzk5Obrmmmu0fPly3XvvvVqzZo1ViJB+KvRkZGTo4osvVkZGht5880394Q9/0K5du3TLLbcErS81NVUvvviitmzZosLCQivdTz75pFJTU8P+9tTUVM2dOzco0HrmmWdUWVnZYNmPP/5Y77//vk444QR17dpVq1ev1r333qvx48frq6++UlpaWthtmO644w4deeSRmjFjhqqrq/Wvf/1L06dP1wsvvKCpU6cGLTt58mSdcsopQe8NHTq0wToLCgp02223Wa9/9atfBX2+YMECHXrooerVq5euueYa+f1+3XnnnRo7dqw+/fTToAKr9FNfmhdeeEHHHHOM9d7cuXOVmpraYJ888sgjmjlzpqZMmaI///nPqqio0L333qsDDjhAS5YsCVp3XV2dDjnkEI0ePVo333yzXnnlFV199dWqra3VddddF9U+euSRR6zl//e//+n+++/XbbfdpoKCAklSUVFRg/1kevfdd/XSSy/Zfm6ue+vWrbrnnns0ffp0ffHFF+rfv7/td+xcc801uvbaazVp0iSdc845Vh7/+OOPtXDhQrndbtvv/va3v416e6a5c+fq97//vf7yl7/opJNOCrvMmWeeqQMPPFDST/n92WefDfo80rx67bXX6pprrtH++++v6667Th6PRx9++KHefPNNHXzwwbr99tu1Z88eSdLXX3+tG264Iai5cmBgcdZZZ+mhhx7SqaeeqvPPP1+rVq3SXXfdpSVLljTYXytWrNAvf/lLnX322Zo5c6bmzp2r6dOn65VXXtHkyZNt980jjzwStm9YeXm5jjnmGPXo0UN+v18PPfSQpk2bpg8++ECjRo2SJM2fP18VFRU655xzlJ+fr48++kh33nmnfvjhB82fP7/RYxKNpvJooL/85S/avHlz3LYNtEkGgDbn9ttvNyQZn332WdD748aNM/baa6+g9/74xz8a6enpxrfffhv0/hVXXGGkpKQYa9euNQzDMFatWmVIMm655ZYG29trr72McePGNXj/yiuvNCQZW7dubXTZcePGGZKMv/zlL9Z7VVVVxtChQ43CwkKjuro66Hc9+uij1nLV1dXGmDFjjIyMDGPXrl1NptU0d+5cQ5IxYsQIa/2GYRg333yzIcn4z3/+Y71XUVHR4PtnnXWWkZaWZlRWVgb9jr322ssYMmSIceutt1rvP/LII0bXrl2NAw88MGj/m+k88cQTDZfLZWzatMn6bOLEicZJJ53U4HeES8sHH3xgSDIefvhh299r9/3q6mpj8ODBxoQJE4Lel2TMnj27yfXNmDHD6NmzZ4PvXn311dZr8zhu27bNeu+zzz4znE6nccopp1jvXX311db+OPzww63316xZYzidTuPEE080JBk//vijYRiGsXv3biMnJ8c444wzgra/adMmIzs7O+j9mTNnGpKM8847z3qvvr7emDp1quHxeKx1RrOPTGZeWrVqVYPP3nrrLUOS8dZbb1nv7bfffsahhx7aYD+Zvz/Qa6+9ZkgynnrqKeu9cOexYRjGLbfcEpSOLVu2GB6Pxzj44IONuro6a7m77rrLkGQ8+OCDQesMPC9feuklQ5JxyCGHNEhTOIHff/HFFw2Xy2VccsklYZddsWKFIcmYN29eo789kuOwYsUKw+l0Gsccc0zQbzSMn45vqHDHw/S///3PkGQ89thjQe+/8sorDd7v3r27Icn497//bb1XVlZmdO7c2Rg2bJj1XmjeqKysNLp162Yd/7lz5zZIh2nLli2GpKBrSbjz/8YbbzQcDoexZs0a672ZM2ca6enpDZadP39+g9/fnDy6ZcsWIzMz01o23H4FOgKa5AFt0LZt2yRJnTp1anLZ+fPn68ADD1Rubq62bt1q/Zs0aZLq6ur07rvvBi1fUVERtNzWrVsbNE0zmbUBdjUrgVwul8466yzrtcfj0VlnnaUtW7Zo8eLFkqSXXnpJxcXFOvHEE63l3G63zj//fO3Zs0fvvPNOk9sJdeaZZwY9NT7nnHPkcrmCnq76fD7r7927d2vr1q068MADVVFRoW+++abBOk899VTNnTvXej137lzNnDnTtonk8OHDtddee1k1C2vWrNFbb72lWbNmNVg2MC01NTXatm2b+vTpo5ycHH366adN/t7A7+/YsUNlZWU68MADI/puONXV1Y3WZG7cuFFLly7VrFmzlJeXZ70/ZMgQTZ48OexT7NNOO02vvPKKNm3aJEmaN2+exowZo379+gUt9/rrr2vnzp068cQTg/JjSkqK9ttvP7311lsN1n3uuedafzscDp177rmqrq7WggU
"text/plain": [
"<Figure size 1000x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
" HeartDisease BMI PhysicalHealth MentalHealth SleepTime \\\n",
"0 No -1.903749 -0.178909 3.477436 -1.363066 \n",
"1 No -1.334675 -0.509405 -0.432138 -0.147128 \n",
"2 No -0.385204 1.693903 3.477436 0.460840 \n",
"3 No -0.745820 -0.509405 -0.432138 -0.755097 \n",
"4 No -0.821900 2.575227 -0.432138 0.460840 \n",
"\n",
" Smoking_Yes AlcoholDrinking_Yes Stroke_Yes DiffWalking_Yes Sex_Male \\\n",
"0 True False False False False \n",
"1 False False True False False \n",
"2 True False False False True \n",
"3 False False False False False \n",
"4 False False False True False \n",
"\n",
" ... Diabetic_Yes (during pregnancy) PhysicalActivity_Yes GenHealth_Fair \\\n",
"0 ... False True False \n",
"1 ... False True False \n",
"2 ... False True True \n",
"3 ... False False False \n",
"4 ... False True False \n",
"\n",
" GenHealth_Good GenHealth_Poor GenHealth_Very good Asthma_Yes \\\n",
"0 False False True True \n",
"1 False False True False \n",
"2 False False False True \n",
"3 True False False False \n",
"4 False False True False \n",
"\n",
" KidneyDisease_Yes SkinCancer_Yes Cluster \n",
"0 False True 2 \n",
"1 False False 1 \n",
"2 False False 2 \n",
"3 False True 1 \n",
"4 False False 0 \n",
"\n",
"[5 rows x 39 columns]\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.cluster import AgglomerativeClustering\n",
"from scipy.cluster.hierarchy import dendrogram, linkage\n",
"import matplotlib.pyplot as plt\n",
"\n",
"df = df_encoded\n",
"\n",
"X = df.drop(columns=['HeartDisease'])\n",
"\n",
"agg_clustering = AgglomerativeClustering(n_clusters=3)\n",
"clusters = agg_clustering.fit_predict(X)\n",
"\n",
"df_encoded['Cluster'] = clusters\n",
"\n",
"Z = linkage(X, 'ward')\n",
"plt.figure(figsize=(10, 7))\n",
"dendrogram(Z)\n",
"plt.title('Дендрограмма агломеративной кластеризации')\n",
"plt.xlabel('Объекты')\n",
"plt.ylabel('Расстояние')\n",
"plt.show()\n",
"\n",
"print(df_encoded.head())"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjYAAASlCAYAAAALTeBgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3RU1drH8d/MZErapFda6F26BQRBEPRaLoqAHWwvV8Fe8dpRsfeLol7FAogI1isICohIURRRBKmhh/ReJsnMef+IjAxJgECSSeD7WStLZ+9TnjlnZjj7PGfvbTIMwxAAAAAAAAAAAEAjYPZ3AAAAAAAAAAAAAEeKxAYAAAAAAAAAAGg0SGwAAAAAAAAAAIBGg8QGAAAAAAAAAABoNEhsAAAAAAAAAACARoPEBgAAAAAAAAAAaDRIbAAAAAAAAAAAgEaDxAYAAAAAAAAAAGg0SGwAAAAAAAAAAIBGg8QGAKBOjR07ViEhIfW6z2nTpslkMmn79u11sv2HH35YJpOpTrZ9LEwmkx5++OEjXnbChAl1GxAAAAAanbFjxyopKcnfYfjYvn27TCaTpk2bdsTLPvvss3UfWB1LSkrS2LFj/R1GjTTUthKA4w+JDQCoJftvph/4Fxsbq0GDBmnevHmVlt+/zHXXXVfl9v797397l8nIyPCW+yNRUJWxY8f6vFen06lu3brpueeek8vl8nd49S4pKUnnnXdelXVLliyRyWTSxx9/XK8xLV++XA8//LBycnLqdb8AAAAnkgPbAcuWLatUbxiGmjVrJpPJVO31Ym1Zv369Hn744Vp/wOdQD8Xsf/+rV6+u1X0ezldffXXED/U0NL///rsuvvhitWjRQg6HQ02aNNFZZ52lV155xd+hVSkpKalSW7eqvyNJPAFAbQnwdwAAcLx59NFH1bJlSxmGodTUVE2bNk3/+Mc/9MUXX1RqyDgcDs2ZM0dTpkyRzWbzqZs5c6YcDodKSkrqM/wasdvteuuttyRJOTk5mjNnju6880799NNP+vDDD/0W15VXXqlLLrlEdrvdbzE0BMuXL9cjjzyisWPHKjw83N/hAAAAHNccDodmzJih008/3af8u+++0+7du+vl2nT9+vV65JFHNHDgwAbX66K2ffXVV/rPf/7T6JIby5cv16BBg9S8eXNdf/31io+P165du7Ry5Uq99NJLuummm/wdYiUvvviiCgoKvK+/+uorzZw5Uy+88IKio6O95X379tUVV1yhe++91x9hAjjBkNgAgFp2zjnnqHfv3t7X1157reLi4jRz5sxKiY2zzz5bn3/+uebNm6d//vOf3vLly5crOTlZI0aM0Jw5c+ot9poKCAjQFVdc4X1944036pRTTtGsWbP0/PPPKzEx0S9xWSwWWSwWv+wbAAAAJ6Z//OMfmj17tl5++WUFBPx9u2XGjBnq1auXTy9snLgef/xxhYWF6aeffqr08FFaWpp/gjqM4cOH+7zet2+fZs6cqeHDh1eZQDvw8w8AdYWhqACgjoWHhyswMLDKi7smTZpowIABmjFjhk/59OnT1bVrV3Xp0uWo9vnss8/KZDJpx44dleomTpwom82m7OxsSdLmzZs1YsQIxcfHy+FwqGnTprrkkkuUm5tb4/2azWYNHDhQkip1f9+zZ4+GDx+ukJAQxcTE6M4775Tb7ZZU0T0/KSnJJ7mzX0lJicLCwjRu3Dhv2SuvvKLOnTsrKChIERER6t27t88xrG6OjXnz5umMM85QaGionE6n+vTp47Pe999/r5EjR6p58+ay2+1q1qyZbrvtNhUXF9f4WByJPXv26JprrlFcXJzsdrs6d+6st99+22eZ0tJSPfjgg+rVq5fCwsIUHBys/v37a/HixYfc9sMPP6y77rpLktSyZUtv9/CDj8mnn36qLl26ePc/f/78Wn2PAAAAJ4pLL71UmZmZWrhwobestLRUH3/8sS677LIq1/F4PHrxxRfVuXNnORwOxcXFady4cd5r9f32D3u6bNkynXzyyXI4HGrVqpXee+897zLTpk3TyJEjJUmDBg3yXv8tWbJEkvTZZ5/p3HPPVWJioux2u1q3bq1JkyZ5r8lr259//qmLL75YkZGRcjgc6t27tz7//HOfZbKysnTnnXeqa9euCgkJkdPp1DnnnKO1a9cecttjx47Vf/7zH0nyGQrpYG+88YZat24tu92uPn366KeffjrkdlevXi2TyaR33323Ut3XX38tk8mkL7/8UpKUn5+vW2+9VUlJSbLb7YqNjdVZZ52lX3755ZD72Lp1qzp37lxlj+rY2NhDritV9JS/9dZb1axZM9ntdrVp00ZPPfWUPB6Pz3I1/WwtWLBA3bt3l8PhUKdOnTR37tzDxlKVqubY2D+U2ezZs9WpUycFBgbqtNNO0++//y5Jmjp1qtq0aSOHw6GBAwdWOZTaqlWrdPbZZyssLExBQUE644wz9MMPPxxVjACOD6RQAaCW5ebmKiMjQ4ZhKC0tTa+88ooKCgp8ejYc6LLLLtMtt9yigoIChYSEqLy8XLNnz9btt99+1MNQjRo1Snfffbc++ugj783t/T766CMNHTpUERERKi0t1bBhw+RyuXTTTTcpPj5ee/bs0ZdffqmcnByFhYXVeN9bt26VJEVFRXnL3G63hg0bplNOOUXPPvusvvnmGz333HNq3bq1brjhBplMJl1xxRV6+umnlZWVpcjISO+6X3zxhfLy8rzH780339TNN9+siy++WLfccotKSkr022+/adWqVdU2GKWKht4111yjzp07a+LEiQoPD9eaNWs0f/5873qzZ89WUVGRbrjhBkVFRenHH3/UK6+8ot27d2v27NmHfe9lZWVVPolXVZIoNTVVp556qvciPyYmRvPmzdO1116rvLw83XrrrZKkvLw8vfXWW7r00kt1/fXXKz8/X//97381bNgw/fjjj+revXuVsVx00UXatGlTpS7iMTEx3mWWLVumuXPn6sYbb1RoaKhefvlljRgxQjt37vQ5fwAAADi8pKQknXbaaZo5c6bOOeccSRUP1uTm5uqSSy7Ryy+/XGmdcePGadq0abr66qt18803Kzk5Wa+++qrWrFmjH374QVar1bvsli1bdPHFF+vaa6/VmDFj9Pbbb2vs2LHq1auXOnfurAEDBujmm2/Wyy+/rPvuu08dO3aUJO9/p02bppCQEN1+++0KCQnRokWL9OCDDyovL0/PPPPMYd9fSUlJlde6Bw5RtN8ff/yhfv36qUmTJrr33nsVHBysjz76SMOHD9ecOXN04YUXSpK2bdumTz/9VCNHjlTLli2VmpqqqVOn6owzztD69eur7QE+btw47d27VwsXLtT7779f5TIzZsxQfn6+xo0bJ5PJpKeffloXXXSRtm3b5nNcD9S7d2+1atVKH330kcaMGeNTN2vWLEVERGjYsGGSpH/961/6+OOPNWHCBHXq1EmZmZlatmyZNmzYoJ49e1Z7HFu0aKEVK1Zo3bp1NX6QraioSGeccYb27NmjcePGqXnz5lq+fLkmTpyolJQUvfjiiz7H6Eg/W5s3b9bo0aP1r3/9S2PGjNE777yjkSNHav78+TrrrLNqFGN1vv/+e33++ecaP368JGny5Mk677zzdPfdd2vKlCm68cYblZ2draefflrXXHONFi1a5F130aJFOuecc9SrVy899NBDMpvNeuedd3TmmWfq+++/18knn1wrMQJoZAwAQK145513DEmV/ux2uzFt2rRKy0syxo8fb2RlZRk2m814//33DcMwjP/973+GyWQytm/fbjz00EOGJCM9Pd273pgxY4zg4ODDxnPaaacZvXr18in78ccfDUnGe++9ZxiGYaxZs8aQZMyePbvG73d/HOnp6UZ6erqxZcsW44knnjBMJpNx0kkn+SwnyXj00Ud91u/Ro4dPfBs3bjQkGa+99prPchdccIGRlJRkeDwewzAM45///KfRuXPnQ8a2/1wkJycbhmEYOTk5RmhoqHHKKacYxcXFPsvu365hGEZRUVGlbU2ePNkwmUzGjh07vGX7z8uBWrRoUeX5P/DvwON87bXXGgkJCUZGRob
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# Визуализация данных с учетом понимания их особенностей\n",
"plt.figure(figsize=(16, 12))\n",
"\n",
"# Взаимодействие между BMI и PhysicalHealth\n",
"plt.subplot(2, 2, 1)\n",
"sns.scatterplot(x='BMI', y='PhysicalHealth', hue='HeartDisease', data=df)\n",
"plt.title('BMI vs PhysicalHealth')\n",
"\n",
"# Взаимодействие между MentalHealth и SleepTime\n",
"plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x='MentalHealth', y='SleepTime', hue='HeartDisease', data=df)\n",
"plt.title('MentalHealth vs SleepTime')\n",
"\n",
"# Взаимодействие между PhysicalHealth и SleepTime\n",
"plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x='PhysicalHealth', y='SleepTime', hue='HeartDisease', data=df)\n",
"plt.title('PhysicalHealth vs SleepTime')\n",
"\n",
"# Сравнение зависимости BMI и MentalHealth\n",
"plt.subplot(2, 2, 4)\n",
"sns.scatterplot(x='BMI', y='MentalHealth', hue='HeartDisease', data=df)\n",
"plt.title('BMI vs MentalHealth')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjYAAASlCAYAAAALTeBgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXxTVf7/8Xdpm6Zb0tJYdqHauqBUEJdRoIobbuPI1A11FGX8OSO4DOMM4L6MLI47Lrh8FR13RNxGHRd0AMUdBZfR4jAiIGBLm7RN03TJ7w+mmaZJkybNbXLb1/Px6ENzz73nfO45N5d77yf33hSfz+cTAAAAAAAAAACACfRLdAAAAAAAAAAAAABdRWIDAAAAAAAAAACYBokNAAAAAAAAAABgGiQ2AAAAAAAAAACAaZDYAAAAAAAAAAAApkFiAwAAAAAAAAAAmAaJDQAAAAAAAAAAYBokNgAAAAAAAAAAgGmQ2AAAAAAAAAAAAKZBYgMAYKipU6cqJyenR9tcvHixUlJS9J///MeQ+q+77jqlpKQYUnd3pKSk6LrrruvyvDNmzDA2IAAAAJjO1KlTNWLEiESHEeA///mPUlJStHjx4i7Pe8sttxgfmMFGjBihqVOnJjqMqCTruRKA3ofEBgDESdvF9PZ/hYWFmjhxol577bWg+dvm+e1vfxuyviuvvNI/T2VlpX96IhIFoUydOjVgXW02m/bbbz/deuutamxsTHR4PW7EiBE68cQTQ5a9++67SklJ0XPPPdejMb3//vu67rrrVFNT06PtAgAA9CXtzwNWrVoVVO7z+TRs2DClpKR0erwYL19//bWuu+66uP/AJ9yPYtrW/5NPPolrm5G8+uqrXf5RT7JZt26dTjnlFA0fPlxWq1VDhgzR0UcfrYULFyY6tJBGjBgRdK4b6q8riScAiJe0RAcAAL3NDTfcoKKiIvl8Pm3btk2LFy/W8ccfr5dffjnoRMZqtWrp0qW69957ZbFYAsqeeuopWa1WeTyengw/KhkZGXrooYckSTU1NVq6dKkuv/xyffzxx3r66acTFtdvfvMbnXHGGcrIyEhYDMng/fff1/XXX6+pU6cqLy8v0eEAAAD0alarVU8++aTGjx8fMP2f//ynNm3a1CPHpl9//bWuv/56HX744Ul310W8vfrqq7rnnntMl9x4//33NXHiRO2666664IILNHDgQP3444/64IMPdOedd+riiy9OdIhB7rjjDtXV1fk/v/rqq3rqqad0++23y+Fw+KcfeuihOvvsszV79uxEhAmgjyGxAQBxdtxxx+mAAw7wf542bZoGDBigp556Kiixceyxx+qll17Sa6+9pl/96lf+6e+//742bNig8vJyLV26tMdij1ZaWprOPvts/+eLLrpIBx98sJ555hnddtttGjx4cELiSk1NVWpqakLaBgAAQN90/PHHa8mSJbrrrruUlva/yy1PPvmkxo4dG3AXNvqum266SXa7XR9//HHQj4+2b9+emKAiOPnkkwM+b926VU899ZROPvnkkAm09ts/ABiFR1EBgMHy8vKUmZkZ8uBuyJAhKisr05NPPhkw/YknntCoUaO07777xtTmLbfcopSUFP3www9BZXPmzJHFYlF1dbUkqaKiQuXl5Ro4cKCsVquGDh2qM844Q06nM+p2+/Xrp8MPP1ySgm5/37x5s04++WTl5ORol1120eWXX66WlhZJO2/PHzFiREByp43H45HdbteFF17on7Zw4ULts88+ysrKUn5+vg444ICAPuzsHRuvvfaaDjvsMOXm5spms+nAAw8MWG7lypU69dRTteuuuyojI0PDhg3TH/7wBzU0NETdF12xefNmnX/++RowYIAyMjK0zz776OGHHw6Yx+v16pprrtHYsWNlt9uVnZ2tCRMm6J133glb93XXXac//elPkqSioiL/7eEd++SFF17Qvvvu62//9ddfj+s6AgAA9BVTpkxRVVWV3nzzTf80r9er5557TmeeeWbIZVpbW3XHHXdon332kdVq1YABA3ThhRf6j9XbtD32dNWqVTrooINktVq122676bHHHvPPs3jxYp166qmSpIkTJ/qP/959911J0osvvqgTTjhBgwcPVkZGhnbffXfdeOON/mPyePvXv/6lU045Rf3795fVatUBBxygl156KWCeHTt26PLLL9eoUaOUk5Mjm82m4447Tl988UXYuqdOnap77rlHkgIehdTRAw88oN13310ZGRk68MAD9fHHH4et95NPPlFKSooeffTRoLJ//OMfSklJ0SuvvCJJqq2t1WWXXaYRI0YoIyNDhYWFOvroo/XZZ5+FbeP777/XPvvsE/KO6sLCwrDLSjvvlL/ssss0bNgwZWRkqLi4WAsWLFBra2vAfNFuW2+88YZGjx4tq9WqkSNH6vnnn48YSyih3rHR9iizJUuWaOTIkcrMzNQhhxyidevWSZLuv/9+FRcXy2q16vDDDw/5KLUPP/xQxx57rOx2u7KysnTYYYfpvffeiylGAL0DKVQAiDOn06nKykr5fD5t375dCxcuVF1dXcCdDe2deeaZuvTSS1VXV6ecnBw1NzdryZIlmjlzZsyPoTrttNP05z//Wc8++6z/4nabZ599Vsccc4zy8/Pl9Xo1adIkNTY26uKLL9bAgQO1efNmvfLKK6qpqZHdbo+67e+//16SVFBQ4J/W0tKiSZMm6eCDD9Ytt9yit956S7feeqt23313/f73v1dKSorOPvts3XzzzdqxY4f69+/vX/bll1+Wy+Xy99+DDz6oSy65RKeccoouvfRSeTwerV27Vh9++GGnJ4zSzhO9888/X/vss4/mzJmjvLw8rVmzRq+//rp/uSVLlsjtduv3v/+9CgoK9NFHH2nhwoXatGmTlixZEnHdm5qaQv4SL1SSaNu2bfrFL37hP8jfZZdd9Nprr2natGlyuVy67LLLJEkul0sPPfSQpkyZogsuuEC1tbX6v//7P02aNEkfffSRRo8eHTKWX//61/ruu++CbhHfZZdd/POsWrVKzz//vC666CLl5ubqrrvuUnl5uTZu3BgwfgAAAIhsxIgROuSQQ/TUU0/puOOOk7TzhzVOp1NnnHGG7rrrrqBlLrzwQi1evFjnnXeeLrnkEm3YsEF333231qxZo/fee0/p6en+edevX69TTjlF06ZN07nnnquHH35YU6dO1dixY7XPPvuorKxMl1xyie666y5dccUV2nvvvSXJ/9/FixcrJydHM2fOVE5OjpYvX65rrrlGLpdLf/3rXyOun8fjCXms2/4RRW2++uorjRs3TkOGDNHs2bOVnZ2tZ599VieffLKWLl2qyZMnS5L+/e9/64UXXtCpp56qoqIibdu2Tffff78OO+wwff31153eAX7hhRdqy5YtevPNN/W3v/0t5DxPPvmkamtrdeGFFyolJUU333yzfv3rX+vf//53QL+2d8ABB2i33XbTs88+q3PPPTeg7JlnnlF+fr4mTZokSfrd736n5557TjNmzNDIkSNVVVWlVatW6ZtvvtH+++/faT8OHz5cq1ev1pdffhn1D9ncbrcOO+wwbd68WRdeeKF23XVXvf/++5ozZ45++ukn3XHHHQF91NVtq6KiQqeffrp+97vf6dxzz9UjjzyiU089Va+//rqOPvroqGLszMqVK/XSSy9p+vTpkqR58+bpxBNP1J///Gfde++9uuiii1RdXa2bb75Z559/vpYvX+5fdvny5TruuOM0duxYXXvtterXr58eeeQRHXHEEVq5cqUOOuiguMQIwGR8AIC4eOSRR3ySgv4yMjJ8ixcvDppfkm/69Om+HTt2+CwWi+9vf/ubz+fz+f7+97/7UlJSfP/5z3981157rU+S7+eff/Yvd+655/qys7MjxnPIIYf4xo4dGzDto48+8knyPfbYYz6fz+dbs2aNT5JvyZIlUa9vWxw///yz7+eff/atX7/eN3fuXF9KSoqvtLQ0YD5JvhtuuCFg+TFjxgTE9+233/ok+e67776A+U466STfiBEjfK2trT6fz+f71a9+5dtnn33CxtY2Fhs2bPD5fD5fTU2NLzc
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Визуализация данных без учета понимания их особенностей\n",
"plt.figure(figsize=(16, 12))\n",
"\n",
"# Взаимодействие между BMI и PhysicalHealth\n",
"plt.subplot(2, 2, 1)\n",
"sns.scatterplot(x='BMI', y='PhysicalHealth', data=df)\n",
"plt.title('BMI vs PhysicalHealth')\n",
"\n",
"# Взаимодействие между MentalHealth и SleepTime\n",
"plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x='MentalHealth', y='SleepTime', data=df)\n",
"plt.title('MentalHealth vs SleepTime')\n",
"\n",
"# Взаимодействие между PhysicalHealth и SleepTime\n",
"plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x='PhysicalHealth', y='SleepTime', data=df)\n",
"plt.title('PhysicalHealth vs SleepTime')\n",
"\n",
"# Сравнение зависимости BMI и MentalHealth\n",
"plt.subplot(2, 2, 4)\n",
"sns.scatterplot(x='BMI', y='MentalHealth', data=df)\n",
"plt.title('BMI vs MentalHealth')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAzgAAAJcCAYAAAA1ngF6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACgn0lEQVR4nOzdd3xc1Z338e/0JmnUuyzLvcjGBtvYVAMGYyChBdgkBEIoKcAuYUkI+6SRZAP7pEAgQEjChhCWTYBNSKMGQnkIsHRTDQYbjI3lqmJ1ae7zh30Oo7HKjLquP+/Xyy9Loylnbjn3/M75nXM9juM4AgAAAAAX8I51AQAAAABguBDgAAAAAHANAhwAAAAArkGAAwAAAMA1CHAAAAAAuAYBDgAAAADXIMABAAAA4BoEOAAAAABcgwAHAAAAgGsQ4AAARs3kyZP12c9+dqyLAQBwMQIcAHCZW2+9VR6Px/4Lh8MqLy/XypUrdd1116mpqWmsiwgAwIjxj3UBAAAj4zvf+Y5qamrU2dmpzZs369FHH9Ull1yiH//4x/rTn/6k+fPnj3URAQAYdgQ4AOBSq1at0qJFi+zvV1xxhR555BGdcMIJ+vjHP6433nhDkUhkDEvYu+bmZsVisVH5rLa2NgWDQXm9JDQAgFtQowPAPuTII4/UN77xDb333nu6/fbb7eNvvvmmPvGJTyg/P1/hcFiLFi3Sn/70px6vNalvTz75pC699FIVFRUpFovp5JNP1tatW3s813Ecfe9731NlZaWi0aiOOOIIvfbaa3uVx7znY489pi996UsqLi5WZWWl/fuNN96ouXPnKhQKqby8XBdeeKHq6+v3ep8bbrhBU6ZMUSQS0ZIlS/TEE09o+fLlWr58uX3Oo48+Ko/Ho9/+9rf6+te/roqKCkWjUTU2NmrHjh267LLLNG/ePGVlZSknJ0erVq3Syy+/3ONzzHvceeeduvLKK1VRUaHs7Gx94hOfUENDg9rb23XJJZeouLhYWVlZOuecc9Te3p7JLgIADBEjOACwj/nMZz6jf/u3f9ODDz6o888/X6+99poOPvhgVVRU6Gtf+5pisZjuvPNOnXTSSfqf//kfnXzyyT1ef/HFFysvL0/f+ta3tH79el177bW66KKL9Lvf/c4+55vf/Ka+973v6bjjjtNxxx2nF154Qcccc4w6Ojp6LdOXvvQlFRUV6Zvf/Kaam5slSd/+9rd15ZVXasWKFfriF7+oNWvW6KabbtKzzz6rJ598UoFAQJJ000036aKLLtKhhx6qL3/5y1q/fr1OOukk5eXl9QiWjO9+97sKBoO67LLL1N7ermAwqNdff1333HOPTjvtNNXU1Kiurk4333yzDj/8cL3++usqLy/v8R5XXXWVIpGIvva1r2nt2rW6/vrrFQgE5PV6tXPnTn3729/W008/rVtvvVU1NTX65je/OaR9BgDIgAMAcJVf/epXjiTn2Wef7fM58XjcWbhwoeM4jnPUUUc58+bNc9ra2uzfE4mEc9BBBznTp0/f631XrFjhJBIJ+/iXv/xlx+fzOfX19Y7jOM6WLVucYDDoHH/88T2e92//9m+OJOfss8/e6z0POeQQp6uryz5u3uOYY45xuru77eM//elPHUnOf/7nfzqO4zjt7e1OQUGBs3jxYqezs9M+79Zbb3UkOYcffrh97O9//7sjyZkyZYrT0tLSY3u0tbX1+BzHcZx169Y5oVDI+c53vrPXe9TW1jodHR328U9+8pOOx+NxVq1a1eM9li1b5lRXVzsAgNFDihoA7IOysrLU1NSkHTt26JFHHtHpp5+upqYmbdu2Tdu2bdP27du1cuVKvf3229q4cWOP115wwQXyeDz290MPPVTd3d167733JEl/+9vf1NHRoYsvvrjH8y655JI+y3P++efL5/PZ3817XHLJJT3mx5x//vnKycnRX//6V0nSc889p+3bt+v888+X3/9RUsKnP/1p5eXl9fpZZ5999l5zj0KhkP2c7u5ubd++XVlZWZo5c6ZeeOGFvd7jrLPOsiNIknTggQfKcRx97nOf6/G8Aw88UBs2bFBXV1ef3x0AMLxIUQOAfdCuXbtUXFystWvXynEcfeMb39A3vvGNXp+7ZcsWVVRU2N8nTZrU4+8mkNi5c6ck2UBn+vTpPZ5XVFTUZ9BRU1PT43fzHjNnzuzxeDAY1JQpU+zfzf/Tpk3r8Ty/36/Jkyen9VmSlEgk9JOf/EQ33nij1q1bp+7ubvu3goKCvZ6fug3i8bgkqaqqaq/HE4mEGhoaen0fAMDwI8ABgH3MBx98oIaGBk2bNk2JREKSdNlll2nlypW9Pj81eEgeaUnmOM6gyzSaq7n19lnf//739Y1vfEOf+9zn9N3vflf5+fnyer265JJL7DZK1tc2GIltAwDIDAEOAOxjfvOb30iSVq5cqSlTpkiSAoGAVqxYMSzvX11dLUl6++237ftL0tatW+0oT7rvsWbNmh7v0dHRoXXr1tmymuetXbtWRxxxhH1eV1eX1q9fn/a9fu6++24dccQRuuWWW3o8Xl9fr8LCwrTeAwAwPjAHBwD2IY888oi++93vqqamRp/+9KdVXFys5cuX6+abb9aHH3641/NTl39Ox4oVKxQIBHT99df3GLm49tprM3qPYDCo6667rsd73HLLLWpoaNDxxx8vSVq0aJEKCgr0i1/8osc8l//6r/9KO5iSdo+8pI6y3HXXXXvNPwIAjH+M4ACAS913331688031dXVpbq6Oj3yyCN66KGHVF1drT/96U8Kh8OSdt9D5pBDDtG8efN0/vnna8qUKaqrq9NTTz2lDz74YK97wQykqKhIl112ma666iqdcMIJOu644/Tiiy/qvvvuS3s0pKioSFdccYWuvPJKHXvssfr4xz+uNWvW6MYbb9TixYt15plnSto9J+fb3/62Lr74Yh155JE6/fTTtX79et16662aOnVqj0UO+nPCCSfoO9/5js455xwddNBBeuWVV/Rf//VfPUaPAAATAwEOALiUufdKMBhUfn6+5s2bp2uvvVbnnHOOsrOz7fPmzJmj5557TldeeaVuvfVWbd++XcXFxVq4cOGg79/yve99T+FwWD/72c/097//XQceeKAefPBBO/KSjm9/+9sqKirST3/6U335y19Wfn6+LrjgAn3/+9/vsYLZRRddJMdx9KMf/UiXXXaZ9ttvP/3pT3/SP//zP9sgbiD/9m//pubmZt1xxx363e9+p/33319//etf9bWvfS3j7w4AGFseh5mPAACXSSQSKioq0imnnKJf/OIXY10cAMAoYg4OAGBCa2tr22v+zG233aYdO3Zo+fLlY1MoAMCYYQQHADChPfroo/ryl7+s0047TQUFBXrhhRd0yy23aPbs2Xr++ecVDAbHuogAgFHEHBwAwIQ2efJkVVVV6brrrtOOHTuUn5+vs846S1dffTXBDQDsgxjBAQAAAOAazMEBAAAA4BoEOAAAAABcY9zNwUkkEtq0aZOys7PTvkEbAAAAAPdxHEdNTU0qLy+X15ve2My4C3A2bdqkqqqqsS4GAAAAgHFiw4YNqqysTOu54y7AMXfX3rBhg3Jycsa4NAAAAADGSmNjo6qqqmyMkI5xF+CYtLScnBwCHAAAAAAZTV1hkQEAAAAArkGAAwAAAMA1CHAAAAAAuAYBDgAAAADXIMABAAAA4BoEOAAAAABcgwAHAAAAgGsQ4AAAAABwDQIcAAAAAK5BgAMAAADANQhwAAAAALgGAQ4AAAAA1yDAAQAAAOAaBDgAAAAAXIMABwAAAIBrEOAAAAAAcA0CHAAAAACuQYADAAAAwDUIcAAAAAC4BgEOAAAAANfwj3UB0D/HcdTa2T3WxQAAABNcJOCTx+MZ62IAI44AZxxzHEef+NlTev69nWNdFAAAMMEtqs7TXV9YRpAD1yNFbRxr7ewmuAEAAMPiufd2khWCfQIjOBPEc19foWjQN9bFAAAAE0xLR7cWfe9vY10MYNQQ4EwQ0aBP0SC7CwAAAOgPKWoAAAAAXIMABwAAAIBrEOAAAAAAcA0CHAAAAACuQYADAAAAwDUIcAA
"text/plain": [
"<Figure size 1000x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.cluster import AgglomerativeClustering\n",
"from scipy.cluster.hierarchy import dendrogram, linkage\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Выбор признаков для кластеризации\n",
"X = df[['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']]\n",
"\n",
"# Применение агломеративной кластеризации\n",
"agg_clustering = AgglomerativeClustering(n_clusters=3)\n",
"clusters = agg_clustering.fit_predict(X)\n",
"\n",
"# Добавление результатов кластеризации в датасет\n",
"df['Cluster'] = clusters\n",
"\n",
"# Визуализация дендрограммы\n",
"linked = linkage(X, 'ward')\n",
"\n",
"plt.figure(figsize=(10, 7))\n",
"dendrogram(linked,\n",
" orientation='top',\n",
" distance_sort='descending',\n",
" show_leaf_counts=True)\n",
"plt.title('Dendrogram')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjYAAAlWCAYAAAD5nOoXAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3wURf/A8c/s1fQCgdB7byooTYoiIE1QEREV7L09dn+PvT72x/LYFbGgFCtgQ8EKKqAggvQiHUJ6v7ud3x+bhFxyd7kUEoLf9+uVF2Rvd+a7s3ubmZ3ZWaW11gghhBBCCCGEEEIIIYQQQtQDRl0HIIQQQgghhBBCCCGEEEIIES7p2BBCCCGEEEIIIYQQQgghRL0hHRtCCCGEEEIIIYQQQgghhKg3pGNDCCGEEEIIIYQQQgghhBD1hnRsCCGEEEIIIYQQQgghhBCi3pCODSGEEEIIIYQQQgghhBBC1BvSsSGEEEIIIYQQQgghhBBCiHpDOjaEEEIIIYQQQgghhBBCCFFvSMeGEEIIIYQQQgghhBBCCCHqDenYEEIcEVq3bs0FF1xQ12HUiccee4zOnTtjmmZdh3JE+/bbb1FKMXfu3FrNVynFvffee1jS3rZtG0op3nzzzcOSflVdcMEFtG7dOux1o6OjK1zv4MGDREVF8dlnn1UzOiGEEEKI+ufee+9FKUVKSkqNpVmZOls4duzYgdvt5qeffqqxNI9WrVu3ZuzYsbWaZ00f77KGDh3K0KFDD1v6VVHcBvz222/DXjec9uLkyZOZNGlSDUQohKhL0rEhhDisNm/ezOWXX07btm1xu93ExsYycOBAnnnmGfLy8molhtzcXO69996wKkO1LTMzk0cffZTbbrsNwzh0SVZK+f1ERUXRtWtXHnzwQXJzc/3SuOCCC1BKERsbG7BMN27cWJLOE088UbK8rjoKyiqOo/jH4XDQtm1bpk6dypYtW+o0trpQUaO3LhpRNfEdatCgAZdccgl33XVXzQUmhBBCiHqpbF032E9d19+HDh1K9+7d6zSG2nT//ffTt29fBg4cWLKsuK1R/GO322nRogWTJ09m7dq1ftuXrte/8847AfMYOHAgSqly5VoXddxAWrdu7be/jRo1YtCgQXz00Ud1HVqtKx6EVboNWdrh6KwLx8yZM/nvf/9brTRuu+02PvjgA1atWlUzQQkh6oS9rgMQQhy9FixYwFlnnYXL5WLq1Kl0796dwsJCfvzxR2655RbWrFnDK6+8ctjjyM3N5b777gM44kagvPHGG3i9Xs4555xynw0fPpypU6cCkJ2dzQ8//MBdd93FqlWrmDNnjt+6drud3Nxc5s2bV27kybvvvovb7SY/P//w7UgNuO666zj++OPxeDz89ttvvPLKKyxYsIDVq1fTtGnTOosrLy8Pu/2f/eeypr5DV1xxBc8++yyLFi3i5JNPrqHohBBCCFHfvP32236/v/XWWyxcuLDc8i5dutRmWP9oBw4cYMaMGcyYMaPcZy6Xi9deew0Ar9fL5s2beemll/jiiy9Yu3Ztubq62+1m5syZnHfeeX7Lt23bxpIlS3C73YdvR2rAMcccw0033QTA7t27efnllznjjDN48cUXueKKK+osrldffVWe8sfq2Pjzzz+54YYbqpzGscceS58+fXjyySd56623ai44IUSt+mffqRFCHDZbt25l8uTJtGrVikWLFtGkSZOSz66++mo2bdrEggUL6jDC6svJySEqKqpaaUyfPp3TTjstYOW+Y8eOfo2BK664gsLCQj788EPy8/P9tnG5XAwcOJD33nuvXMfGzJkzGTNmDB988EG1Yj3cBg0axMSJEwG48MIL6dixI9dddx0zZszgjjvuqLO4jvSGV33SpUsXunfvzptvvikdG0IIIcQ/WNkb3j///DMLFy4st7ys3NxcIiMjD2do/1jvvPMOdrudcePGlfvMbreXOzb9+vVj7NixLFiwgEsvvdTvs9GjR/Ppp5+SkpJCw4YNS5bPnDmTxo0b06FDB9LS0g7PjtSAZs2a+e3v1KlTad++PU8//XSddmw4HI46y/toNGnSJO655x5eeOGFsKbWFUIceWQqKiHEYfHYY4+RnZ3N66+/7tepUax9+/Zcf/31Qbcvfqy1rDfffBOlFNu2bStZtnz5ckaOHEnDhg2JiIigTZs2XHTRRYA1KigpKQmA++67r+SR4tLvTFi3bh0TJ04kMTERt9tNnz59+PTTTwPm+91333HVVVfRqFEjmjdvDkBWVhY33HADrVu3xuVy0ahRI4YPH85vv/0Wsoy2bt3KH3/8wSmnnBJyvdKSk5NLHgEva8qUKXz++eekp6eXLFu2bBkbN25kypQpYedR2r59+7Db7SWj9Utbv349Simef/55ADweD/fddx8dOnTA7XbToEEDTjzxRBYuXFilvItvfG/dutVvuWmaPPTQQzRv3hy3282wYcPYtGlTyef33HMPDoeDAwcOlEvzsssuIz4+vuTplVDnTrFA79jYtWsXF198MU2bNsXlctGmTRuuvPJKCgsLAUhNTeXmm2+mR48eREdHExsby6hRow7bo86mafLf//6Xbt264Xa7ady4MZdffnm5BuMnn3zCmDFjSuJu164dDzzwAD6fL2ja4XyHwCqTCRMmEB0dTVJSEjfffHPAdIcPH868efPQWld/x4UQQghx1CqeBmrFihUMHjyYyMhI/u///g8I/g60QO/tS09P54YbbqBFixa4XC7at2/Po48+WmMj3//44w8uuOCCkql3k5OTueiiizh48GDA9VNSUpg0aRKxsbE0aNCA66+/PuCT1e+88w69e/cmIiKCxMREJk+ezI4dOyqM5/3336d3797ExMQQGxtLjx49eOaZZyrc7uOPP6Zv375h3+BNTk4GCNguGT9+PC6Xq9xT5jNnzmTSpEnYbLaw8ihr7NixtG3bNuBn/fv3p0+fPiW/L1y4kBNPPJH4+Hiio6Pp1KlTyflTWcnJyXTp0qVcuwTgxx9/5IQTTsDtdtO2bVu/0f9btmxBKcXTTz9dbrslS5aglOK9994DwmtTBnrHhmmaPPPMM/To0QO3201SUhKnnnoqy5cvL1ln+vTpnHzyyTRq1AiXy0XXrl158cUXq1QW4fjll1849dRTiYuLIzIykiFDhpR7b8v27du56qqr6NSpExERETRo0ICzzjrLr50fyNChQ1mwYAHbt28vaZcEKpNQ7cViw4cPJycnp8rtVSFE3ZMnNoQQh8W8efNo27YtAwYMOKz57N+/nxEjRpCUlMTtt99OfHw827Zt48MPPwQgKSmJF198kSuvvJLTTz+dM844A4CePXsCsGbNGgYOHEizZs24/fbbiYqKYvbs2UyYMIEPPviA008/3S+/q666iqSkJO6++25ycnIA60mKuXPncs0119C1a1cOHjzIjz/+yF9//cVxxx0XNPYlS5YABF0nPz+/ZL7SnJwcfvrpJ2bMmMGUKVMCNiDOOOMMrrjiCj788MOSm/MzZ86kc+fOIeMIpXHjxgwZMoTZs2dzzz33+H02a9YsbDYbZ511FmB1Rj3yyCNccsklnHDCCWRmZrJ8+XJ+++03hg8fXum8N2/eDFjvZijtP//5D4ZhcPPNN5ORkcFjjz3Gueeeyy+//ALA+eefz/3338+sWbO45pprSrYrLCxk7ty5nHnmmbjd7grPnWB2797NCSecQHp6OpdddhmdO3dm165dzJ07l9zcXJxOJ1u2bOHjjz/mrLPOok2bNuzbt4+XX36ZIUOGBHxcP5DU1NSAywM1wi+//HLefPNNLrzwQq677jq2bt3K888/z++//85PP/1UMrrrzTffJDo6mhtvvJHo6GgWLVrE3XffTWZmJo8//njA/Cr6DgH4fD5GjhxJ3759eeKJJ/j666958sknadeuHVdeeaVfer179+bpp59mzZo1/6g5q4UQQghReQcPHmTUqFFMnjyZ8847j8aNG1dq+9zcXIYMGcKuXbu4/PLLadmyJUuWLOGOO+5gz5491Z6nH6wb6Fu2bOHCCy8kOTm5ZLrdNWvW8PPPP5cbrDVp0iRat27
"text/plain": [
"<Figure size 1600x2400 with 8 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from scipy.cluster.hierarchy import linkage\n",
"from scipy.cluster import hierarchy\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Загрузка данных\n",
"df = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\").head(15000)\n",
"\n",
"# Инициализация LabelEncoder\n",
"label_encoders = {}\n",
"\n",
"# Кодирование категориальных переменных\n",
"for column in df.select_dtypes(include=['object']).columns:\n",
" le = LabelEncoder()\n",
" df[column] = le.fit_transform(df[column])\n",
" label_encoders[column] = le\n",
"\n",
"# Выбор признаков для кластеризации\n",
"X = df[['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']]\n",
"\n",
"# Создание матрицы связей\n",
"linkage_matrix = linkage(X, method='ward')\n",
"\n",
"# Применение иерархической кластеризации\n",
"result = hierarchy.fcluster(linkage_matrix, 10, criterion=\"distance\")\n",
"\n",
"# Преобразование меток кластеров\n",
"result = [0 if val == 1 else 1 if val == 3 else 2 for val in result]\n",
"\n",
"# Визуализация результатов кластеризации и истинных меток\n",
"plt.figure(figsize=(16, 24))\n",
"\n",
"# Визуализация кластеров на основе признаков 0 и 1\n",
"plt.subplot(4, 2, 1)\n",
"plt.scatter(X['BMI'], X['PhysicalHealth'], c=result, cmap='viridis')\n",
"plt.title('Clusters (BMI vs PhysicalHealth)')\n",
"\n",
"# Визуализация истинных меток на основе признаков 0 и 1\n",
"plt.subplot(4, 2, 2)\n",
"plt.scatter(X['BMI'], X['PhysicalHealth'], c=df['HeartDisease'], cmap='viridis')\n",
"plt.title('True Labels (BMI vs PhysicalHealth)')\n",
"\n",
"# Визуализация кластеров на основе признаков 2 и 3\n",
"plt.subplot(4, 2, 3)\n",
"plt.scatter(X['MentalHealth'], X['SleepTime'], c=result, cmap='viridis')\n",
"plt.title('Clusters (MentalHealth vs SleepTime)')\n",
"\n",
"# Визуализация истинных меток на основе признаков 2 и 3\n",
"plt.subplot(4, 2, 4)\n",
"plt.scatter(X['MentalHealth'], X['SleepTime'], c=df['HeartDisease'], cmap='viridis')\n",
"plt.title('True Labels (MentalHealth vs SleepTime)')\n",
"\n",
"# Визуализация кластеров на основе признаков 0 и 2\n",
"plt.subplot(4, 2, 5)\n",
"plt.scatter(X['BMI'], X['MentalHealth'], c=result, cmap='viridis')\n",
"plt.title('Clusters (BMI vs MentalHealth)')\n",
"\n",
"# Визуализация истинных меток на основе признаков 0 и 2\n",
"plt.subplot(4, 2, 6)\n",
"plt.scatter(X['BMI'], X['MentalHealth'], c=df['HeartDisease'], cmap='viridis')\n",
"plt.title('True Labels (BMI vs MentalHealth)')\n",
"\n",
"# Визуализация кластеров на основе признаков 1 и 3\n",
"plt.subplot(4, 2, 7)\n",
"plt.scatter(X['PhysicalHealth'], X['SleepTime'], c=result, cmap='viridis')\n",
"plt.title('Clusters (PhysicalHealth vs SleepTime)')\n",
"\n",
"# Визуализация истинных меток на основе признаков 1 и 3\n",
"plt.subplot(4, 2, 8)\n",
"plt.scatter(X['PhysicalHealth'], X['SleepTime'], c=df['HeartDisease'], cmap='viridis')\n",
"plt.title('True Labels (PhysicalHealth vs SleepTime)')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "aimenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}