1042 lines
855 KiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Бизнес-цель: кластеризация пациентов для выявления групп с схожими характеристиками здоровья и рисками инсульта. Что, к примеру, может использоваться для следующего:\n",
"\n",
"- определение, люди каких групп могут иметь бОльшую предрасположенность к возникновению инсульта\n",
"- помощь в медицине на основе полученных данных в разработке медицинских показаний людям с повышенным риском возникновения инсульта"
]
},
{
"cell_type": "code",
"execution_count": 141,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gender</th>\n",
" <th>age</th>\n",
" <th>hypertension</th>\n",
" <th>heart_disease</th>\n",
" <th>ever_married</th>\n",
" <th>work_type</th>\n",
" <th>Residence_type</th>\n",
" <th>avg_glucose_level</th>\n",
" <th>bmi</th>\n",
" <th>smoking_status</th>\n",
" <th>stroke</th>\n",
" </tr>\n",
" <tr>\n",
" <th>id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9046</th>\n",
" <td>Male</td>\n",
" <td>67.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Urban</td>\n",
" <td>228.69</td>\n",
" <td>36.6</td>\n",
" <td>formerly smoked</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51676</th>\n",
" <td>Female</td>\n",
" <td>61.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Self-employed</td>\n",
" <td>Rural</td>\n",
" <td>202.21</td>\n",
" <td>NaN</td>\n",
" <td>never smoked</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31112</th>\n",
" <td>Male</td>\n",
" <td>80.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Rural</td>\n",
" <td>105.92</td>\n",
" <td>32.5</td>\n",
" <td>never smoked</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60182</th>\n",
" <td>Female</td>\n",
" <td>49.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Urban</td>\n",
" <td>171.23</td>\n",
" <td>34.4</td>\n",
" <td>smokes</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1665</th>\n",
" <td>Female</td>\n",
" <td>79.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Self-employed</td>\n",
" <td>Rural</td>\n",
" <td>174.12</td>\n",
" <td>24.0</td>\n",
" <td>never smoked</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34383</th>\n",
" <td>Male</td>\n",
" <td>46.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Urban</td>\n",
" <td>88.23</td>\n",
" <td>25.8</td>\n",
" <td>Unknown</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8646</th>\n",
" <td>Female</td>\n",
" <td>54.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Rural</td>\n",
" <td>97.47</td>\n",
" <td>26.7</td>\n",
" <td>never smoked</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46653</th>\n",
" <td>Female</td>\n",
" <td>81.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Rural</td>\n",
" <td>59.28</td>\n",
" <td>28.1</td>\n",
" <td>never smoked</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1099</th>\n",
" <td>Female</td>\n",
" <td>15.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>children</td>\n",
" <td>Rural</td>\n",
" <td>101.15</td>\n",
" <td>22.2</td>\n",
" <td>Unknown</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61676</th>\n",
" <td>Male</td>\n",
" <td>77.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Self-employed</td>\n",
" <td>Urban</td>\n",
" <td>68.38</td>\n",
" <td>25.1</td>\n",
" <td>Unknown</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2500 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" gender age hypertension heart_disease ever_married work_type \\\n",
"id \n",
"9046 Male 67.0 0 1 Yes Private \n",
"51676 Female 61.0 0 0 Yes Self-employed \n",
"31112 Male 80.0 0 1 Yes Private \n",
"60182 Female 49.0 0 0 Yes Private \n",
"1665 Female 79.0 1 0 Yes Self-employed \n",
"... ... ... ... ... ... ... \n",
"34383 Male 46.0 0 0 Yes Private \n",
"8646 Female 54.0 0 0 Yes Private \n",
"46653 Female 81.0 1 1 Yes Private \n",
"1099 Female 15.0 0 0 No children \n",
"61676 Male 77.0 0 0 Yes Self-employed \n",
"\n",
" Residence_type avg_glucose_level bmi smoking_status stroke \n",
"id \n",
"9046 Urban 228.69 36.6 formerly smoked 1 \n",
"51676 Rural 202.21 NaN never smoked 1 \n",
"31112 Rural 105.92 32.5 never smoked 1 \n",
"60182 Urban 171.23 34.4 smokes 1 \n",
"1665 Rural 174.12 24.0 never smoked 1 \n",
"... ... ... ... ... ... \n",
"34383 Urban 88.23 25.8 Unknown 0 \n",
"8646 Rural 97.47 26.7 never smoked 0 \n",
"46653 Rural 59.28 28.1 never smoked 0 \n",
"1099 Rural 101.15 22.2 Unknown 0 \n",
"61676 Urban 68.38 25.1 Unknown 0 \n",
"\n",
"[2500 rows x 11 columns]"
]
},
"execution_count": 141,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn import cluster\n",
"from scipy.cluster import hierarchy\n",
"\n",
"df1 = pd.read_csv(\"./csv/option4.csv\", index_col='id')\n",
"df1.info\n",
"df = df1.head(2500)\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"уберем пустые значения, подготовим данные:"
]
},
{
"cell_type": "code",
"execution_count": 142,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"gender 0\n",
"age 0\n",
"hypertension 0\n",
"heart_disease 0\n",
"ever_married 0\n",
"work_type 0\n",
"Residence_type 0\n",
"avg_glucose_level 0\n",
"bmi 119\n",
"smoking_status 0\n",
"stroke 0\n",
"dtype: int64\n",
"\n",
"gender False\n",
"age False\n",
"hypertension False\n",
"heart_disease False\n",
"ever_married False\n",
"work_type False\n",
"Residence_type False\n",
"avg_glucose_level False\n",
"bmi True\n",
"smoking_status False\n",
"stroke False\n",
"dtype: bool\n",
"\n"
]
}
],
"source": [
"print(df.isnull().sum())\n",
"print()\n",
"\n",
"print(df.isnull().any())\n",
"print()"
]
},
{
"cell_type": "code",
"execution_count": 143,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Наличие пропущенных значений:\n",
"gender 0\n",
"age 0\n",
"hypertension 0\n",
"heart_disease 0\n",
"ever_married 0\n",
"work_type 0\n",
"Residence_type 0\n",
"avg_glucose_level 0\n",
"bmi 0\n",
"smoking_status 0\n",
"stroke 0\n",
"dtype: int64\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\elena\\AppData\\Local\\Temp\\ipykernel_68948\\1629916119.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df['bmi'] = df['bmi'].fillna(df['bmi'].median())\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>hypertension</th>\n",
" <th>heart_disease</th>\n",
" <th>avg_glucose_level</th>\n",
" <th>bmi</th>\n",
" <th>stroke</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2500.000000</td>\n",
" <td>2500.000000</td>\n",
" <td>2500.000000</td>\n",
" <td>2500.000000</td>\n",
" <td>2500.000000</td>\n",
" <td>2500.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>44.605296</td>\n",
" <td>0.108000</td>\n",
" <td>0.062400</td>\n",
" <td>108.630440</td>\n",
" <td>29.102840</td>\n",
" <td>0.099600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>22.817713</td>\n",
" <td>0.310443</td>\n",
" <td>0.241929</td>\n",
" <td>47.124712</td>\n",
" <td>7.804786</td>\n",
" <td>0.299526</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.080000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>55.220000</td>\n",
" <td>10.300000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>26.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>77.902500</td>\n",
" <td>23.975000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>47.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>93.200000</td>\n",
" <td>28.200000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>63.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>117.510000</td>\n",
" <td>33.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>82.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>271.740000</td>\n",
" <td>97.600000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age hypertension heart_disease avg_glucose_level \\\n",
"count 2500.000000 2500.000000 2500.000000 2500.000000 \n",
"mean 44.605296 0.108000 0.062400 108.630440 \n",
"std 22.817713 0.310443 0.241929 47.124712 \n",
"min 0.080000 0.000000 0.000000 55.220000 \n",
"25% 26.000000 0.000000 0.000000 77.902500 \n",
"50% 47.000000 0.000000 0.000000 93.200000 \n",
"75% 63.000000 0.000000 0.000000 117.510000 \n",
"max 82.000000 1.000000 1.000000 271.740000 \n",
"\n",
" bmi stroke \n",
"count 2500.000000 2500.000000 \n",
"mean 29.102840 0.099600 \n",
"std 7.804786 0.299526 \n",
"min 10.300000 0.000000 \n",
"25% 23.975000 0.000000 \n",
"50% 28.200000 0.000000 \n",
"75% 33.000000 0.000000 \n",
"max 97.600000 1.000000 "
]
},
"execution_count": 143,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['bmi'] = df['bmi'].fillna(df['bmi'].median())\n",
"print(\"\\nНаличие пропущенных значений:\")\n",
"print(df.isnull().sum())\n",
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 144,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ЗАВИСИМОСТЬ ЗНАЧЕНИЙ ДРУГ ОТ ДРУГА\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoMAAAHnCAYAAADD41dfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADmj0lEQVR4nOydeWAU5f3/37shu8kmmw2bQDiFkGDIAYRwqVSCVq2oXLb2UrEWvFr0Z20t9Wi1aq1WRb/VWg+0rVhrtQqKimdLpGARBCQJ4Qgm3ASSkDvZQHZ/f8RZZmfneOaZc3ef1z/KZnZmdo7n+Tyf4/1xhEKhEBgMBoPBYDAYCYnT6hNgMBgMBoPBYFgHMwYZDAaDwWAwEhhmDDIYDAaDwWAkMMwYZDAYDAaDwUhgmDHIYDAYDAaDkcAwY5DBYDAYDAYjgWHGIIPBYDAYDEYCM8DqE7AbW7duRSgUQnJystWnwmAwGAwGg0HFyZMn4XA4MGnSJMVtmWdQQCgUghk63KFQCL29vaYci2EP2D1PPNg9TzzYPU887HrP1dgzzDMogPMIjh8/3tDjdHV1oaamBvn5+fB4PIYei2EP2D1PPNg9TzzYPU887HrPKysribdlnkEGg8FgMBiMBIYZgwwGg8FgMBgJDDMGGQwGg8FgMBIYZgwyGAwGg8FgJDDMGGQwGAwGg8FIYJgxyGAwGAwGg5HAMGOQwWAwGAwGI4FhxiCDwWAwGAxGAsOMQQaDwWAwGIwEhhmDDAaDwWAwGAkMMwYZDAaDwWAwEhhmDDIYDAaDwWAkMAOsPgEGg8FgMBj2oS8Ywo6vmtDc1gN/RgqKxmQhyemw+rQYBsKMQQaDwWAwGACADdsP47lVlWhq7Ql/luVLwfXzx+OcCcMsPDOGkbAwMYPBYDAYDGzYfhi//9umCEMQAJpae/D7v23Chu2HLTozhtEwY5DBYDAYjASnLxjCc6sqZbd5/q0q9AVDJp0Rw0yYMchgMBgMRoKz46umKI+gkMaWbuz4qsmkM2KYCTMGGQwGg8FIcJrb5A1BtdsxYgtmDDIYDAaDkeD4M1J03Y4RW7BqYgaDwWCohsmPxBdFY7KQ5UuRDRVnZ6aiaEyWiWfFMAtmDDIYDAZDFUx+JP5Icjpw/fzx+P3fNkluc928EmbwxyksTMxgMBgMYpj8iDR9wRCq65pRWd+F6rrmmKu8PWfCMNxxzVRk+SJDwdmZqbjjmqnM0I9jmGeQwWAwGESQyo9MLxmacB4kobf0jQ3NyPJVx5y39JwJwzC9ZChLAUgwmGeQwWAwGEQw+RFx4s1bmuR0YHx+NsrLRmB8fjYzBBMAZgwyGAwGgwgmPxINE2tmxAPMGGQwGAwGEUx+JBrmLWXEA8wYZDAYDAYRnPyIHIkmP8K8pYx4gBmDDAaDwSCCkx+RI9HkR5i3VJ6+YAiVtY2o2HIQlbWNLFxuU1g1MYPBYDCI4eRHhDqD2ZmpuG5eSUxVzuoBE2uWhulRxg7MGGQwGAyGKpj8yGmYWLM4XIW1EK7CmukW2gsWJmYwGAyGapj8yGmYWHMkrMI69mCeQQaDwWAwNMJ5S7fUHEL1zjoUj8tFWeHwhDSS1VRYj8/PNumsGHIwY5DBYDAYDB1IcjpQnOuHs6cBhbn+hDQEAVZhHYuwMDGDwWAwGAzdYBXWsQczBhkMBoPBYOhGIulR9gVDqK5rRmV9F6rrmmM2D5KFiRkMBoPBYOhGolRYC6Vz3tjQjCxfdUxK5zDPIIPBYDAYDF2J9wprTjpHWCjDSeds2H7YojOjg3kGGQwGg8Fg6E686lGSSudMLxkaM7+VGYMMBoPBYDAMgdOjjCfiUTqHhYkZDAaDwWAwCIlH6RxmDDIYDAaDwWAQEo/SOcwYZDAYDAaDwSAkHqVzmDHIYDAYDAaDQQgnnSNHrEnnMGOQwWAwGIwEpC8YQmVtIyq2HERlbaOiYLLa7eOZeJPOYdXEDAaDwWAkGELBZADI8qVICiar3T4R4KRzttQcQvXOOhSPy0VZ4fCY8ghyMM8gg8FgMBgJhFrB5HgTWNaTJKcDxbl+jB/tQXGuX9EQtKt3lXkGGQwGg8FIENQKJsejwLKecL2Jq+u7EExpRllhquR1sLN3lRmDDAaDwWAkCGoFk+0isNwXDNmuk4ma3sScd1UI5121Os+QGYMMBoPBYMQpQiOqsaWb6HucYDKpcPKXe44bZqjZ0aOmxriLBe8qMwYZDAaDwYhDxIyojDQX0Xc5wWRS4eR/frw7/P96Gmp29KiRGHfPraqEJzUZre0BtLQHbOFdlYMZgwwGg8FgxAik4VIpI6qts1fxGHzBZE5gWcmY4aOXoWZXjxpJ6LyptQe/fmaDqv1a2b7OcmOwpaUFy5Ytw9q1a9HR0YGCggL8/Oc/x5QpUwAA1157LTZsiLyg06ZNw4oVKwAAgUAADz30EN5//3309PTg/PPPx1133QW/32/6b2EwGAwGwyhIw6UkRpQcfMFkTmBZzLBUQquhZpd8RSFGGW1Wtq+z3Bi87bbbcPz4cSxbtgxZWVlYsWIFFi1ahJUrV2LMmDHYtWsX7r33XlxwwQXh7yQnJ4f//95778XmzZvx5JNPwuVy4Z577sEtt9yCl19+2Yqfw2AwGAwbYscCBDWoCZeSGFEAkJGWjLbOk+F/Z2em4rp5JVHePE5gWWiIKtHY0o2q2kY4nQ6q605qdJntUTPCaLO6fZ2lxuC+ffuwfv16vPLKK5g8eTIA4Ne//jXWrVuH1atX46qrrkJTUxMmTpyIQYMGRX2/oaEBq1atwjPPPBP2JC5btgwXX3wxtm7dikmTJpn6exgMBoNhP+xYgKAGteFSUuNo8bzxyPalEhlqnMAyZ1Dvb2jHa7w8QSkeWrEJHV2nDU41153U6DLbo0YTOlfC6vZ1lopODxw4EM899xzGjz/d48/hcMDhcKCtrQ27du2Cw+FAbm6u6Pe/+OILAMBZZ50V/iw3Nxc5OTnYtEm9S5vBYDAY8UU8CCarCZcC5MZRti8V4/OzUV42AuPzsxWNkSSnI7x96dhoB40YfEMQUHfdOaNLDis8aiS9iUmxS/s6Sz2DGRkZKC8vj/jsgw8+wL59+3DnnXdi9+7d8Hq9uO+++7B+/Xp4PB5cfPHF+MlPfgKXy4WGhgYMHDgQbrc7Yh+DBw/G0aNHqc8rFAqhq6uL+vskdHd3R/yXEf+we554sHtORjAYQs2+E2hpDyDT60bhqIFw6uAlCQZDeHbldtltnlu1HRPG+HQ5HmDMPT/a2Eq8Xd4wD3KHpMKf4UZzW0By2yyfG7lDUkXnOpL7QXIMOUiv+zWzz8SyV6Xv4cKLxyLQY/77VZqfidu+PwF/fW+X6muwcPaZyEx3RVxbI2yOUCgEh4PsubY8Z5DPli1bcMcdd+Ciiy7CrFmzcOeddyIQCGDChAm49tprUVNTgz/84Q84fPgw/vCHP6C7uxsuV3SZvNvtRiBA94ACwMmTJ1FTU6PlpxBTX19vynEY9oHd88SD3XNpdhzoxvtftKCtqy/8WYYnCRdPzkTRyFRN+65r6FGcqJtaA3j/023IzdE31KjnPW9tJgtHtjYfRU1NCwDggonpeG2d9G//5oR07Nq1M+pzNfdD6RhykF73DCfw3XOzJM8pw9mMmppmqnPQSoYTWHJJNvYdD6CjOwhPigNvfXYCbd1B6e94kjDa1wmnswvoAXbtajD0HMVsJDFsYwx+/PHH+MUvfoGysjI8+uijAID77rsPS5cuhc/nAwCceeaZSE5Oxs9+9jP88pe/REpKCnp7o8vkA4EAUlPpB5Hk5GTk5+dTf5+E7u5u1NfXY/To0ZrOlRE7sHueeLB7Ls/G6ga8ti7a69PW1YfX1jXhtu9PwPTiHOr9N588AqBRcTuffwgKC4cS71fOc2bEPS8oCGH1pnWKnr6LZ5aGz6Mt2ACgSXL7EcOHo7Aw8tqqvR+FhcCI4Q1R3rH01AHo6D6l+LtIr3thIbDgAmO8x3owhnfPBw1uk/VkLp5bjGINz7Qaamtribe1hTH48ssv43e/+x0uvvhiPPzww2FLdsCAAWFDkGPs2LE
"text/plain": [
"<Figure size 1600x1200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoIAAAHnCAYAAAAsITxhAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACDfElEQVR4nO3deXwU9f0/8Nduzs1NNgIBhIQgkBOIBBQV8CgVb7T2+OJRBKWX/fVrLdaqtdZ69Ot9K+J91KNcxVtbQcslhxAgEQgkCITDJOQid3Z+f8RZdjd7zLUzszuv5+PRR2Uzu/PZnd2Z93w+78/7YxMEQQARERERWY7d6AYQERERkTEYCBIRERFZFANBIiIiIotiIEhERERkUQwEiYiIiCyKgSARERGRRTEQJCIiIrKoWKMbYDZff/01BEFAXFyc0U0hIiIiUqS7uxs2mw0TJkwIuh17BH0IgoBw19gWBAFdXV1h3w+ZB4+5NfG4Ww+PufWY9ZhLjWfYI+hD7AksLi4O2z7a2tpQWVmJUaNGISkpKWz7IfPgMbcmHnfr4TG3HrMe823btknajj2CRERERBbFQJCIiIjIokwVCD733HO4+uqrvR6rrKzEVVddhfHjx+Occ87Bq6++6vV3l8uFxx9/HGeddRbGjx+P66+/Hvv379ez2UREREQRyTSB4BtvvIFHH33U67Fjx45hzpw5GD58OBYvXoxf//rXePDBB7F48WL3Nk8//TTefPNN3H333Xjrrbfgcrkwb948dHV16fwOiIiIiCKL4ZNFjhw5gjvvvBPr169HTk6O19/eeecdxMXF4a9//StiY2ORl5eHffv2YeHChbjiiivQ1dWFF198ETfffDOmT58OAHjkkUdw1lln4ZNPPsFFF12k/xsiIiIiihCG9wju2LEDcXFx+Ne//oVx48Z5/W3jxo2YNGkSYmNPxKunnXYaampqUFdXh2+++QbHjx/H6aef7v57WloaCgoKsGHDBt3eAxEREVEkMrxH8JxzzsE555zj92+HDx/G6NGjvR4bOHAgAODQoUM4fPgwACA7O7vfNuLflBAEAW1tbYqfH0p7e7vX/1P04zG3Jh536+Extx6zHnNBEGCz2UJuZ3ggGExHRwfi4+O9HktISAAAdHZ2uj90f9s0NTUp3m93dzcqKysVP1+qmpqasO+DzIXH3Jp43K2Hx9x6zHjMfeMjf0wdCCYmJvab9NHZ2QkASEpKQmJiIgCgq6vL/d/iNg6HQ/F+4+LiMGrUKMXPD6W9vR01NTXIyclR1U6KHDzm1sTjbj085tZj1mNeVVUlaTtTB4KDBw/G0aNHvR4T/z1o0CD09PS4Hxs+fLjXNmPGjFG8X5vNpkt1cIfDYaoq5BR+PObWxONuPTzm1mO2Yy5lWBgwwWSRYMrKyrBp0yb09va6H1u3bh1yc3PhdDoxduxYpKSkYP369e6/Nzc3o6KiAmVlZUY0mYiIiChimDoQvOKKK9Da2orbbrsNVVVVWLJkCV5++WXMnz8fQN/Y91VXXYUHH3wQ//73v/HNN9/gf//3fzF48GDMmDHD4NYTERERmZuph4adTicWLVqEe+65B7NmzcJJJ52EBQsWYNasWe5tfvvb36Knpwe33347Ojo6UFZWhhdeeAFxcXEGtpyiQa9LQMXeejQ0dyAzLREFI52IsUvraiciIooEpgoE77///n6PlZSU4O233w74nJiYGPzhD3/AH/7wh3A2jSxmTXktFi7bhvqmDvdjzvRE3HBZMaaUDDGwZURERNox9dAwkRHWlNfivlc2eAWBAFDf1IH7XtmANeW1BrWMiIhIWwwEiTz0ugQsXLYt6DbPL9+OXpegU4uIiIjCh4EgkYeKvfX9egJ91TW2o2JvvU4tIiIiCh8GgkQeGpqDB4FytyMiIjIzBoJEHjLTEkNvJGM7IiIiM2MgSOShYKQTzvTgQV5WhgMFI506tYiIiCh8GAgSeYix23DDZcVBt7n+0iLWEyQioqjAQJDIx5SSIbj12rJ+PYNZGQ7cem0Z6wgSEVHUMFVBaSKzmFIyBJOLsrmyCBERRTUGgkQBxNhtKB6VZXQziIiIwoZDw0REREQWxUCQiIiIyKIYCBIRERFZFANBIiIiIotiIEhERERkUQwEiYiIiCyKgSARERGRRTEQJCIiIrIoBoJEREREFsVAkIiIiMiiGAgSERERWRQDQSIiIiKLYiBIREREZFEMBImIiIgsioEgERERkUUxECQiIiKyKAaCRERERBbFQJCIiIjIohgIEhEREVkUA0EiIiIii2IgSERERGRRDASJiIiILIqBIBEREZFFMRAkIiIisigGgkREREQWxUCQiIiIyKIYCBIRERFZFANBIiIiIotiIEhERERkUQwEiYiIiCyKgSARERGRRTEQJCIiIrIoBoJEREREFsVAkIiIiMiiGAgSERERWRQDQSIiIiKLYiBIREREZFEMBImIiIgsioEgERERkUUxECQiIiKyKAaCRERERBbFQJCIiIjIohgIEhEREVkUA0EiIiIii2IgSERERGRRDASJiIiILIqBIBEREZFFMRAkIiIisigGgkREREQWxUCQiIiIyKIYCBIRERFZFANBIiIiIotiIEhERERkUQwEiYiIiCyKgSARERGRRTEQJCIiIrIoBoJEREREFsVAkIiIiMiiGAgSERERWRQDQSIiIiKLYiBIREREZFEMBImIiIgsioEgERERkUUxECQiIiKyKAaCRERERBbFQJCIiIjIoiIiEOzp6cFjjz2Gs88+GxMmTMDs2bOxZcsW998rKytx1VVXYfz48TjnnHPw6quvGtdYIiIioggREYHgM888g3fffRd33303li1bhtzcXMybNw9Hjx7FsWPHMGfOHAwfPhyLFy/Gr3/9azz44INYvHix0c0mIiIiMrVYoxsgxWeffYaLLroIZ555JgDgj3/8I959911s2bIF1dXViIuLw1//+lfExsYiLy8P+/btw8KFC3HFFVcY3HIiIiIi84qIQNDpdOLzzz/HVVddhezsbLz99tuIj4/H2LFj8e6772LSpEmIjT3xVk477TQ899xzqKurQ1ZWluz9CYKAtrY2Ld+Cl/b2dq//p+jHY25NPO7Ww2NuPWY95oIgwGazhdwuIgLB2267Df/v//0/nHvuuYiJiYHdbscTTzyB4cOH4/Dhwxg9erTX9gMHDgQAHDp0SFEg2N3djcrKSk3aHkxNTU3Y90HmwmNuTTzu1sNjbj1mPObx8fEht4mIQLCqqgqpqal46qmnMGjQILz77ru4+eab8frrr6Ojo6PfG01ISAAAdHZ2KtpfXFwcRo0apbrdgbS3t6OmpgY5OTlwOBxh2w+ZB4+5NfG4Ww+PufWY9ZhXVVVJ2s70geChQ4fw+9//Hi+//DImTpwIACguLkZVVRWeeOIJJCYmoqury+s5YgCYlJSkaJ82m03xc+VwOBy67IfMg8fcmnjcrYfH3HrMdsylDAsDETBreOvWreju7kZxcbHX4+PGjcO+ffswePBgHD161Otv4r8HDRqkWzuJiIiIIo3pA8HBgwcDAHbu3On1+K5du5CTk4OysjJs2rQJvb297r+tW7cOubm5cDqduraVKNx6XQK2VdVh1eYD2FZVh16XYHSTiIgogpl+aLikpASnnnoqbrnlFtx5550YPHgwli1bhrVr1+If//gHhg0bhkWLFuG2227DvHnzUF5ejpdffhl33XWX0U0n0tSa8losXLYN9U0d7sec6Ym44bJiTCkZYmDLiIgoUpm+R9But+OZZ57BaaedhltvvRWXX3451q1bh5dffhnjxo2D0+nEokWLUF1djVmzZuHJJ5/EggULMGvWLKObTqSZNeW1uO+VDV5BIADUN3Xgvlc2YE15rUEtIyKiSGb6HkEASE9Px5133ok777zT799LSkrw9ttv69wqIn30ugQsXLYt6DbPL9+OyUXZiLFLSw4mIiICIqBHkMjqKvbW9+sJ9FXX2I6KvfU6tYiIiKIFA0Eik2toDh4Eyt2OiIhIxECQyOQy0xI13Y6IiEjEQJDI5ApGOuFMDx7kZWU4UDCS5ZK
"text/plain": [
"<Figure size 1600x1200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAn4AAAHnCAYAAAAmUVB2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA6r0lEQVR4nO3deVxWZf7/8ffNDiqalmKio2kIKqUmqOWajjlmm1OPptLKRFssWhTLyUzLSRtxX9ImtdLMchnNxso2y8kVs9zQcoGRFDFRUVmF8/vDr/cvhFvhcOMNXq/n4+FDuc51rvM55+KGt/e5zzkOy7IsAQAA4Irn5ekCAAAAcHkQ/AAAAAxB8AMAADAEwQ8AAMAQBD8AAABDEPwAAAAMQfADAAAwhI+nC/C0rVu3yrIs+fr6eroUAAAAW/Ly8uRwONSqVauL9jP+HT/LslTe97C2LEu5ubnlvh1UHMy5mZh38zDn5qmoc17SPGP8O37n3+mLjIwst21kZmYqMTFRTZo0UVBQULltBxUHc24m5t08zLl5Kuqcb9++vUT9jH/HDwAAwBQEPwAAAEMQ/AAAAAxB8AMAADAEwQ8AAMAQBD8AAABDEPwAAAAMQfADAAAwBMEPAADAEAQ/AAAAQxD8AAAADEHwAwAAMISPpwsAKor8Aku79h9Teka2agYHqNl1teTt5XBbfzvbdtWee7ZAn/1wQIePnVHdWlX0l1sayc/Hvf+Puxz7Z2ed0o51sWPlallWbr7eXblDh34/o2uvrqJH72ihQD/vi46VX2Bp54F07UzKVEFAulpHBMrby1HqsewcK1fbsPP9cyozT6/PWa+jx7N0zVWBemVAe1UL8nW5jeMZORo6dY0yTucpuKqv4mO76Kpgf5ftknQ47YwGx3+tvHxLvt4OzRjaTXVrV9HR9Cw9M+FrZefkK8DfW9OGdNM1NQN18nSu/j5zrY5n5OiqYH+98VRHVa/q57LWi+3HsRPZem7SNzqTdVZVAn00+flbVatGgK1tHDuRrdiJa3QmM09Vgo5o6gvnxnK1H66OyZnss5q0MEGpxzIVUitIzz/YRlUCzv16dlWXq3VczdOl9qO4Y+KqXlf9j/yeqafjv1ZOXoH8fb00fWg31bk6SJJc1uuqrtJ+H0oq9ffPnr3pGvrWWufrKP7JjmrapKbL/Sv6vfub83vX1bFytW1PcliWZXm0gj+YPXu2/vvf/2r+/Pku+xw/flxjxozR999/L4fDodtvv13Dhg1TYGCgrW1u375dkhQZGWlr/ZLIzMxUYmKiIiIiFBQUVG7bgX3rth3S28u369jJbGdbreoBGnR3pG6+4dpS9y/NnLsaq3Orevpu629F2puEVtfmXUdU8IdXrpdDurtzY/W/o4Wd3S/1/pX3WKU9Jq7Gmrdyh5Z/t6/YYyWp2GV1agbp8LHMImPVrRWkI+mZxY7V9E81i623ZrC/fj14ssRjRTWro70pJ0t1rMbM3aCNO48Uab++fnWlZ+SU6vtn/fbDxe67n6+XcvMKirR7eUkFRZtdCgrwUVbOWbnjt47DoWLHqVvr3OutuP1w5zaOZWQXe0zctY3r61fX0eNZOnE6t8gyV8c9KMBHmdlni7S7mj937ocrXl5S43rVi30d+Hg7dDa/6M672j9X+9G2eR1t2Z1W7Fju4ufrpbyzBW753q1R1U/zR/+l7ANdoKR5psIEvw8++EBjxoxRmzZtLhr8+vXrp6ysLI0ePVoZGRl6+eWXFRUVpTfffNPWdgl+WLftkMa+t9nl8uGPRBX6pVuS/i2b1CjRnF9qrNLq06Xs4a+0x8PdY9k9JheONW/lDi1bs6/U41RUxR0rV6EPQMVWHuGvpHnG45/xO3LkiJ544gnFx8erYcOGF+27detWbdq0SW+++aaaN2+u9u3b67XXXtOKFSt05Ag//FB6+QWW3l6+/aJ9/rVih/L/7+2RkvYvKLj0/6dKMlZpLf9un3LP2v/fe2mPh7vHKssx+eNYuWcLtPy7Kyf0SUWPVVZuPqEPqKROnM7VyWLezb0cPP4Zv507d8rX11effPKJZsyYod9++81l34SEBF1zzTVq3Lixsy06OloOh0NbtmxRr169bNVgWZYyM8t+WsCVrKysQn+j4th5IL3QqbDi/H4iSz8m/qbmjWqWuP9Pv6QqUBef85KMVVoFlrRizR7dfvOfbK1f2uPh7rHKckz+ONZ/1iWrBNm0UrnwWM1ZmejhigCUxUvTv9OE2FvcNp5lWXI4Lv05bI8Hv1tvvVW33nprifoeOXJEdevWLdTm5+enGjVq6PDhw7ZryMvLU2Ji+f8QTUpKKvdtoHR2JpUs8O/cfUBe2UdK3H/vgUOKbBh00Tkv6ViltXvvb7ruKntjl/Z4uHussh6T82Pt3nu8TONUVH88Vvv+d9TD1QAoi2MnstyePfz8Ln3hiMeDX2lkZWUVu1P+/v7KycmxPa6vr6+aNGlSltIuKisrS0lJSWrYsKHti1BQPgoC0rV0Xfol+zUPb6SIRjVL3L9Jo2sl68RF57ykY5VWeJN6ioiw945faY+Hu8cq6zE5P9b+48na9OsvtsepqP54rBrvlfalpni4IgB21aoRqIiICLeNt3fv3hL1q1TBLyAgQLm5Rc+J5+TklOmiCYfDcVkuuggMDOTijgqmdUSgalXfedHTi1fXCFTriHry9nKUuH/LsBDt2XPionNekrFKy8sh3dWlqe1bu5T2eLh7rLIckz+OdVeXplrw+S9X1OneC49VzD0ttXoTwQ+orMY93VlBQe67tUtJTvNKFeDijtIICQlRWlpaobbc3FydOHFCtWvX9lBVqMy8vRwadPfFr4AaeFcL5y/bkvb3KsH97koyVmnd3blxme7nV9rj4e6xynJM/jiWn4+X85YtV4oLj1Wgn7faNq/jwYoA2FWjqp/H7udXqYJfVFSUUlNTlZyc7GzbtGmTJOmmm27yVFmo5G6+4VoNfyRKtaoHFGq/ukZgsbfQKG1/u9vu06Vxse1tm9fRhbnLy+GeW7lcqiZ37p+rsewck+LG6n9HC/Xp0tjlsXK17Px94C5Ut1aQy7Fc1Xt9/eqlGqtt8zqlOlYjHmvnMvxdX796qb9/XO27n2/xvyq8SvkbJCjARyV8U+KSXI1Tt1aQy/1w5zZcHRN3beP6+tVVw0UwcHXcgwKKP4nnqlZ37ocrXl5y+Trw8S5+513tn6ta2zav43Isd/Hz9XLb92553cevpCrMffwk6aWXXtJvv/3mvI9ffn6+0tPTVa1aNQUEBMiyLD344IPKycnRqFGjlJmZqb///e9q27atxo4da2ub3McP57nzyR2lnXOe3FHydSrykzt+TPxNO3cfUPPwRs7Tsjy540p/csc3//fkDl+e3GHckzscFerJHZXuBs5S0eCXkpKibt26aezYserTp48k6dixYxo9erTWrl0rf39/9ezZU8OHD5e/v7+tbRL8UB6YczMx7+Zhzs1TUee8pHmmQl3cMW7cuEJfh4aGas+ePYXaatWqpalTp17OsgAAAK4IleozfgAAALCP4AcAAGAIgh8AAIAhCH4AAACGIPgBAAAYguAHAABgCIIfAACAIQh+AAAAhiD4AQAAGILgBwAAYAiCHwAAgCEIfgAAAIYg+AEAABiC4AcAAGAIgh8AAIAhCH4AAACGIPgBAAAYguAHAABgCIIfAACAIQh+AAAAhiD4AQAAGILgBwAAYAiCHwAAgCEIfgAAAIYg+AEAABiC4AcAAGAIgh8AAIAhCH4AAACGIPgBAAAYguAHAABgCIIfAACAIQh+AAAAhiD4AQAAGILgBwAAYAiCHwAAgCEIfgAAAIYg+AEAABiC4AcAAGAIgh8AAIAhCH4AAACGIPgBAAAYguAHAABgCIIfAACAIQh+AAAAhiD4AQAAGILgBwAAYAiCHwAAgCEIfgAAAIYg+AEAABiC4AcAAGAIgh8AAIAhCH4AAACGIPgBAAAYguAHAABgCIIfAACAIQh+AAAAhiD4AQAAGILgBwAAYAiCHwAAgCEIfgAAAIYg+AEAABiC4AcAAGAIgh8AAIAhCH4AAACG8Hj
"text/plain": [
"<Figure size 1600x1200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoIAAAHnCAYAAAAsITxhAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACYsklEQVR4nO3deXgUVdo28Ls7pJN0VpIgEJQlIGHJBrLJIogIboDoyIwLOgiCuMw3OowOIzPz6jguI6KOuIDRUVRUVGRRX0F8FdEIsghJAIGwCYTFTsi+kq7vD+y216pT1dVb+v5d11wj6erq6q7tqeec8xyDJEkSiIiIiCjiGIO9AUREREQUHAwEiYiIiCIUA0EiIiKiCMVAkIiIiChCMRAkIiIiilAMBImIiIgiFANBIiIiogjVLtgbEGp++OEHSJKE6OjoYG8KERERkSYtLS0wGAwYMGCA7HLMCLqQJAlaamxLkoTm5mZN76XQxf3aNnG/tk3cr20X9616ovEMM4IubJnAnJwcVe+rr6/Hnj170KtXL5jNZn9sGgUB92vbxP3aNnG/tl3ct+oVFxcLLceMIBEREVGEYiBIREREFKFCKhBcvHgxpk2b5vS3PXv24JZbbkF+fj7Gjh2LpUuXOr1utVrxn//8B6NGjUJ+fj7uuOMOHD16NJCbTURERBSWQiYQfPvtt/Hss886/e3MmTOYPn06unbtig8//BB33303FixYgA8//NC+zIsvvohly5bhn//8J959911YrVbMnDkTzc3NAf4GREREROEl6INFTp06hX/84x/YvHkzunfv7vTa8uXLER0djUceeQTt2rVDz549ceTIESxZsgTXX389mpub8dprr2Hu3LkYM2YMAOCZZ57BqFGjsG7dOlxzzTWB/0JEREREYSLoGcFdu3YhOjoaq1evRl5entNrW7duxZAhQ9Cu3a/x6rBhw3D48GFYLBb8+OOPqKurw8UXX2x/PSkpCf369cOWLVsC9h2IiIiIwlHQM4Jjx47F2LFjPb528uRJ9O7d2+lv5513HgDgxIkTOHnyJACgc+fObsvYXtNCkiTU19erek9DQ4PT/1PbwP3aNnG/tk3cr20X9616kiTBYDAoLhf0QFBOY2MjTCaT099iYmIAAE1NTfYDwtMyVVVVmj+3paUFe/bs0fTew4cPa/5cCl3cr20T92vbxP3adnHfquMaH3kS0oFgbGys26CPpqYmAIDZbEZsbCwAoLm52f7ftmXi4uI0f250dDR69eql6j0NDQ04fPgwunfv7tNnU2jhfm2buF/bJu7Xtov7Vr3S0lKh5UI6EOzUqRNOnz7t9Dfbvzt27IizZ8/a/9a1a1enZbKysjR/rsFg0Fy5PC4ujlXP2yDu17aJ+7Vt4n5tu7hvxYk0CwMhMFhEzuDBg7Ft2za0trba/7Zp0yb06NEDaWlp6NOnDxISErB582b769XV1di9ezcGDx4cjE0mIiIiChshHQhef/31qK2txUMPPYTS0lKsWLECr7/+OmbPng3gXNv3LbfcggULFuCLL77Ajz/+iPvuuw+dOnXC+PHjg7z1RERERKEtpJuG09LSUFBQgH/961+YMmUKOnTogAceeABTpkyxL/OHP/wBZ8+exfz589HY2IjBgwfj1VdfRXR0dBC33H9arRJ2HyxHRXUjUpNi0S8zDVFGsfQvERERkaOQCgSfeOIJt7/l5ubivffe8/qeqKgo/PnPf8af//xnf25aSCgsKsOSlcUor2q0/y0tORazrs3B8NyMIG4ZERERhaOQbhqmXxUWleHxN7Y4BYEAUF7ViMff2ILCorIgbRkRERGFKwaCYaDVKmHJymLZZV5ZVYJWqxSgLSIiIqK2gIFgGNh9sNwtE+jKUtmA3QfLA7RFRERE1BYwEAwDFdXyQaDa5YiIiIgABoJhITUpVnkhFcsRERERAQwEw0K/zDSkJcsHeekpceiXmRagLSIiIqK2gIFgGIgyGjDr2hzZZe6YnM16gkRERKQKA8EwMTw3A/NuG+yWGUxPicO82wazjiARERGpFlIFpUne8NwMDM3uzJlFiIiISBcMBMNMlNGAnF7pwd4MIiIiagPYNExEREQUoRgIEhEREUUoBoJEREREEYqBIBEREVGEYiBIREREFKEYCBIRERFFKAaCRERERBGKgSARERFRhGIgSERERBShGAgSERERRSgGgkREREQRioEgERERUYRiIEhEREQUoRgIEhEREUUoBoJEREREEYqBIBEREVGEYiBIREREFKEYCBIRERFFKAaCRERERBGKgSARERFRhGIgSERERBShGAgSERERRSgGgkREREQRioEgERERUYRiIEhEREQUoRgIEhEREUUoBoJEREREEYqBIBEREVGEYiBIREREFKEYCBIRERFFKAaCRERERBGKgSARERFRhGIgSERERBShGAgSERERRSgGgkREREQRioEgERERUYRiIEhEREQUoRgIEhEREUUoBoJEREREEYqBIBEREVGEYiBIREREFKEYCBIRERFFKAaCRERERBGKgSARERFRhGIgSERERBShGAgSERERRSgGgkREREQRioEgERERUYRiIEhEREQUoRgIEhEREUUoBoJEREREEYqBIBEREVGEYiBIREREFKEYCBIRERFFKAaCRERERBGKgSARERFRhGIgSERERBShGAgSERERRSgGgkREREQRioEgERERUYRiIEhEREQUoRgIEhEREUUoBoJEREREEYqBIBEREVGEYiBIREREFKEYCBIRERFFqLAIBM+ePYvnnnsOl156KQYMGICbb74ZO3bssL++Z88e3HLLLcjPz8fYsWOxdOnS4G0sERERUZgIi0DwpZdewvvvv49//vOfWLlyJXr06IGZM2fi9OnTOHPmDKZPn46uXbviww8/xN13340FCxbgww8/DPZmExEREYW0dsHeABHr16/HNddcg5EjRwIA/vKXv+D999/Hjh07cOjQIURHR+ORRx5Bu3bt0LNnTxw5cgRLlizB9ddfH+QtJyIiIgpdYREIpqWl4csvv8Qtt9yCzp0747333oPJZEKfPn3w/vvvY8iQIWjX7tevMmzYMCxevBgWiwXp6emqP0+SJNTX16t6T0NDg9P/U9vA/do2cb+2TdyvbRf3rXqSJMFgMCguFxaB4EMPPYT/9//+Hy677DJERUXBaDTi+eefR9euXXHy5En07t3bafnzzjsPAHDixAlNgWBLSwv27NmjaVsPHz6s6X0U2rhf2ybu17aJ+7Xt4r5Vx2QyKS4TFoFgaWkpEhMT8cILL6Bjx454//33MXfuXLz11ltobGx0+6IxMTEAgKamJk2fFx0djV69eql6T0NDAw4fPozu3bsjLi5O0+dS6OF+bZu4X9sm7te2i/tWvdLSUqHlQj4QPHHiBP70pz/h9ddfx6BBgwAAOTk5KC0txfPPP4/Y2Fg0Nzc7vccWAJrNZk2faTAYNL83Li5O83spdHG/tk3cr20T92vbxX0rTqRZGAiDUcM7d+5ES0sLcnJynP6el5eHI0eOoFOnTjh9+rTTa7Z/d+zYMWDbSURERBRuQj4Q7NSpEwBg7969Tn/ft28funfvjsGDB2Pbtm1obW21v7Zp0yb06NEDaWlpAd3WcNRqlVBcasGG7cdQXGpBq1UK9iYRERFRgIR803Bubi4uuugiPPjgg/jHP/6BTp06YeXKlfjuu+/wzjvv4Pzzz0dBQQEeeughzJw5E0VFRXj99dfx8MMPB3vTQ15hURmWrCxGeVWj/W9pybGYdW0OhudmBHHLiIiIKBBCPiNoNBrx0ksvYdiwYZg3bx6uu+46bNq0Ca+//jry8vKQlpaGgoICHDp0CFOmTMGiRYvwwAMPYMqUKcHe9JBWWFSGx9/Y4hQEAkB5VSMef2MLCovKgrRlREREFCghnxEEgOTkZPzjH//AP/7xD4+v5+bm4r333gvwVoWvVquEJSuLZZd5ZVUJhmZ3RpRRrLMpERERhZ+QzwiS/nYfLHfLBLqyVDZg98HyAG0RERERBQMDwQhUUS0fBKpdjoiIiMITA8EIlJoUq+tyREREFJ4
"text/plain": [
"<Figure size 1600x1200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from typing import List, Any\n",
"\n",
"def draw_data_2d(\n",
" df: pd.DataFrame,\n",
" col1: int,\n",
" col2: int,\n",
" y: List | None = None,\n",
" classes: List | None = None,\n",
" subplot: Any | None = None,\n",
"):\n",
" ax = None\n",
" if subplot is None:\n",
" _, ax = plt.subplots()\n",
" else:\n",
" ax = subplot\n",
" scatter = ax.scatter(df[df.columns[col1]], df[df.columns[col2]], c=y)\n",
" ax.set(xlabel=df.columns[col1], ylabel=df.columns[col2])\n",
" if classes is not None:\n",
" ax.legend(\n",
" scatter.legend_elements()[0], classes, loc=\"lower right\", title=\"Classes\"\n",
" )\n",
"\n",
"columns = ['age', 'avg_glucose_level', 'bmi', 'hypertension']\n",
"df_temp = df[columns]\n",
"\n",
"sns.set_theme(style=\"whitegrid\")\n",
"print(\"ЗАВИСИМОСТЬ ЗНАЧЕНИЙ ДРУГ ОТ ДРУГА\")\n",
"plt.figure(figsize=(16,12))\n",
"draw_data_2d(df_temp, 0, 1, subplot=plt.subplot(2, 2, 1)) # age vs avg_glucose_level\n",
"plt.figure(figsize=(16,12))\n",
"draw_data_2d(df_temp, 0, 2, subplot=plt.subplot(2, 2, 2)) # age vs bmi\n",
"plt.figure(figsize=(16,12))\n",
"draw_data_2d(df_temp, 0, 3, subplot=plt.subplot(2, 2, 3)) # age vs hypertension\n",
"plt.figure(figsize=(16,12))\n",
"draw_data_2d(df_temp, 1, 2, subplot=plt.subplot(2, 2, 4)) # avg_glucose_level vs bmi\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"видно, что индекс массы тела в зависимости от возраста в основном держится поменьше в раннем возрасте (до полового созревания, грубо говоря), а потом уже распределяется от адекватного до 40+ (в общем, вплоть до ожирения, с выбросами-то)\n",
"\n",
"потом гипертония встречается все таки после 20 лет чаще\n",
"\n",
"ну и чем ниже индекс массы тела, тем ниже и адекватнее уровень глюкозы (ну тут ясно, почему. люди с избыточным весом и болеют диабетом чаще)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"продолжим приводить данные к нормальному виду, и теперь их стандартизуем:"
]
},
{
"cell_type": "code",
"execution_count": 145,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>avg_glucose_level</th>\n",
" <th>bmi</th>\n",
" <th>hypertension</th>\n",
" </tr>\n",
" <tr>\n",
" <th>id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9046</th>\n",
" <td>0.981658</td>\n",
" <td>2.548208</td>\n",
" <td>0.960777</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51676</th>\n",
" <td>0.718652</td>\n",
" <td>1.986183</td>\n",
" <td>-0.115701</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31112</th>\n",
" <td>1.551505</td>\n",
" <td>-0.057528</td>\n",
" <td>0.435353</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60182</th>\n",
" <td>0.192639</td>\n",
" <td>1.328647</td>\n",
" <td>0.678842</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1665</th>\n",
" <td>1.507670</td>\n",
" <td>1.389985</td>\n",
" <td>-0.653940</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34383</th>\n",
" <td>0.061136</td>\n",
" <td>-0.432990</td>\n",
" <td>-0.423266</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8646</th>\n",
" <td>0.411811</td>\n",
" <td>-0.236875</td>\n",
" <td>-0.307929</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46653</th>\n",
" <td>1.595339</td>\n",
" <td>-1.047440</td>\n",
" <td>-0.128516</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1099</th>\n",
" <td>-1.297729</td>\n",
" <td>-0.158769</td>\n",
" <td>-0.884614</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61676</th>\n",
" <td>1.420001</td>\n",
" <td>-0.854297</td>\n",
" <td>-0.512973</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2500 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" age avg_glucose_level bmi hypertension\n",
"id \n",
"9046 0.981658 2.548208 0.960777 0\n",
"51676 0.718652 1.986183 -0.115701 0\n",
"31112 1.551505 -0.057528 0.435353 0\n",
"60182 0.192639 1.328647 0.678842 0\n",
"1665 1.507670 1.389985 -0.653940 1\n",
"... ... ... ... ...\n",
"34383 0.061136 -0.432990 -0.423266 0\n",
"8646 0.411811 -0.236875 -0.307929 0\n",
"46653 1.595339 -1.047440 -0.128516 1\n",
"1099 -1.297729 -0.158769 -0.884614 0\n",
"61676 1.420001 -0.854297 -0.512973 0\n",
"\n",
"[2500 rows x 4 columns]"
]
},
"execution_count": 145,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"scaler = StandardScaler()\n",
"\n",
"columns_to_scale = df_temp.drop(columns=[\"hypertension\"]).columns\n",
"columns_to_keep = [\"hypertension\"]\n",
"data_scaled = scaler.fit_transform(df_temp[columns_to_scale])\n",
"df_scaled = pd.DataFrame(data_scaled, columns=columns_to_scale, index=df_temp.index)\n",
"df_scaled[columns_to_keep] = df_temp[columns_to_keep]\n",
"\n",
"df_scaled"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"самое время применить иерархический алгоритм кластеризации (когда мы создаем дерево кластеров, где каждый уровень - это объединение более мелких кластеров)"
]
},
{
"cell_type": "code",
"execution_count": 146,
"metadata": {},
"outputs": [],
"source": [
"# linkage_matrix = linkage(data_scaled, method='ward')\n",
"# plt.figure(figsize=(10,10))\n",
"# dendrogram(linkage_matrix)\n",
"# plt.title('Дендрограмма')\n",
"# plt.ylabel('')\n",
"# plt.xlabel('')\n",
"# plt.show()\n",
"\n",
"\n",
"import numpy as np\n",
"from sklearn import cluster\n",
"from scipy.cluster import hierarchy\n",
"\n",
"def run_agglomerative(\n",
" df: pd.DataFrame, num_clusters: int | None = 2\n",
") -> cluster.AgglomerativeClustering:\n",
" agglomerative = cluster.AgglomerativeClustering(\n",
" n_clusters=num_clusters,\n",
" compute_distances=True,\n",
" )\n",
" return agglomerative.fit(df)\n",
"\n",
"\n",
"def get_linkage_matrix(model: cluster.AgglomerativeClustering) -> np.ndarray:\n",
" counts = np.zeros(model.children_.shape[0]) # type: ignore\n",
" n_samples = len(model.labels_)\n",
" for i, merge in enumerate(model.children_): # type: ignore\n",
" current_count = 0\n",
" for child_idx in merge:\n",
" if child_idx < n_samples:\n",
" current_count += 1\n",
" else:\n",
" current_count += counts[child_idx - n_samples]\n",
" counts[i] = current_count\n",
"\n",
" return np.column_stack([model.children_, model.distances_, counts]).astype(float)\n",
"\n",
"def draw_dendrogram(linkage_matrix: np.ndarray):\n",
" hierarchy.dendrogram(linkage_matrix, truncate_mode=\"level\", p=3)\n",
" plt.xticks(fontsize=10, rotation=45)\n",
" plt.tight_layout()"
]
},
{
"cell_type": "code",
"execution_count": 147,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAm8AAAHPCAYAAAAFwj37AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABVG0lEQVR4nO3dd3hT9eIG8DddScsquyBIkdEyZAkFfshUEBEQLBsuGysgW4pcZJSNcguIiiDrAiJFC3gVleEV7wXZMmS0UKBltZQNpdn5/v7ozSGBUprkpMlp38/z+BgyTt6enHPy5kyVEEKAiIiIiBTBx9MBiIiIiCj3WN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhB/Bx58sGDB9G/f/9sH6tQoQJ+/fVXXL16FbNmzcLhw4cRFBSEbt26YdSoUfD19ZUlMBEREVFB5lB5q1+/Pvbu3Wt33/HjxzFq1CiMGDECRqMRQ4YMQWhoKDZt2oTLly9jypQp8PHxwejRo2UNTkRERFQQqVy5tmlmZiY6duyIxo0bY968efjxxx8xefJk7N27F8WKFQMAxMXF4eOPP8b+/fsREBDg0PCPHTsGIQT8/f2djUhERETk9YxGI1QqFerXr//c57q0z9uXX34JrVaLSZMmAQCOHDmCWrVqScUNAJo0aYKMjAycPXvW4eELIeBCt7QbjsFgkGVYcmCenHlTHm/KAjDP8zCPMrIAzPM8zKOMLIB8eRzpPA5tNrV1584drF27FhMmTEBwcDAAIC0tDSEhIXbPK1OmDAAgNTUVdevWdeg9/P39IYRAlSpVnI0JANBqtUhOTsYLL7yAwMBAl4YlB+ZRTh5vysI8zJNfsjAP8+SXLHLmSUpKgkqlytVznS5vGzduRJEiRdCzZ0/pPp1Oh6JFi9o9T61WAwD0er1T72M0Gp1aa5ed5ORkWYYjF+bJmTfl8aYsAPM8D/M8mzdlAZjneZjn2bwpCyBPntzuXuZ0edu2bRu6dOkCjUYj3afRaGAwGOyeZy1tQUFBTr2Pv78/qlat6mxMAI9bcWhoqFe1dObx/jzelIV5mCe/ZGEe5skvWeTMk5SUlOvnOlXeEhIScOXKFXTq1Mnu/pCQEJw7d87uvvT0dABA2bJlnXkrqFQqp4vfkwIDA2UblhyYJ2felMebsgDM8zzM82zelAVgnudhnmfzpiyA63lyu8kUcPKAhSNHjqBkyZIIDw+3u79Ro0Y4c+YMMjIypPsOHDiAQoUKPfVcIiIiInKcU+XtzJkzCAsLe+r+119/HaVLl8bYsWORkJCA3bt3IzY2FoMHD3b4NCFERERE9DSnytvNmzelI0xtqdVqrFy5EhaLBT169EBMTAz69OmDESNGuJqTiIiIiODkPm9fffXVMx+rVKkSVq9e7XQgIiIiIno2XpieiIiISEFY3oiIiIgUhOWNiIiISEFY3oiIiIgUhOWNiIiISEFY3oiIiIgUhOWNiIiISEGcvjA9kbcTQkBvMLs0DJ3BDIPJAp3BDB9fk0zJmId5CnaW/J5HHeDr0HUqiRzF8kb5khACkz7bi7PJd2Qa4nWZhiMX5skZ8zybN2UB8mOeGqElsOD9V1ngyG242ZTyJb3BLGNxIyLKvbPJd1xe60+UE655o3xv/Yz20AT4OvXaTK0WiYkJCAsLR1BgoMzJmId5CmaW/JpHZzDjbzN+kTkZ0dNY3ijf0wT4QqN2blK3mH0R4Ofj0jDkxDzMkx+yMA+Ra7jZlIiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFMSp8rZt2zZ06NABL7/8Mt566y38/PPP0mNXr15FVFQUGjRogFdffRWLFy+G2WyWLTARERFRQeZwefv+++8xZcoU9O3bF9u3b0fHjh0xfvx4HDt2DEajEUOGDAEAbNq0CTNmzMA333yDzz//XPbgRERERAWRnyNPFkJgyZIl6N+/P/r27QsAGD58OI4cOYJDhw7h2rVruH79OjZv3oxixYqhevXquH37Nj7++GO89957CAgIcMsfQURERFRQOLTm7dKlS7h27Ro6depkd/+qVasQFRWFI0eOoFatWihWrJj0WJMmTZCRkYGzZ8/Kk5iIiIioAHNozdulS5cAAJmZmRgyZAjOnDmDChUqYPjw4WjTpg3S0tIQEhJi95oyZcoAAFJTU1G3bl2HAwohkJmZ6fDrbGm1Wrv/exrz5EyOPDrD4/0sM7VaWMy+HssiJ+bJGfM8mzdlAfJnHrmWO3LlkZM35fGmLIB8eYQQUKlUuXquQ+UtIyMDADBp0iS8//77+OCDD7Bjxw6MGDECa9asgU6nQ9GiRe1eo1arAQB6vd6Rt5IYjUbZ1tolJyfLMhy5ME/OXMljMFmk24mJCQjwc+3A6vw0btyBeXLmTXm8KQuQv/LIvdxxNY87eFMeb8oCyJMnt7uXOVTe/P39AQBDhgxB165dAQA1atTAmTNnsGbNGmg0GhgMBrvXWEtbUFCQI29l955Vq1Z16rVWWq0WycnJCA0NRWBgoEvDkgPzuD9P1i/g6wCAsLBwaAKcX/OW38YN8xTMPN6UJb/mkWu5I1ceOXlTHm/KImeepKSkXD/XofJWtmxZAED16tXt7q9atSr27NmDiIgInDt3zu6x9PR0u9c6SqVSOV38nhQYGCjbsOTAPDlzJY+Pr0m6HRQYCI3aoUld1izuwDw5Y55n86YsQP7KI/dyx9U87uBNebwpC+B6ntxuMgUcPGChVq1aKFSoEE6cOGF3/7lz5/Diiy+iUaNGOHPmjLR5FQAOHDiAQoUKITw83JG3IiIiIqJsOFTeNBoNhg4dis8//xw//vgjLl++jGXLlmHfvn0YNGgQXn/9dZQuXRpjx45FQkICdu/ejdjYWAwePJinCSEiIiKSgcPrdEeMGIHAwEAsWrQIN27cQJUqVbB06VI0btwYALBy5UrExMSgR48eKFasGPr06YMRI0bIHpyIiIioIHJqg/ygQYMwaNCgbB+rVKkSVq9e7VIoIiIiIsoeL0xPREREpCAsb0REREQKwvJGREREpCAsb0REREQKwvJGREREpCCun/6ZiIgKDCEE9DYXYHeWzmCGwWSBzmC2uzKBp8iRR2cwZXvbU3nkJGcedYCvQ1cToKexvBERUa4IITDps704m3xHxqFel3FYcpAnz99m7JBlOPlx/NQILYEF77/KAucCbjYlIqJc0RvMMhc3KojOJt+RZe1tQcY1b0RE5LD1M9pDE+Dr9OsztVokJiYgLCwcQYGBMiZjHm/NozOY8bcZv8icrGBieSMiIodpAnyhUTv/FWIx+yLAz8fl4ciFeZSVp6DjZlMiIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQh8vbjRs3EBYW9tR/W7ZsAQCcPXsW/fr1Q7169dCmTRusW7dO9tBEREREBZWfoy9ISEiAWq3G7t27oVKppPu
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"tree = run_agglomerative(df_scaled)\n",
"linkage_matrix = get_linkage_matrix(tree)\n",
"draw_dendrogram(linkage_matrix)"
]
},
{
"cell_type": "code",
"execution_count": 149,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/EAAANFCAYAAAAzmnz2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gUVRfA4d/Mtmx6LwRC6E16ERVBEQFRUWyIgBVFP3vDggiIIioiihVRURRFxYYgIIqoIL13QgklvbfNlpn5/tgkEJItSZYE9L7Po5DZmTt3JkuyZ+6950iapmkIgiAIgiAIgiAIgnDWkxu6A4IgCIIgCIIgCIIgeEcE8YIgCIIgCIIgCIJwjhBBvCAIgiAIgiAIgiCcI0QQLwiCIAiCIAiCIAjnCBHEC4IgCIIgCIIgCMI5QgTxgiAIgiAIgiAIgnCOEEG8IAiCIAiCIAiCIJwj9A3dgbPNli1b0DQNg8HQ0F0RBEEQBADsdjuSJNG1a9eG7sq/hvh9LwiCIJxNavK7XozEn0bTNDRN81lbNpvNZ+0J3hH3vWGI+94wxH1vGPV93335u0lwEr/vz23injcMcd8bhrjvDeNs/l0vRuJPU/5EvmPHjnVuq6SkhD179tCyZUv8/f3r3J7gHXHfG4a47w1D3PeGUd/3fceOHWf8HP814vf9uU3c84Yh7nvDEPe9YZzNv+vFSLwgCIIgCIIgCIIgnCNEEC8IgiAIQr364IMPGD16dKVte/bsYdSoUXTp0oX+/fvz2WefVXpdVVXeeustLr74Yrp06cLdd9/NsWPH6rPbgiAIgnBWEEG8IAiCIAj15osvvmDmzJmVtuXm5nLHHXeQkJDAwoULuf/++5k+fToLFy6s2Ofdd99l/vz5TJkyha+++gpVVRkzZgw2m62er0AQBEEQGpZYEy8IgiAIwhmXnp7OxIkTWbduHYmJiZVe+/rrrzEYDLzwwgvo9XpatGhBcnIys2fP5vrrr8dms/Hxxx/zxBNPcMkllwDwxhtvcPHFF7N8+XKuuuqq+r8gQRAEQWggIogXBEEQBOGM27VrFwaDgZ9++ol33nmHEydOVLy2ceNGevXqhV5/8mNJ7969+eCDD8jKyiIlJYXi4mIuuOCCiteDg4Np3749GzZsqHUQr2kaJSUltb+oMhaLpdKfwpkn7nnDEPe9YYj73jDq+75rmoYkSV7tK4J4QRAEQRDOuP79+9O/f/9qX0tLS6N169aVtkVHRwOQmppKWloaAHFxcVX2KX+tNux2O3v27Kn18ac7cuSIz9oSvCPuecMQ971hiPveMOrzvhuNRq/2E0G8IAiCIAgNqrS0tMoHF5PJBIDVaq0YBalun/z8/Fqf12Aw0LJly1ofX85isXDkyBESExMxm811bk/wTNzzhiHue8MQ971h1Pd9T0pK8npfEcQLgiAIgtCg/Pz8qiSos1qtAPj7++Pn5weAzWar+Hv5PnX5YCVJkk9r/5rNZlHDuZ6Je94wxH1vGOK+N4z6uu/eTqUHkZ1eEARBEIQGFhsbS0ZGRqVt5V/HxMRUTKOvbp+YmJj66aQgCIIgnCXESLwgCILgE7b0HDI/W0rOj3+hFFnwa9WY6FsHE3blhUiyeGYsuNazZ0+++uorFEVBp9MBsHbtWpo1a0ZERARBQUEEBgaybt06EhISACgoKGD37t2MGjWqIbsu1JKmOlAtBajWYtAUJJ0R2RyMZAyo0WiUIAjCf5EI4gVBEIQ6K9l1iH03TUAptICqAlCUU0DRPzsJHXIBLd59Akmva+BeCmer66+/njlz5jB+/HjGjBnD9u3bmTt3LpMnTwaca+FHjRrF9OnTCQ8PJz4+ntdee43Y2FgGDhzYwL0Xakpz2HDkp4CmnrKtFKWwFMngjy44RgTygiAIboggXhAEQagTzaFw4I6pKEUnA3ig4u95v6wlbfaPxP3vugbqoXC2i4iIYM6cObz00ksMGzaMqKgoxo0bx7Bhwyr2eeihh3A4HDz33HOUlpbSs2dPPvroIwwGQwP2XKgpTdNwFKRVCuArvW4vQbXkofMPq+eeCYIgnDtEEC8IgiDUSd6KDdhTslzvoGlkzFlE7NhrkHRiNF6AadOmVdnWqVMnFixY4PIYnU7Hk08+yZNPPnkmuyacYZrdAqrD7T6qJR/ZHCpG4wVBEFwQixQFQRCEOinasNfjVHl7Ri621Ox66pEgCGcrzV7qxU4qqPYz3xlBEIRzlAjiBUH4T1HtDlSLFU3TGror/xqSLOHN3ZRkMaomCIK3xM8LQRAEV0QQLwjCf0LB6u3sv2USm5vfwOZWw9l58f9I//hnNIfS0F075wVd1Anc3UcJjE2iMcRG1F+nBEE4K0kGs+edZB3IYsWnIAiCKyKIFwThXy9z/q/sv/l5ClZvp3zI2JqcxrGJH5F097RzNpAvPXiCgmXr0TYfRC21NVg/gvt2xq9FPOhc/ErRIPa+YaLMnCAISAY/0Bnd7iPWwwuCILgnPlEJgvCvZkvJIvmZ95zBu3JKNmRNA00jf8UGsr5a0WD9q43SQyfYe/2z7Ox3P8cfnAmT57P/wvtInfUNmlp9xuczSZJlWn76HIboMOcM2PLP3mVBfdTowUSNHlzv/RIE4ewjSRL64BiXI+2SKRDZL7ieeyUIgnBuEXOVBEH4V8v88lfcL9iWSP/oZ6JGDaqvLtWJ9Vg6e4Y+hVJYUmm7WmThxCtf4MgppMnEO+u9X36JcZy38m2yv11Jzk9/oRSUYG6TQNTowQSe316MqgmCUEHSGdCHNUYtLUKzFqFpCpLOiOwXjGTwEz8vBEEQPBBBvCAI/2qW3Ycr1y4/naZRmnQMTVXPieneKW9+4wzgleqvKX3OT0TddgV+iXH13DPQBZqJvn0I0bcPqfdzC4JwbpEkGZ05GMxi1F0QBKGmzv5PrIIgCHUg+5lcr9UuI+n1cA6M/Kg2OznfrXIZwAMgy2R/+0e99UkQBEEQBEGoX2IkXhCEf7XQgb3I+fEv1zvoZEIHn39OTN9UCorRbB5qJ0sS9rRzrx679UQmuYtW48gvwpQQQ/jVfdAFepHFWhAEQRAE4T9GBPGCIPyrhV7RG2NCDLYTmVVHsCWcmdPHXtsQXasxXVAAkkGPZne43knTnAnmzhGq3cGx5z8k8/PlIEnOmvMOhWPPzyFh6lgib+zf0F0UBEEQBEE4q4jp9IIg/KvJRgNtvnoBU+No5wadDLLsDBgNBpq/+wQBXVo1bCe9JJsMhA3t4355gKIScf0l9danujo26SNnAK9poKoV5f5Ui5Ujj75F7rJ1DdxDQRAEQRCEs4sYiRcE4V/PlBBDhz/eJn/5evJ+34RmtePfsTmRN/VHH3ZuJVVq9PBN5C1bh2qxVrs2PnLUQGfN9nOALTWbzHlLnQF8dSSJE69+QejAXufEcgdBEARBEIT6IIJ4QRD+E2SDnrArLyTsygsbuit14te8EW2/e5nDj76JZdfhiu2SyUDM3UOJf/KWBuxdzeT+8o/78n+aRum+o1gPp+DX/Nx4MCEIgiAIgnCmiSBeEAThHOPfPpEOy96geHsS+TsPkpKdSevhgwmKjmzortWIUljiXBpQNoXe5X4FJfXUI++oVjv2jFw0q4ckg4IgCIIgCGeACOIFQRDOUQGdWiK1bETqnj3oAv0bujs15teskccAHlnC2DiqfjrkgS01m9S3viHrm9/RSm0gSxwb2Ismj4/Av11iQ3dPEARBEIT/CJHYThAEQWgQoQN7oQsJcFYJqI5OJnTQ+RgiQ+uzW9WyHs9g95DHyZy/3BnAA6gahb9uYM/V4yjatLdhOygIgiAIwn+GCOIFQRCEBiH7GUl87QGQJJBPi+R1MvqQQJpMuKNhOneaoxM+xJFTUDWZoKKi2RwcevANNLVqokFBEARBEARfE0G8IAiC0GDChlxA6/mTCOja+uRGnUzYlRfSbvFrmBJiGq5zZWwpWeSv2FhtNQAAVBXb0XQK/9lZvx0TBEEQBOE/SayJFwRBEBpUcJ/OBPfpjC0lC6WgGENsBPrQwIbuVoXSQydcl8ErJ0mU7j9G8EWd6qdTgiAIgiD
"text/plain": [
"<Figure size 1200x1000 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"result = hierarchy.fcluster(linkage_matrix, 40, criterion=\"distance\")\n",
"y_names = ['0', '1', '2']\n",
"\n",
"plt.figure(figsize=(12, 10))\n",
"\n",
"\n",
"draw_data_2d(df_temp, 0, 1, result, y_names, subplot=plt.subplot(2, 2, 1)) \n",
"draw_data_2d(df_temp, 0, 2, result, y_names, subplot=plt.subplot(2, 2, 2)) \n",
"draw_data_2d(df_temp, 0, 3, result, y_names, subplot=plt.subplot(2, 2, 3)) \n",
"draw_data_2d(df_temp, 1, 2, result, y_names, subplot=plt.subplot(2, 2, 4)) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"емае теперь переходим к НЕиерархической кластеризации\n",
"будем использовать метод К-средних (K-means), мы выбираем количество кластеров и флгоритм пытается распределить данные так, чтобы минимизировать расстояние между объектами и центром их кластера"
]
},
{
"cell_type": "code",
"execution_count": 152,
"metadata": {},
"outputs": [],
"source": [
"from typing import Tuple\n",
"\n",
"def print_cluster_result(\n",
" df: pd.DataFrame, clusters_num: int, labels: np.ndarray, separator: str = \", \"\n",
"):\n",
" for cluster_id in range(clusters_num):\n",
" cluster_indices = np.where(labels == cluster_id)[0]\n",
" print(f\"Cluster {cluster_id + 1} ({len(cluster_indices)}):\")\n",
" rules = [str(df.index[idx]) for idx in cluster_indices]\n",
" print(separator.join(rules))\n",
" print(\"\")\n",
" print(\"--------\")\n",
"\n",
"\n",
"def run_kmeans(\n",
" df: pd.DataFrame, num_clusters: int, random_state: int\n",
") -> Tuple[np.ndarray, np.ndarray]:\n",
" kmeans = cluster.KMeans(n_clusters=num_clusters, random_state=random_state)\n",
" labels = kmeans.fit_predict(df)\n",
" return labels, kmeans.cluster_centers_"
]
},
{
"cell_type": "code",
"execution_count": 153,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cluster 1 (1314):\n",
"31112, 53882, 10434, 27419, 60491, 12109, 12095, 12175, 27458, 4219, 70822, 38047, 33879, 14248, 712, 24977, 47306, 62602, 4651, 1261, 61960, 1845, 37937, 47472, 35626, 36338, 18587, 15102, 59190, 47167, 38829, 55927, 65842, 19557, 7356, 17013, 72366, 6118, 7371, 70676, 27169, 19773, 66159, 71673, 42117, 57419, 26015, 26727, 66638, 70042, 32399, 3253, 71796, 14499, 49130, 51169, 66315, 37726, 54385, 35512, 8154, 4639, 12363, 4712, 33175, 2346, 42072, 30456, 59125, 56546, 48405, 36706, 71639, 60744, 7547, 5563, 68798, 72918, 50522, 3352, 70943, 48796, 16817, 31563, 55824, 20439, 45965, 8045, 37651, 41241, 62861, 72081, 32503, 12482, 56939, 43054, 34567, 50931, 16590, 3512, 42899, 43364, 44993, 210, 28939, 60739, 67432, 2182, 40899, 62466, 36841, 33486, 54567, 66204, 8003, 28378, 41081, 16077, 30184, 66071, 36255, 23410, 35684, 18937, 491, 8580, 28484, 62019, 51314, 37060, 35578, 54921, 33454, 62439, 2548, 2390, 68023, 54724, 8899, 39105, 31154, 69959, 10552, 12917, 68356, 28493, 1836, 32221, 10548, 17739, 27153, 34060, 43424, 30468, 56543, 15266, 10460, 64908, 67855, 25774, 49589, 17986, 29217, 72911, 47175, 48588, 66767, 29908, 45222, 40311, 71750, 42203, 71379, 58261, 67318, 8831, 65199, 43454, 7282, 49003, 16371, 40181, 66174, 6319, 55232, 41940, 72214, 37089, 52134, 54918, 1703, 16934, 32689, 18051, 38805, 61837, 49713, 17608, 62608, 4630, 66333, 49916, 71038, 58617, 69064, 9404, 28286, 10159, 58282, 64489, 64553, 69936, 46527, 13547, 9608, 10504, 37090, 60148, 52173, 23462, 11091, 70374, 31143, 66972, 55810, 37031, 34608, 36007, 48298, 44749, 46468, 51983, 20351, 55351, 67431, 20546, 52342, 59906, 53144, 11999, 38119, 3355, 67177, 58600, 65946, 48368, 36471, 15689, 8233, 23221, 31830, 15296, 17718, 18498, 56735, 621, 5835, 11838, 38165, 71585, 3009, 32361, 53910, 67548, 33404, 50965, 21077, 66570, 29158, 34299, 7696, 34668, 68483, 6072, 51112, 69673, 71238, 63958, 34511, 24892, 29496, 19939, 27832, 27757, 25099, 33123, 21713, 6726, 17242, 16380, 9729, 56974, 65574, 17019, 41800, 6855, 26028, 14712, 23094, 40622, 46438, 65144, 34641, 13129, 37629, 62936, 59829, 55424, 61697, 55138, 39399, 721, 40448, 58007, 11960, 24592, 67744, 8328, 32437, 55420, 53660, 56553, 30480, 31988, 45585, 52063, 27377, 7446, 65130, 30753, 22853, 12465, 64849, 24183, 39601, 46891, 38987, 21886, 5353, 44300, 46218, 39745, 13517, 36355, 22678, 52512, 3579, 3130, 5545, 63693, 33528, 11068, 62233, 7291, 36814, 48265, 10139, 12662, 43174, 72823, 30567, 21117, 50491, 61013, 71010, 23551, 12738, 57772, 16615, 68995, 53010, 967, 31145, 54338, 13223, 67932, 10255, 27012, 20541, 5892, 66883, 43196, 51514, 38184, 13997, 27796, 18390, 63409, 72882, 40866, 63561, 51422, 3590, 60665, 18430, 13365, 60983, 14615, 50277, 50811, 1246, 30712, 31308, 3325, 52808, 36109, 53336, 56831, 55592, 44583, 58227, 60810, 34612, 25595, 30550, 13367, 38609, 22159, 37413, 4169, 18888, 42503, 23645, 62382, 59521, 55386, 72547, 26973, 41033, 71442, 49624, 10572, 28910, 10381, 31956, 24665, 13683, 7387, 57011, 50053, 51125, 29077, 4970, 58291, 99, 64633, 23016, 18412, 67412, 37545, 14491, 66220, 25458, 69645, 53695, 26692, 33400, 67078, 22540, 26999, 30102, 49521, 29134, 68281, 40350, 39375, 12106, 25283, 49949, 28681, 62332, 25488, 14807, 16110, 40970, 28933, 11709, 16809, 13907, 54071, 28024, 11730, 17245, 70852, 60957, 19742, 4808, 49928, 52688, 65698, 820, 55721, 72310, 24115, 7122, 48755, 33551, 62716, 68438, 41148, 14924, 47950, 9262, 71896, 38623, 26503, 5475, 15525, 48748, 71591, 67864, 34857, 34995, 22952, 57879, 36638, 41097, 54620, 19681, 6988, 25287, 5934, 58999, 28261, 35222, 44105, 65256, 20044, 54769, 12686, 48830, 47924, 59336, 5684, 3673, 31867, 23633, 52549, 37349, 2751, 64464, 66494, 42786, 33401, 24174, 61715, 60663, 46875, 69091, 1821, 44978, 10762, 84, 45824, 61838, 57212, 62668, 33142, 17437, 38303, 36484, 60047, 16542, 18805, 17869, 6793, 49265, 6606, 23031, 22902, 4807, 9641, 10313, 12097, 45323, 39120, 68344, 66752, 5077, 49279, 42856, 10752, 42133, 4842, 58138, 65053, 24168, 61973, 18687, 72642, 54782, 24437, 65429, 3509, 15220, 4813, 31166, 9
"\n",
"--------\n",
"Cluster 2 (813):\n",
"31720, 69768, 39912, 33943, 66866, 49669, 30669, 16523, 46136, 52800, 37893, 19584, 24447, 70336, 45801, 67210, 33759, 26325, 65460, 36811, 70970, 55680, 11014, 44338, 34974, 41648, 45538, 68249, 1686, 22284, 39038, 21956, 30171, 65535, 29865, 28799, 40840, 10449, 31091, 9487, 28102, 1506, 40670, 21284, 5973, 42996, 46785, 21408, 7559, 8171, 43232, 34402, 22706, 71539, 28637, 31741, 22537, 50611, 9637, 44862, 5708, 65357, 49465, 14123, 54975, 10913, 27029, 45719, 129, 530, 6107, 3655, 5010, 56681, 56716, 61350, 61465, 18108, 46436, 7351, 31164, 48272, 2893, 34376, 8595, 46474, 69687, 2953, 9179, 63050, 11974, 41523, 50441, 16927, 28265, 54375, 37832, 21058, 31279, 9201, 22622, 29933, 7621, 5374, 31564, 71808, 56998, 43134, 39383, 63606, 36331, 42359, 20751, 68241, 60902, 58439, 29010, 44912, 45238, 61511, 36942, 15095, 44315, 68245, 47732, 55709, 15311, 59807, 40639, 8544, 3361, 61408, 33552, 31364, 9906, 27794, 46809, 35565, 48144, 2532, 34363, 23650, 23046, 41927, 54866, 20364, 10997, 62999, 66184, 22870, 57523, 68131, 29873, 54182, 61300, 15274, 53016, 28848, 7745, 12593, 15553, 45796, 31840, 58767, 14391, 22321, 41673, 49086, 47523, 56870, 40791, 54304, 19234, 52454, 16575, 11176, 9612, 41513, 52580, 33723, 26235, 8320, 49529, 49785, 6886, 40055, 45283, 22685, 46745, 5046, 55847, 42441, 14387, 22384, 24108, 69427, 21688, 60777, 64732, 42710, 46683, 58909, 18616, 55529, 12204, 21397, 10324, 25514, 7663, 71793, 32352, 65218, 54643, 33410, 2543, 45289, 10792, 19153, 47876, 41615, 41537, 45759, 71929, 37154, 70380, 67405, 2549, 10782, 61742, 24099, 13948, 65766, 42482, 8008, 56089, 11745, 17733, 11743, 3606, 32554, 45893, 53538, 17006, 42108, 224, 56679, 27146, 16556, 36698, 57372, 30605, 13622, 39250, 2879, 59684, 56986, 16402, 40889, 4083, 48843, 5694, 44481, 19101, 11973, 59178, 44281, 55599, 45224, 54747, 6090, 46385, 46323, 28122, 50843, 60211, 53279, 37830, 2454, 3437, 6355, 58567, 62187, 72779, 12396, 69622, 58037, 34281, 7990, 57622, 11691, 13319, 53815, 51579, 58203, 6965, 42821, 10367, 66530, 43146, 57497, 16147, 18306, 61769, 26134, 66772, 41861, 954, 23565, 57854, 66678, 56734, 20347, 809, 67052, 69224, 13323, 59940, 46093, 46072, 47848, 71440, 59734, 32733, 61338, 59275, 19996, 31517, 56245, 9225, 45955, 53943, 53276, 39661, 51162, 4683, 782, 63565, 26389, 39518, 542, 26031, 38255, 41565, 39423, 68908, 28418, 33162, 39467, 20282, 51159, 7167, 59147, 18192, 14049, 69355, 2082, 16449, 5447, 44224, 533, 45554, 55744, 25767, 71319, 23604, 46576, 31293, 6044, 45209, 43155, 11882, 45669, 59604, 33187, 44192, 728, 28952, 51916, 21857, 55976, 27572, 54184, 4702, 45048, 30084, 50650, 40571, 13072, 22969, 47537, 26242, 36226, 32723, 35737, 30677, 50453, 14241, 40144, 13504, 51959, 2092, 68235, 3956, 34436, 40513, 48836, 62387, 3807, 51339, 32826, 44179, 14563, 40237, 65970, 51109, 5984, 13062, 47770, 12687, 36858, 50373, 13191, 47330, 64750, 70259, 72132, 11726, 13736, 43913, 41870, 37907, 15987, 57166, 47627, 8723, 937, 51806, 59412, 29233, 17762, 1405, 57953, 40251, 27013, 7586, 45357, 15219, 52987, 29327, 8085, 41820, 49057, 18070, 11770, 25454, 29224, 24219, 52089, 7297, 29910, 59339, 18754, 34312, 57798, 11605, 61924, 58015, 43271, 39714, 21785, 30214, 66419, 40931, 28559, 49815, 1625, 56309, 52034, 17175, 40544, 49152, 43672, 39286, 38043, 71721, 38094, 47350, 43675, 65801, 59953, 43772, 45754, 57485, 6128, 37082, 64541, 4538, 34001, 48964, 40393, 35432, 44010, 71044, 30290, 70752, 26366, 12436, 37698, 14677, 42743, 5686, 4789, 897, 69553, 38036, 36666, 16316, 31835, 4099, 26893, 1486, 5451, 3640, 17835, 37660, 24782, 63416, 42082, 54058, 4861, 33768, 35450, 62793, 66592, 29804, 3753, 27279, 38578, 66502, 9034, 16582, 28500, 70241, 66647, 39450, 57109, 17277, 18861, 38858, 13862, 66065, 71869, 46035, 54946, 29934, 28998, 63668, 24876, 22536, 8760, 53126, 18179, 68708, 12366, 38440, 34621, 16091, 17515, 34958, 30620, 1818, 70654, 49485, 61641, 11566, 72108, 22967, 33692, 25305, 7885, 14599, 38488, 52428, 61171, 2824, 2019, 47751, 34525, 13755, 56019, 67942, 52220, 48226, 10333, 10390, 72497, 51935, 29470,
"\n",
"--------\n",
"Cluster 3 (373):\n",
"9046, 51676, 60182, 1665, 56669, 8213, 5317, 58202, 56112, 34120, 25226, 70630, 13861, 68794, 64778, 61843, 54827, 69160, 43717, 39373, 54401, 47269, 7937, 19824, 8752, 25831, 66400, 58631, 5111, 10710, 17004, 2326, 50784, 36236, 45805, 28291, 2458, 56841, 63973, 45277, 12062, 41069, 53401, 13491, 44033, 14164, 37132, 53440, 69551, 20387, 71279, 11762, 29281, 30683, 17308, 67981, 58978, 11933, 46703, 24669, 59437, 66258, 20426, 63453, 14431, 65105, 67895, 66955, 24905, 69112, 64373, 58267, 54695, 68627, 31179, 68025, 29552, 25904, 31421, 20463, 12689, 39186, 32729, 23368, 25974, 1210, 36857, 52282, 45535, 40460, 32257, 41413, 28674, 63884, 4057, 36275, 11577, 20980, 28526, 18518, 42807, 11120, 68614, 4480, 2982, 59368, 65836, 21130, 56357, 45053, 28333, 49421, 54312, 69037, 63732, 2374, 15528, 27213, 22320, 50305, 59729, 12985, 22091, 17291, 66196, 1307, 35846, 28645, 22470, 15649, 12982, 67733, 545, 15791, 67780, 68275, 36561, 47811, 17148, 56179, 25483, 50118, 39639, 31090, 64174, 48993, 39659, 71533, 34558, 42553, 53515, 9752, 49744, 22485, 16685, 25315, 33585, 17813, 64582, 12270, 50826, 14147, 2314, 63058, 13571, 47885, 17351, 25627, 72020, 36618, 37290, 62709, 7273, 10538, 9648, 66922, 8521, 69330, 5824, 8332, 55862, 42550, 14178, 69143, 67603, 34326, 18414, 17708, 38678, 30989, 22363, 41291, 33622, 47735, 55775, 35140, 2750, 62783, 19778, 22440, 32157, 28150, 6419, 39823, 32884, 70031, 5821, 45788, 52150, 37327, 56090, 22001, 16260, 35913, 40624, 45945, 54526, 29869, 47159, 7806, 63984, 5878, 239, 33983, 29375, 62452, 47622, 50098, 46373, 71318, 49341, 4692, 40253, 44950, 42460, 41271, 21491, 43059, 46284, 38493, 5137, 39202, 4833, 19389, 34496, 49709, 31415, 71322, 72337, 7550, 57917, 41424, 54858, 49495, 15166, 16593, 63663, 15988, 35829, 53909, 5799, 16488, 44781, 29385, 48072, 32776, 50841, 26328, 52419, 70344, 6879, 58438, 12512, 2513, 45713, 24272, 54347, 54353, 15515, 48759, 32452, 35333, 15120, 11412, 24630, 24229, 9170, 47608, 63597, 5478, 43657, 71917, 24603, 70857, 16066, 48069, 49014, 61247, 27799, 72435, 46864, 25405, 7344, 61178, 48364, 71182, 48775, 8968, 32016, 32270, 1473, 60963, 66431, 36750, 16938, 29388, 5355, 29915, 3305, 50671, 68333, 44777, 36377, 39531, 14479, 53422, 19550, 62456, 47521, 63938, 45040, 42212, 57270, 63401, 53990, 17295, 55466, 34448, 49672, 59130, 34661, 65680, 5863, 23223, 55566, 63990, 61895, 36589, 28651, 45033, 20316, 7683, 23176, 61000, 9026, 54301, 3099, 46068, 3715, 50402, 69502, 16812, 62629, 63912, 49574, 7411, 6239, 44591, 27626, 12279, 28303, 7658, 35997\n",
"\n",
"--------\n"
]
},
{
"data": {
"text/plain": [
"array([[ 0.47644409, -0.40065839, 0.33339772, 0.12947449],\n",
" [-1.10132351, -0.3102823 , -0.77003252, 0.004914 ],\n",
" [ 0.72629021, 2.08749131, 0.50685056, 0.25737265]])"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"random_state = 9\n",
"\n",
"labels, centers = run_kmeans(df_scaled, 3, random_state) \n",
"print_cluster_result(df_scaled, 3, labels)\n",
"display(centers)"
]
},
{
"cell_type": "code",
"execution_count": 157,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABQ4AAAPfCAYAAACGlVSEAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXwU9f0/8NduyLG5L+4odw6ScEdQqGgR1FZRqbVWUQTvSvWn1lqvWqtWe2i9/YoHoKj1QBGwatEKVRAEuSEBA4JErtx3QsjM74+4y14z+5ljZ4+8no9HH5XN7OxnPzu7O/ue9/vztsmyLIOIiIiIiIiIiIjIjT3UAyAiIiIiIiIiIqLww8AhERERERERERER+WDgkIiIiIiIiIiIiHwwcEhEREREREREREQ+GDgkIiIiIiIiIiIiHwwcEhERERERERERkQ8GDomIiIiIiIiIiMgHA4dERERERERERETkg4FDIiIiIiIiIiIi8sHAYTfzhz/8AXl5ear/u+KKK0I9TCLN9u3bhzvvvBOTJ09GYWEhj2eNKioqcOaZZ6KmpibUQ6EgmzlzJv79739rvl9tbS3OOOMMHDhwIAijIqLuhOejFK14PmoMz0e7D56PRhabLMtyqAdB1vn+++89Poife+457Ny5E88884zrtuTkZAwdOjQUwyPSpaKiAhdddBHOOussTJkyBRkZGYiNjYXD4cCQIUNgt/MaiRpZlnHllVdiypQpuOqqq0I9HAqynTt34uqrr8by5cuRlZWl6b4LFizAZ599hldffRU2my1IIySiaMfzUYpGPB81huej3QvPRyNLj1APgKx18skn4+STT3b9OzMzE3FxcRg1alToBkVk0IIFCzBp0iQ88sgjoR5KRFqxYgV2796Nl19+OdRDIQsMHz4cI0aMwPPPP497771X030vu+wyPP/881ixYgWmTZsWpBESUbTj+ShFI56PGsPz0e6F56ORhZc9yK/29nY8++yzOOecc1BcXIxp06Zh3rx5kCQJAHDFFVf4pNyvW7cOeXl5WLduHQDg6aefRl5enuvvH374IUpKSvDYY48BADo7OzFv3jycd955GDFiBEaNGoVLL70Ua9euNeU5HD58GLfddhsmTJiAkSNH4oorrsDmzZs9xubvf08//TQA4OjRo7jrrrswefJkjBgxAhdffDE+++wz1cd87733kJeXh4qKCtecTJo0CbfddhuAriuRSo/7hz/8wbWfiooK/P73v8ekSZNQWFiIU089Fb///e9RW1vr2kaWZSxYsADnnnsuRowYgalTp+Lll1+GLMtCz6+urg5//OMfcdppp6G4uBiXXHIJvvrqK4/nk5eXh0WLFuHOO+/E6NGjcdppp+Hhhx9Ge3u7ptfC33iefvpp13xt2bIFF110EUaMGIHzzz8fH3/8MQDg+PHjmDRpEm6//XaffU6bNs31JfP1119j8uTJeOqpp/CTn/wEI0aMwKWXXuo6Fp0aGxvxyCOP4KyzzkJxcTHOO+88vPvuu6pjdx7X/sqntm3bhquvvhrjx4/HmDFjcMMNN+Dbb79VvG9RURF++tOf4pVXXvF4DNH9fPnll7j88ssxYsQITJs2DW+88YbHft555x2ce+65KCoqUjy2/HnhhRdw9tlnIy4uznWb+7Hi5P2eBoANGzZg5syZGDlyJE455RTceeedHlkk3u8Jp5/+9KeucamVqgFdJW1XXHEF3n33XZx55pkYPXo0Zs2ahbKyMtXn5f059fDDD6O4uBj/+9//PJ6Pv/+5HzuffvopLrvsMowePRpFRUU455xz8Prrr3s81tGjR3HnnXfi1FNPxejRozFz5kxs2rTJ9VyVHsc5L6LzqPRecRL57ACA888/H++++65qKZDzMd3nIi4uDmeffTZeeOEF1bknIjIDz0d5PgrwfBTg+ag7no/yfJTno9ZjxiH5kGUZN9xwAzZv3oy5c+ciPz8f69atwxNPPIEDBw7gwQcf1LzPtrY2/PnPf8Y111yD888/HwDwj3/8A2+++SZuv/125OXl4ciRI3j22Wdxyy23YOXKlXA4HLqfw7Fjx3DNNdego6MD999/P2JjY/Hcc8/hiiuuwNtvv41f/vKX+MlPfgIAeOCBBwAA999/PwCgT58+qKqqwsUXX4z4+HjceuutyMjIwHvvvYebbroJf/vb3zB9+nShcfz5z3/GOeecg5kzZ3rcfuONN+KMM85w/Xvu3Lmu/25tbcWVV16JjIwM3H///UhJScGmTZvwzDPPICEhAX/+858BAH/729+wcOFCzJ49GxMnTsS2bdvwj3/8A8ePHw/4/Nrb2zFr1ixUVVXh1ltvRa9evbB48WJcc801eOmll3Dqqae6xvPkk09i5MiReOKJJ7Bnzx488cQTqKysxBNPPCH6cri89dZbrv/u06cP1qxZAwC4/vrrMXPmTNx6661499138f/+3//DCy+8gMmTJ+PCCy/Ea6+9hqamJiQnJwMAvvnmG+zfvx+PPvooAOCHH37Aiy++iNbWVtx+++1IT0/H22+/jTlz5uDll1/GhAkT0NbWhssuuwzV1dW4+eab0b9/f3z66ae45557UFVVhRtuuEF17H/84x9RWFgIoKt8au3atbjmmmswfvx4/OUvf0F7ezteeOEFXHrppXj77bcxZMgQn/s2Nzfjww8/xF//+lfk5+fjtNNO07SfW2+9FRdeeCFuuOEGfPbZZ67X9rLLLsP69etx77334uKLL8a9996LpKQkAJ7Hlj979+7F9u3bceuttwq9hu7Wr1+P2bNnY8KECXjiiSdQX1+PJ598EldeeSXeffddJCQkCO3HeVzs2LEDf/7znz3m2qm0tBR79+7FbbfdhrS0NDz11FOutVF69eoV8DG2bt2KN998E08++SRGjx7t9/Hdx+C0cuVK3HTTTbjyyivx29/+Fm1tbXjjjTfw5z//GUVFRRg5ciSam5vx61//Gp2dnbjjjjvQu3dvvPLKK5gzZw7ef/99PPPMMzh27BgqKysxd+5cj/d/r169NM2j2ntF9LMD6Dp57OzsxIoVK/CrX/3K75ydccYZeOutt3xKBc855xy89dZb+O677zBo0KCAc09EpAfPR3k+yvNRXzwf9cXzUZ6P8nzUAjJ1a3feead85plnety2cuVKOTc3V16+fLnH7c8++6ycm5sr7969W77yyivlSy+91OPva9eulXNzc+W1a9fKsizLTz31lJybmyvLsiy/88478sSJE+Xjx4+7tr/tttvkBQsWeOzjk08+kXNzc+VNmzYZel7r1q2TCwoK5I0bN7puq66ulseMGSPPnTvXY9uZM2fKM2fO9Ljtb3/7m1xYWChXVFR43D5r1ix54sSJcmdnp9/HXbx4sZybmysfOHBAXrNmjVxUVCTX1ta6/n7gwAE5NzdXXrx4scf9zjzzTPnOO++UZVmWd+7cKf/617+Wv//+e49trr/+evnss8+WZVmW6+vr5eHDh8sPP/ywxzYPPvigfPXVVwd8fm+99Zacm5srb9682XWbJEny5ZdfLs+YMcN1W25urjxt2jS5o6PDddv8+fPl3Nxcuby83O8c+ON+LLhzztczzzzjMY4LLrhA/uUvfynLsizv3btXzs3Nld99913XNvfee688bdo0178LCgrkgoIC+bvvvvPYz/nnny9ffPHFsizL8uuvvy7n5uZ6HBOyLMt33323XFxc7PE6ufM+rp0uvvhi+Wc/+5nHMV1fXy+fcsop8s0336x436amJjk3N1d+4YUXNO/nrrvu8hjDjTfeKE+cOFGWJEl+6aWX5NzcXLmpqcljG/djyx/nvNTX13vcnpubKz/11FMet3m/jr/61a/k8847z2Pse/fulQsKCuRFixbJsuz5ngg0LqW5vvPOO+Xc3Fx5/fr1rtuOHDkiFxcXy3//+98Vn5v7sX/HHXe45lPp+fgbw4svvugzztraWo/X8LXXXpPz8vLknTt3urZpaWmRp02bJr/99tuu25Te/1rmUe29IvLZ4e6CCy6Qb7nlFp/bA2loaJBzc3Pl119/XfN9iYj84fkoz0edeD5a63fsPB8
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def draw_cluster_results(\n",
" df: pd.DataFrame,\n",
" col1: int,\n",
" col2: int,\n",
" labels: np.ndarray,\n",
" cluster_centers: np.ndarray,\n",
" subplot: Any | None = None,\n",
"):\n",
" ax = None\n",
" if subplot is None:\n",
" ax = plt\n",
" else:\n",
" ax = subplot\n",
"\n",
" centroids = cluster_centers\n",
" u_labels = np.unique(labels)\n",
"\n",
" for i in u_labels:\n",
" ax.scatter(\n",
" df[labels == i][df.columns[col1]],\n",
" df[labels == i][df.columns[col2]],\n",
" label=i,\n",
" )\n",
"\n",
" ax.scatter(centroids[:, col1], centroids[:, col2], s=80, color=\"k\")\n",
" plt.title('Точка - это кластер, грубо говоря (центр кластера) :)')\n",
"\n",
"\n",
"plt.figure(figsize=(16, 12))\n",
"draw_cluster_results(df_scaled, 0, 1, labels, centers, plt.subplot(2, 2, 1)) # age vs avg_glucose_level\n",
"draw_cluster_results(df_scaled, 0, 2, labels, centers, plt.subplot(2, 2, 2)) # age vs bmi\n",
"draw_cluster_results(df_scaled, 0, 3, labels, centers, plt.subplot(2, 2, 3)) # age vs hypertension\n",
"draw_cluster_results(df_scaled, 1, 2, labels, centers, plt.subplot(2, 2, 4)) # avg_glucose_level vs bmi"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "vev",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}