1181 lines
1.2 MiB
Plaintext
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"#### Бизнес-цель: кластеризация пациентов для выявления групп с схожими характеристиками здоровья и рисками инсульта. Что, к примеру, может использоваться для следующего:\n",
"\n",
"- определение, люди каких групп могут иметь бОльшую предрасположенность к возникновению инсульта\n",
"- помощь в медицине на основе полученных данных в разработке медицинских показаний людям с повышенным риском возникновения инсульта"
]
},
{
"cell_type": "code",
"execution_count": 160,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>gender</th>\n",
" <th>age</th>\n",
" <th>hypertension</th>\n",
" <th>heart_disease</th>\n",
" <th>ever_married</th>\n",
" <th>work_type</th>\n",
" <th>Residence_type</th>\n",
" <th>avg_glucose_level</th>\n",
" <th>bmi</th>\n",
" <th>smoking_status</th>\n",
" <th>stroke</th>\n",
" </tr>\n",
" <tr>\n",
" <th>id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9046</th>\n",
" <td>Male</td>\n",
" <td>67.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Urban</td>\n",
" <td>228.69</td>\n",
" <td>36.6</td>\n",
" <td>formerly smoked</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51676</th>\n",
" <td>Female</td>\n",
" <td>61.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Self-employed</td>\n",
" <td>Rural</td>\n",
" <td>202.21</td>\n",
" <td>NaN</td>\n",
" <td>never smoked</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31112</th>\n",
" <td>Male</td>\n",
" <td>80.0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Rural</td>\n",
" <td>105.92</td>\n",
" <td>32.5</td>\n",
" <td>never smoked</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60182</th>\n",
" <td>Female</td>\n",
" <td>49.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Urban</td>\n",
" <td>171.23</td>\n",
" <td>34.4</td>\n",
" <td>smokes</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1665</th>\n",
" <td>Female</td>\n",
" <td>79.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Self-employed</td>\n",
" <td>Rural</td>\n",
" <td>174.12</td>\n",
" <td>24.0</td>\n",
" <td>never smoked</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34383</th>\n",
" <td>Male</td>\n",
" <td>46.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Urban</td>\n",
" <td>88.23</td>\n",
" <td>25.8</td>\n",
" <td>Unknown</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8646</th>\n",
" <td>Female</td>\n",
" <td>54.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Rural</td>\n",
" <td>97.47</td>\n",
" <td>26.7</td>\n",
" <td>never smoked</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46653</th>\n",
" <td>Female</td>\n",
" <td>81.0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>Yes</td>\n",
" <td>Private</td>\n",
" <td>Rural</td>\n",
" <td>59.28</td>\n",
" <td>28.1</td>\n",
" <td>never smoked</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1099</th>\n",
" <td>Female</td>\n",
" <td>15.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>No</td>\n",
" <td>children</td>\n",
" <td>Rural</td>\n",
" <td>101.15</td>\n",
" <td>22.2</td>\n",
" <td>Unknown</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61676</th>\n",
" <td>Male</td>\n",
" <td>77.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>Yes</td>\n",
" <td>Self-employed</td>\n",
" <td>Urban</td>\n",
" <td>68.38</td>\n",
" <td>25.1</td>\n",
" <td>Unknown</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2500 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" gender age hypertension heart_disease ever_married work_type \\\n",
"id \n",
"9046 Male 67.0 0 1 Yes Private \n",
"51676 Female 61.0 0 0 Yes Self-employed \n",
"31112 Male 80.0 0 1 Yes Private \n",
"60182 Female 49.0 0 0 Yes Private \n",
"1665 Female 79.0 1 0 Yes Self-employed \n",
"... ... ... ... ... ... ... \n",
"34383 Male 46.0 0 0 Yes Private \n",
"8646 Female 54.0 0 0 Yes Private \n",
"46653 Female 81.0 1 1 Yes Private \n",
"1099 Female 15.0 0 0 No children \n",
"61676 Male 77.0 0 0 Yes Self-employed \n",
"\n",
" Residence_type avg_glucose_level bmi smoking_status stroke \n",
"id \n",
"9046 Urban 228.69 36.6 formerly smoked 1 \n",
"51676 Rural 202.21 NaN never smoked 1 \n",
"31112 Rural 105.92 32.5 never smoked 1 \n",
"60182 Urban 171.23 34.4 smokes 1 \n",
"1665 Rural 174.12 24.0 never smoked 1 \n",
"... ... ... ... ... ... \n",
"34383 Urban 88.23 25.8 Unknown 0 \n",
"8646 Rural 97.47 26.7 never smoked 0 \n",
"46653 Rural 59.28 28.1 never smoked 0 \n",
"1099 Rural 101.15 22.2 Unknown 0 \n",
"61676 Urban 68.38 25.1 Unknown 0 \n",
"\n",
"[2500 rows x 11 columns]"
]
},
"execution_count": 160,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"from sklearn import cluster\n",
"from scipy.cluster import hierarchy\n",
"\n",
"df1 = pd.read_csv(\"./csv/option4.csv\", index_col='id')\n",
"df1.info\n",
"df = df1.head(2500)\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"уберем пустые значения, подготовим данные:"
]
},
{
"cell_type": "code",
"execution_count": 161,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"gender 0\n",
"age 0\n",
"hypertension 0\n",
"heart_disease 0\n",
"ever_married 0\n",
"work_type 0\n",
"Residence_type 0\n",
"avg_glucose_level 0\n",
"bmi 119\n",
"smoking_status 0\n",
"stroke 0\n",
"dtype: int64\n",
"\n",
"gender False\n",
"age False\n",
"hypertension False\n",
"heart_disease False\n",
"ever_married False\n",
"work_type False\n",
"Residence_type False\n",
"avg_glucose_level False\n",
"bmi True\n",
"smoking_status False\n",
"stroke False\n",
"dtype: bool\n",
"\n"
]
}
],
"source": [
"print(df.isnull().sum())\n",
"print()\n",
"\n",
"print(df.isnull().any())\n",
"print()"
]
},
{
"cell_type": "code",
"execution_count": 162,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Наличие пропущенных значений:\n",
"gender 0\n",
"age 0\n",
"hypertension 0\n",
"heart_disease 0\n",
"ever_married 0\n",
"work_type 0\n",
"Residence_type 0\n",
"avg_glucose_level 0\n",
"bmi 0\n",
"smoking_status 0\n",
"stroke 0\n",
"dtype: int64\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\elena\\AppData\\Local\\Temp\\ipykernel_68948\\1629916119.py:1: SettingWithCopyWarning: \n",
"A value is trying to be set on a copy of a slice from a DataFrame.\n",
"Try using .loc[row_indexer,col_indexer] = value instead\n",
"\n",
"See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
" df['bmi'] = df['bmi'].fillna(df['bmi'].median())\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>hypertension</th>\n",
" <th>heart_disease</th>\n",
" <th>avg_glucose_level</th>\n",
" <th>bmi</th>\n",
" <th>stroke</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2500.000000</td>\n",
" <td>2500.000000</td>\n",
" <td>2500.000000</td>\n",
" <td>2500.000000</td>\n",
" <td>2500.000000</td>\n",
" <td>2500.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>44.605296</td>\n",
" <td>0.108000</td>\n",
" <td>0.062400</td>\n",
" <td>108.630440</td>\n",
" <td>29.102840</td>\n",
" <td>0.099600</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>22.817713</td>\n",
" <td>0.310443</td>\n",
" <td>0.241929</td>\n",
" <td>47.124712</td>\n",
" <td>7.804786</td>\n",
" <td>0.299526</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>0.080000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>55.220000</td>\n",
" <td>10.300000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>26.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>77.902500</td>\n",
" <td>23.975000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>47.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>93.200000</td>\n",
" <td>28.200000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>63.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>117.510000</td>\n",
" <td>33.000000</td>\n",
" <td>0.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>82.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>271.740000</td>\n",
" <td>97.600000</td>\n",
" <td>1.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" age hypertension heart_disease avg_glucose_level \\\n",
"count 2500.000000 2500.000000 2500.000000 2500.000000 \n",
"mean 44.605296 0.108000 0.062400 108.630440 \n",
"std 22.817713 0.310443 0.241929 47.124712 \n",
"min 0.080000 0.000000 0.000000 55.220000 \n",
"25% 26.000000 0.000000 0.000000 77.902500 \n",
"50% 47.000000 0.000000 0.000000 93.200000 \n",
"75% 63.000000 0.000000 0.000000 117.510000 \n",
"max 82.000000 1.000000 1.000000 271.740000 \n",
"\n",
" bmi stroke \n",
"count 2500.000000 2500.000000 \n",
"mean 29.102840 0.099600 \n",
"std 7.804786 0.299526 \n",
"min 10.300000 0.000000 \n",
"25% 23.975000 0.000000 \n",
"50% 28.200000 0.000000 \n",
"75% 33.000000 0.000000 \n",
"max 97.600000 1.000000 "
]
},
"execution_count": 162,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['bmi'] = df['bmi'].fillna(df['bmi'].median())\n",
"print(\"\\nНаличие пропущенных значений:\")\n",
"print(df.isnull().sum())\n",
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 163,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"ЗАВИСИМОСТЬ ЗНАЧЕНИЙ ДРУГ ОТ ДРУГА\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoMAAAHnCAYAAADD41dfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADmj0lEQVR4nOydeWAU5f3/37shu8kmmw2bQDiFkGDIAYRwqVSCVq2oXLb2UrEWvFr0Z20t9Wi1aq1WRb/VWg+0rVhrtQqKimdLpGARBCQJ4Qgm3ASSkDvZQHZ/f8RZZmfneOaZc3ef1z/KZnZmdo7n+Tyf4/1xhEKhEBgMBoPBYDAYCYnT6hNgMBgMBoPBYFgHMwYZDAaDwWAwEhhmDDIYDAaDwWAkMMwYZDAYDAaDwUhgmDHIYDAYDAaDkcAwY5DBYDAYDAYjgWHGIIPBYDAYDEYCM8DqE7AbW7duRSgUQnJystWnwmAwGAwGg0HFyZMn4XA4MGnSJMVtmWdQQCgUghk63KFQCL29vaYci2EP2D1PPNg9TzzYPU887HrP1dgzzDMogPMIjh8/3tDjdHV1oaamBvn5+fB4PIYei2EP2D1PPNg9TzzYPU887HrPKysribdlnkEGg8FgMBiMBIYZgwwGg8FgMBgJDDMGGQwGg8FgMBIYZgwyGAwGg8FgJDDMGGQwGAwGg8FIYJgxyGAwGAwGg5HAMGOQwWAwGAwGI4FhxiCDwWAwGAxGAsOMQQaDwWAwGIwEhhmDDAaDwWAwGAkMMwYZDAaDwWAwEhhmDDIYDAaDwWAkMAOsPgEGg8FgMBj2oS8Ywo6vmtDc1gN/RgqKxmQhyemw+rQYBsKMQQaDwWAwGACADdsP47lVlWhq7Ql/luVLwfXzx+OcCcMsPDOGkbAwMYPBYDAYDGzYfhi//9umCEMQAJpae/D7v23Chu2HLTozhtEwY5DBYDAYjASnLxjCc6sqZbd5/q0q9AVDJp0Rw0yYMchgMBgMRoKz46umKI+gkMaWbuz4qsmkM2KYCTMGGQwGg8FIcJrb5A1BtdsxYgtmDDIYDAaDkeD4M1J03Y4RW7BqYgaDwWCohsmPxBdFY7KQ5UuRDRVnZ6aiaEyWiWfFMAtmDDIYDAZDFUx+JP5Icjpw/fzx+P3fNkluc928EmbwxyksTMxgMBgMYpj8iDR9wRCq65pRWd+F6rrmmKu8PWfCMNxxzVRk+SJDwdmZqbjjmqnM0I9jmGeQwWAwGESQyo9MLxmacB4kobf0jQ3NyPJVx5y39JwJwzC9ZChLAUgwmGeQwWAwGEQw+RFx4s1bmuR0YHx+NsrLRmB8fjYzBBMAZgwyGAwGgwgmPxINE2tmxAPMGGQwGAwGEUx+JBrmLWXEA8wYZDAYDAYRnPyIHIkmP8K8pYx4gBmDDAaDwSCCkx+RI9HkR5i3VJ6+YAiVtY2o2HIQlbWNLFxuU1g1MYPBYDCI4eRHhDqD2ZmpuG5eSUxVzuoBE2uWhulRxg7MGGQwGAyGKpj8yGmYWLM4XIW1EK7CmukW2gsWJmYwGAyGapj8yGmYWHMkrMI69mCeQQaDwWAwNMJ5S7fUHEL1zjoUj8tFWeHwhDSS1VRYj8/PNumsGHIwY5DBYDAYDB1IcjpQnOuHs6cBhbn+hDQEAVZhHYuwMDGDwWAwGAzdYBXWsQczBhkMBoPBYOhGIulR9gVDqK5rRmV9F6rrmmM2D5KFiRkMBoPBYOhGolRYC6Vz3tjQjCxfdUxK5zDPIIPBYDAYDF2J9wprTjpHWCjDSeds2H7YojOjg3kGGQwGg8Fg6E686lGSSudMLxkaM7+VGYMMBoPBYDAMgdOjjCfiUTqHhYkZDAaDwWAwCIlH6RxmDDIYDAaDwWAQEo/SOcwYZDAYDAaDwSAkHqVzmDHIYDAYDAaDQQgnnSNHrEnnMGOQwWAwGIwEpC8YQmVtIyq2HERlbaOiYLLa7eOZeJPOYdXEDAaDwWAkGELBZADI8qVICiar3T4R4KRzttQcQvXOOhSPy0VZ4fCY8ghyMM8gg8FgMBgJhFrB5HgTWNaTJKcDxbl+jB/tQXGuX9EQtKt3lXkGGQwGg8FIENQKJsejwLKecL2Jq+u7EExpRllhquR1sLN3lRmDDAaDwWAkCGoFk+0isNwXDNmuk4ma3sScd1UI5121Os+QGYMMBoPBYMQpQiOqsaWb6HucYDKpcPKXe44bZqjZ0aOmxriLBe8qMwYZDAaDwYhDxIyojDQX0Xc5wWRS4eR/frw7/P96Gmp29KiRGHfPraqEJzUZre0BtLQHbOFdlYMZgwwGg8FgxAik4VIpI6qts1fxGHzBZE5gWcmY4aOXoWZXjxpJ6LyptQe/fmaDqv1a2b7OcmOwpaUFy5Ytw9q1a9HR0YGCggL8/Oc/x5QpUwAA1157LTZsiLyg06ZNw4oVKwAAgUAADz30EN5//3309PTg/PPPx1133QW/32/6b2EwGAwGwyhIw6UkRpQcfMFkTmBZzLBUQquhZpd8RSFGGW1Wtq+z3Bi87bbbcPz4cSxbtgxZWVlYsWIFFi1ahJUrV2LMmDHYtWsX7r33XlxwwQXh7yQnJ4f//95778XmzZvx5JNPwuVy4Z577sEtt9yCl19+2Yqfw2AwGAwbYscCBDWoCZeSGFEAkJGWjLbOk+F/Z2em4rp5JVHePE5gWWiIKtHY0o2q2kY4nQ6q605qdJntUTPCaLO6fZ2lxuC+ffuwfv16vPLKK5g8eTIA4Ne//jXWrVuH1atX46qrrkJTUxMmTpyIQYMGRX2/oaEBq1atwjPPPBP2JC5btgwXX3wxtm7dikmTJpn6exgMBoNhP+xYgKAGteFSUuNo8bzxyPalEhlqnMAyZ1Dvb2jHa7w8QSkeWrEJHV2nDU41153U6DLbo0YTOlfC6vZ1lopODxw4EM899xzGjz/d48/hcMDhcKCtrQ27du2Cw+FAbm6u6Pe/+OILAMBZZ50V/iw3Nxc5OTnYtEm9S5vBYDAY8UU8CCarCZcC5MZRti8V4/OzUV42AuPzsxWNkSSnI7x96dhoB40YfEMQUHfdOaNLDis8aiS9iUmxS/s6Sz2DGRkZKC8vj/jsgw8+wL59+3DnnXdi9+7d8Hq9uO+++7B+/Xp4PB5cfPHF+MlPfgKXy4WGhgYMHDgQbrc7Yh+DBw/G0aNHqc8rFAqhq6uL+vskdHd3R/yXEf+we554sHtORjAYQs2+E2hpDyDT60bhqIFw6uAlCQZDeHbldtltnlu1HRPG+HQ5HmDMPT/a2Eq8Xd4wD3KHpMKf4UZzW0By2yyfG7lDUkXnOpL7QXIMOUiv+zWzz8SyV6Xv4cKLxyLQY/77VZqfidu+PwF/fW+X6muwcPaZyEx3RVxbI2yOUCgEh4PsubY8Z5DPli1bcMcdd+Ciiy7CrFmzcOeddyIQCGDChAm49tprUVNTgz/84Q84fPgw/vCHP6C7uxsuV3SZvNvtRiBA94ACwMmTJ1FTU6PlpxBTX19vynEY9oHd88SD3XNpdhzoxvtftKCtqy/8WYYnCRdPzkTRyFRN+65r6FGcqJtaA3j/023IzdE31KjnPW9tJgtHtjYfRU1NCwDggonpeG2d9G//5oR07Nq1M+pzNfdD6RhykF73DCfw3XOzJM8pw9mMmppmqnPQSoYTWHJJNvYdD6CjOwhPigNvfXYCbd1B6e94kjDa1wmnswvoAXbtajD0HMVsJDFsYwx+/PHH+MUvfoGysjI8+uijAID77rsPS5cuhc/nAwCceeaZSE5Oxs9+9jP88pe/REpKCnp7o8vkA4EAUlPpB5Hk5GTk5+dTf5+E7u5u1NfXY/To0ZrOlRE7sHueeLB7Ls/G6ga8ti7a69PW1YfX1jXhtu9PwPTiHOr9N588AqBRcTuffwgKC4cS71fOc2bEPS8oCGH1pnWKnr6LZ5aGz6Mt2ACgSXL7EcOHo7Aw8tqqvR+FhcCI4Q1R3rH01AHo6D6l+LtIr3thIbDgAmO8x3owhnfPBw1uk/VkLp5bjGINz7Qaamtribe1hTH48ssv43e/+x0uvvhiPPzww2FLdsCAAWFDkGPs2LE
"text/plain": [
"<Figure size 1600x1200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoIAAAHnCAYAAAAsITxhAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACDfElEQVR4nO3deXwU9f0/8Nduzs1NNgIBhIQgkBOIBBQV8CgVb7T2+OJRBKWX/fVrLdaqtdZ69Ot9K+J91KNcxVtbQcslhxAgEQgkCITDJOQid3Z+f8RZdjd7zLUzszuv5+PRR2Uzu/PZnd2Z93w+78/7YxMEQQARERERWY7d6AYQERERkTEYCBIRERFZFANBIiIiIotiIEhERERkUQwEiYiIiCyKgSARERGRRTEQJCIiIrKoWKMbYDZff/01BEFAXFyc0U0hIiIiUqS7uxs2mw0TJkwIuh17BH0IgoBw19gWBAFdXV1h3w+ZB4+5NfG4Ww+PufWY9ZhLjWfYI+hD7AksLi4O2z7a2tpQWVmJUaNGISkpKWz7IfPgMbcmHnfr4TG3HrMe823btknajj2CRERERBbFQJCIiIjIokwVCD733HO4+uqrvR6rrKzEVVddhfHjx+Occ87Bq6++6vV3l8uFxx9/HGeddRbGjx+P66+/Hvv379ez2UREREQRyTSB4BtvvIFHH33U67Fjx45hzpw5GD58OBYvXoxf//rXePDBB7F48WL3Nk8//TTefPNN3H333Xjrrbfgcrkwb948dHV16fwOiIiIiCKL4ZNFjhw5gjvvvBPr169HTk6O19/eeecdxMXF4a9//StiY2ORl5eHffv2YeHChbjiiivQ1dWFF198ETfffDOmT58OAHjkkUdw1lln4ZNPPsFFF12k/xsiIiIiihCG9wju2LEDcXFx+Ne//oVx48Z5/W3jxo2YNGkSYmNPxKunnXYaampqUFdXh2+++QbHjx/H6aef7v57WloaCgoKsGHDBt3eAxEREVEkMrxH8JxzzsE555zj92+HDx/G6NGjvR4bOHAgAODQoUM4fPgwACA7O7vfNuLflBAEAW1tbYqfH0p7e7vX/1P04zG3Jh536+Extx6zHnNBEGCz2UJuZ3ggGExHRwfi4+O9HktISAAAdHZ2uj90f9s0NTUp3m93dzcqKysVP1+qmpqasO+DzIXH3Jp43K2Hx9x6zHjMfeMjf0wdCCYmJvab9NHZ2QkASEpKQmJiIgCgq6vL/d/iNg6HQ/F+4+LiMGrUKMXPD6W9vR01NTXIyclR1U6KHDzm1sTjbj085tZj1mNeVVUlaTtTB4KDBw/G0aNHvR4T/z1o0CD09PS4Hxs+fLjXNmPGjFG8X5vNpkt1cIfDYaoq5BR+PObWxONuPTzm1mO2Yy5lWBgwwWSRYMrKyrBp0yb09va6H1u3bh1yc3PhdDoxduxYpKSkYP369e6/Nzc3o6KiAmVlZUY0mYiIiChimDoQvOKKK9Da2orbbrsNVVVVWLJkCV5++WXMnz8fQN/Y91VXXYUHH3wQ//73v/HNN9/gf//3fzF48GDMmDHD4NYTERERmZuph4adTicWLVqEe+65B7NmzcJJJ52EBQsWYNasWe5tfvvb36Knpwe33347Ojo6UFZWhhdeeAFxcXEGtpyiQa9LQMXeejQ0dyAzLREFI52IsUvraiciIooEpgoE77///n6PlZSU4O233w74nJiYGPzhD3/AH/7wh3A2jSxmTXktFi7bhvqmDvdjzvRE3HBZMaaUDDGwZURERNox9dAwkRHWlNfivlc2eAWBAFDf1IH7XtmANeW1BrWMiIhIWwwEiTz0ugQsXLYt6DbPL9+OXpegU4uIiIjCh4EgkYeKvfX9egJ91TW2o2JvvU4tIiIiCh8GgkQeGpqDB4FytyMiIjIzBoJEHjLTEkNvJGM7IiIiM2MgSOShYKQTzvTgQV5WhgMFI506tYiIiCh8GAgSeYix23DDZcVBt7n+0iLWEyQioqjAQJDIx5SSIbj12rJ+PYNZGQ7cem0Z6wgSEVHUMFVBaSKzmFIyBJOLsrmyCBERRTUGgkQBxNhtKB6VZXQziIiIwoZDw0REREQWxUCQiIiIyKIYCBIRERFZFANBIiIiIotiIEhERERkUQwEiYiIiCyKgSARERGRRTEQJCIiIrIoBoJEREREFsVAkIiIiMiiGAgSERERWRQDQSIiIiKLYiBIREREZFEMBImIiIgsioEgERERkUUxECQiIiKyKAaCRERERBbFQJCIiIjIohgIEhEREVkUA0EiIiIii2IgSERERGRRDASJiIiILIqBIBEREZFFMRAkIiIisigGgkREREQWxUCQiIiIyKIYCBIRERFZFANBIiIiIotiIEhERERkUQwEiYiIiCyKgSARERGRRTEQJCIiIrIoBoJEREREFsVAkIiIiMiiGAgSERERWRQDQSIiIiKLYiBIREREZFEMBImIiIgsioEgERERkUUxECQiIiKyKAaCRERERBbFQJCIiIjIohgIEhEREVkUA0EiIiIii2IgSERERGRRDASJiIiILIqBIBEREZFFMRAkIiIisigGgkREREQWxUCQiIiIyKIYCBIRERFZFANBIiIiIotiIEhERERkUQwEiYiIiCyKgSARERGRRTEQJCIiIrIoBoJEREREFsVAkIiIiMiiGAgSERERWRQDQSIiIiKLYiBIREREZFEMBImIiIgsioEgERERkUUxECQiIiKyKAaCRERERBbFQJCIiIjIoiIiEOzp6cFjjz2Gs88+GxMmTMDs2bOxZcsW998rKytx1VVXYfz48TjnnHPw6quvGtdYIiIioggREYHgM888g3fffRd33303li1bhtzcXMybNw9Hjx7FsWPHMGfOHAwfPhyLFy/Gr3/9azz44INYvHix0c0mIiIiMrVYoxsgxWeffYaLLroIZ555JgDgj3/8I959911s2bIF1dXViIuLw1//+lfExsYiLy8P+/btw8KFC3HFFVcY3HIiIiIi84qIQNDpdOLzzz/HVVddhezsbLz99tuIj4/H2LFj8e6772LSpEmIjT3xVk477TQ899xzqKurQ1ZWluz9CYKAtrY2Ld+Cl/b2dq//p+jHY25NPO7Ww2NuPWY95oIgwGazhdwuIgLB2267Df/v//0/nHvuuYiJiYHdbscTTzyB4cOH4/Dhwxg9erTX9gMHDgQAHDp0SFEg2N3djcrKSk3aHkxNTU3Y90HmwmNuTTzu1sNjbj1mPObx8fEht4mIQLCqqgqpqal46qmnMGjQILz77ru4+eab8frrr6Ojo6PfG01ISAAAdHZ2KtpfXFwcRo0apbrdgbS3t6OmpgY5OTlwOBxh2w+ZB4+5NfG4Ww+PufWY9ZhXVVVJ2s70geChQ4fw+9//Hi+//DImTpwIACguLkZVVRWeeOIJJCYmoqury+s5YgCYlJSkaJ82m03xc+VwOBy67IfMg8fcmnjcrYfH3HrMdsylDAsDETBreOvWreju7kZxcbHX4+PGjcO+ffswePBgHD161Otv4r8HDRqkWzuJiIiIIo3pA8HBgwcDAHbu3On1+K5du5CTk4OysjJs2rQJvb297r+tW7cOubm5cDqduraVKNx6XQK2VdVh1eYD2FZVh16XYHSTiIgogpl+aLikpASnnnoqbrnlFtx5550YPHgwli1bhrVr1+If//gHhg0bhkWLFuG2227DvHnzUF5ejpdffhl33XWX0U0n0tSa8losXLYN9U0d7sec6Ym44bJiTCkZYmDLiIgoUpm+R9But+OZZ57BaaedhltvvRWXX3451q1bh5dffhnjxo2D0+nEokWLUF1djVmzZuHJJ5/EggULMGvWLKObTqSZNeW1uO+VDV5BIADUN3Xgvlc2YE15rUEtIyKiSGb6HkEASE9Px5133ok777zT799LSkrw9ttv69wqIn30ugQsXLYt6DbPL9+OyUXZiLFLSw4mIiICIqBHkMjqKvbW9+sJ9FXX2I6KvfU6tYiIiKIFA0Eik2toDh4Eyt2OiIhIxECQyOQy0xI13Y6IiEjEQJDI5ApGOuFMDx7kZWU4UDCS5ZK
"text/plain": [
"<Figure size 1600x1200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAn4AAAHnCAYAAAAmUVB2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAA6r0lEQVR4nO3deVxWZf7/8ffNDiqalmKio2kIKqUmqOWajjlmm1OPptLKRFssWhTLyUzLSRtxX9ImtdLMchnNxso2y8kVs9zQcoGRFDFRUVmF8/vDr/cvhFvhcOMNXq/n4+FDuc51rvM55+KGt/e5zzkOy7IsAQAA4Irn5ekCAAAAcHkQ/AAAAAxB8AMAADAEwQ8AAMAQBD8AAABDEPwAAAAMQfADAAAwhI+nC/C0rVu3yrIs+fr6eroUAAAAW/Ly8uRwONSqVauL9jP+HT/LslTe97C2LEu5ubnlvh1UHMy5mZh38zDn5qmoc17SPGP8O37n3+mLjIwst21kZmYqMTFRTZo0UVBQULltBxUHc24m5t08zLl5Kuqcb9++vUT9jH/HDwAAwBQEPwAAAEMQ/AAAAAxB8AMAADAEwQ8AAMAQBD8AAABDEPwAAAAMQfADAAAwBMEPAADAEAQ/AAAAQxD8AAAADEHwAwAAMISPpwsAKor8Aku79h9Teka2agYHqNl1teTt5XBbfzvbdtWee7ZAn/1wQIePnVHdWlX0l1sayc/Hvf+Puxz7Z2ed0o51sWPlallWbr7eXblDh34/o2uvrqJH72ihQD/vi46VX2Bp54F07UzKVEFAulpHBMrby1HqsewcK1fbsPP9cyozT6/PWa+jx7N0zVWBemVAe1UL8nW5jeMZORo6dY0yTucpuKqv4mO76Kpgf5ftknQ47YwGx3+tvHxLvt4OzRjaTXVrV9HR9Cw9M+FrZefkK8DfW9OGdNM1NQN18nSu/j5zrY5n5OiqYH+98VRHVa/q57LWi+3HsRPZem7SNzqTdVZVAn00+flbVatGgK1tHDuRrdiJa3QmM09Vgo5o6gvnxnK1H66OyZnss5q0MEGpxzIVUitIzz/YRlUCzv16dlWXq3VczdOl9qO4Y+KqXlf9j/yeqafjv1ZOXoH8fb00fWg31bk6SJJc1uuqrtJ+H0oq9ffPnr3pGvrWWufrKP7JjmrapKbL/Sv6vfub83vX1bFytW1PcliWZXm0gj+YPXu2/vvf/2r+/Pku+xw/flxjxozR999/L4fDodtvv13Dhg1TYGCgrW1u375dkhQZGWlr/ZLIzMxUYmKiIiIiFBQUVG7bgX3rth3S28u369jJbGdbreoBGnR3pG6+4dpS9y/NnLsaq3Orevpu629F2puEVtfmXUdU8IdXrpdDurtzY/W/o4Wd3S/1/pX3WKU9Jq7Gmrdyh5Z/t6/YYyWp2GV1agbp8LHMImPVrRWkI+mZxY7V9E81i623ZrC/fj14ssRjRTWro70pJ0t1rMbM3aCNO48Uab++fnWlZ+SU6vtn/fbDxe67n6+XcvMKirR7eUkFRZtdCgrwUVbOWbnjt47DoWLHqVvr3OutuP1w5zaOZWQXe0zctY3r61fX0eNZOnE6t8gyV8c9KMBHmdlni7S7mj937ocrXl5S43rVi30d+Hg7dDa/6M672j9X+9G2eR1t2Z1W7Fju4ufrpbyzBW753q1R1U/zR/+l7ANdoKR5psIEvw8++EBjxoxRmzZtLhr8+vXrp6ysLI0ePVoZGRl6+eWXFRUVpTfffNPWdgl+WLftkMa+t9nl8uGPRBX6pVuS/i2b1CjRnF9qrNLq06Xs4a+0x8PdY9k9JheONW/lDi1bs6/U41RUxR0rV6EPQMVWHuGvpHnG45/xO3LkiJ544gnFx8erYcOGF+27detWbdq0SW+++aaaN2+u9u3b67XXXtOKFSt05Ag//FB6+QWW3l6+/aJ9/rVih/L/7+2RkvYvKLj0/6dKMlZpLf9un3LP2v/fe2mPh7vHKssx+eNYuWcLtPy7Kyf0SUWPVVZuPqEPqKROnM7VyWLezb0cPP4Zv507d8rX11effPKJZsyYod9++81l34SEBF1zzTVq3Lixsy06OloOh0NbtmxRr169bNVgWZYyM8t+WsCVrKysQn+j4th5IL3QqbDi/H4iSz8m/qbmjWqWuP9Pv6QqUBef85KMVVoFlrRizR7dfvOfbK1f2uPh7rHKckz+ONZ/1iWrBNm0UrnwWM1ZmejhigCUxUvTv9OE2FvcNp5lWXI4Lv05bI8Hv1tvvVW33nprifoeOXJEdevWLdTm5+enGjVq6PDhw7ZryMvLU2Ji+f8QTUpKKvdtoHR2JpUs8O/cfUBe2UdK3H/vgUOKbBh00Tkv6ViltXvvb7ruKntjl/Z4uHussh6T82Pt3nu8TONUVH88Vvv+d9TD1QAoi2MnstyePfz8Ln3hiMeDX2lkZWUVu1P+/v7KycmxPa6vr6+aNGlSltIuKisrS0lJSWrYsKHti1BQPgoC0rV0Xfol+zUPb6SIRjVL3L9Jo2sl68RF57ykY5VWeJN6ioiw945faY+Hu8cq6zE5P9b+48na9OsvtsepqP54rBrvlfalpni4IgB21aoRqIiICLeNt3fv3hL1q1TBLyAgQLm5Rc+J5+TklOmiCYfDcVkuuggMDOTijgqmdUSgalXfedHTi1fXCFTriHry9nKUuH/LsBDt2XPionNekrFKy8sh3dWlqe1bu5T2eLh7rLIckz+OdVeXplrw+S9X1OneC49VzD0ttXoTwQ+orMY93VlBQe67tUtJTvNKFeDijtIICQlRWlpaobbc3FydOHFCtWvX9lBVqMy8vRwadPfFr4AaeFcL5y/bkvb3KsH97koyVmnd3blxme7nV9rj4e6xynJM/jiWn4+X85YtV4oLj1Wgn7faNq/jwYoA2FWjqp/H7udXqYJfVFSUUlNTlZyc7GzbtGmTJOmmm27yVFmo5G6+4VoNfyRKtaoHFGq/ukZgsbfQKG1/u9vu06Vxse1tm9fRhbnLy+GeW7lcqiZ37p+rsewck+LG6n9HC/Xp0tjlsXK17Px94C5Ut1aQy7Fc1Xt9/eqlGqtt8zqlOlYjHmvnMvxdX796qb9/XO27n2/xvyq8SvkbJCjARyV8U+KSXI1Tt1aQy/1w5zZcHRN3beP6+tVVw0UwcHXcgwKKP4nnqlZ37ocrXl5y+Trw8S5+513tn6ta2zav43Isd/Hz9XLb92553cevpCrMffwk6aWXXtJvv/3mvI9ffn6+0tPTVa1aNQUEBMiyLD344IPKycnRqFGjlJmZqb///e9q27atxo4da2ub3McP57nzyR2lnXOe3FHydSrykzt+TPxNO3cfUPPwRs7Tsjy540p/csc3//fkDl+e3GHckzscFerJHZXuBs5S0eCXkpKibt26aezYserTp48k6dixYxo9erTWrl0rf39/9ezZU8OHD5e/v7+tbRL8UB6YczMx7+Zhzs1TUee8pHmmQl3cMW7cuEJfh4aGas+ePYXaatWqpalTp17OsgAAAK4IleozfgAAALCP4AcAAGAIgh8AAIAhCH4AAACGIPgBAAAYguAHAABgCIIfAACAIQh+AAAAhiD4AQAAGILgBwAAYAiCHwAAgCEIfgAAAIYg+AEAABiC4AcAAGAIgh8AAIAhCH4AAACGIPgBAAAYguAHAABgCIIfAACAIQh+AAAAhiD4AQAAGILgBwAAYAiCHwAAgCEIfgAAAIYg+AEAABiC4AcAAGAIgh8AAIAhCH4AAACGIPgBAAAYguAHAABgCIIfAACAIQh+AAAAhiD4AQAAGILgBwAAYAiCHwAAgCEIfgAAAIYg+AEAABiC4AcAAGAIgh8AAIAhCH4AAACGIPgBAAAYguAHAABgCIIfAACAIQh+AAAAhiD4AQAAGILgBwAAYAiCHwAAgCEIfgAAAIYg+AEAABiC4AcAAGAIgh8AAIAhCH4AAACGIPgBAAAYguAHAABgCIIfAACAIQh+AAAAhiD4AQAAGILgBwAAYAiCHwAAgCEIfgAAAIYg+AEAABiC4AcAAGAIgh8AAIAhCH4AAACG8Hj
"text/plain": [
"<Figure size 1600x1200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoIAAAHnCAYAAAAsITxhAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACYsklEQVR4nO3deXgUVdo28Ls7pJN0VpIgEJQlIGHJBrLJIogIboDoyIwLOgiCuMw3OowOIzPz6jguI6KOuIDRUVRUVGRRX0F8FdEIsghJAIGwCYTFTsi+kq7vD+y216pT1dVb+v5d11wj6erq6q7tqeec8xyDJEkSiIiIiCjiGIO9AUREREQUHAwEiYiIiCIUA0EiIiKiCMVAkIiIiChCMRAkIiIiilAMBImIiIgiFANBIiIiogjVLtgbEGp++OEHSJKE6OjoYG8KERERkSYtLS0wGAwYMGCA7HLMCLqQJAlaamxLkoTm5mZN76XQxf3aNnG/tk3cr20X9616ovEMM4IubJnAnJwcVe+rr6/Hnj170KtXL5jNZn9sGgUB92vbxP3aNnG/tl3ct+oVFxcLLceMIBEREVGEYiBIREREFKFCKhBcvHgxpk2b5vS3PXv24JZbbkF+fj7Gjh2LpUuXOr1utVrxn//8B6NGjUJ+fj7uuOMOHD16NJCbTURERBSWQiYQfPvtt/Hss886/e3MmTOYPn06unbtig8//BB33303FixYgA8//NC+zIsvvohly5bhn//8J959911YrVbMnDkTzc3NAf4GREREROEl6INFTp06hX/84x/YvHkzunfv7vTa8uXLER0djUceeQTt2rVDz549ceTIESxZsgTXX389mpub8dprr2Hu3LkYM2YMAOCZZ57BqFGjsG7dOlxzzTWB/0JEREREYSLoGcFdu3YhOjoaq1evRl5entNrW7duxZAhQ9Cu3a/x6rBhw3D48GFYLBb8+OOPqKurw8UXX2x/PSkpCf369cOWLVsC9h2IiIiIwlHQM4Jjx47F2LFjPb528uRJ9O7d2+lv5513HgDgxIkTOHnyJACgc+fObsvYXtNCkiTU19erek9DQ4PT/1PbwP3aNnG/tk3cr20X9616kiTBYDAoLhf0QFBOY2MjTCaT099iYmIAAE1NTfYDwtMyVVVVmj+3paUFe/bs0fTew4cPa/5cCl3cr20T92vbxP3adnHfquMaH3kS0oFgbGys26CPpqYmAIDZbEZsbCwAoLm52f7ftmXi4uI0f250dDR69eql6j0NDQ04fPgwunfv7tNnU2jhfm2buF/bJu7Xtov7Vr3S0lKh5UI6EOzUqRNOnz7t9Dfbvzt27IizZ8/a/9a1a1enZbKysjR/rsFg0Fy5PC4ujlXP2yDu17aJ+7Vt4n5tu7hvxYk0CwMhMFhEzuDBg7Ft2za0trba/7Zp0yb06NEDaWlp6NOnDxISErB582b769XV1di9ezcGDx4cjE0mIiIiChshHQhef/31qK2txUMPPYTS0lKsWLECr7/+OmbPng3gXNv3LbfcggULFuCLL77Ajz/+iPvuuw+dOnXC+PHjg7z1RERERKEtpJuG09LSUFBQgH/961+YMmUKOnTogAceeABTpkyxL/OHP/wBZ8+exfz589HY2IjBgwfj1VdfRXR0dBC33H9arRJ2HyxHRXUjUpNi0S8zDVFGsfQvERERkaOQCgSfeOIJt7/l5ubivffe8/qeqKgo/PnPf8af//xnf25aSCgsKsOSlcUor2q0/y0tORazrs3B8NyMIG4ZERERhaOQbhqmXxUWleHxN7Y4BYEAUF7ViMff2ILCorIgbRkRERGFKwaCYaDVKmHJymLZZV5ZVYJWqxSgLSIiIqK2gIFgGNh9sNwtE+jKUtmA3QfLA7RFRERE1BYwEAwDFdXyQaDa5YiIiIgABoJhITUpVnkhFcsRERERAQwEw0K/zDSkJcsHeekpceiXmRagLSIiIqK2gIFgGIgyGjDr2hzZZe6YnM16gkRERKQKA8EwMTw3A/NuG+yWGUxPicO82wazjiARERGpFlIFpUne8NwMDM3uzJlFiIiISBcMBMNMlNGAnF7pwd4MIiIiagPYNExEREQUoRgIEhEREUUoBoJEREREEYqBIBEREVGEYiBIREREFKEYCBIRERFFKAaCRERERBGKgSARERFRhGIgSERERBShGAgSERERRSgGgkREREQRioEgERERUYRiIEhEREQUoRgIEhEREUUoBoJEREREEYqBIBEREVGEYiBIREREFKEYCBIRERFFKAaCRERERBGKgSARERFRhGIgSERERBShGAgSERERRSgGgkREREQRioEgERERUYRiIEhEREQUoRgIEhEREUUoBoJEREREEYqBIBEREVGEYiBIREREFKEYCBIRERFFKAaCRERERBGKgSARERFRhGIgSERERBShGAgSERERRSgGgkREREQRioEgERERUYRiIEhEREQUoRgIEhEREUUoBoJEREREEYqBIBEREVGEYiBIREREFKEYCBIRERFFKAaCRERERBGKgSARERFRhGIgSERERBShGAgSERERRSgGgkREREQRioEgERERUYRiIEhEREQUoRgIEhEREUUoBoJEREREEYqBIBEREVGEYiBIREREFKEYCBIRERFFKAaCRERERBGKgSARERFRhGIgSERERBShGAgSERERRSgGgkREREQRioEgERERUYRiIEhEREQUoRgIEhEREUUoBoJEREREEYqBIBEREVGEYiBIREREFKEYCBIRERFFqLAIBM+ePYvnnnsOl156KQYMGICbb74ZO3bssL++Z88e3HLLLcjPz8fYsWOxdOnS4G0sERERUZgIi0DwpZdewvvvv49//vOfWLlyJXr06IGZM2fi9OnTOHPmDKZPn46uXbviww8/xN13340FCxbgww8/DPZmExEREYW0dsHeABHr16/HNddcg5EjRwIA/vKXv+D999/Hjh07cOjQIURHR+ORRx5Bu3bt0LNnTxw5cgRLlizB9ddfH+QtJyIiIgpdYREIpqWl4csvv8Qtt9yCzp0747333oPJZEKfPn3w/vvvY8iQIWjX7tevMmzYMCxevBgWiwXp6emqP0+SJNTX16t6T0NDg9P/U9vA/do2cb+2TdyvbRf3rXqSJMFgMCguFxaB4EMPPYT/9//+Hy677DJERUXBaDTi+eefR9euXXHy5En07t3bafnzzjsPAHDixAlNgWBLSwv27NmjaVsPHz6s6X0U2rhf2ybu17aJ+7Xt4r5Vx2QyKS4TFoFgaWkpEhMT8cILL6Bjx454//33MXfuXLz11ltobGx0+6IxMTEAgKamJk2fFx0djV69eql6T0NDAw4fPozu3bsjLi5O0+dS6OF+bZu4X9sm7te2i/tWvdLSUqHlQj4QPHHiBP70pz/h9ddfx6BBgwAAOTk5KC0txfPPP4/Y2Fg0Nzc7vccWAJrNZk2faTAYNL83Li5O83spdHG/tk3cr20T92vbxX0rTqRZGAiDUcM7d+5ES0sLcnJynP6el5eHI0eOoFOnTjh9+rTTa7Z/d+zYMWDbSURERBRuQj4Q7NSpEwBg7969Tn/ft28funfvjsGDB2Pbtm1obW21v7Zp0yb06NEDaWlpAd3WcNRqlVBcasGG7cdQXGpBq1UK9iYRERFRgIR803Bubi4uuugiPPjgg/jHP/6BTp06YeXKlfjuu+/wzjvv4Pzzz0dBQQEeeughzJw5E0VFRXj99dfx8MMPB3vTQ15hURmWrCxGeVWj/W9pybGYdW0OhudmBHHLiIiIKBBCPiNoNBrx0ksvYdiwYZg3bx6uu+46bNq0Ca+//jry8vKQlpaGgoICHDp0CFOmTMGiRYvwwAMPYMqUKcHe9JBWWFSGx9/Y4hQEAkB5VSMef2MLCovKgrRlREREFCghnxEEgOTkZPzjH//AP/7xD4+v5+bm4r333gvwVoWvVquEJSuLZZd5ZVUJhmZ3RpRRrLMpERERhZ+QzwiS/nYfLHfLBLqyVDZg98HyAG0RERERBQMDwQhUUS0fBKpdjoiIiMITA8EIlJoUq+tyREREFJ4
"text/plain": [
"<Figure size 1600x1200 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from typing import List, Any\n",
"\n",
"def draw_data_2d(\n",
" df: pd.DataFrame,\n",
" col1: int,\n",
" col2: int,\n",
" y: List | None = None,\n",
" classes: List | None = None,\n",
" subplot: Any | None = None,\n",
"):\n",
" ax = None\n",
" if subplot is None:\n",
" _, ax = plt.subplots()\n",
" else:\n",
" ax = subplot\n",
" scatter = ax.scatter(df[df.columns[col1]], df[df.columns[col2]], c=y)\n",
" ax.set(xlabel=df.columns[col1], ylabel=df.columns[col2])\n",
" if classes is not None:\n",
" ax.legend(\n",
" scatter.legend_elements()[0], classes, loc=\"lower right\", title=\"Classes\"\n",
" )\n",
"\n",
"columns = ['age', 'avg_glucose_level', 'bmi', 'hypertension']\n",
"df_temp = df[columns]\n",
"\n",
"sns.set_theme(style=\"whitegrid\")\n",
"print(\"ЗАВИСИМОСТЬ ЗНАЧЕНИЙ ДРУГ ОТ ДРУГА\")\n",
"plt.figure(figsize=(16,12))\n",
"draw_data_2d(df_temp, 0, 1, subplot=plt.subplot(2, 2, 1)) # age vs avg_glucose_level\n",
"plt.figure(figsize=(16,12))\n",
"draw_data_2d(df_temp, 0, 2, subplot=plt.subplot(2, 2, 2)) # age vs bmi\n",
"plt.figure(figsize=(16,12))\n",
"draw_data_2d(df_temp, 0, 3, subplot=plt.subplot(2, 2, 3)) # age vs hypertension\n",
"plt.figure(figsize=(16,12))\n",
"draw_data_2d(df_temp, 1, 2, subplot=plt.subplot(2, 2, 4)) # avg_glucose_level vs bmi\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"видно, что индекс массы тела в зависимости от возраста в основном держится поменьше в раннем возрасте (до полового созревания, грубо говоря), а потом уже распределяется от адекватного до 40+ (в общем, вплоть до ожирения, с выбросами-то)\n",
"\n",
"потом гипертония встречается все таки после 20 лет чаще\n",
"\n",
"ну и чем ниже индекс массы тела, тем ниже и адекватнее уровень глюкозы (ну тут ясно, почему. люди с избыточным весом и болеют диабетом чаще)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"продолжим приводить данные к нормальному виду, и теперь их стандартизуем:"
]
},
{
"cell_type": "code",
"execution_count": 164,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>age</th>\n",
" <th>avg_glucose_level</th>\n",
" <th>bmi</th>\n",
" <th>hypertension</th>\n",
" </tr>\n",
" <tr>\n",
" <th>id</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>9046</th>\n",
" <td>0.981658</td>\n",
" <td>2.548208</td>\n",
" <td>0.960777</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>51676</th>\n",
" <td>0.718652</td>\n",
" <td>1.986183</td>\n",
" <td>-0.115701</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>31112</th>\n",
" <td>1.551505</td>\n",
" <td>-0.057528</td>\n",
" <td>0.435353</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>60182</th>\n",
" <td>0.192639</td>\n",
" <td>1.328647</td>\n",
" <td>0.678842</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1665</th>\n",
" <td>1.507670</td>\n",
" <td>1.389985</td>\n",
" <td>-0.653940</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>34383</th>\n",
" <td>0.061136</td>\n",
" <td>-0.432990</td>\n",
" <td>-0.423266</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8646</th>\n",
" <td>0.411811</td>\n",
" <td>-0.236875</td>\n",
" <td>-0.307929</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>46653</th>\n",
" <td>1.595339</td>\n",
" <td>-1.047440</td>\n",
" <td>-0.128516</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1099</th>\n",
" <td>-1.297729</td>\n",
" <td>-0.158769</td>\n",
" <td>-0.884614</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>61676</th>\n",
" <td>1.420001</td>\n",
" <td>-0.854297</td>\n",
" <td>-0.512973</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2500 rows × 4 columns</p>\n",
"</div>"
],
"text/plain": [
" age avg_glucose_level bmi hypertension\n",
"id \n",
"9046 0.981658 2.548208 0.960777 0\n",
"51676 0.718652 1.986183 -0.115701 0\n",
"31112 1.551505 -0.057528 0.435353 0\n",
"60182 0.192639 1.328647 0.678842 0\n",
"1665 1.507670 1.389985 -0.653940 1\n",
"... ... ... ... ...\n",
"34383 0.061136 -0.432990 -0.423266 0\n",
"8646 0.411811 -0.236875 -0.307929 0\n",
"46653 1.595339 -1.047440 -0.128516 1\n",
"1099 -1.297729 -0.158769 -0.884614 0\n",
"61676 1.420001 -0.854297 -0.512973 0\n",
"\n",
"[2500 rows x 4 columns]"
]
},
"execution_count": 164,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.preprocessing import StandardScaler\n",
"scaler = StandardScaler()\n",
"\n",
"columns_to_scale = df_temp.drop(columns=[\"hypertension\"]).columns\n",
"columns_to_keep = [\"hypertension\"]\n",
"data_scaled = scaler.fit_transform(df_temp[columns_to_scale])\n",
"df_scaled = pd.DataFrame(data_scaled, columns=columns_to_scale, index=df_temp.index)\n",
"df_scaled[columns_to_keep] = df_temp[columns_to_keep]\n",
"\n",
"df_scaled"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"самое время применить иерархический алгоритм кластеризации (когда мы создаем дерево кластеров, где каждый уровень - это объединение более мелких кластеров)"
]
},
{
"cell_type": "code",
"execution_count": 165,
"metadata": {},
"outputs": [],
"source": [
"# linkage_matrix = linkage(data_scaled, method='ward')\n",
"# plt.figure(figsize=(10,10))\n",
"# dendrogram(linkage_matrix)\n",
"# plt.title('Дендрограмма')\n",
"# plt.ylabel('')\n",
"# plt.xlabel('')\n",
"# plt.show()\n",
"\n",
"\n",
"import numpy as np\n",
"from sklearn import cluster\n",
"from scipy.cluster import hierarchy\n",
"\n",
"def run_agglomerative(\n",
" df: pd.DataFrame, num_clusters: int | None = 2\n",
") -> cluster.AgglomerativeClustering:\n",
" agglomerative = cluster.AgglomerativeClustering(\n",
" n_clusters=num_clusters,\n",
" compute_distances=True,\n",
" )\n",
" return agglomerative.fit(df)\n",
"\n",
"\n",
"def get_linkage_matrix(model: cluster.AgglomerativeClustering) -> np.ndarray:\n",
" counts = np.zeros(model.children_.shape[0]) # type: ignore\n",
" n_samples = len(model.labels_)\n",
" for i, merge in enumerate(model.children_): # type: ignore\n",
" current_count = 0\n",
" for child_idx in merge:\n",
" if child_idx < n_samples:\n",
" current_count += 1\n",
" else:\n",
" current_count += counts[child_idx - n_samples]\n",
" counts[i] = current_count\n",
"\n",
" return np.column_stack([model.children_, model.distances_, counts]).astype(float)\n",
"\n",
"def draw_dendrogram(linkage_matrix: np.ndarray):\n",
" hierarchy.dendrogram(linkage_matrix, truncate_mode=\"level\", p=3)\n",
" plt.xticks(fontsize=10, rotation=45)\n",
" plt.tight_layout()"
]
},
{
"cell_type": "code",
"execution_count": 166,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAm8AAAHPCAYAAAAFwj37AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABVG0lEQVR4nO3dd3hT9eIG8DddScsquyBIkdEyZAkFfshUEBEQLBsuGysgW4pcZJSNcguIiiDrAiJFC3gVleEV7wXZMmS0UKBltZQNpdn5/v7ozSGBUprkpMlp38/z+BgyTt6enHPy5kyVEEKAiIiIiBTBx9MBiIiIiCj3WN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhB/Bx58sGDB9G/f/9sH6tQoQJ+/fVXXL16FbNmzcLhw4cRFBSEbt26YdSoUfD19ZUlMBEREVFB5lB5q1+/Pvbu3Wt33/HjxzFq1CiMGDECRqMRQ4YMQWhoKDZt2oTLly9jypQp8PHxwejRo2UNTkRERFQQqVy5tmlmZiY6duyIxo0bY968efjxxx8xefJk7N27F8WKFQMAxMXF4eOPP8b+/fsREBDg0PCPHTsGIQT8/f2djUhERETk9YxGI1QqFerXr//c57q0z9uXX34JrVaLSZMmAQCOHDmCWrVqScUNAJo0aYKMjAycPXvW4eELIeBCt7QbjsFgkGVYcmCenHlTHm/KAjDP8zCPMrIAzPM8zKOMLIB8eRzpPA5tNrV1584drF27FhMmTEBwcDAAIC0tDSEhIXbPK1OmDAAgNTUVdevWdeg9/P39IYRAlSpVnI0JANBqtUhOTsYLL7yAwMBAl4YlB+ZRTh5vysI8zJNfsjAP8+SXLHLmSUpKgkqlytVznS5vGzduRJEiRdCzZ0/pPp1Oh6JFi9o9T61WAwD0er1T72M0Gp1aa5ed5ORkWYYjF+bJmTfl8aYsAPM8D/M8mzdlAZjneZjn2bwpCyBPntzuXuZ0edu2bRu6dOkCjUYj3afRaGAwGOyeZy1tQUFBTr2Pv78/qlat6mxMAI9bcWhoqFe1dObx/jzelIV5mCe/ZGEe5skvWeTMk5SUlOvnOlXeEhIScOXKFXTq1Mnu/pCQEJw7d87uvvT0dABA2bJlnXkrqFQqp4vfkwIDA2UblhyYJ2felMebsgDM8zzM82zelAVgnudhnmfzpiyA63lyu8kUcPKAhSNHjqBkyZIIDw+3u79Ro0Y4c+YMMjIypPsOHDiAQoUKPfVcIiIiInKcU+XtzJkzCAsLe+r+119/HaVLl8bYsWORkJCA3bt3IzY2FoMHD3b4NCFERERE9DSnytvNmzelI0xtqdVqrFy5EhaLBT169EBMTAz69OmDESNGuJqTiIiIiODkPm9fffXVMx+rVKkSVq9e7XQgIiIiIno2XpieiIiISEFY3oiIiIgUhOWNiIiISEFY3oiIiIgUhOWNiIiISEFY3oiIiIgUhOWNiIiISEGcvjA9kbcTQkBvMLs0DJ3BDIPJAp3BDB9fk0zJmId5CnaW/J5HHeDr0HUqiRzF8kb5khACkz7bi7PJd2Qa4nWZhiMX5skZ8zybN2UB8mOeGqElsOD9V1ngyG242ZTyJb3BLGNxIyLKvbPJd1xe60+UE655o3xv/Yz20AT4OvXaTK0WiYkJCAsLR1BgoMzJmId5CmaW/JpHZzDjbzN+kTkZ0dNY3ijf0wT4QqN2blK3mH0R4Ofj0jDkxDzMkx+yMA+Ra7jZlIiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFITljYiIiEhBWN6IiIiIFMSp8rZt2zZ06NABL7/8Mt566y38/PPP0mNXr15FVFQUGjRogFdffRWLFy+G2WyWLTARERFRQeZwefv+++8xZcoU9O3bF9u3b0fHjh0xfvx4HDt2DEajEUOGDAEAbNq0CTNmzMA333yDzz//XPbgRERERAWRnyNPFkJgyZIl6N+/P/r27QsAGD58OI4cOYJDhw7h2rVruH79OjZv3oxixYqhevXquH37Nj7++GO89957CAgIcMsfQURERFRQOLTm7dKlS7h27Ro6depkd/+qVasQFRWFI0eOoFatWihWrJj0WJMmTZCRkYGzZ8/Kk5iIiIioAHNozdulS5cAAJmZmRgyZAjOnDmDChUqYPjw4WjTpg3S0tIQEhJi95oyZcoAAFJTU1G3bl2HAwohkJmZ6fDrbGm1Wrv/exrz5EyOPDrD4/0sM7VaWMy+HssiJ+bJGfM8mzdlAfJnHrmWO3LlkZM35fGmLIB8eYQQUKlUuXquQ+UtIyMDADBp0iS8//77+OCDD7Bjxw6MGDECa9asgU6nQ9GiRe1eo1arAQB6vd6Rt5IYjUbZ1tolJyfLMhy5ME/OXMljMFmk24mJCQjwc+3A6vw0btyBeXLmTXm8KQuQv/LIvdxxNY87eFMeb8oCyJMnt7uXOVTe/P39AQBDhgxB165dAQA1atTAmTNnsGbNGmg0GhgMBrvXWEtbUFCQI29l955Vq1Z16rVWWq0WycnJCA0NRWBgoEvDkgPzuD9P1i/g6wCAsLBwaAKcX/OW38YN8xTMPN6UJb/mkWu5I1ceOXlTHm/KImeepKSkXD/XofJWtmxZAED16tXt7q9atSr27NmDiIgInDt3zu6x9PR0u9c6SqVSOV38nhQYGCjbsOTAPDlzJY+Pr0m6HRQYCI3aoUld1izuwDw5Y55n86YsQP7KI/dyx9U87uBNebwpC+B6ntxuMgUcPGChVq1aKFSoEE6cOGF3/7lz5/Diiy+iUaNGOHPmjLR5FQAOHDiAQoUKITw83JG3IiIiIqJsOFTeNBoNhg4dis8//xw//vgjLl++jGXLlmHfvn0YNGgQXn/9dZQuXRpjx45FQkICdu/ejdjYWAwePJinCSEiIiKSgcPrdEeMGIHAwEAsWrQIN27cQJUqVbB06VI0btwYALBy5UrExMSgR48eKFasGPr06YMRI0bIHpyIiIioIHJqg/ygQYMwaNCgbB+rVKkSVq9e7VIoIiIiIsoeL0xPREREpCAsb0REREQKwvJGREREpCAsb0REREQKwvJGREREpCCun/6ZiIgKDCEE9DYXYHeWzmCGwWSBzmC2uzKBp8iRR2cwZXvbU3nkJGcedYCvQ1cToKexvBERUa4IITDps704m3xHxqFel3FYcpAnz99m7JBlOPlx/NQILYEF77/KAucCbjYlIqJc0RvMMhc3KojOJt+RZe1tQcY1b0RE5LD1M9pDE+Dr9OsztVokJiYgLCwcQYGBMiZjHm/NozOY8bcZv8icrGBieSMiIodpAnyhUTv/FWIx+yLAz8fl4ciFeZSVp6DjZlMiIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQljciIiIiBWF5IyIiIlIQh8vbjRs3EBYW9tR/W7ZsAQCcPXsW/fr1Q7169dCmTRusW7dO9tBEREREBZWfoy9ISEiAWq3G7t27oVKppPu
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"tree = run_agglomerative(df_scaled)\n",
"linkage_matrix = get_linkage_matrix(tree)\n",
"draw_dendrogram(linkage_matrix)"
]
},
{
"cell_type": "code",
"execution_count": 167,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA/EAAANFCAYAAAAzmnz2AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gUVRfA4d/Mtmx6LwRC6E16ERVBEQFRUWyIgBVFP3vDggiIIioiihVRURRFxYYgIIqoIL13QgklvbfNlpn5/tgkEJItSZYE9L7Po5DZmTt3JkuyZ+6950iapmkIgiAIgiAIgiAIgnDWkxu6A4IgCIIgCIIgCIIgeEcE8YIgCIIgCIIgCIJwjhBBvCAIgiAIgiAIgiCcI0QQLwiCIAiCIAiCIAjnCBHEC4IgCIIgCIIgCMI5QgTxgiAIgiAIgiAIgnCOEEG8IAiCIAiCIAiCIJwj9A3dgbPNli1b0DQNg8HQ0F0RBEEQBADsdjuSJNG1a9eG7sq/hvh9LwiCIJxNavK7XozEn0bTNDRN81lbNpvNZ+0J3hH3vWGI+94wxH1vGPV93335u0lwEr/vz23injcMcd8bhrjvDeNs/l0vRuJPU/5EvmPHjnVuq6SkhD179tCyZUv8/f3r3J7gHXHfG4a47w1D3PeGUd/3fceOHWf8HP814vf9uU3c84Yh7nvDEPe9YZzNv+vFSLwgCIIgCIIgCIIgnCNEEC8IgiAIQr364IMPGD16dKVte/bsYdSoUXTp0oX+/fvz2WefVXpdVVXeeustLr74Yrp06cLdd9/NsWPH6rPbgiAIgnBWEEG8IAiCIAj15osvvmDmzJmVtuXm5nLHHXeQkJDAwoULuf/++5k+fToLFy6s2Ofdd99l/vz5TJkyha+++gpVVRkzZgw2m62er0AQBEEQGpZYEy8IgiAIwhmXnp7OxIkTWbduHYmJiZVe+/rrrzEYDLzwwgvo9XpatGhBcnIys2fP5vrrr8dms/Hxxx/zxBNPcMkllwDwxhtvcPHFF7N8+XKuuuqq+r8gQRAEQWggIogXBEEQBOGM27VrFwaDgZ9++ol33nmHEydOVLy2ceNGevXqhV5/8mNJ7969+eCDD8jKyiIlJYXi4mIuuOCCiteDg4Np3749GzZsqHUQr2kaJSUltb+oMhaLpdKfwpkn7nnDEPe9YYj73jDq+75rmoYkSV7tK4J4QRAEQRDOuP79+9O/f/9qX0tLS6N169aVtkVHRwOQmppKWloaAHFxcVX2KX+tNux2O3v27Kn18ac7cuSIz9oSvCPuecMQ971hiPveMOrzvhuNRq/2E0G8IAiCIAgNqrS0tMoHF5PJBIDVaq0YBalun/z8/Fqf12Aw0LJly1ofX85isXDkyBESExMxm811bk/wTNzzhiHue8MQ971h1Pd9T0pK8npfEcQLgiAIgtCg/Pz8qiSos1qtAPj7++Pn5weAzWar+Hv5PnX5YCVJkk9r/5rNZlHDuZ6Je94wxH1vGOK+N4z6uu/eTqUHkZ1eEARBEIQGFhsbS0ZGRqVt5V/HxMRUTKOvbp+YmJj66aQgCIIgnCXESLwgCILgE7b0HDI/W0rOj3+hFFnwa9WY6FsHE3blhUiyeGYsuNazZ0+++uorFEVBp9MBsHbtWpo1a0ZERARBQUEEBgaybt06EhISACgoKGD37t2MGjWqIbsu1JKmOlAtBajWYtAUJJ0R2RyMZAyo0WiUIAjCf5EI4gVBEIQ6K9l1iH03TUAptICqAlCUU0DRPzsJHXIBLd59Akmva+BeCmer66+/njlz5jB+/HjGjBnD9u3bmTt3LpMnTwaca+FHjRrF9OnTCQ8PJz4+ntdee43Y2FgGDhzYwL0Xakpz2HDkp4CmnrKtFKWwFMngjy44RgTygiAIboggXhAEQagTzaFw4I6pKEUnA3ig4u95v6wlbfaPxP3vugbqoXC2i4iIYM6cObz00ksMGzaMqKgoxo0bx7Bhwyr2eeihh3A4HDz33HOUlpbSs2dPPvroIwwGQwP2XKgpTdNwFKRVCuArvW4vQbXkofMPq+eeCYIgnDtEEC8IgiDUSd6KDdhTslzvoGlkzFlE7NhrkHRiNF6AadOmVdnWqVMnFixY4PIYnU7Hk08+yZNPPnkmuyacYZrdAqrD7T6qJR/ZHCpG4wVBEFwQixQFQRCEOinasNfjVHl7Ri621Ox66pEgCGcrzV7qxU4qqPYz3xlBEIRzlAjiBUH4T1HtDlSLFU3TGror/xqSLOHN3ZRkMaomCIK3xM8LQRAEV0QQLwjCf0LB6u3sv2USm5vfwOZWw9l58f9I//hnNIfS0F075wVd1Anc3UcJjE2iMcRG1F+nBEE4K0kGs+edZB3IYsWnIAiCKyKIFwThXy9z/q/sv/l5ClZvp3zI2JqcxrGJH5F097RzNpAvPXiCgmXr0TYfRC21NVg/gvt2xq9FPOhc/ErRIPa+YaLMnCAISAY/0Bnd7iPWwwuCILgnPlEJgvCvZkvJIvmZ95zBu3JKNmRNA00jf8UGsr5a0WD9q43SQyfYe/2z7Ox3P8cfnAmT57P/wvtInfUNmlp9xuczSZJlWn76HIboMOcM2PLP3mVBfdTowUSNHlzv/RIE4ewjSRL64BiXI+2SKRDZL7ieeyUIgnBuEXOVBEH4V8v88lfcL9iWSP/oZ6JGDaqvLtWJ9Vg6e4Y+hVJYUmm7WmThxCtf4MgppMnEO+u9X36JcZy38m2yv11Jzk9/oRSUYG6TQNTowQSe316MqgmCUEHSGdCHNUYtLUKzFqFpCpLOiOwXjGTwEz8vBEEQPBBBvCAI/2qW3Ycr1y4/naZRmnQMTVXPieneKW9+4wzgleqvKX3OT0TddgV+iXH13DPQBZqJvn0I0bcPqfdzC4JwbpEkGZ05GMxi1F0QBKGmzv5PrIIgCHUg+5lcr9UuI+n1cA6M/Kg2OznfrXIZwAMgy2R/+0e99UkQBEEQBEGoX2IkXhCEf7XQgb3I+fEv1zvoZEIHn39OTN9UCorRbB5qJ0sS9rRzrx679UQmuYtW48gvwpQQQ/jVfdAFepHFWhAEQRAE4T9GBPGCIPyrhV7RG2NCDLYTmVVHsCWcmdPHXtsQXasxXVAAkkGPZne43knTnAnmzhGq3cGx5z8k8/PlIEnOmvMOhWPPzyFh6lgib+zf0F0UBEEQBEE4q4jp9IIg/KvJRgNtvnoBU+No5wadDLLsDBgNBpq/+wQBXVo1bCe9JJsMhA3t4355gKIScf0l9danujo26SNnAK9poKoV5f5Ui5Ujj75F7rJ1DdxDQRAEQRCEs4sYiRcE4V/PlBBDhz/eJn/5evJ+34RmtePfsTmRN/VHH3ZuJVVq9PBN5C1bh2qxVrs2PnLUQGfN9nOALTWbzHlLnQF8dSSJE69+QejAXufEcgdBEARBEIT6IIJ4QRD+E2SDnrArLyTsygsbuit14te8EW2/e5nDj76JZdfhiu2SyUDM3UOJf/KWBuxdzeT+8o/78n+aRum+o1gPp+DX/Nx4MCEIgiAIgnCmiSBeEAThHOPfPpEOy96geHsS+TsPkpKdSevhgwmKjmzortWIUljiXBpQNoXe5X4FJfXUI++oVjv2jFw0q4ckg4IgCIIgCGeACOIFQRDOUQGdWiK1bETqnj3oAv0bujs15teskccAHlnC2DiqfjrkgS01m9S3viHrm9/RSm0gSxwb2Ismj4/Av11iQ3dPEARBEIT/CJHYThAEQWgQoQN7oQsJcFYJqI5OJnTQ+RgiQ+uzW9WyHs9g95DHyZy/3BnAA6gahb9uYM/V4yjatLdhOygIgiAIwn+GCOIFQRCEBiH7GUl87QGQJJBPi+R1MvqQQJpMuKNhOneaoxM+xJFTUDWZoKKi2RwcevANNLVqokFBEARBEARfE0G8IAiC0GDChlxA6/mTCOja+uRGnUzYlRfSbvFrmBJiGq5zZWwpWeSv2FhtNQAAVBXb0XQK/9lZvx0TBEEQBOE/SayJFwRBEBpUcJ/OBPfpjC0lC6WgGENsBPrQwIbuVoXSQydcl8ErJ0mU7j9G8EWd6qdTgiAIgiD
"text/plain": [
"<Figure size 1200x1000 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"result = hierarchy.fcluster(linkage_matrix, 40, criterion=\"distance\")\n",
"y_names = ['0', '1', '2']\n",
"\n",
"plt.figure(figsize=(12, 10))\n",
"\n",
"\n",
"draw_data_2d(df_temp, 0, 1, result, y_names, subplot=plt.subplot(2, 2, 1)) \n",
"draw_data_2d(df_temp, 0, 2, result, y_names, subplot=plt.subplot(2, 2, 2)) \n",
"draw_data_2d(df_temp, 0, 3, result, y_names, subplot=plt.subplot(2, 2, 3)) \n",
"draw_data_2d(df_temp, 1, 2, result, y_names, subplot=plt.subplot(2, 2, 4)) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"емае теперь переходим к НЕиерархической кластеризации\n",
"будем использовать метод К-средних (K-means), мы выбираем количество кластеров и флгоритм пытается распределить данные так, чтобы минимизировать расстояние между объектами и центром их кластера"
]
},
{
"cell_type": "code",
"execution_count": 168,
"metadata": {},
"outputs": [],
"source": [
"from typing import Tuple\n",
"\n",
"def print_cluster_result(\n",
" df: pd.DataFrame, clusters_num: int, labels: np.ndarray, separator: str = \", \"\n",
"):\n",
" for cluster_id in range(clusters_num):\n",
" cluster_indices = np.where(labels == cluster_id)[0]\n",
" print(f\"Cluster {cluster_id + 1} ({len(cluster_indices)}):\")\n",
" rules = [str(df.index[idx]) for idx in cluster_indices]\n",
" print(separator.join(rules))\n",
" print(\"\")\n",
" print(\"--------\")\n",
"\n",
"\n",
"def run_kmeans(\n",
" df: pd.DataFrame, num_clusters: int, random_state: int\n",
") -> Tuple[np.ndarray, np.ndarray]:\n",
" kmeans = cluster.KMeans(n_clusters=num_clusters, random_state=random_state)\n",
" labels = kmeans.fit_predict(df)\n",
" return labels, kmeans.cluster_centers_"
]
},
{
"cell_type": "code",
"execution_count": 169,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cluster 1 (1314):\n",
"31112, 53882, 10434, 27419, 60491, 12109, 12095, 12175, 27458, 4219, 70822, 38047, 33879, 14248, 712, 24977, 47306, 62602, 4651, 1261, 61960, 1845, 37937, 47472, 35626, 36338, 18587, 15102, 59190, 47167, 38829, 55927, 65842, 19557, 7356, 17013, 72366, 6118, 7371, 70676, 27169, 19773, 66159, 71673, 42117, 57419, 26015, 26727, 66638, 70042, 32399, 3253, 71796, 14499, 49130, 51169, 66315, 37726, 54385, 35512, 8154, 4639, 12363, 4712, 33175, 2346, 42072, 30456, 59125, 56546, 48405, 36706, 71639, 60744, 7547, 5563, 68798, 72918, 50522, 3352, 70943, 48796, 16817, 31563, 55824, 20439, 45965, 8045, 37651, 41241, 62861, 72081, 32503, 12482, 56939, 43054, 34567, 50931, 16590, 3512, 42899, 43364, 44993, 210, 28939, 60739, 67432, 2182, 40899, 62466, 36841, 33486, 54567, 66204, 8003, 28378, 41081, 16077, 30184, 66071, 36255, 23410, 35684, 18937, 491, 8580, 28484, 62019, 51314, 37060, 35578, 54921, 33454, 62439, 2548, 2390, 68023, 54724, 8899, 39105, 31154, 69959, 10552, 12917, 68356, 28493, 1836, 32221, 10548, 17739, 27153, 34060, 43424, 30468, 56543, 15266, 10460, 64908, 67855, 25774, 49589, 17986, 29217, 72911, 47175, 48588, 66767, 29908, 45222, 40311, 71750, 42203, 71379, 58261, 67318, 8831, 65199, 43454, 7282, 49003, 16371, 40181, 66174, 6319, 55232, 41940, 72214, 37089, 52134, 54918, 1703, 16934, 32689, 18051, 38805, 61837, 49713, 17608, 62608, 4630, 66333, 49916, 71038, 58617, 69064, 9404, 28286, 10159, 58282, 64489, 64553, 69936, 46527, 13547, 9608, 10504, 37090, 60148, 52173, 23462, 11091, 70374, 31143, 66972, 55810, 37031, 34608, 36007, 48298, 44749, 46468, 51983, 20351, 55351, 67431, 20546, 52342, 59906, 53144, 11999, 38119, 3355, 67177, 58600, 65946, 48368, 36471, 15689, 8233, 23221, 31830, 15296, 17718, 18498, 56735, 621, 5835, 11838, 38165, 71585, 3009, 32361, 53910, 67548, 33404, 50965, 21077, 66570, 29158, 34299, 7696, 34668, 68483, 6072, 51112, 69673, 71238, 63958, 34511, 24892, 29496, 19939, 27832, 27757, 25099, 33123, 21713, 6726, 17242, 16380, 9729, 56974, 65574, 17019, 41800, 6855, 26028, 14712, 23094, 40622, 46438, 65144, 34641, 13129, 37629, 62936, 59829, 55424, 61697, 55138, 39399, 721, 40448, 58007, 11960, 24592, 67744, 8328, 32437, 55420, 53660, 56553, 30480, 31988, 45585, 52063, 27377, 7446, 65130, 30753, 22853, 12465, 64849, 24183, 39601, 46891, 38987, 21886, 5353, 44300, 46218, 39745, 13517, 36355, 22678, 52512, 3579, 3130, 5545, 63693, 33528, 11068, 62233, 7291, 36814, 48265, 10139, 12662, 43174, 72823, 30567, 21117, 50491, 61013, 71010, 23551, 12738, 57772, 16615, 68995, 53010, 967, 31145, 54338, 13223, 67932, 10255, 27012, 20541, 5892, 66883, 43196, 51514, 38184, 13997, 27796, 18390, 63409, 72882, 40866, 63561, 51422, 3590, 60665, 18430, 13365, 60983, 14615, 50277, 50811, 1246, 30712, 31308, 3325, 52808, 36109, 53336, 56831, 55592, 44583, 58227, 60810, 34612, 25595, 30550, 13367, 38609, 22159, 37413, 4169, 18888, 42503, 23645, 62382, 59521, 55386, 72547, 26973, 41033, 71442, 49624, 10572, 28910, 10381, 31956, 24665, 13683, 7387, 57011, 50053, 51125, 29077, 4970, 58291, 99, 64633, 23016, 18412, 67412, 37545, 14491, 66220, 25458, 69645, 53695, 26692, 33400, 67078, 22540, 26999, 30102, 49521, 29134, 68281, 40350, 39375, 12106, 25283, 49949, 28681, 62332, 25488, 14807, 16110, 40970, 28933, 11709, 16809, 13907, 54071, 28024, 11730, 17245, 70852, 60957, 19742, 4808, 49928, 52688, 65698, 820, 55721, 72310, 24115, 7122, 48755, 33551, 62716, 68438, 41148, 14924, 47950, 9262, 71896, 38623, 26503, 5475, 15525, 48748, 71591, 67864, 34857, 34995, 22952, 57879, 36638, 41097, 54620, 19681, 6988, 25287, 5934, 58999, 28261, 35222, 44105, 65256, 20044, 54769, 12686, 48830, 47924, 59336, 5684, 3673, 31867, 23633, 52549, 37349, 2751, 64464, 66494, 42786, 33401, 24174, 61715, 60663, 46875, 69091, 1821, 44978, 10762, 84, 45824, 61838, 57212, 62668, 33142, 17437, 38303, 36484, 60047, 16542, 18805, 17869, 6793, 49265, 6606, 23031, 22902, 4807, 9641, 10313, 12097, 45323, 39120, 68344, 66752, 5077, 49279, 42856, 10752, 42133, 4842, 58138, 65053, 24168, 61973, 18687, 72642, 54782, 24437, 65429, 3509, 15220, 4813, 31166, 9
"\n",
"--------\n",
"Cluster 2 (813):\n",
"31720, 69768, 39912, 33943, 66866, 49669, 30669, 16523, 46136, 52800, 37893, 19584, 24447, 70336, 45801, 67210, 33759, 26325, 65460, 36811, 70970, 55680, 11014, 44338, 34974, 41648, 45538, 68249, 1686, 22284, 39038, 21956, 30171, 65535, 29865, 28799, 40840, 10449, 31091, 9487, 28102, 1506, 40670, 21284, 5973, 42996, 46785, 21408, 7559, 8171, 43232, 34402, 22706, 71539, 28637, 31741, 22537, 50611, 9637, 44862, 5708, 65357, 49465, 14123, 54975, 10913, 27029, 45719, 129, 530, 6107, 3655, 5010, 56681, 56716, 61350, 61465, 18108, 46436, 7351, 31164, 48272, 2893, 34376, 8595, 46474, 69687, 2953, 9179, 63050, 11974, 41523, 50441, 16927, 28265, 54375, 37832, 21058, 31279, 9201, 22622, 29933, 7621, 5374, 31564, 71808, 56998, 43134, 39383, 63606, 36331, 42359, 20751, 68241, 60902, 58439, 29010, 44912, 45238, 61511, 36942, 15095, 44315, 68245, 47732, 55709, 15311, 59807, 40639, 8544, 3361, 61408, 33552, 31364, 9906, 27794, 46809, 35565, 48144, 2532, 34363, 23650, 23046, 41927, 54866, 20364, 10997, 62999, 66184, 22870, 57523, 68131, 29873, 54182, 61300, 15274, 53016, 28848, 7745, 12593, 15553, 45796, 31840, 58767, 14391, 22321, 41673, 49086, 47523, 56870, 40791, 54304, 19234, 52454, 16575, 11176, 9612, 41513, 52580, 33723, 26235, 8320, 49529, 49785, 6886, 40055, 45283, 22685, 46745, 5046, 55847, 42441, 14387, 22384, 24108, 69427, 21688, 60777, 64732, 42710, 46683, 58909, 18616, 55529, 12204, 21397, 10324, 25514, 7663, 71793, 32352, 65218, 54643, 33410, 2543, 45289, 10792, 19153, 47876, 41615, 41537, 45759, 71929, 37154, 70380, 67405, 2549, 10782, 61742, 24099, 13948, 65766, 42482, 8008, 56089, 11745, 17733, 11743, 3606, 32554, 45893, 53538, 17006, 42108, 224, 56679, 27146, 16556, 36698, 57372, 30605, 13622, 39250, 2879, 59684, 56986, 16402, 40889, 4083, 48843, 5694, 44481, 19101, 11973, 59178, 44281, 55599, 45224, 54747, 6090, 46385, 46323, 28122, 50843, 60211, 53279, 37830, 2454, 3437, 6355, 58567, 62187, 72779, 12396, 69622, 58037, 34281, 7990, 57622, 11691, 13319, 53815, 51579, 58203, 6965, 42821, 10367, 66530, 43146, 57497, 16147, 18306, 61769, 26134, 66772, 41861, 954, 23565, 57854, 66678, 56734, 20347, 809, 67052, 69224, 13323, 59940, 46093, 46072, 47848, 71440, 59734, 32733, 61338, 59275, 19996, 31517, 56245, 9225, 45955, 53943, 53276, 39661, 51162, 4683, 782, 63565, 26389, 39518, 542, 26031, 38255, 41565, 39423, 68908, 28418, 33162, 39467, 20282, 51159, 7167, 59147, 18192, 14049, 69355, 2082, 16449, 5447, 44224, 533, 45554, 55744, 25767, 71319, 23604, 46576, 31293, 6044, 45209, 43155, 11882, 45669, 59604, 33187, 44192, 728, 28952, 51916, 21857, 55976, 27572, 54184, 4702, 45048, 30084, 50650, 40571, 13072, 22969, 47537, 26242, 36226, 32723, 35737, 30677, 50453, 14241, 40144, 13504, 51959, 2092, 68235, 3956, 34436, 40513, 48836, 62387, 3807, 51339, 32826, 44179, 14563, 40237, 65970, 51109, 5984, 13062, 47770, 12687, 36858, 50373, 13191, 47330, 64750, 70259, 72132, 11726, 13736, 43913, 41870, 37907, 15987, 57166, 47627, 8723, 937, 51806, 59412, 29233, 17762, 1405, 57953, 40251, 27013, 7586, 45357, 15219, 52987, 29327, 8085, 41820, 49057, 18070, 11770, 25454, 29224, 24219, 52089, 7297, 29910, 59339, 18754, 34312, 57798, 11605, 61924, 58015, 43271, 39714, 21785, 30214, 66419, 40931, 28559, 49815, 1625, 56309, 52034, 17175, 40544, 49152, 43672, 39286, 38043, 71721, 38094, 47350, 43675, 65801, 59953, 43772, 45754, 57485, 6128, 37082, 64541, 4538, 34001, 48964, 40393, 35432, 44010, 71044, 30290, 70752, 26366, 12436, 37698, 14677, 42743, 5686, 4789, 897, 69553, 38036, 36666, 16316, 31835, 4099, 26893, 1486, 5451, 3640, 17835, 37660, 24782, 63416, 42082, 54058, 4861, 33768, 35450, 62793, 66592, 29804, 3753, 27279, 38578, 66502, 9034, 16582, 28500, 70241, 66647, 39450, 57109, 17277, 18861, 38858, 13862, 66065, 71869, 46035, 54946, 29934, 28998, 63668, 24876, 22536, 8760, 53126, 18179, 68708, 12366, 38440, 34621, 16091, 17515, 34958, 30620, 1818, 70654, 49485, 61641, 11566, 72108, 22967, 33692, 25305, 7885, 14599, 38488, 52428, 61171, 2824, 2019, 47751, 34525, 13755, 56019, 67942, 52220, 48226, 10333, 10390, 72497, 51935, 29470,
"\n",
"--------\n",
"Cluster 3 (373):\n",
"9046, 51676, 60182, 1665, 56669, 8213, 5317, 58202, 56112, 34120, 25226, 70630, 13861, 68794, 64778, 61843, 54827, 69160, 43717, 39373, 54401, 47269, 7937, 19824, 8752, 25831, 66400, 58631, 5111, 10710, 17004, 2326, 50784, 36236, 45805, 28291, 2458, 56841, 63973, 45277, 12062, 41069, 53401, 13491, 44033, 14164, 37132, 53440, 69551, 20387, 71279, 11762, 29281, 30683, 17308, 67981, 58978, 11933, 46703, 24669, 59437, 66258, 20426, 63453, 14431, 65105, 67895, 66955, 24905, 69112, 64373, 58267, 54695, 68627, 31179, 68025, 29552, 25904, 31421, 20463, 12689, 39186, 32729, 23368, 25974, 1210, 36857, 52282, 45535, 40460, 32257, 41413, 28674, 63884, 4057, 36275, 11577, 20980, 28526, 18518, 42807, 11120, 68614, 4480, 2982, 59368, 65836, 21130, 56357, 45053, 28333, 49421, 54312, 69037, 63732, 2374, 15528, 27213, 22320, 50305, 59729, 12985, 22091, 17291, 66196, 1307, 35846, 28645, 22470, 15649, 12982, 67733, 545, 15791, 67780, 68275, 36561, 47811, 17148, 56179, 25483, 50118, 39639, 31090, 64174, 48993, 39659, 71533, 34558, 42553, 53515, 9752, 49744, 22485, 16685, 25315, 33585, 17813, 64582, 12270, 50826, 14147, 2314, 63058, 13571, 47885, 17351, 25627, 72020, 36618, 37290, 62709, 7273, 10538, 9648, 66922, 8521, 69330, 5824, 8332, 55862, 42550, 14178, 69143, 67603, 34326, 18414, 17708, 38678, 30989, 22363, 41291, 33622, 47735, 55775, 35140, 2750, 62783, 19778, 22440, 32157, 28150, 6419, 39823, 32884, 70031, 5821, 45788, 52150, 37327, 56090, 22001, 16260, 35913, 40624, 45945, 54526, 29869, 47159, 7806, 63984, 5878, 239, 33983, 29375, 62452, 47622, 50098, 46373, 71318, 49341, 4692, 40253, 44950, 42460, 41271, 21491, 43059, 46284, 38493, 5137, 39202, 4833, 19389, 34496, 49709, 31415, 71322, 72337, 7550, 57917, 41424, 54858, 49495, 15166, 16593, 63663, 15988, 35829, 53909, 5799, 16488, 44781, 29385, 48072, 32776, 50841, 26328, 52419, 70344, 6879, 58438, 12512, 2513, 45713, 24272, 54347, 54353, 15515, 48759, 32452, 35333, 15120, 11412, 24630, 24229, 9170, 47608, 63597, 5478, 43657, 71917, 24603, 70857, 16066, 48069, 49014, 61247, 27799, 72435, 46864, 25405, 7344, 61178, 48364, 71182, 48775, 8968, 32016, 32270, 1473, 60963, 66431, 36750, 16938, 29388, 5355, 29915, 3305, 50671, 68333, 44777, 36377, 39531, 14479, 53422, 19550, 62456, 47521, 63938, 45040, 42212, 57270, 63401, 53990, 17295, 55466, 34448, 49672, 59130, 34661, 65680, 5863, 23223, 55566, 63990, 61895, 36589, 28651, 45033, 20316, 7683, 23176, 61000, 9026, 54301, 3099, 46068, 3715, 50402, 69502, 16812, 62629, 63912, 49574, 7411, 6239, 44591, 27626, 12279, 28303, 7658, 35997\n",
"\n",
"--------\n"
]
},
{
"data": {
"text/plain": [
"array([[ 0.47644409, -0.40065839, 0.33339772, 0.12947449],\n",
" [-1.10132351, -0.3102823 , -0.77003252, 0.004914 ],\n",
" [ 0.72629021, 2.08749131, 0.50685056, 0.25737265]])"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"random_state = 9\n",
"\n",
"labels, centers = run_kmeans(df_scaled, 3, random_state) \n",
"print_cluster_result(df_scaled, 3, labels)\n",
"display(centers)"
]
},
{
"cell_type": "code",
"execution_count": 170,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABQ4AAAPfCAYAAACGlVSEAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXwU9f0/8NduyLG5L+4odw6ScEdQqGgR1FZRqbVWUQTvSvWn1lqvWqtWe2i9/YoHoKj1QBGwatEKVRAEuSEBA4JErtx3QsjM74+4y14z+5ljZ4+8no9HH5XN7OxnPzu7O/ue9/vztsmyLIOIiIiIiIiIiIjIjT3UAyAiIiIiIiIiIqLww8AhERERERERERER+WDgkIiIiIiIiIiIiHwwcEhEREREREREREQ+GDgkIiIiIiIiIiIiHwwcEhERERERERERkQ8GDomIiIiIiIiIiMgHA4dERERERERERETkg4FDIiIiIiIiIiIi8sHAYTfzhz/8AXl5ear/u+KKK0I9TCLN9u3bhzvvvBOTJ09GYWEhj2eNKioqcOaZZ6KmpibUQ6EgmzlzJv79739rvl9tbS3OOOMMHDhwIAijIqLuhOejFK14PmoMz0e7D56PRhabLMtyqAdB1vn+++89Poife+457Ny5E88884zrtuTkZAwdOjQUwyPSpaKiAhdddBHOOussTJkyBRkZGYiNjYXD4cCQIUNgt/MaiRpZlnHllVdiypQpuOqqq0I9HAqynTt34uqrr8by5cuRlZWl6b4LFizAZ599hldffRU2my1IIySiaMfzUYpGPB81huej3QvPRyNLj1APgKx18skn4+STT3b9OzMzE3FxcRg1alToBkVk0IIFCzBp0iQ88sgjoR5KRFqxYgV2796Nl19+OdRDIQsMHz4cI0aMwPPPP497771X030vu+wyPP/881ixYgWmTZsWpBESUbTj+ShFI56PGsPz0e6F56ORhZc9yK/29nY8++yzOOecc1BcXIxp06Zh3rx5kCQJAHDFFVf4pNyvW7cOeXl5WLduHQDg6aefRl5enuvvH374IUpKSvDYY48BADo7OzFv3jycd955GDFiBEaNGoVLL70Ua9euNeU5HD58GLfddhsmTJiAkSNH4oorrsDmzZs9xubvf08//TQA4OjRo7jrrrswefJkjBgxAhdffDE+++wz1cd87733kJeXh4qKCtecTJo0CbfddhuAriuRSo/7hz/8wbWfiooK/P73v8ekSZNQWFiIU089Fb///e9RW1vr2kaWZSxYsADnnnsuRowYgalTp+Lll1+GLMtCz6+urg5//OMfcdppp6G4uBiXXHIJvvrqK4/nk5eXh0WLFuHOO+/E6NGjcdppp+Hhhx9Ge3u7ptfC33iefvpp13xt2bIFF110EUaMGIHzzz8fH3/8MQDg+PHjmDRpEm6//XaffU6bNs31JfP1119j8uTJeOqpp/CTn/wEI0aMwKWXXuo6Fp0aGxvxyCOP4KyzzkJxcTHOO+88vPvuu6pjdx7X/sqntm3bhquvvhrjx4/HmDFjcMMNN+Dbb79VvG9RURF++tOf4pVXXvF4DNH9fPnll7j88ssxYsQITJs2DW+88YbHft555x2ce+65KCoqUjy2/HnhhRdw9tlnIy4uznWb+7Hi5P2eBoANGzZg5syZGDlyJE455RTceeedHlkk3u8Jp5/+9KeucamVqgFdJW1XXHEF3n33XZx55pkYPXo0Zs2ahbKyMtXn5f059fDDD6O4uBj/+9//PJ6Pv/+5HzuffvopLrvsMowePRpFRUU455xz8Prrr3s81tGjR3HnnXfi1FNPxejRozFz5kxs2rTJ9VyVHsc5L6LzqPRecRL57ACA888/H++++65qKZDzMd3nIi4uDmeffTZeeOEF1bknIjIDz0d5PgrwfBTg+ag7no/yfJTno9ZjxiH5kGUZN9xwAzZv3oy5c+ciPz8f69atwxNPPIEDBw7gwQcf1LzPtrY2/PnPf8Y111yD888/HwDwj3/8A2+++SZuv/125OXl4ciRI3j22Wdxyy23YOXKlXA4HLqfw7Fjx3DNNdego6MD999/P2JjY/Hcc8/hiiuuwNtvv41f/vKX+MlPfgIAeOCBBwAA999/PwCgT58+qKqqwsUXX4z4+HjceuutyMjIwHvvvYebbroJf/vb3zB9+nShcfz5z3/GOeecg5kzZ3rcfuONN+KMM85w/Xvu3Lmu/25tbcWVV16JjIwM3H///UhJScGmTZvwzDPPICEhAX/+858BAH/729+wcOFCzJ49GxMnTsS2bdvwj3/8A8ePHw/4/Nrb2zFr1ixUVVXh1ltvRa9evbB48WJcc801eOmll3Dqqae6xvPkk09i5MiReOKJJ7Bnzx488cQTqKysxBNPPCH6cri89dZbrv/u06cP1qxZAwC4/vrrMXPmTNx6661499138f/+3//DCy+8gMmTJ+PCCy/Ea6+9hqamJiQnJwMAvvnmG+zfvx+PPvooAOCHH37Aiy++iNbWVtx+++1IT0/H22+/jTlz5uDll1/GhAkT0NbWhssuuwzV1dW4+eab0b9/f3z66ae45557UFVVhRtuuEF17H/84x9RWFgIoKt8au3atbjmmmswfvx4/OUvf0F7ezteeOEFXHrppXj77bcxZMgQn/s2Nzfjww8/xF//+lfk5+fjtNNO07SfW2+9FRdeeCFuuOEGfPbZZ67X9rLLLsP69etx77334uKLL8a9996LpKQkAJ7Hlj979+7F9u3bceuttwq9hu7Wr1+P2bNnY8KECXjiiSdQX1+PJ598EldeeSXeffddJCQkCO3HeVzs2LEDf/7znz3m2qm0tBR79+7FbbfdhrS0NDz11FOutVF69eoV8DG2bt2KN998E08++SRGjx7t9/Hdx+C0cuVK3HTTTbjyyivx29/+Fm1tbXjjjTfw5z//GUVFRRg5ciSam5vx61//Gp2dnbjjjjvQu3dvvPLKK5gzZw7ef/99PPPMMzh27BgqKysxd+5cj/d/r169NM2j2ntF9LMD6Dp57OzsxIoVK/CrX/3K75ydccYZeOutt3xKBc855xy89dZb+O677zBo0KCAc09EpAfPR3k+yvNRXzwf9cXzUZ6P8nzUAjJ1a3feead85plnety2cuVKOTc3V16+fLnH7c8++6ycm5sr7969W77yyivlSy+91OPva9eulXNzc+W1a9fKsizLTz31lJybmyvLsiy/88478sSJE+Xjx4+7tr/tttvkBQsWeOzjk08+kXNzc+VNmzYZel7r1q2TCwoK5I0bN7puq66ulseMGSPPnTvXY9uZM2fKM2fO9Ljtb3/7m1xYWChXVFR43D5r1ix54sSJcmdnp9/HXbx4sZybmysfOHBAXrNmjVxUVCTX1ta6/n7gwAE5NzdXXrx4scf9zjzzTPnOO++UZVmWd+7cKf/617+Wv//+e49trr/+evnss8+WZVmW6+vr5eHDh8sPP/ywxzYPPvigfPXVVwd8fm+99Zacm5srb9682XWbJEny5ZdfLs+YMcN1W25urjxt2jS5o6PDddv8+fPl3Nxcuby83O8c+ON+LLhzztczzzzjMY4LLrhA/uUvfynLsizv3btXzs3Nld99913XNvfee688bdo0178LCgrkgoIC+bvvvvPYz/nnny9ffPHFsizL8uuvvy7n5uZ6HBOyLMt33323XFxc7PE6ufM+rp0uvvhi+Wc/+5nHMV1fXy+fcsop8s0336x436amJjk3N1d+4YUXNO/nrrvu8hjDjTfeKE+cOFGWJEl+6aWX5NzcXLmpqcljG/djyx/nvNTX13vcnpubKz/11FMet3m/jr/61a/k8847z2Pse/fulQsKCuRFixbJsuz5ngg0LqW5vvPOO+Xc3Fx5/fr1rtuOHDkiFxcXy3//+98Vn5v7sX/HHXe45lPp+fgbw4svvugzztraWo/X8LXXXpPz8vLknTt3urZpaWmRp02bJr/99tuu25Te/1rmUe29IvLZ4e6CCy6Qb7nlFp/bA2loaJBzc3Pl119/XfN9iYj84fkoz0edeD5a63fsPB8
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"def draw_cluster_results(\n",
" df: pd.DataFrame,\n",
" col1: int,\n",
" col2: int,\n",
" labels: np.ndarray,\n",
" cluster_centers: np.ndarray,\n",
" subplot: Any | None = None,\n",
"):\n",
" ax = None\n",
" if subplot is None:\n",
" ax = plt\n",
" else:\n",
" ax = subplot\n",
"\n",
" centroids = cluster_centers\n",
" u_labels = np.unique(labels)\n",
"\n",
" for i in u_labels:\n",
" ax.scatter(\n",
" df[labels == i][df.columns[col1]],\n",
" df[labels == i][df.columns[col2]],\n",
" label=i,\n",
" )\n",
"\n",
" ax.scatter(centroids[:, col1], centroids[:, col2], s=80, color=\"k\")\n",
" plt.title('Точка - это кластер, грубо говоря (центр кластера) :)')\n",
"\n",
"\n",
"plt.figure(figsize=(16, 12))\n",
"draw_cluster_results(df_scaled, 0, 1, labels, centers, plt.subplot(2, 2, 1)) # age vs avg_glucose_level\n",
"draw_cluster_results(df_scaled, 0, 2, labels, centers, plt.subplot(2, 2, 2)) # age vs bmi\n",
"draw_cluster_results(df_scaled, 0, 3, labels, centers, plt.subplot(2, 2, 3)) # age vs hypertension\n",
"draw_cluster_results(df_scaled, 1, 2, labels, centers, plt.subplot(2, 2, 4)) # avg_glucose_level vs bmi"
]
},
{
"cell_type": "code",
"execution_count": 172,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAygAAAJHCAYAAAB7IXLKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5gdZ3n4/e/MmdN72d5XdVe9WO5NuGEbm5aYEiAJNXHeOCE2EAIOBEjgBzExBNNi0gyYBAgG3HuXbdnqXdre9/Reprx/HO2xVrurupJW8vO5Li7kM+fMPKfu3PM8931LhmEYCIIgCIIgCIIgzAHymR6AIAiCIAiCIAjCBBGgCIIgCIIgCIIwZ4gARRAEQRAEQRCEOUMEKIIgCIIgCIIgzBkiQBEEQRAEQRAEYc4QAYogCIIgCIIgCHOGCFAEQRAEQRAEQZgzRIAiCIIgCIIgCMKcIQIUQRAEYVaJ/r/nPvEeC4JwKokARRCEk/ahD32IRYsWTfrf0qVLueKKK/jyl79MIpGY8pju7m6+9KUvcdVVV7F8+XKuuOIKPv3pT7N79+4Zj/Ptb3+bRYsW8ZWvfOWEx7V48WJWr17Nu9/9bh544IETfs6HeuWVV1i0aBGvvPLKrOzvZA0MDLBo0SJ+/etfz3if7373u1Nem+XLl3PttdfyjW98g3g8ftzHTSaTfOYzn2Hjxo0nMfrpHek5bdiwgZUrV/KOd7yDSCRSue+iRYv4xS9+Me3+UqkUy5Ytm1Pv2/E41u/P+vXr+dznPjerx963bx/vf//7Z2Vfx/JZFQThrUc50wMQBOHc0NnZyd///d9X/rtUKrFjxw7uuusudu3axc9//nMkSQLgscce4zOf+QwLFizgz/7sz2hsbGRkZIT//M//5A//8A/5/ve/z8UXXzxp/7qu85vf/IaFCxfywAMPcPvtt2O32497XJqmMTIywn/8x3/wmc98Bp/Px+WXXz5Lr8LZZ+IE3jAMstks27Zt48c//jFPPfUUP//5zwkEAse8r127dvHAAw/wnve851QNd4pXX32VT33qU7S1tfGTn/wEv9/PwMAAALIs88gjj3DLLbdMedzjjz9OsVg8beOcTSfy/ZlNjzzyCJs2bZqVfVVXV/OLX/yC5ubmWdmfIAjnBhGgCIIwK1wuFytXrpx023nnnUcmk+E73/kOW7ZsYeXKlfT19fHZz36WSy+9lH/5l3/BZDJV7n/NNdfw/ve/n89+9rM89dRTWCyWyrYXXniBkZER7rrrLv7oj/6I3//+9/zBH/zBCY0L4LLLLuPCCy/k17/+9Vs6QDn8tbn44ou56KKL+MAHPsBdd93FV7/61TMzsGPw2muv8clPfpL58+fzk5/8BI/HM2n76tWreeWVV4hGo1MCrQcffJCOjg527dp1Ood80k70+zNXWSyWab+fgiC8tYklXoIgnFJLly4FYGhoCID//u//plgs8oUvfGHSyRWA3W7ns5/9LO95z3umLAv71a9+xcKFC1mzZg3nn3/+jEt3jpXVasVisVRmdaA8S/OjH/2Iq6++mqVLl3Lttdfy3//931Mee//993PttdeyfPly/uiP/qjy3CZMLJ863KJFi/jud79b+e90Os1XvvIVLr30UlauXMl73vMennnmmUmP+d///V9uuOGGypK57373u2iaNuk+jz32GDfddBPLly/nXe961xGXyR2L5cuXc8011/Cb3/yGXC43aSzvfve7WblyJcuXL+fmm2/m4YcfBsrL3D784Q8D8OEPf5gPfehDQHnG6kc/+hE33ngjy5cvZ+XKlbzvfe9jw4YNlf1OLPM59LU5mo0bN/KJT3yCRYsW8R//8R9TghOAq6++GlmWefzxxyfdHovF2LBhAzfccMOUxwwNDfHpT3+adevWsWLFCj7ykY+wc+fOSfcZGBjgM5/5DJdccglLlizhwgsv5DOf+QyxWKxyn/Xr1/Od73yHb3zjG1x00UUsX76cj370o/T09FTuE41G+Zu/+Rsuvvhili1bxs0338xvfvObIz7vE/3+wMxLET/0oQ9V3i+A7du385GPfIQ1a9awatUq/viP/5jNmzcD5c/2v/7rvwKTP8/H8t350Ic+xO23385f/uVfsnLlSv7kT/5kyhKvX//613R2drJlyxZuueUWli1bxpVXXsm99947aV9jY2P89V//NevWreO8887jzjvv5Nvf/jbr168/4usnCMLZQQQogiCcUt3d3QA0NTUB8Pzzz9PZ2UlNTc2097/wwgv567/+a6qqqiq3xeNxnnrqKd75zncC8K53vYtt27axY8eOox7fMAxUVa38r1Ao0NXVxd/+7d+SyWS4+eabK/f90pe+xHe+8x1uuukmfvCDH3Ddddfxj//4j3zve9+r3Oe+++7j7//+77n88su55557WLFiBV/84heP+3XRNI0//dM/5Xe/+x2f/OQnueeee2hvb+fWW2+t5HD88Ic/5Itf/CIXXnghP/jBD/jgBz/Ij3/840nHe+qpp/jLv/xLFi1axPe+9z3e/va3c8cddxz3eA538cUXUyqV2LZtGwA//elPufPOO7nqqqv44Q9/yLe+9S0sFgu33347IyMjLFmyhDvvvBOAO++8s7Ks7lvf+hb33HMPt9xyC//2b//GV77yFeLxOLfddlsl+JlY5nMsM2IAr7/+Oh//+MdZtGgR9957Ly6Xa9r7eTweLr74Yh555JFJtz/66KPU19ezfPnySbdHo1He9773sWPHDr74xS/yz//8z+i6zgc/+EEOHDgAQC6X48Mf/jAHDhzg7//+77n33nv58Ic/zIMPPsi3v/3tSfv7r//6L7q6uvinf/onvvrVr7J9+3Y++9nPVrbfcccdHDhwgC9/+cv8+Mc/prOzk89+9rOTgrfDncj353ik02k+9rGP4ff7+e53v8u3v/1tcrkcH/3oR0mlUvzBH/wB733vewEmvWfH8t0BePjhh3E6nXz/+9/nYx/72LRj0HWdv/qrv+L666/nRz/6EatXr+b//b//x/PPPw9AsVjkIx/5CG+88Qaf//zn+ad/+id2797NT37ykxN6zoIgzD1iiZcgCLNiIhCYkEgkePXVV/n+97/PqlWrKjMpIyMjdHR0HNe+f/e736HreiWYuOaaa/iHf/gH7r///qMmzL/22mssWbJk0m2SJLFw4ULuvvturrzySqAcSP3P//wPn/70p/nEJz4BwCWXXIIkSfzwhz/kAx/4AD6fj3vuuYfrr7+ez3/+85X7pNNp7r///uN6Ts899xxbtmzhe9/7HldddRUAF1xwAf39/WzYsIFFixZVTuy/8IUvVI7l8/n4whe+wJ/8yZ+wYMECvve977F8+XK++c1vAnDppZcC8M///M/HNZ7DhUIhAMLhMAD9/f189KMf5c///M8r92loaODd7343r7/+OjfccAPz588HYP78+ZV/T1zpPvQKvdVq5f/7//4/9uzZw8qVK49rmc/mzZv56le/Si6XmzRjMZO3v/3tfP7zn5+0zOvBBx/k+uuvn3Lf//zP/yQej/Pzn/+choYGoLwU8Prrr+fuu+/mO9/5Dj09PdTW1vKNb3yjEnRfcMEFbNmyhVdffXXS/jweD/fcc09lpqOvr4/vfve7xGIx/H4/r776Krfeemvl/V+3bh0+n++IS7NO5PtzPPbv308sFuPDH/4wq1evBqC9vZ1f/OIXZDIZamtrqa2tBd5cHngs3x2/3w+A2Wzmy1/+cuU5TuQLHcowDP78z/+8EvysWbOGxx9/nGeeeYZLL72U3/72t3R1dfGrX/2q8rtywQUXVF5HQRDOfmIGRRCEWTERCEz876KLLuLTn/40S5cu5Z//+Z8rS6lMJtOUJUpH86tf/Yrzzz8fi8VCMpmkVCqxfv16fv/735NOp4/42CVLlvDLX/6SX/7yl9xzzz0sXLiQ1tZW/uVf/oXrrruucr8NGzZgGAbr16+fNOOyfv16CoUCr7/+Ol1dXUQikUpQM+Htb3/7cT0fKM8CmM3mSUtSZFnm/vvv5y/+4i/YtGkT+Xx+2vEAvPjii+TzeXbs2DEr4zmaz33uc9x
"text/plain": [
"<Figure size 1600x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.decomposition import PCA\n",
"\n",
"#до двухмерного пространсва :) вжух\n",
"pca_data = PCA(n_components=2).fit_transform(df_scaled)\n",
"pca_data\n",
"\n",
"# Визуализация сокращенных данных\n",
"plt.figure(figsize=(16, 6))\n",
"\n",
"# Визуализация для KMeans кластеризации\n",
"plt.subplot(1, 2, 1)\n",
"sns.scatterplot(x=pca_data[:, 0], y=pca_data[:, 1], hue=labels, palette='Set1', alpha=0.6)\n",
"plt.title('PCA Reduced Data: KMeans Clustering')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"теперь интересная штука... с помощью так называемого метода локтя посмотрим на лучшее (оптимальное) количество кластеров на основе инерции (расстояния = сумме квадратов расстояния от объектов до центра кластера)"
]
},
{
"cell_type": "code",
"execution_count": 184,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA14AAAImCAYAAABD3lvqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB9bUlEQVR4nO3dd3hUVf7H8c8kmfTeMfQAofe6C4KI2NBdwLUguiIqIooUxYa6uKKuNEVEZUGsrBX5iajYwUYJgiJJCEESEkglvU7K/P4IGRkSkhASZpK8X8+TJ8m95575ZvaYzYdz7rkGs9lsFgAAAACgyTjYugAAAAAAaOkIXgAAAADQxAheAAAAANDECF4AAAAA0MQIXgAAAADQxAheAAAAANDECF4AAAAA0MQIXgAAAADQxAheAAAAANDECF4AAAAA0MQIXgDQCG666SZFRETo+uuvP2ObuXPnKiIiQg8++OB5rAxAQyUlJSkiIkIbN260dSkAWgCCFwA0EgcHB+3bt08pKSnVzhUWFurbb7+1QVUAAMAeELwAoJH07NlTLi4u+vzzz6ud+/bbb+Xm5qaQkBAbVAYAAGyN4AUAjcTd3V2jR4+uMXh9+umnuvTSS+Xk5FTt3FdffaVJkyapT58++utf/6onn3xShYWFkqSxY8cqIiKixo+kpCRJ0o8//qgpU6Zo0KBBGjZsmObPn6/k5GSr15g/f36NfdS1hKpqCWVNH6fav3+/pk+frmHDhmngwIG68847dejQIcv5nTt3KiIiQjt37pQkxcbGaty4cbr++uv1wgsvnPE1XnjhBUnS+++/r8svv1y9e/e2Ol/Xss333nuvxn5Pva5qOVld7RpaQ33fm9pe/0znq/53ePDBBzV27Fir133nnXes3sNTX2fPnj1Wbd966y1FRERY9VFcXKxly5Zp/Pjx6t27twYOHKhp06YpOjra6toz1XXTTTdZtamqoyanj48qN910k1U/JSUlevHFF3XZZZepT58+Gj9+vNasWaOKigqra06vZefOnfW6ti5ms1kPPfSQ+vbtqx9++KHe1wGAJFX/CwAA0GBXXHGF5syZo5SUFIWGhkqS8vPztX37dq1fv17bt2+3ar9582bdd999uuqqqzRnzhwdO3ZMK1asUFxcnNavX69Vq1bJZDIpPT1dd999t2bOnKkxY8ZIkoKDg7Vp0yY98MADmjBhgmbMmKGsrCytXLlS1113nT766CMFBARIqvyD9brrrtOkSZMkydJfffTs2VOPP/645fv3339fH3zwgeX7HTt26LbbbtOwYcP01FNPqaSkRK+88oquv/56vffeewoPD6/W55IlS9S7d2/NnDlTPj4+GjVqlCRp0aJFkmR5vdDQUO3evVsLFy7UNddco4ULF8rDw0OS6lV/cXGx+vTpo4ULF1qOnem6U9/b09s1tIazeW8ee+wx9erVq8bXf/fddyVJBw4c0BNPPFGt7elycnL03HPP1XjOw8ND33zzjQYNGmQ59umnn8rBwfrfYhcsWKDIyEjNmzdP7du3V0JCgp5//nnNnz9fW7ZskcFgsLS95ppr9I9//MPyfdX/jo3JbDbrzjvv1L59+3T33Xere/fu2rlzp5577jklJibq3//+t6Xt6WM2PDy83tfW5sknn9Qnn3yiF198USNHjmz0nxFAy0bwAoBGNGbMGLm5uenzzz/XLbfcIkn68ssvFRAQYPWHrlT5h+TSpUs1atQoLV261HK8Y8eOuuWWW7Rt2zZLEKia3Wrfvr369+8vSaqoqNDSpUs1cuRILVu2zHL9wIEDdcUVV2jdunVasGCBJKmoqEgdO3a0XFvVX314enparpOk77//3ur8smXL1KFDB61Zs0aOjo6SpJEjR+qSSy7RypUr9fzzz1u1T0hI0A8//KCPP/5YXbt2lSRLSPX09JQkq9fbsmWLJOnhhx+2BB5JcnZ2rrP2oqIiBQYGWvV3putOfW9Pb/fbb781qIazeW+6dOlyxtevOl5SUlJj29OtXLlSF1xwgbKysqqdu/DCC/X111/r/vvvlySlpKRo7969Gjx4sI4dOyZJMplMKigo0MKFC3XFFVdIkoYOHar8/Hw988wzysjIUFBQkKXP0NBQq3qq/ndsTNu3b9dPP/2k5cuX68orr5Qk/fWvf5Wrq6uef/553XzzzZbxdPqY3bZtW72vPZNly5bp3Xff1apVq3ThhRc2+s8HoOVjqSEANCJXV1eNHTvWarnhli1bdPnll1vNEEjSH3/8oZSUFI0dO1ZlZWWWjyFDhsjT01M//vhjra915MgRpaena8KECVbH27dvrwEDBmjXrl2WY8nJyfLy8mqEn9BaYWGh9u/fr8svv9wSLCTJ29tbF110kVUNVe1XrFihYcOG1fmHbpW+fftKkl599VWlpaXJZDKprKysXtc21s/dkBrO9r1pLLGxsXr33Xf16KOP1nh+7Nixio+P1x9//CFJ+vzzz9WvXz+FhYVZ2jg7O2vdunW64oorlJqaqh07duidd96xbBBjMpnOuq6KigqVlZXJbDbX2abq49S2u3btkpOTky677DKra66++mrL+TM5l2sl6e2339aaNWt05ZVXWs2KAsDZYMYLABrZ5ZdfrrvvvlspKSlycXHRzz//rDlz5lRrl52dLalyWVZNS7PS0tJqfZ2q6wMDA6udCwwMVFRUlKTKmbXjx4+rbdu2Z/eD1ENeXp7MZvMZa8jLy7M6duedd8rb29tqqWJdhgwZooULF2rNmjVatWrVWdV37NixWpfkNWUNZ/veNJYnn3xSV155pQYMGFDj+ZCQEPXu3Vtff/21OnfurE8//VQTJkywjJcq33//vZ566in98ccf8vDwUPfu3eXu7i5JtYanM1m9erVWr14tR0dHBQYGauTIkbr33nutNpypmiU+1dChQyVVLp/08/OzCrGSLDNvtb2f53KtJMXExGjkyJH65JNP9M9//lM9e/astT0A1ITgBQCN7MILL5SHh4c+//xzubu7q23bturdu3e1dt7e3pIq76Wp+uPyVD4+PrW+jq+vryQpIyOj2rn09HT5+flJkqKjo1VcXFxtQ4zG4OXlJYPBcMYaqmqssmDBAn3++eeaPXu23n777XovSbv22mv1ww8/qKysTI899pjatm2rmTNn1npNRUWFfv31V02ePLler3H6jOS51nC2701j+Oyzz/T7779bLT2tycUXX6yvv/5al19+uX7//XetWrXKKngdPXpUs2bN0rhx4/TKK6+oXbt2MhgMevvtt6stNZXqfu+kyvfv2muvVUVFhY4fP64VK1bo9ttv18cff2xps2jRIqugfOp9Wj4+PsrKylJ5eblVgKr6B4qq8V6Tc7lWku69917dfPPNuvLKK7Vw4UK9//771UIcANSFpYYA0MicnZ01btw4bd26VZ999pnlnpLTde7cWQEBAUpKSlKfPn0sHyEhIVq2bFm1GYjTderUSUFBQfrkk0+sjicmJmrfvn0aOHCgJOm7775Tjx495O/vf9Y/S0VFRa1/YLq7u6t379767LPPVF5ebjmel5en7777rtp9bb1799aqVat07NgxLVmypN51PP/88/ruu+/0zDPP6PLLL1efPn3qvL/ql19+UWFhoYYNG1Zru6rZm9M3lzjXGs72vTlXJpNJzz77rGbNmmV1/1VNxo0bp19//VVvvfWWBg0apODgYKvzv//+u0pKSnTHHXeoffv2lmBVFbqq3rOqHQHreu+kys1g+vTpo379+unyyy/XjTfeqIMHDyonJ8fSplOnTlb/LZx6P93QoUNVVlZWbdfQquBW2/t5LtdKlTOUrq6ueuyxx3TgwAGtX7++zp8XAE7HjBcANIErrrhCM2bMkIODg9WOeqdydHTU3Llz9dhjj8nR0VEXXXSRcnNztXr1aqWmpta5RM7BwUHz5s3TQw89pPnz5+vqq69WVlaWVq1aJR8fH02bNk0HDhzQ22+/rSuvvFL79u2zXJueni6pcmYjMzOzWijLzMxUXFycEhISLAHuTObPn6/p06frjjvu0JQpU1RaWqo1a9bIZDJp1qxZ1dqHhIRozpw
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.cluster import KMeans\n",
"\n",
"inertias = []\n",
"clusters_range = range(1, 23)\n",
"for i in clusters_range:\n",
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
" kmeans.fit(data_scaled)\n",
" inertias.append(kmeans.inertia_)\n",
"\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(clusters_range, inertias, marker='*')\n",
"plt.title('Метод локтя для оптимального k')\n",
"plt.xlabel('Количество кластеров')\n",
"plt.ylabel('Инерция')\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"отсюда можем видеть, что кривая, в принципе, прекращает резко падать после 15 кластера. а в принципе, САМЫЙ резкий спад закончился на количесте кластеров,равному 3-5"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"ТЕПЕРЬ ЧАВО ой капс\n",
"\n",
"теперь берем и считаем, насколько хорошо будут данные разделены на некое количество кластеров <br/>\n",
"делаем это с помощью коэффициента силуета - чем ближе он к 1, тем лучше сгруппирован объект и тем дальше он от соседних кластеров. чем ближе к нулю, тем он ближе к соседям.\n",
"чем ближе к -1, тем больше вероятность, что он неправильно сгруппирован в кластер"
]
},
{
"cell_type": "code",
"execution_count": 186,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1oAAAImCAYAAABKNfuQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACzuUlEQVR4nOzdd3hU1dbH8e9k0htJaIGE3kILRUBKkCpYUBERRYoXRRTR2EDlBVGxIaAoIEUFFQTxKggWVBS9KqBU6SQ0QwmBBNJILzPvHzEjkZYJM5yU3+d58pCcOefMOsuos9h7r22yWq1WRERERERExGFcjA5ARERERESkvFGhJSIiIiIi4mAqtERERERERBxMhZaIiIiIiIiDqdASERERERFxMBVaIiIiIiIiDqZCS0RERERExMFUaImIiIiIiDiYCi0REREREREHU6ElIhXGsGHDGDZsWJFjW7Zs4dZbb6VZs2Z89tlnTn3/Z599lp49e9p9Xc+ePXn22WedEJGIOEuTJk2YNWuW0WGIiIFcjQ5ARMQoZ86c4aGHHqJ58+YsWLCAJk2aGB2SiIiIlBMqtESkwvrggw/Iyspi6tSpVK9e3ehwREREpBzR1EERqZCSkpJYunQpt9xyy3lFVkxMDJGRkXTp0oXWrVszbNgwtm7dWuSc//3vfwwYMIBWrVrRuXNnnn/+ec6ePVvknCVLltCjRw9atWrFE088QVpaGgBz586lU6dOtGvXjueff56cnBzbNTk5Obz44ou0b9+ea6+91jb1KD09nXHjxtG6dWu6devGkiVLbNccP36cJk2asGLFCtux7OxsevXqVWSU7kJTJzdu3EiTJk3YuHHjBX+GgpG/du3anTft8bPPPuPmm2+mRYsWdO/enVmzZpGfn297/UJTJc+NtfC9LvRVGOflpk1e6Jn+LT4+nmeeeYZOnTrRpk0bhg4dyp9//ml7/d9TvKxWK3fffTdNmjTh+PHjRc67VKyRkZFcd911WCyWIu8/YcIE+vbtC8DJkyd58skn6dixI61atWLYsGFs374dgFmzZl30PQrji4qK4pFHHqFjx440b96crl278vLLL5OVlXXJHKxfv/6SsRf3GQHWrl3L7bffTqtWrS55r3OtWLGCJk2asGPHDm6//XbCw8O55ZZb+O6774qcd/z4cZ5++mkiIiJo3rw5nTp14umnnyYpKcl2zr59+xgyZAht2rShd+/eLFu2zPbahX5/4fzfk8tN6zv3927RokXn/fv1xx9/EBYWxjvvvHPRe/zbzJkzadq0KV988UWxrxGRsk0jWiJSoVitVuLi4nj55ZfJy8vjwQcfLPL6wYMHGTRoEHXr1mXixIm4ubmxaNEi7r33XhYuXEiHDh3YvHkzo0eP5tZbb+Wpp57iwIEDvPXWW+zfv5+PP/4Ys9nMjz/+yOTJkxk2bBjXXXcdn376KT/++CMAq1ev5uWXXyY2Npbp06fj6enJ+PHjAZg2bRrLly/n6aefJjg4mBkzZhAbG0tsbCw33HADM2fO5Ndff2Xy5MkEBwfTq1evCz7n+++/X6RIuBJvvPEGZ8+exd/f33Zs/vz5zJgxg6FDhzJ+/Hj27dvHrFmziIuL49VXXy3WfZs3b86nn34KFBRtn3/+ue1nX19fh8Senp7O4MGDyc/PZ9y4cVSvXp2FCxdy33338cUXX1C3bt3zrlm1alWRQuxcAwcO5M4777T9/OKLLxZ57fvvv2fjxo106tQJgKysLL777jseeOABcnJyGDlyJLm5uTz//PO4ubkxZ84chg0bxn//+1/uvPNOunbtWuS+zz//PADBwcHEx8czZMgQWrduzZQpU3B3d+fXX3/lgw8+oFq1aowaNeqiecjKyiI4OJi33377grEX9xmPHj3KY489RteuXXniiSdsvxMXu9e/PfjggwwdOpQnnniCzz//nMcff5z58+fTrVs3MjMzGT58OIGBgTz//PP4+fnx559/Mnv2bDw9PZk8eTKZmZk88MADhISEMGvWLLZt28bzzz9PzZo1ue6664oVg72GDRvGmjVreP311+nevTvu7u783//9H61bt+ahhx4q1j0WLFjAnDlzePnll7n99tudEqeIlD4qtESkQtm8eTPdu3fHzc2N995777wP2rNnz8bd3Z1FixbZPux3796dfv36MXXqVD7//HNWrlxJ3bp1ee2113BxcaFLly54eXkxadIkfvnlF3r27Mm8efO49tprmThxIgDXXnstXbp04ezZs7z22mu0aNECgNTUVN577z0efvhhLBYLn376KaNGjWLo0KEAVKlShbvuuouAgACmT5+Om5sb1113Hfv372f+/PkXLLTi4uJ47733aN68OXv27LmifO3atYtVq1bRtGlTUlNTATh79ixz5szhrrvusj1fREQEAQEBTJw4kREjRtCoUaPL3tvX15fWrVsD8NtvvwHYfnaUL774gtjYWL744guaNm0KQNu2benfvz+bN28+759/eno606dPv2jugoODi8R4bkEYERFBcHAwK1eutBVaP/zwAxkZGfTv35/t27dz+PBhlixZQps2bWyxXH/99cyZM4dZs2YRHBxc5L7nvte6deto2rQpb7/9tu31zp07s379ejZu3HjJQiszMxN/f/+Lxl7cZ9y7dy+5ubk88cQTNG7c+LL3+rdhw4YxZswYALp27crtt9/OO++8Q7du3YiJiSE4OJjXX3+dWrVqAdCxY0d27NjBpk2bAIiNjaVly5b83//9H7Vq1SIiIoKlS5fy22+/Oa3QMplMvPbaa9x6661MmzYNs9lMcnIyH330EWaz+bLXf/LJJ0ybNo3JkyczcOBAp8QoIqWTpg6KSIXSrFkzpkyZQqVKlRg/fvx5oz6bNm2iR48eRT44urq6cvPNN7N7927S09N55ZVXWLlyJS4uLuTl5ZGXl0ffvn1xcXFh8+bN5OXlsXfvXiIiImz38PDwoFWrVnh5edmKLCj4cJ6VlUV0dDTR0dFkZ2fbRjWg4IO2h4cH4eHhuLm5Fbluz549RabqFXr99ddp164dPXr0uKJcWa1WXn75ZQYOHEhYWJjt+J9//klWVhY9e/a0PX9eXp5tmuD69euL3Ofcc/49ra64cZT02q1btxIaGmorsgC8vLz4/vvvi4zaFJozZw6BgYEMHjzY7vdycXHh9ttvZ82aNWRmZgIFhV7nzp0JDg6mQ4cObN++ndatW5Ofn09eXh7+/v506dKFzZs3X/b+ERERfPzxx3h4eHDw4EHWrl3L3LlzSUxMLDL99ELi4uLw8/Oz+5n+rXnz5ri6uvLxxx8TGxtLTk4OeXl5WK3WYl1/7miOyWTi+uuvZ+fOnWRlZdG0aVOWLl1KSEgIMTEx/PLLLyxYsIDDhw/bnq9hw4bMnTuXWrVqkZOTw6+//kpKSgoNGjQo8j4Wi6XI792F4is8pzix16pVi7Fjx/LFF1/w2WefMXHiRFsxeCk///wzL774Iu3atWPQoEGXPV9EyheNaIlIheLr68vtt99O/fr1GTx4MI8//jiffvqp7W+mU1JSqFKlynnXValSBavVSlpaGj4+Pnh4eAAFHzzPlZqaypkzZ8jPzycwMLDIawEBAVSqVKnIscKpV6dPn7YVTf++rlKlSgQEBJx3XV5eXpG1K1BQKP744498+eWXfPPNN8VJyUWtXLmSmJgY5s2bx+uvv247npycDHDREZT4+Hjb97GxseflqCRxrFy5EpPJROXKlbnmmmt47LHHzvtwfSHJyclUrly5WO8TExPDRx99xPvvv8+JEydKFOsdd9zBvHnzWLNmDR07duT3339n+vTpttfd3d2BgnVb567VKc7IiMVi4c0332TJkiVkZGRQo0YNwsPDbb+LlxIbG0tISEgJnqioWrVqMW3aNN58803bNM9CHTp0uOz11apVK/Jz5cqVsVqtpKam4unpyQcffMC8efNITk6mSpUqtGjRAi8vr/PWP6amptK+fXsAqlatyo033ljk9f/85z/nvfe/45szZw5z5szBbDZTpUoVIiIieOyxxy7aGOemm25iypQpAHTp0uWyzwqwZ88eunf
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.metrics import silhouette_score\n",
"\n",
"silhouette_scores = []\n",
"for i in clusters_range[1:]: \n",
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
" labels = kmeans.fit_predict(data_scaled)\n",
" score = silhouette_score(data_scaled, labels)\n",
" silhouette_scores.append(score)\n",
"\n",
"# Построение диаграммы значений силуэта\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(clusters_range[1:], silhouette_scores, marker='4')\n",
"plt.title('Коэффициенты силуэта для разных k')\n",
"plt.xlabel('Количество кластеров')\n",
"plt.ylabel('Коэффициент силуэта')\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"видим, что при количестве кластеров, равном трем, самое лучшее разбиение получается. вот и отличненько) ведь я как раз разбивала данные на 3 кластера"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "vev",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}