903 lines
2.1 MiB
Plaintext
903 lines
2.1 MiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Лабораторная 5"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Информация о диабете индейцев Пима"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 76,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',\n",
|
|||
|
" 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],\n",
|
|||
|
" dtype='object')\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Pregnancies</th>\n",
|
|||
|
" <th>Glucose</th>\n",
|
|||
|
" <th>BloodPressure</th>\n",
|
|||
|
" <th>SkinThickness</th>\n",
|
|||
|
" <th>Insulin</th>\n",
|
|||
|
" <th>BMI</th>\n",
|
|||
|
" <th>DiabetesPedigreeFunction</th>\n",
|
|||
|
" <th>Age</th>\n",
|
|||
|
" <th>Outcome</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>148</td>\n",
|
|||
|
" <td>72</td>\n",
|
|||
|
" <td>35</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>33.6</td>\n",
|
|||
|
" <td>0.627</td>\n",
|
|||
|
" <td>50</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>85</td>\n",
|
|||
|
" <td>66</td>\n",
|
|||
|
" <td>29</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>26.6</td>\n",
|
|||
|
" <td>0.351</td>\n",
|
|||
|
" <td>31</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>183</td>\n",
|
|||
|
" <td>64</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>23.3</td>\n",
|
|||
|
" <td>0.672</td>\n",
|
|||
|
" <td>32</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>89</td>\n",
|
|||
|
" <td>66</td>\n",
|
|||
|
" <td>23</td>\n",
|
|||
|
" <td>94</td>\n",
|
|||
|
" <td>28.1</td>\n",
|
|||
|
" <td>0.167</td>\n",
|
|||
|
" <td>21</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>137</td>\n",
|
|||
|
" <td>40</td>\n",
|
|||
|
" <td>35</td>\n",
|
|||
|
" <td>168</td>\n",
|
|||
|
" <td>43.1</td>\n",
|
|||
|
" <td>2.288</td>\n",
|
|||
|
" <td>33</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>763</th>\n",
|
|||
|
" <td>10</td>\n",
|
|||
|
" <td>101</td>\n",
|
|||
|
" <td>76</td>\n",
|
|||
|
" <td>48</td>\n",
|
|||
|
" <td>180</td>\n",
|
|||
|
" <td>32.9</td>\n",
|
|||
|
" <td>0.171</td>\n",
|
|||
|
" <td>63</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>764</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>122</td>\n",
|
|||
|
" <td>70</td>\n",
|
|||
|
" <td>27</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>36.8</td>\n",
|
|||
|
" <td>0.340</td>\n",
|
|||
|
" <td>27</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>765</th>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>121</td>\n",
|
|||
|
" <td>72</td>\n",
|
|||
|
" <td>23</td>\n",
|
|||
|
" <td>112</td>\n",
|
|||
|
" <td>26.2</td>\n",
|
|||
|
" <td>0.245</td>\n",
|
|||
|
" <td>30</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>766</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>126</td>\n",
|
|||
|
" <td>60</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>30.1</td>\n",
|
|||
|
" <td>0.349</td>\n",
|
|||
|
" <td>47</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>767</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>93</td>\n",
|
|||
|
" <td>70</td>\n",
|
|||
|
" <td>31</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>30.4</td>\n",
|
|||
|
" <td>0.315</td>\n",
|
|||
|
" <td>23</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>768 rows × 9 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
|
|||
|
"0 6 148 72 35 0 33.6 \n",
|
|||
|
"1 1 85 66 29 0 26.6 \n",
|
|||
|
"2 8 183 64 0 0 23.3 \n",
|
|||
|
"3 1 89 66 23 94 28.1 \n",
|
|||
|
"4 0 137 40 35 168 43.1 \n",
|
|||
|
".. ... ... ... ... ... ... \n",
|
|||
|
"763 10 101 76 48 180 32.9 \n",
|
|||
|
"764 2 122 70 27 0 36.8 \n",
|
|||
|
"765 5 121 72 23 112 26.2 \n",
|
|||
|
"766 1 126 60 0 0 30.1 \n",
|
|||
|
"767 1 93 70 31 0 30.4 \n",
|
|||
|
"\n",
|
|||
|
" DiabetesPedigreeFunction Age Outcome \n",
|
|||
|
"0 0.627 50 1 \n",
|
|||
|
"1 0.351 31 0 \n",
|
|||
|
"2 0.672 32 1 \n",
|
|||
|
"3 0.167 21 0 \n",
|
|||
|
"4 2.288 33 1 \n",
|
|||
|
".. ... ... ... \n",
|
|||
|
"763 0.171 63 0 \n",
|
|||
|
"764 0.340 27 0 \n",
|
|||
|
"765 0.245 30 0 \n",
|
|||
|
"766 0.349 47 1 \n",
|
|||
|
"767 0.315 23 0 \n",
|
|||
|
"\n",
|
|||
|
"[768 rows x 9 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 76,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from scipy.cluster.hierarchy import dendrogram, linkage, fcluster\n",
|
|||
|
"from sklearn.cluster import KMeans\n",
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv(\".//scv//diabetes.csv\")\n",
|
|||
|
"print(df.columns)\n",
|
|||
|
"df"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Очищаем от выбросов"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 77,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Выбросы в датасете:\n",
|
|||
|
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
|
|||
|
"4 0 137 40 35 168 43.1 \n",
|
|||
|
"12 10 139 80 0 0 27.1 \n",
|
|||
|
"39 4 111 72 47 207 37.1 \n",
|
|||
|
"45 0 180 66 39 0 42.0 \n",
|
|||
|
"58 0 146 82 0 0 40.5 \n",
|
|||
|
"100 1 163 72 0 0 39.0 \n",
|
|||
|
"147 2 106 64 35 119 30.5 \n",
|
|||
|
"187 1 128 98 41 58 32.0 \n",
|
|||
|
"218 5 85 74 22 0 29.0 \n",
|
|||
|
"228 4 197 70 39 744 36.7 \n",
|
|||
|
"243 6 119 50 22 176 27.1 \n",
|
|||
|
"245 9 184 85 15 0 30.0 \n",
|
|||
|
"259 11 155 76 28 150 33.3 \n",
|
|||
|
"292 2 128 78 37 182 43.3 \n",
|
|||
|
"308 0 128 68 19 180 30.5 \n",
|
|||
|
"330 8 118 72 19 0 23.1 \n",
|
|||
|
"370 3 173 82 48 465 38.4 \n",
|
|||
|
"371 0 118 64 23 89 0.0 \n",
|
|||
|
"383 1 90 62 18 59 25.1 \n",
|
|||
|
"395 2 127 58 24 275 27.7 \n",
|
|||
|
"445 0 180 78 63 14 59.4 \n",
|
|||
|
"534 1 77 56 30 56 33.3 \n",
|
|||
|
"593 2 82 52 22 115 28.5 \n",
|
|||
|
"606 1 181 78 42 293 40.0 \n",
|
|||
|
"618 9 112 82 24 0 28.2 \n",
|
|||
|
"621 2 92 76 20 0 24.2 \n",
|
|||
|
"622 6 183 94 0 0 40.8 \n",
|
|||
|
"659 3 80 82 31 70 34.2 \n",
|
|||
|
"661 1 199 76 43 0 42.9 \n",
|
|||
|
"\n",
|
|||
|
" DiabetesPedigreeFunction Age Outcome \n",
|
|||
|
"4 2.288 33 1 \n",
|
|||
|
"12 1.441 57 0 \n",
|
|||
|
"39 1.390 56 1 \n",
|
|||
|
"45 1.893 25 1 \n",
|
|||
|
"58 1.781 44 0 \n",
|
|||
|
"100 1.222 33 1 \n",
|
|||
|
"147 1.400 34 0 \n",
|
|||
|
"187 1.321 33 1 \n",
|
|||
|
"218 1.224 32 1 \n",
|
|||
|
"228 2.329 31 0 \n",
|
|||
|
"243 1.318 33 1 \n",
|
|||
|
"245 1.213 49 1 \n",
|
|||
|
"259 1.353 51 1 \n",
|
|||
|
"292 1.224 31 1 \n",
|
|||
|
"308 1.391 25 1 \n",
|
|||
|
"330 1.476 46 0 \n",
|
|||
|
"370 2.137 25 1 \n",
|
|||
|
"371 1.731 21 0 \n",
|
|||
|
"383 1.268 25 0 \n",
|
|||
|
"395 1.600 25 0 \n",
|
|||
|
"445 2.420 25 1 \n",
|
|||
|
"534 1.251 24 0 \n",
|
|||
|
"593 1.699 25 0 \n",
|
|||
|
"606 1.258 22 1 \n",
|
|||
|
"618 1.282 50 1 \n",
|
|||
|
"621 1.698 28 0 \n",
|
|||
|
"622 1.461 45 0 \n",
|
|||
|
"659 1.292 27 1 \n",
|
|||
|
"661 1.394 22 1 \n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA00AAAImCAYAAACPR2EBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADNAklEQVR4nOzdeXhU5cE28HsmyWRfTIJAUEkIioEQloIoKlCkvlprEVC7aHGh4qtWP7e6W4t7rWvVIkjFvSgVUVSq1Sr6ilIQkQQCGEhUdpNAQvZkZr4/0hlnJjPnPGc/Z+b+XZdXS3LmOc96kidz5j4uv9/vBxEREREREUXltroCREREREREdsZNExERERERkQRumoiIiIiIiCRw00RERERERCSBmyYiIiIiIiIJ3DQRERERERFJ4KaJiIiIiIhIAjdNREREREREErhpIiIiIiIikpBsdQWIiNS46aab8Prrr0f93owZM3D//febXCMiIiKKV9w0EZFj9evXD0888UTY1373u99ZVBsiIiKKV9w0EZEjeb1eZGRkYPTo0WFf93g81lSIiIiI4hY/00REjtTT04O0tDTZ42666SZMnTo17GtLlizBsGHD8Pjjjwe/tmXLFvzud7/D8ccfjxEjRuDkk0/G3XffjY6OjuAxw4YN6/MfAEydOhWPPPII7r33XowfPx4TJkzADTfcgIMHD4add+nSpZg5cyZGjx6NiooKTJ8+HStXrgx+f9myZcFy9+3bF/baP/3pTxg2bBh+85vf9KnP/Pnzw47dtm1b8Hs7d+4UPn80v/nNb3DTTTfhqaeewsSJE/GjH/0Il19+OXbt2hV23Pvvv49f//rXGDNmDMrLy3HaaafhpZdeCjtm//79uPHGG3HCCSdgzJgxOP/88/Hll18Gv9/V1YVHH30Up5xyCioqKvCzn/2szy2Y77//PmbOnImRI0fixBNPxN133422trbg9x9//PHguAR88sknGDZsGG666SYAwM6dO8PGcPjw4TjppJPwwAMPwOfzBV+3du1azJkzB+PHj0d5eTmmTp2Kxx9/PHhMoJxly5aFnS9yzk2dOjV47oBrr70Ww4YNw5o1awAAzc3NuOWWW3DSSSf1mWOBY6KZOnVq1HkZ2QfvvPMOZs6ciTFjxuDEE0/EH/7wBzQ1NYUds2HDBlx88cUYO3Ysjj/+eFx77bVh8/Cmm26Kea7QebZu3Tqcf/75GDVqFI477jjceOONaGxsjNmGwLwPLSOy36L1dWdnJ0455ZQ+bV2+fDlmzJiBUaNGYcqUKXjooYfQ1dUVtr4i/wucJ/S60Nrait/85jcYPnw4Ojs7ha8lRBSf+E4TETlSe3s7cnNzFb+uqakJjz76aNjX9u/fj/POOw+jR4/G/fffD4/Hg48//hiLFy/G4Ycfjrlz5waPPfvss3HOOef0Kffll1/G4MGDcd9996GxsREPPfQQvvnmGyxZsgQulwsvvfQS7r77blx55ZX40Y9+hKamJjz99NO4/vrrMWbMGAwYMCBYVmZmJj744AP8+te/BgD4/X6sXLkSbnffv3NlZmbi3//+Ny677LLg19555x243e6wDYCS80f64IMPcNhhh+G2226Dz+fDQw89hN/85jd4++23kZ6ejo8++ghXXHEFZs+ejSuvvBIdHR14+eWXceedd6K8vByjRo1Ca2srfvWrX8Hr9eL3v/89+vfvj2eeeQYXX3wxXn/9dRQXF+P666/HqlWrcNlll2HUqFFYtWoVbrrpJqSkpOBnP/sZVqxYgeuvvx5nnnkmrr76auzatQuPPPIIampqsHjxYrhcrj517+7uxr333hu1XZdddhmmTJmC9vZ2fPrpp3j66adRUlKCc845B1u2bMGFF16I0047DY888gj8fj9WrFiBJ554AkOGDMEZZ5wRs7/krFu3Dm+//XbY1+6//37885//xI033ojS0lIkJydj06ZNuPPOO2XLmzx5Mi6//PLgv5cuXYp//OMfwX//9a9/xV/+8hf8+te/xjXXXIPvvvsOjz32GDZs2IBXX30VaWlp2Lx5c3Cj88ADD8Dr9eKhhx7CnDlzsHz5ciQn9/66EHlL7EcffRS2aV+7di0uuugiHH/88Xj00UfR1NSExx57DLNnz8Y//vEPoT90iFq0aFGfjdZLL72EO++8E+eccw6uvfZafPfdd3jggQfQ1NSEq6++Gq+88kqwTzZv3hxsS35+fp/yX375ZdTX1+O5556L+g52tGsJEcUvbpqIyJEOHjyIww8/XPHr/vKXv6CoqAgHDhwIfm3btm0oKyvDY489hqysLADAxIkT8emnn2LNmjVhm6YBAwb0uSUQANxuNxYvXozs7GwAvb+EXXHFFfjkk08wadIkfPfdd5gzZ07YL7eDBg3CzJkz8cUXX4T9Ej5p0qSwTdOXX36JpqYmjBgxos95J02ahH/+85/Yv39/sD9WrlyJ8ePHh71DoeT8kdrb27Fs2TIceeSRAIAhQ4ZgxowZWL58OX71q1+hpqYGM2bMwK233hp8zZgxYzBhwgSsWbMGo0aNwuuvv45du3bh9ddfR1lZGQBg7NixOOuss7B27Vp0dXXh3XffxS233IILLrgAAHDCCSdg165dWLNmDc444ww8+OCDOPnkk/Hggw8Gz1NcXIwLL7wQq1atwpQpU/rU/YUXXkBbWxsKCwv7fO+oo44KjuUJJ5yApUuXoqqqKrhpmjhxIv785z8HN6snnngi/v3vfwfro4bP58Pdd9+NESNGYNOmTcGvb9y4ESeddBJ+8YtfBL/W2dkpVGZ+fn7YnPzkk0+C/7+pqQnz58/Hueeeiz/84Q/Brx9zzDE477zz8Nprr+G8887DU089hby8PDzzzDNITU0FABx++OG47rrr8PXXXwfHzOPxhJ1rx44dYXV56KGHUFJSggULFiApKQkAMGrUKJxxxhnBc0UK9K/X6xVqLwDs2bMHTz/9dFg/+nw+PPnkk5g2bRruvvvu4LHt7e14++23kZ2dHax7fn5+n7aE8nq9+Pvf/x58pzGaaNcSIopfvD2PiBxp//796N+/v6LXbNu2Da+88gpuv/32sK+fdNJJePHFF5Gamoqamhp88MEHmD9/PhobG9HV1SVU9tSpU4MbpsC/k5OTsXbtWgC9tzZdf/31aG5uxoYNG/DGG28Eb1+LPMcpp5yCNWvWoKWlBUDvO0dTp05Fenp6n/OWlpZi8ODB+Pe//w0A2Lx5M/bs2YNp06aFHafk/JHGjh0b3DABwPDhw3HkkUcG2/bb3/4W999/P1pbW1FVVYV33nkHCxYsCCv7iy++wBFHHBH85RsA0tPT8e677+Kcc87BF198AQA49dRTw879+OOP46677sKOHTuwd+9eTJ06FT09PcH/xo8fj6ysLHz66ad96l1fX48nn3wSN954Y3AjEMrn86GnpwcdHR1YsWIFmpqaUF5eDgA466yz8PTTT6O7uxtbtmzBu+++i7/85S/wer3o7u6OWk7gP7/fH7MvlyxZgu+//x5XXHFF2NdHjhyJ//znP/jss89w6NAh9PT0hL1TqNaGDRvQ1dWFn/3sZ2FfHzduHAYNGoT//Oc/AHrHZ9KkSWH9NGbMGPz73/8OGzMp7e3t+OqrrzB58mT4/f5gfxx55JEoLS2NOkYAUFBQAADYu3evcLv+9Kc/Ydy4cfjxj38c/FptbS0aGhrwk5/8JOzYOXPmYNmyZUhJSREqu6enBy+88AJaW1vx85//POoxsa4lRBS/+E4TETlOV1cX9u7diyFDhih63d13340zzjgDY8aMCfu6z+fDww8/jJdeegltbW0YOHAgKioqov6iHUvkBs7tduOwww4Lfm7k22+/xR/+8Ad89tlnSElJwZAhQ3DssccCQJ9fskePHo3c3Fx8/PHHOO200/DPf/4T8+bNw7PPPhv13Keccgo++OAD/PKXv8Q777yDSZMmBd8xC1Byfrm2Ab2/6Aba1tjYiDvuuAPvv/8+XC4XBg8ejHHjxoWVffDgweAvx9EEPv8V65jA9+fNm4d58+b1+f7+/fv7fO2hhx7C8OHD8dOf/jTs3amAW2+
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"Q1 = df[\"DiabetesPedigreeFunction\"].quantile(0.25)\n",
|
|||
|
"Q3 = df[\"DiabetesPedigreeFunction\"].quantile(0.75)\n",
|
|||
|
"\n",
|
|||
|
"IQR = Q3 - Q1\n",
|
|||
|
"\n",
|
|||
|
"threshold = 1.5 * IQR\n",
|
|||
|
"lower_bound = Q1 - threshold\n",
|
|||
|
"upper_bound = Q3 + threshold\n",
|
|||
|
"\n",
|
|||
|
"outliers = (df[\"DiabetesPedigreeFunction\"] < lower_bound) | (df[\"DiabetesPedigreeFunction\"] > upper_bound)\n",
|
|||
|
"\n",
|
|||
|
"# Вывод выбросов\n",
|
|||
|
"print(\"Выбросы в датасете:\")\n",
|
|||
|
"print(df[outliers])\n",
|
|||
|
"\n",
|
|||
|
"# Заменяем выбросы на медианные значения\n",
|
|||
|
"median_score = df[\"DiabetesPedigreeFunction\"].median()\n",
|
|||
|
"df.loc[outliers, \"DiabetesPedigreeFunction\"] = median_score\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация данных после обработки\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.scatter(df['DiabetesPedigreeFunction'], df['Age'])\n",
|
|||
|
"plt.xlabel('Функция родословной диабета')\n",
|
|||
|
"plt.ylabel('Возраст')\n",
|
|||
|
"plt.title('Диаграмма рассеивания после чистки')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация взаимосвязей"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 78,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABi8AAASgCAYAAACAO9vxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXhjZdk/8G+Sk31rm25pO12mnYXp7DvLDDCAIOAoy+urLP5UEFQQF0TgBUVEUQQE2RRkkRdBZPEFZVEEkRF1hllYhplh1nZm2qZb2qbZk5Pk90dJJmubtUk73891eWFzcs55nuecZNrzPPd9S0KhUAhEREREREREREREREQlQlrsBhAREREREREREREREUXj5AUREREREREREREREZUUTl4QEREREREREREREVFJ4eQFERERERERERERERGVFE5eEBERERERERERERFRSeHkBRERERERERERERERlRROXhARERERERERERERUUnh5AUREREREREREREREZUUTl4QEREREREREREREVFJ4eQFEVEJ279/P2655RacfvrpWLRoEZYtW4bPfe5zeOqppyCKYuR9XV1dmDNnDv74xz8WsbVTw7p163DddddN+nnjr9Ef//hHzJkzB11dXZPeFiIiIiKiiy++GHPmzIn53/Lly/GFL3wB77zzTsx7r7vuOsyZMwdr165FKBRKerw77rgDc+bMwcUXXxx57d5778WcOXMK2o/xeDwe/Pa3v8V///d/Y9WqVViwYAFOO+003HLLLejt7Y1578UXXxzTdiIiKj6h2A0gIqLkXnnlFVx//fVobW3Fl770JbS0tMDj8eCtt97Crbfein/+85944IEHIJFIit1UysJJJ52EP/zhD6iuri52U4iIiIjoKDVv3jzcdNNNAIBAIIDh4WH8/ve/xyWXXII//vGPmDVrVuS9UqkUfX192LZtG5YtW5ZwrFdeeWXS2p2Ovr4+XHrppbBYLLjgggtwxRVXQKVS4aOPPsLjjz+OV155BU8++SRmzpxZ7KYSEVEKnLwgIipB+/fvx/XXX481a9bg7rvvhiAc+bo+8cQTsWrVKlx11VV49dVXceaZZxaxpZStiooKVFRUFLsZRERERHQU0+l0WLx4ccxrxx13HI499lj88Y9/xLXXXht53Ww2IxQK4dVXX02YvHjvvffQ19eH2bNnT0azJxQKhfC9730Pvb29eP7559HU1BTZtnLlSqxfvx7nnHMObr31Vjz88MNFbCkREY2HaaOIiErQww8/DKlUiptvvjlm4iLs9NNPx2c+85mU+6cKz54zZw7uvffeyM8OhwO33HIL1qxZg8WLF+O8887DP/7xj8j2QCCAJ598Ep/61KewcOFCnHTSSbjjjjvg9Xoj7xkaGsLVV1+N448/HgsWLMCnP/1pvPDCCzHn7enpwXe+8x2sXLkSixYtwv/7f/8PO3fuTNn+bdu2Yc6cOXjzzTdjXt+1axfmzJmDv/3tbwCAl156CevXr8fChQuxevVqfPe730VfX1/K4yZz8cUX44YbbsBDDz2Ek046CQsWLMDnPvc5fPDBB5H3eDwe/PCHP8TatWsxf/58nHHGGXjkkUci21OlgBovRVX8Ptdddx2++MUv4vnnn8fpp5+O+fPn49Of/jQ2bNiQUX+IiIiIiHKhVquhVCqTRnifccYZeO211xJSR73yyis47rjjUFZWltG5fv3rX2P+/Pmw2Wwxr//2t79Fe3s7rFYrgsEg7rrrLqxbtw7z58/HunXrcOedd8Lv96c87pYtW7Bx40Z861vfipm4CCsrK8NVV12F+vp6BIPBhO2p0vJed911WLduXcxrL7zwAs455xwsWrQIJ510Eu688074fL7I9u3bt+OSSy7BqlWrsHTpUnz1q1/F3r17Y47x+OOP44wzzsCCBQuwZs0a/PCHP4TD4YhsDwaDeOihh3Daaadh/vz5OP300/HEE0+k7D8R0XTByQsiohL0xhtvYPXq1TCZTCnfc9ttt+UUdREIBPDlL38Zf/7zn3H55ZfjgQcewMyZM3HFFVdgy5YtAIAf/OAH+OlPf4pTTz0Vv/rVr3DhhRfid7/7Hb7+9a9H/mC55pprsH//ftx88834zW9+g3nz5uHaa6/Fxo0bAYxNbnzuc5/Djh078P3vfx933nkngsEgLrzwQuzfvz9p25YuXYrGxka8/PLLMa+/9NJLKCsrw4knnoitW7fie9/7Hj7xiU/gN7/5Da6//nps3LgRV199dcZj8de//hVvvPEGbrzxRvziF7/A4OAgvvGNbyAQCAAAbr31VmzYsAHXXnstHnnkEZxyyin4+c9/jueffz7jc43nww8/xCOPPIKrrroK999/P2QyGb7xjW8k/DFHRERERJQPoVAIoihCFEX4/X4MDAxEHr6fd955Ce8/88wzI6mjwoLBIP7yl7/grLPOyvj8n/rUpyCKIl577bWY119++WWccMIJMJlM+M1vfoPf//73uOKKK/Doo4/i85//PB555BH86le/Snnc119/HRKJZNw2nXPOObj55pshlWb/aOzJJ5/Etddei/b2dtx333247LLL8MQTT+DHP/4xAGDjxo34/Oc/D2Dsb4of//jHsFgs+NznPhf5W+ill17C7bffjgsvvBCPPPIIrrjiCrz44ou45ZZbIuf54Q9/iHvuuQfr16/Hr3/9a5xxxhm49dZbcf/992fddiKiqYBpo4iISozNZoPNZkNzc3PCtugi3QAgkUggk8myOs+GDRvw/vvv4/7778epp54KAFi9ejUOHz6MjRs3oqysDM899xyuvvpqXHbZZQCA448/HtXV1fje976HDRs24MQTT8Q777yDK664InKMlStXoqysDAqFAsDYKqKRkRH8/ve/R319PQBg7dq1OPPMM/HLX/4S99xzT9L2rV+/Ho8++ig8Hg9UKhVCoRBeeeUVnHHGGVAoFNi6dStUKhUuu+yyyLnKysqwfft2hEKhjGqBiKKIRx55BDqdDgDgdDpx7bXXYteuXZg/fz7eeecdHH/88ZE/flatWgWNRjPu5FI27HY7/vjHP6KxsREAoNFocNFFF2Hjxo04/fTT83ouIiIiIqLNmzejvb094fXvfOc7aG1tTXh9wYIFmDFjRkzqqC1btmBkZASnnnpqxot76uvrsWLFCrz00kv4r//6LwDAoUOH8MEHH+Cuu+4CALzzzjuYP39+ZDJl5cqVUKvV0Ov1KY976NAhlJWVJUSCBAKBhKgRmUyWVR3BYDAY+VsqPFkBAG63Gy+//DL8fj/uvPNONDU14aGHHor83XbCCSfgtNNOwz333INf/vKXeOedd9DQ0IALL7wQUqkUK1euhEajiSxg6ujowDPPPIPvfOc7kb/LTjjhBEgkEjz44IO44IILUF5ennH7iYimAkZeEBGVmGRhywBw8OBBtLe3x/zvtNNOy/o8W7duhVwujwl7lkqlePrpp3HllVfinXfeAYCE1UpnnXUWZDIZNm3aBGDsQf69996Lq666Cs8++ywGBwdx7bXXYunSpQCA//znPzjmmGNQU1MTWdUllUqxdu1a/Pvf/07ZvvXr18PlckVSR23btg09PT349Kc/DQBYsWIF3G43zj77bNx5553YsmULTjjhBFx55ZUZ//HR1tYWmbgAgJqaGgBjf3iE+/jMM8/gK1/5Cn73u9/h8OHDuOKKK3DSSSdldJ6JVFRURCYuAKC2tjamHURERERE+dTe3o7nnnsOzz33HJ599lk88sgj+H//7//hrrvuikwexDvzzDNjUke9/PLLOOmkk2J+n87E+vXrsXnzZgwMDESOp9PpIn+nrFq1Cv/6179wwQUX4OGHH8a+fftw0UUXRf4uSCZ+giLsoosuSvibKvx3T6Y6OjpgtVoT/iYLFzv3+/3Yvn07PvnJT8YsODMYDDj55JMj5129ejU6Ojpw7rnn4r777sP27dvxqU99ChdffDGAseiNUCiEdevWRf6eEkUR69atg9frxdatW7NqPxHRVMDJCyKiElNeXg6NRoPu7u6Y181mc+QPi+eeew4nn3xyTucZGRlBWVlZyjD
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"sns.set(style=\"whitegrid\")\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(x=df['Glucose'], y=df['Insulin'], alpha=0.6)\n",
|
|||
|
"plt.title('Glucose vs Insulin')\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(x=df['BMI'], y=df['Glucose'], alpha=0.6)\n",
|
|||
|
"plt.title('BMI vs Glucose')\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(x=df['Age'], y=df['BMI'], alpha=0.6)\n",
|
|||
|
"plt.title('Age vs BMI')\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 4)\n",
|
|||
|
"sns.scatterplot(x=df['Outcome'], y=df['Glucose'], alpha=0.6)\n",
|
|||
|
"plt.title('Outcome vs Glucose')\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Стандартизация данных для кластеризации"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 79,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"data_scaled = scaler.fit_transform(df)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 80,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArMAAAImCAYAAABATALrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOy9d3ikV3n3/3nqVM2o70qr7dpdd2xscME2BowhYAy23zcB/MIbAgEChJaAwy8Qk0IwBPwCBhMcCN1ggo2xgQRwwQWDK2tc19tXbdWnl6f+/ng0syNpJM2MRtJodT7X5WutKc9z5jzte+7zPfctua7rIhAIBAKBQCAQrELklW6AQCAQCAQCgUBQK0LMCgQCgUAgEAhWLULMCgQCgUAgEAhWLULMCgQCgUAgEAhWLULMCgQCgUAgEAhWLULMCgQCgUAgEAhWLULMCgQCgUAgEAhWLULMCgQCgUAgEAhWLULMCgQCwRpG1M2ZjugPgWD1IcSsQCBYEt7ylrewa9euaf+dcsopXHTRRfzjP/4j8Xh81ncOHjzIJz/5SS6++GJOO+00LrroIj784Q/z3HPPzbmf//f//h+7du3in//5nxds09ve9jZe/OIXYxjGnJ953etex1VXXQXArl27uP766yv4tfXl7/7u73j5y19e/PvlL385f/d3f1fXfRw9epR3vvOdDAwMLOl+FkMqleLd7343L3jBC3jRi17EoUOHKv5uLcfurrvu4uqrr66ylQKBYKVRV7oBAoHg+OWkk07immuuKf5tmiZPP/001113Hc8++yw/+MEPkCQJgF/96ld89KMfZceOHfzVX/0VPT09HD16lG9/+9v86Z/+KV/96ld5yUteMm37juNw2223sXPnTn7605/yt3/7twQCgTnbc+WVV/Lggw9y3333cfHFF896/+mnn+b555/nM5/5DAA333wz69evr0dXLIovf/nLhMPhum7zwQcf5N57713y/SyG2267jXvuuYd/+Id/YMeOHfT09Czp/r71rW8t6fYFAsHSIMSsQCBYMsLhMKeffvq01170oheRTqf50pe+xBNPPMHpp5/OkSNHuPrqq7ngggv4whe+gKIoxc9fcsklvOlNb+Lqq6/m7rvvRtf14nsPPPAAR48e5brrruP//J//w89+9jP+9//+33O255WvfCXRaJTbb7+9rJj9yU9+Qjgc5lWvehXArLavFCeddNJxtZ9KicViALz5zW8uDnoEAoFgJsJmIBAIlp1TTjkFgMHBQQC++93vYhgGH//4x6cJWYBAIMDVV1/NlVdeOcuacMstt7Bz507OPPNMzj77bG6++eZ59+vz+bj00kv5zW9+QyqVmvaeaZr8/Oc/57WvfW0xujtzqvrb3/42r371qzn11FO54IIL+OQnP1ncTn9/P7t27eLWW2+dtt2ZlgHbtrnxxhu59NJLOe200zj99NN54xvfyO9///s52106/X/99dfPsm8U/iu0daF93HrrrXzsYx8D4BWveEVx2zNtBslkkk9/+tNcfPHFnHrqqVx66aX8+Mc/ntW2L33pS3zmM5/hvPPO47TTTuPtb3/7gpaAfD7PV77ylWJ/XnLJJdx44404jgN4NpXC7znhhBPmtT88/PDD/Nmf/RkveMELeNWrXsWDDz446zP9/f189KMf5fzzz+fkk0/m3HPP5aMf/SiTk5PF/T388MM8/PDD7Nq1i4ceegiA5557jve9732cc845nHzyyVxwwQX8y7/8C7lcbt7fJxAIlg8RmRUIBMvOwYMHAdi4cSMA999/PyeddBLr1q0r+/lzzz2Xc889d9prsViMu+++mw9+8IMAXH755Xz0ox/l6aef5uSTT55z31deeSXf//73+eUvf8mVV15ZfP2+++5jYmJizsjuz372M/7t3/6Nq6++ml27dnHgwAE+85nPkM1mi7aESvjc5z7HD37wA/7mb/6GXbt2MTw8zFe+8hU+8IEP8Jvf/GZemwTA//7f/5sLLrhg2muf/exnee6553jNa15T0T4uuugi/uqv/oqvfvWrfPnLX2bXrl2z9pPL5Xjzm9/M+Pg473//+9mwYQN33nknf//3f8/Y2Bjvfve7i5/9zne+w5lnnsmnP/1p4vE4n/rUp7j66qvnHFy4rsu73/1udu/ezfve9z5OOOEEHnroIb7whS/Q19fHP//zP3PNNdfwzW9+kx//+MfcfPPNtLa2lt3W008/zV/8xV9wzjnn8KUvfYn+/n4+/OEPT/tMNpvlrW99Ky0tLVxzzTU0NTXxhz/8gS9/+cv4/X7+6Z/+iWuuuYaPfOQjAFxzzTX09vYyMjLCVVddxemnn861116Lruvcd999fPOb36Szs5N3vvOd8x4rgUCwPAgxKxAIlgzXdbEsq/h3PB7n4Ycf5qtf/SpnnHFGMUJ79OhRTjzxxKq2fccdd+A4Dq9//esBz47wT//0T/zwhz+cdzHYySefzIknnsgdd9wxTczedttt7Nq1i1NPPbXs9x5++GF6enq46qqrkGWZF7/4xQSDwbIL2eZjZGSED33oQ7zlLW8pvubz+fjrv/5r9uzZs6C1Yf369dN8vN/61rd4/PHH+fKXv8z27dsr3semTZsAOPHEE8t6UW+99Vaef/55fvjDH3LGGWcAcMEFF2BZFjfccANvfOMbaW5uBiASiXDDDTcUo+pHjhzh+uuvZ3JykpaWllnbvu+++3jwwQe57rrreO1rXwvAS17yEvx+P1/84hd561vfyo4dO4q/c74++drXvkZbWxtf/epX0TQNgJaWFj70oQ8VP3Po0CHWr1/PZz7zmeIA6pxzzuGJJ57g4YcfBqC3t7foFy7sb/fu3Zx44ol88YtfLL533nnn8dvf/paHHnpIiFmBoEEQYlYgECwZjzzyyKwoqSzLnHfeefzTP/1T0QepKAq2bVe17VtuuYWzzz4bXddJJBKAN+X9s5/9jKuvvnrehUxXXnkl//qv/8rw8DDr1q0jFotxzz338NGPfnTO75xzzjncfPPNXHHFFVx88cW89KUv5XWve13VXs7Pf/7zAExMTHDgwAEOHz7MPffcAzBvloVy3H///Xz2s5/lPe95zzQPcD328fDDD7Nhw4aikC1w2WWX8eMf/5gnnniCl770pQCceuqp0+whBRGazWbLitmHH34YVVV59atfPWvbX/ziF3n44YfZsWNHRe187LHHeNnLXlYUsuANbErbc+KJJ3LTTTfhOA6HDh3i8OHD7Nu3jwMHDkwbbM3k/PPP5/zzz8c0Tfbt28fhw4d5/vnnmZiYKAp5gUCw8ggxKxAIloyTTz6Zf/zHfwRAkiR8Ph9dXV2zhGZ3d3fRP1sO0zSJx+O0t7cD8Mwzz/Dss88C3oKymdx+++28+c1vnnN7r3vd6/jsZz/LL37xC972trfx85//HEmSuOyyy+b8zmte8xocx+Gmm27ihhtu4Prrr2fDhg387d/+bXF6vxKefPJJ/vEf/5Enn3ySQCBAb28v3d3dQHU5Tg8cOMCHP/xhLrzwQv76r/+67vuIx+N0dHTMer1wDAoDCGCWNUKWveUYBf9ruW23tLTM8kcX9pdMJitqY+m2SlFVddZr3/zmN/n3f/93YrEY7e3tnHLKKQQCgXn35TgO1113Hd///vfJZDJ0dXVx2mmn4fP5Km6fQCBYeoSYFQgES0YoFJpz2r6U888/n29/+9uMjo6WFVD33nsv733ve/nyl7/MK1/5Sm699VaCwSA33HBDUTgV+Id/+AduvvnmecVsc3MzF198MXfccQdve9vb+OlPf8orX/nKBaNtl156KZdeeinJZJIHHniA//iP/+AjH/kIZ555ZjFCOzPCnMlkiv+fSqV4xzvewa5du/j5z3/Otm3bkGWZe++9l1/+8pcLdVOReDzOX/3VX9He3s7nPve5adHheu0jGo1y+PDhWa+Pjo4ClI24VrPtyclJbNueJmhHRkaq3nZzczNjY2PTXnNdd5r944477uDaa6/lIx/5CFdccUXRf/uBD3yAJ598cs5t33jjjXzrW9/iH//xH7nkkktoamoC4H/9r/9VcfsEAsHSI7I
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df_scaled = pd.DataFrame(data_scaled, columns=df.columns)\n",
|
|||
|
"\n",
|
|||
|
"# Понижение размерности до 2 компонент\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"kc_pca = pca.fit_transform(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"plt.scatter(kc_pca[:, 0], kc_pca[:, 1], alpha=0.6)\n",
|
|||
|
"plt.title(\"PCA Visualization of data\")\n",
|
|||
|
"plt.xlabel(\"Principal Component 1\")\n",
|
|||
|
"plt.ylabel(\"Principal Component 2\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"\n",
|
|||
|
"Иерархическая агломеративная кластеризация\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 81,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1AAAAJxCAYAAABMnFMWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADC/ElEQVR4nOzdeXxU1f3/8fckM9n3FQghkLCDYV+sFQRcC7ZFK+7b79u6gVqXqq21m1pt1SqLWtGv4oKKinWrW0VEqwiCCu6oQICwhBCykJBkJpnfH3znOjPMJHcmk8xM8no+Hj4Ms5655y7nc885n2NxOp1OAQAAAADaFRPuAgAAAABAtCCAAgAAAACTCKAAAAAAwCQCKAAAAAAwiQAKAAAAAEwigAIAAAAAkwigAAAAAMAkAigAAAAAMIkACgAAAABMIoACYMoNN9ygIUOG+PzvhhtuCHfxALipqanRuHHjtHHjRtXU1OjSSy/Vww8/HO5iAUC3YA13AQBEj9zcXC1atMjjsXnz5oWpNAD8SU9P14UXXqg5c+bI6XRqyJAh+tvf/hbuYgFAt0AABcCUlpYWJSUlafTo0R6Px8XFhadAANo0b948nXHGGaqtrVVRUZFiY2PDXSQA6BYYwgfAFIfDoYSEBFOvXbdunc455xyNGjVKEydO1PXXX6+qqirj+eeff15DhgzRjh07PN43ffp0j+GAdrvd77BB78/asGGDZs+erdLSUp188sl6/fXXPT67rq5Ot912m4499lgdccQRmjVrlp577rnDvt/7e3bs2KFzzz1XN9xwg/75z3/qRz/6kcaNG6fLLrtM5eXlHu9/6623dNZZZ2nMmDEaOXKkTjzxRC1dutR4fs2aNcbnrl+/3uO9TzzxhIYMGaLp06cfVp7f//73Hq+tqanRyJEjNWTIEK1Zs8b09/vz7LPP6pRTTtHo0aNVWlqqn/3sZ3rttdcO28a+hm36q59zzz3X4zteffVVnXLKKRozZoyOOuoo/eEPf1BNTY3x/MKFCzVkyBCNGTNGzc3NHu+94oorDhsq2tTUpL///e+aOnWqRo4cqZNPPlmvvvqqx/umT5+uu+++W3/96181YcIETZo0Sdddd52qq6tN//62hq4+//zzRp2618O+ffs0fvx4n3U5ZMgQDR06VBMmTNDll1+u/fv3G68ZMmSIFi5c6FE213YJZltKUk5OjoqLi/XBBx+0O9zW+7v+/e9/a8KECbrrrrskee6/3v+5l/vrr7/WvHnzNHnyZI0YMUJHH320brnlFjU2NhqvaW5u1j333KMZM2aotLRUs2bN0r/+9S9T21ySdu7cqauvvloTJ07UqFGjdP755+vLL780Pn/Hjh0aMmSI/v3vf+uSSy7RqFGjdMwxx+jee+9Va2urR714b5Orr77ao06dTqfmz5+vo48+WuPGjdMll1yiXbt2Ga9vaWnR4sWLNWvWLJWWlmr06NE644wz9OGHH7ZZj9Lhde79b6fTqTPOOMPjfHnDDTd47FuS9PTTT/vcfwB0DnqgAJhy8OBBpaent/u6jz76SBdeeKEmT56se+65RzU1NZo/f77OO+88Pffcc6aDMOlQI1mS7r//fmVlZUk61Nj1Dnwk6eKLL9Y555yjq666Ss8995x+/etf64EHHtDUqVPV2Nios846S/v27dMVV1yhgoICvfXWW7rxxhtVWVmpSy65xPicqVOn6rLLLjP+nZeXJ0lasWKFMjMz9fvf/16tra266667dO655+rf//63EhMT9c4772ju3Lk677zzdPnll6uxsVFPPvmk/vKXv2jkyJEaNWqU8ZnJycl6++23NW7cOOOxV199VTExh9/TSk5O1jvvvCOn0ymLxSJJevPNN9XS0uLxukC+393SpUt1yy236PLLL9e4ceNUU1OjBx98UNdee63GjBmjXr16Ga9dtGiRcnNzJcmoD0n6xS9+odNOO83495///GeP77jvvvu0YMECnXXWWbrqqqu0fft2zZ8/X59++qmeeeYZj33CYrFo9erVmjp1qiSpvr5eq1at8tg2TqdTc+fO1ccff6wrrrhCJSUl+s9//qOrrrpKzc3N+vnPf2689sknn1RRUZFuu+02VVVV6a677lJZWZmefvppWSyWdn//ZZddpjPOOEPSoR6d4cOHG/tHv3799O233x62Te+66y7V1dUpLS3N43HXvmW32/X999/r73//u2699VbdeeedPuvGl0C2pYvdbtdf//pX098hSY2NjfrLX/6iX/7ylzr55JM9nvvDH/6gESNGGP8+/fTTjb8rKip09tlna/To0br99tsVFxend999V4888ojy8vJ00UUXSZKuvfZarVq1SpdeeqlGjRqlVatW6YYbbpDNZmt3m1dVVemMM85QYmKibrrpJiUmJurRRx/V2Wefreeee04lJSVGef70pz9p6tSpWrhwodavX69FixapoaFBv/nNb3z+7nXr1unf//63x2NLlizRAw88oOuuu04DBgzQ7bffriuvvFLPPPOMJOnOO+/UU089pWuuuUZDhgzRnj17dO+99+rKK6/UO++8o8TExIC2vbsXX3xRn3zySZuvqamp0T333BP0dwAIHAEUAFOqq6uNYKItd911lwYMGKAHHnjAGDI0atQozZw5U8uXL9fZZ59t+jsbGhokSWPGjFFmZqYk6b333vP52nPPPVdz586VJB199NGaPXu27r33Xk2dOlXPP/+8Nm3apKefflpjxowxXuNwOHTffffpjDPOUEZGhqRDgYH3MEXpUAD5/PPPq7CwUJJUXFys2bNn64UXXtCZZ56p7777TrNnz9aNN95ovGfMmDGaNGmS1qxZ4xHATJkyRStWrDAacbt379Ynn3yi8ePHH9ardeSRR2rVqlXasGGDUa7XXntNEyZM8Oj1COT73W3fvl3/8z//4xE0FhQU6JRTTtH69es1c+ZM4/Fhw4apb9++h31Gr169PLZZSkqK8XdNTY3uv/9+zZkzR3/4wx+MxwcPHqyzzz77sH3CtW1cAdTbb7+t3Nxcj16DDz74QO+9957uvvtu/eQnP5F0qD4PHjyoO++8U7NmzZLVeujyFhMTo0ceeUSpqamSDtXv3Llz9d5772nKlCmmfn+/fv0kHRqu6m//cPnss8/04osvatiwYaqtrfV4zv29EyZM0AcffKAvvvjC72d5C3Rbujz++ONqaGhQTk6O6e965ZVXZLPZ9Mtf/vKwoX8DBw70uw02bdqkYcOGaf78+cZ+8KMf/Ujvv/++1qxZo4suukibNm3SG2+8od/97nc6//zzJR3az8vLy7VmzRrNmjWrzW1+9913q7q6Wk899ZQKCgokHdpvfvKTn2j+/PlasGCB8doRI0YYAeqUKVPU0NCgRx99VJdeeqnHfipJra2tuuWWWzRixAiPemloaNBll12mCy64QNKh3q2//OUvqq2tVVpamioqKnTVVVd59LrGx8fr8ssv1zfffNPm/tKW+vp63XnnnYeVx9uCBQvUp08fj95MAJ2LIXwATKmoqFB+fn6brzl48KA2bNigqVOnyul0yuFwyOFwqLCwUCUlJXr//fc9Xt/a2mq8xuFwHPZ5u3fvVkxMzGENHV9mz55t/G2xWHTcccdp48aNamxs1Nq1a1VQUGAETy4//elP1dTUpA0bNrT7+WPHjjWCJ0kaPny4CgsL9dFHH0mSfvnLX+r2229XfX29Pv/8c7366qt64IEHJOmwIWnTp0/X1q1btXnzZknS66+/rlGjRhmNQXepqamaOHGiVqxYIUmqqqrSmjVrPAKbQL/f3Q033KBrr71WtbW1+vTTT/Xiiy8aw/7aep9Zn376qZqbmzVr1iyPx8ePH6+CggKtXbvW4/EZM2bo7bffltPplHSoZ84VJLmsXr1aFotFU6dO9dh/pk+frr1793r0Ck2fPt0Inlz/tlqtRr2F8vc7nU7dcsst+sUvfqGhQ4f6fN7hcKi5uVkbN27U+vXrNXLkSI/XeB8T7oFjoNtSkiorK3Xvvffq+uuvV3x8vKnfsWfPHj344IM666yzAp439eMf/1hPPPGE4uPj9d1332n
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[4 7 4 7 2 6 2 5 1 5 6 3 6 1 4 5 2 3 8 2 8 6 3 4 4 4 3 7 6 6 6 4 7 6 6 7 6\n",
|
|||
|
" 4 2 2 8 6 6 4 6 2 6 7 4 5 7 7 7 4 4 7 2 8 6 8 5 3 6 7 3 7 2 6 7 8 2 8 4 8\n",
|
|||
|
" 8 7 6 8 5 7 7 5 6 7 3 7 6 8 4 7 7 7 6 3 7 8 7 7 7 2 3 6 6 7 6 7 6 8 8 2 2\n",
|
|||
|
" 1 8 6 2 3 3 6 7 7 2 8 8 6 3 2 8 8 2 3 2 4 2 6 7 8 7 7 7 8 6 8 7 3 8 5 6 8\n",
|
|||
|
" 6 7 8 7 4 1 3 4 7 7 7 4 6 6 8 7 4 2 8 7 6 7 3 2 5 8 7 4 6 2 6 4 6 7 7 6 6\n",
|
|||
|
" 4 1 2 2 2 6 6 3 5 6 2 6 3 2 2 7 6 7 7 6 7 4 3 8 4 7 8 6 2 4 4 2 8 2 3 1 3\n",
|
|||
|
" 5 4 7 8 6 2 1 8 3 2 7 6 7 3 4 2 4 7 7 7 3 2 8 3 6 1 8 7 6 6 7 8 4 2 7 7 8\n",
|
|||
|
" 4 8 5 7 6 3 6 5 8 6 5 4 7 6 8 6 8 2 7 6 7 3 6 8 3 3 6 1 2 7 8 8 2 2 2 6 8\n",
|
|||
|
" 2 7 4 6 5 2 8 3 6 8 4 8 2 2 6 8 2 7 4 8 7 3 8 3 7 2 2 4 8 8 2 6 2 8 6 7 5\n",
|
|||
|
" 6 7 8 5 3 4 3 7 7 7 6 6 6 7 5 7 7 6 6 8 7 6 3 2 5 6 4 4 6 6 3 8 7 3 7 7 2\n",
|
|||
|
" 1 5 7 8 8 4 7 8 3 8 7 7 7 7 8 7 4 4 4 7 8 3 8 7 4 8 7 2 7 2 3 6 2 6 3 8 3\n",
|
|||
|
" 7 4 1 6 7 8 8 2 1 7 4 7 2 8 7 8 7 2 2 5 2 8 2 5 7 7 6 7 5 6 6 7 6 2 7 8 4\n",
|
|||
|
" 3 2 7 8 2 7 7 3 8 5 8 4 6 7 4 6 6 7 6 7 6 7 7 8 5 8 8 8 8 6 6 6 2 7 8 6 2\n",
|
|||
|
" 8 7 8 5 2 1 4 7 6 7 8 8 4 5 6 6 7 4 4 7 8 7 6 8 6 2 7 7 6 4 8 6 7 7 2 4 6\n",
|
|||
|
" 6 4 7 8 5 4 6 7 7 8 7 7 7 6 8 5 7 5 6 6 8 2 4 2 4 8 8 2 4 8 6 6 7 7 6 7 7\n",
|
|||
|
" 8 8 6 6 6 3 2 8 7 7 7 8 6 8 2 6 6 7 7 8 8 7 3 6 2 2 7 6 6 1 7 3 6 4 5 4 8\n",
|
|||
|
" 3 7 4 4 6 7 3 7 7 5 8 4 5 8 2 7 8 8 7 2 4 6 4 6 6 7 4 5 8 7 6 8 6 8 6 6 6\n",
|
|||
|
" 7 4 8 6 8 6 4 6 7 4 7 7 6 3 5 7 8 2 2 4 7 7 8 8 6 8 1 7 8 3 2 6 2 2 4 4 8\n",
|
|||
|
" 3 4 6 6 4 7 6 8 6 3 3 6 3 7 7 2 8 3 5 7 6 7 7 2 6 3 8 4 6 1 2 5 7 6 7 4 2\n",
|
|||
|
" 5 8 8 5 8 3 2 8 6 4 8 6 4 4 6 8 4 7 8 2 6 6 8 7 8 6 6 2 3 2 7 6 8 7 6 8 3\n",
|
|||
|
" 4 7 7 4 4 6 2 8 2 3 4 8 7 1 4 2 6 3 6 3 7 4 6 6 8 8 3 8]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"linkage_matrix = linkage(data_scaled, method='ward')\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"dendrogram(linkage_matrix)\n",
|
|||
|
"plt.title('Дендрограмма агломеративной кластеризации')\n",
|
|||
|
"plt.xlabel('Индекс образца')\n",
|
|||
|
"plt.ylabel('Расстояние')\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"result = fcluster(linkage_matrix, t=20, criterion='distance')\n",
|
|||
|
"print(result) "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 82,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJ8AAAMQCAYAAACJzMTyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzde5wT1d3H8W92s7t4WQQBAbkKVtQCig8oaqWAaL1XindEtPUBFBRBRKxaUVoEXURwvVPlUqiI4N1qq6K09YKIPtUqttxh5SLichN2s7vz/LEkTLKTZCab2cxkP+/Xy5eTzJzJOUk2ZH75nd8JGIZhCAAAAAAAAHBBTqY7AAAAAAAAgOxF8AkAAAAAAACuIfgEAAAAAAAA1xB8AgAAAAAAgGsIPgEAAAAAAMA1BJ8AAAAAAADgGoJPAAAAAAAAcA3BJwAAAAAAALiG4BMAAAAAAABcQ/AJAOBLgwYN0qBBgyz3vfzyy+rUqZPGjRtXx70CkKphw4ZpwYIFme4G0uSFF17QkCFDMt0NAIBHEHwCAGSVbdu2aeLEiZnuBgAHFi1apC1btmjAgAGZ7grSZMCAAfruu+/0wgsvZLorAAAPIPgEAMgq9913n3788UcdfPDBme4KABv27dunoqIiDRs2TDk5fDXNFoFAQEOHDtVDDz2kffv2Zbo7AIAM4194AEDW+Otf/6q33npLw4cPV+PGjaP2VVVV6amnntJZZ52lzp076xe/+IXmzJkTdcygQYM0btw4PfHEEzrttNP0P//zP7rxxhtVUlISddzbb7+tq666St26dVPnzp11zjnnaO7cuZH9H3/8sTp16qR//OMfGjhwoLp27aqzzz5b8+bNixxz//33q1OnTvroo48i9y1atEidOnXSSy+9FOlP7NTCKVOmqFOnTlq0aJEkqVOnTnrkkUeijnnkkUfUqVOnGn3+1a9+pS5duuj000/X73//e/34449Rx3z++ef69a9/rZNOOkk9e/bU6NGjtWXLlqgxffzxx5Kk//znP+rXr5+uuOIK28+LJD311FM688wzdfzxx6tTp06R/2LHYDZu3Dj17ds3cvuZZ55Rt27d9Nxzz0U9b1b/hZ8nSfrkk0/0m9/8Rj169FDnzp3Vt29fPfLII6qqqoocs3v3bk2YMEFnnHGGTjzxRA0YMEDvvfde5PWI9zjm52Xo0KE66aSTdNJJJ2n48OHasGFD5Px23htSzdfVMAxdccUV6tSpkzZu3ChJKisr0/jx43XqqafqlFNO0ZgxY7Rjx45Im3379mnKlCk6++yz1blzZ5100km67rrr9PXXX8d9biVp48aNUc9d7O3wY5955plR77P//ve/kec39vlJZOHChSorK1OfPn2i7p86darlcx37XlmwYIHOP/98de7cWb1799YjjzyiyspKR2OUpH/+85+Wj2f+Gxw3bpwGDRqkF154QX369FG3bt00ePBgrVixIur8a9eu1c0336zTTz9dJ554ogYNGqRPP/20xuOb/wv3sVOnTvrTn/6k22+/Xd26ddNpp52mP/zhDyorK4u0r6ys1FNPPaULLrhAXbt21Yknnqgrrrgi6vMk/DnQrVs3lZeXR/Xv5ptvjpqabO7PK6+8EnXs4sWLa7yOdh5fkvr06aOysjItXLhQAID6LZjpDgAAkA6lpaW699579dOf/lTXX3+9nn/++aj948eP16JFizR06FB169ZNn3zyiSZOnKidO3dq+PDhkePeeecdNW7cWHfddZeqqqo0ZcoUDRo0SK+//roOOuggvffeexo+fLiuueYa3XTTTdq3b5/mzZun++67T507d9YJJ5wQOdeoUaN08cUXa9iwYXrnnXd07733SpKuuuoqjRo1Su+9957uuecevfrqq9q2bZv+8Ic/6Nxzz9XFF19sOcb169dr5syZjp+bV199VWPGjNGFF16oW265RSUlJZo6dapWrlypZ599VoFAQF999ZWuvvpqnXDCCXrggQdUWVmpKVOm6De/+U0kGGb24IMPqnPnzrrhhhskydbz8tJLL2nKlCkaOnSoTj31VB100EGSpMsvv9z2WLZs2aKHHnpI9913n37+859H7SsuLlazZs0kSd99951GjBgR2bdixQpde+21OuecczR16lQZhqFXX31VxcXF6tChg84//3xVVlbq17/+dSRw0KFDB7344osaPny4Zs2apXvuuUe7d++O9PmSSy7RpZdeKkk6+uijtWbNGl1xxRXq0KGDJk+erIqKCj3++OO68sor9fLLL6tJkyaR/iR6b1h5+eWX9dlnn9V4DV566SXdfffdatiwoe69916NHz9eU6dOlSSNHTtWy5Yt0+jRo9W2bVutW7dO06ZN06233qrXX39dgUDA9vMea8aMGZEgWNgNN9yg/Px8TZgwQUcccYRycnK0YMGCpNOuXnnlFfXu3Vv5+flR9+/bt099+/bV0KFDI/fFvleefPJJTZ06VVdffbXuuOMOff3113rkkUe0adMmx9Nv9+3bpxYtWmjatGmR+8Kvi9nXX3+t1atXa/To0TrssMM0ffp0XX311XrjjTd0xBFHaOXKlbrsssvUvn173XXXXcrLy9Ps2bM1ePBgPfPMMzr55JOjnrPevXtLUtT4p02bphNOOEEPP/ywVq1apYcffljfffedHn74YUlSUVGR/vznP+vWW29Vp06dtGXLFj366KMaOXKk3nvvvcjfllSdgfThhx9G/l727Nmj999/3zLL7JBDDtG7776riy66KHLfG2+8oZycnKggrd3HLygoUJ8+ffTqq69q4MCBTl4OAECWIfgEAMgKEydO1I4dO/THP/5RwWD0P29r1qzR888/r9GjR0cK4P7sZz9TIBDQk08+qauuuiqSKbV3714tWrRIbdq0kSR16NBB/fv310svvaQrr7xSK1euVP/+/XXnnXdGzt+tWzedcsop+vjjj6OCT2eddVbkuDPOOENbt27VY489piuvvFINGjTQpEmTdNVVV+mpp57S8uXLdeihh1pe7JrH+JOf/ET//ve/I/fl5OSooqIibhvDMFRUVKQzzjhDRUVFkfvbt2+va6+9Vu+//7569+6tJ554Qo0aNdIzzzyjgoICSdIRRxyhW2+9Vf/973+jzrlu3Tr94x//0CuvvKKf/OQnkmTrefnXv/6lRo0aafTo0XH7m8xzzz2nY489Vr/61a9q7DvuuOPUunVrSaoRGFmxYoVOO+00Pfjgg5GL7tNPP13vvvuuPv74Y51//vlasmSJ/u///k+PPvqo+vXrJ0nq2bOnNmzYoI8++igqmCVJLVq00Iknnhi5fc899+iggw7SzJkzdeihh0qSTj31VPXr108zZszQ7bffHjk20XsjNii0Z88eFRUV6ac//WnUa28YhsaOHRupk7R8+fJIwe7y8nLt2bNHd911l8477zxJ0sknn6zdu3dr0qRJ2rZtWyRQ59SmTZv09NNPR/Vn+/bt2rBhg+6++26dc845kWP//ve/JzzX7t279cUXX+jcc8+tsW/v3r068sgjo55js127dumxxx7T5ZdfrrvuuktS9d91o0aNdNddd+m6666LvD/t2Lt3rxo2bBj1eOHXMfZxn3jiCXXv3l2S1LVrV/Xr10+zZ8/WmDFjVFxcrPz8fM2ePTvSvnfv3rrgggv0wAMPRAXj2rZtazm+ww8/XE888YSCwaB+/vOfKycnR/fff79uuukmdezYUVu3btWoUaOisrIKCgp000036Ztvvok6Z69evfTOO+9Egk/vvvuumjVrFhVMMh/797//XeXl5crPz1dZWZneeecd9ejRI5LdJ8nR43fp0kVvvPGGdu/ebfl8AgDqB6bdAQB87/3339fLL7+sIUOG6Nhjj62x/6OPPpJhGOrbt68qKioi//Xt21dlZWVR02FOOumkSOBJko4//ni1adNGn3zyiSTp+uuv16RJk7Rnzx59+eWXeuONN/Tkk09KUo2pLf3794+6ffbZZ+u7777TmjVrJFUHZ6699lo9+uij+uCDDzRp0iQddthhlmNcsmSJPvjgg6gAhiQ1adIkMjXOyurVq7V58+YaY+/
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x800 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Выбираем подмножество данных для кластеризации\n",
|
|||
|
"features = df[['BMI', 'Glucose', 'Age', 'Outcome', 'Insulin']]\n",
|
|||
|
"\n",
|
|||
|
"scaled_features = scaler.fit_transform(features)\n",
|
|||
|
"\n",
|
|||
|
"# Построение дендрограммы\n",
|
|||
|
"linkage_matrix = linkage(scaled_features, method='ward') # Метод \"Ward\"\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(12, 8))\n",
|
|||
|
"dendrogram(linkage_matrix, labels=df.index, leaf_rotation=90, leaf_font_size=10)\n",
|
|||
|
"plt.title('Иерархическая кластеризация (дендрограмма)')\n",
|
|||
|
"plt.xlabel('Индекс дома')\n",
|
|||
|
"plt.ylabel('Евклидово расстояние')\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Визуализация кластеров"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 89,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABi8AAASgCAYAAACAO9vxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gc5bX48e/M9r7qvdiSLfdeAdvg0EK9BnIviUPCvZQUCEkgtAsJEAIJP3oPhBIuIYSAE0jokNA7LtjG3ZZs9d63787vD1lrrbWSZbWV7fN5Hj+geae8M6Oy8555z1E0TdMQQgghhBBCCCGEEEIIIYQYI9REd0AIIYQQQgghhBBCCCGEEKInCV4IIYQQQgghhBBCCCGEEGJMkeCFEEIIIYQQQgghhBBCCCHGFAleCCGEEEIIIYQQQgghhBBiTJHghRBCCCGEEEIIIYQQQgghxhQJXgghhBBCCCGEEEIIIYQQYkyR4IUQQgghhBBCCCGEEEIIIcYUCV4IIYQQQgghhBBCCCGEEGJMkeCFEEIIcYjTNC3RXRBCCCGEEEKMUfK8IIQ4VEnwQggh+rBz505uvvlmTjrpJGbOnMncuXM599xz+fOf/0woFIquV1FRQUlJCX/7298S2NtDw/Lly7nmmmtG/bj736O//e1vlJSUUFFRMSz7Ly0t5cYbb+T4449nxowZHHvssVx++eVs2bIlZr2ROP/t27fz7W9/e1j3KYQQQgghRt55551HSUlJzL958+bxve99j88//zxm3WuuuYaSkhKWLl3a50D0HXfcQUlJCeedd1502f33309JScmInkd/fD4ff/zjH/mv//ovFi5cyPTp0znhhBO4+eabqampiVn3vPPOi+n74eSNN97gggsu4KijjmLWrFmcdtppPPTQQ3R0dETX+eyzzygpKeGzzz4b1mM///zz3HbbbcO6TyGEGC36RHdACCHGoldffZVrr72WoqIi/vu//5tx48bh8/l47733uPXWW/nggw946KGHUBQl0V0Vg3Dsscfy3HPPkZ6ePuR9vfnmm1x11VVMmDCBH/3oR+Tm5lJTU8NTTz3Ff/7nf/Lwww9z9NFHD0Ov43v99ddZu3btiO1fCCGEEEKMnClTpnDDDTcAEA6HaW5u5tlnn+WCCy7gb3/7GxMmTIiuq6oqtbW1rFmzhrlz5/ba16uvvjpq/R6I2tpaLrzwQqqrq/nOd77DJZdcgtlsZsuWLTz11FO8+uqrPPPMM4wfPz7RXR0xkUiEK6+8ktdff52zzz6bb3/729hsNtatW8fjjz/O22+/zR//+EecTueI9eHhhx9mwYIFI7Z/IYQYSRK8EEKI/ezcuZNrr72WJUuWcM8996DX7/tVuWzZMhYuXMhll13Ga6+9ximnnJLAnorBSk5OJjk5ecj72bNnD1dffXX0e0Wn00XbTjzxRL797W9z9dVX8+9//xuj0Tjk4wkhhBBCiMOL3W5n1qxZMcuOOuooFi9ezN/+9jeuvvrq6PKsrCw0TeO1117rFbxYt24dtbW1TJw4cTS6fUCapnHVVVdRU1PDqlWrKCgoiLYtWLCAM844gxUrVnDrrbfy2GOPJbCnI+uxxx7j5Zdf5oEHHuCEE06ILl+8eDELFixg5cqVPPjgg1x77bUJ7KUQQoxdkjZKCCH289hjj6GqKjfddFNM4KLbSSedxH/8x3/0uX1fU7NLSkq4//77o193dHRw8803s2TJEmbNmsXZZ5/Nu+++G20Ph8M888wznH766dFURHfccQd+vz+6TlNTE1dccQVHH30006dP58wzz+TFF1+MOW5VVRWXX345CxYsYObMmXz/+99n06ZNffZ/zZo1lJSU8M4778Qs37x5MyUlJbz11lsAvPzyy5xxxhnMmDGDRYsW8Ytf/ILa2to+9xvPeeedx3XXXcejjz7Ksccey/Tp0zn33HNZv359dB2fz8eNN97I0qVLmTZtGieffDKPP/54tL2vFFD9pWjaf5trrrmG888/n1WrVnHSSScxbdo0zjzzTN5///1++//0008TCAS4/vrrYwIXABaLhauvvpqzzz6b1tbWXtv2NS18/+nyGzdu5Pvf/z5z585l9uzZnH/++axbtw7o+l574IEHgNjvr0gkwqOPPsoJJ5zAtGnTOOmkk3j66ad7HecXv/gFl112GbNmzeK///u/geG5r0IIIYQQYvAsFgsmkynuLO+TTz6ZN998s1fqqFdffZWjjjoKt9t9UMf6/e9/z7Rp03p9Xv3jH//I1KlTaWxsJBKJcPfdd7N8+XKmTZvG8uXLufPOOwkGg33u98svv+TTTz/lZz/7WUzgopvb7eayyy4jJyeHSCTSq72v1LzXXHMNy5cvj1n24osvsmLFCmbOnMmxxx7LnXfeSSAQiLZv2LCBCy64gIULFzJnzhx++MMfsn379ph9PPXUU5x88slMnz6dJUuWcOONN8akdBrI5+v9BYNBnnjiCZYuXRoTuOg2d+5cLrvsMoqLi+NuH+9c412X/vq+fPlyKisr+fvf/x7z/HOgZ8Tu4zz55JOcfPLJzJw5k1WrVh3w2UwIIYabzLwQQoj9/Otf/2LRokWkpKT0uc5Qc4aGw2H+53/+h7KyMi677DLGjx/P3//+dy655BKeeuop5s2bx69+9SteeuklLrroIubNm8emTZt48MEH2bx5M4899hiKonDllVfS2NjITTfdhN1u56WXXuLqq68mMzOTRYsW0dTUxLnnnovFYuGXv/wlFouFp556ipUrV/LCCy9QVFTUq29z5swhPz+fV155heOOOy66/OWXX8btdrNs2TJWr17NVVddxY9//GPmz59PTU0Nt99+O1dccQV/+tOfDupavPHGGxQVFXH99dejaRq33XYbP/nJT/j3v/+NTqfj1ltv5cMPP+Tqq68mNTWV999/n//3//4fbrebs88+e0j3oaeNGzdSV1fHZZddht1u59577+UnP/kJ77//Pi6XK+42H3zwAVOmTCEjIyNu++LFi1m8ePGg+9TR0cGFF17IokWLuP/++wkEAjz88MNccMEFvPvuu3zrW9+ipqaGF154geeee47MzEwAbrzxRv72t7/xgx/8gNmzZ/PFF19w66230tbWxiWXXBLd/2uvvcYZZ5zBww8/TCQSGdb7KoQQQgghDkzTtGg9PU3TaGlp4amnniIQCMT9rHvKKafwxBNPxKSOikQivP7661x++eWsWrXqoI5/+umnc8899/Dmm2/yrW99K7r8lVde4ZhjjiElJYVHHnmEZ599lquvvpq8vDy++uor7r77bgwGA5dddlnc/b799tsoisKpp57a57FXrFjBihUrDqq/+3vmmWf49a9/zbe+9S0uv/xyysvL+X//7//R2trKr3/9az799FMuvPBCFi5cyK233orf7+eRRx7h3HPP5a9//StFRUW8/PLL3H777Vx99dWUlJSwa9cubrvtNrxeb/S5b6Cfr3v6+uuvaW5ujnmm2t+Pf/zjIZ3/gfr+wAMPcPHFFzNlyhR+/OMfk56eflDPiPfffz/XXXcddrudmTNnjtqzmRBCdJPghRBC9NDa2kprayuFhYW92noW6QZQFKXX2/YD9f777/PVV1/x4IMPcvzxxwOwaNEiysvL+fTTT3G73bzwwgtcccUVXHzxxQAcffTRpKenc9VVV/H++++zbNkyPv/8cy655JLoPhYsWIDb7Y6mKHrqqadoaWnh2WefJScnB4ClS5dyyimncO+993LffffF7d8ZZ5zBE088gc/nw2w2o2kar776KieffDJGo5HVq1djNpu5+OKLo8dyu91s2LABTdMOqhZIKBTi8ccfx263A9DZ2cnVV1/N5s2bmTZtGp9//jlHH3109MFn4cKFWK3WfoNLg9He3s7f/vY38vPzAbBarXz3u9/l008/5aSTToq7TU1NDZMnTx7WfvS0Y8cOmpub+d73vsecOXMAGD9+PM899xydnZ1kZmZGAxbd6QZKS0v561//yuWXXx793jnmmGNQFIVHHnmE73znOyQlJQFgMBi46aabovfw0UcfHbb7KoQQQgghDuyLL75g6tS
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"\n",
|
|||
|
"sns.scatterplot(x=df['Glucose'], y=df['Insulin'], hue=result, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('Glucose vs Insulin Clusters')\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(x=df['BMI'], y=df['Glucose'], hue=result, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('BMI vs Glucose Clusters')\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(x=df['Age'], y=df['BMI'], hue=result, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('Age vs BMI Clusters')\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 4)\n",
|
|||
|
"sns.scatterplot(x=df['Outcome'], y=df['Glucose'], hue=result, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('Outcome vs Glucose Clusters')\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"KMeans (неиерархическая кластеризация) для сравнения"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 84,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Центры кластеров:\n",
|
|||
|
" [[ 3.39431138e+01 1.05173653e+02 4.19700599e+01 1.27970060e+02\n",
|
|||
|
" -1.11022302e-16]\n",
|
|||
|
" [ 3.53015209e+01 1.01726236e+02 3.71749049e+01 1.42802281e+02\n",
|
|||
|
" 1.00000000e+00]\n",
|
|||
|
" [ 2.84541420e+01 5.02011834e+01 2.58668639e+01 1.00352071e+02\n",
|
|||
|
" 1.47928994e-02]]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABi8AAASgCAYAAACAO9vxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3xUVfr48c/0nt4bCS0QOkgTBcECYltdd9W1rK69t1XXr30Ve0ex+9tiL7t2UXEVKyAo0nuA9F4mmX7n/v6IGRgyCaROgOf9evmS3HPn3jNnJpM597nPeTSqqqoIIYQQQgghhBBCCCGEEEL0E9pod0AIIYQQQgghhBBCCCGEEGJ3ErwQQgghhBBCCCGEEEIIIUS/IsELIYQQQgghhBBCCCGEEEL0KxK8EEIIIYQQQgghhBBCCCFEvyLBCyGEEEIIIYQQQgghhBBC9CsSvBBCCCGEEEIIIYQQQgghRL8iwQshhBBCCCGEEEIIIYQQQvQrErwQQgghhBBCCCGEEEIIIUS/IsELIYQ4SKmqGu0uCCGEEEIIIQ5wMu8QQgjRVRK8EEL0mbPPPpuzzz67zfampib++Mc/MnLkSBYtWhTaNz8/n9NPP73d41177bXk5+fzt7/9rdf63Fu8Xi//+Mc/+P3vf8+ECROYNGkSp59+Ou+9917Yl/v58+eTn5/fo+f2+Xzce++9fPjhhz1yvPZe1762cOFCLrroIg4//HBGjhzJYYcdxtVXX82qVavC9uuNMT0QLV26lPz8fJYuXdrn597zNeov7zEhhBBC7B9k3rGLzDt6RnFxMfn5+WH/jRgxgunTp3P77bdTW1sbtn/rPo8++mjE4wWDQQ4//HDy8/P5z3/+E9o+a9asqL7Ptm7dyt13383s2bMZM2YMEyZM4PTTT+e1114jEAiE9msdj937LoQQvUEf7Q4IIQ5uTU1NXHDBBWzYsIGnn36aGTNmhNq0Wi0rV66kvLyctLS0sMe5XC6++uqrvu5uj6iuruaCCy6grKyMs88+m9GjRxMMBvnqq6/429/+xvLly7n77rvRaDS9cv7Kykr++c9/ct999/XI8e64444eOU5XBQIBrr/+er744gtOPPFEbrvtNuLj4yktLeWtt97i9NNP5+GHH2bu3LlR7afoumi/x4QQQgix/5N5h8w7esKll17KEUccAbQEhgoLC5k/fz5btmzhtddeC9tXq9WycOFCrrvuujbH+emnn6isrOyLLu+zTz75hJtvvplBgwZx3nnnkZeXh8fjYfHixdx77718++23LFiwoNfeL0IIEYkEL4QQUdM6gVi/fj3PPPMM06ZNC2svKChgy5YtLFy4kHPPPTes7auvvsJisRATE9OHPe4ZN910E+Xl5bz55pvk5uaGth9xxBFkZGTw6KOPMnPmTI488sjodbITBg8eHNXzP/vssyxcuJAnn3yS2bNnh7WdcMIJXH755dx1113MmjULs9kcpV6K7oj2e0wIIYQQ+zeZd8i8o6fk5OQwduzY0M+TJ0/GYDDwf//3f2zevJkhQ4aE2saPH8/y5ctZt24dBQUFYcf5+OOPGT58OOvXr++rrndo69at3HzzzRx++OE8/vjj6PW7LhfOmDGDyZMnc9VVV/Hpp5/KTWFCiD4ly0YJIaKiubmZCy+8kI0bN/L888+3mUAAWK1WZsyYwcKFC9u0ffLJJ8yePTvsSxW0pN8+//zzHH300YwcOZLZs2fz73//O2wfRVF4/vnnOf744xk9ejRjx47l9NNPZ8mSJaF95s+fz9FHH83XX3/NCSecEDrWe++9F3asf/7zn8yZM4dRo0Zx+OGHc+edd9LU1NTu816/fj3fffcd559/ftgEotW5557LmWeeidVqjfj4SGnE//nPf8jPz6e4uBgAj8fDnXfeyfTp0xk5ciRz5szhpZdeAlrSe1snJzfffDOzZs0KHWf58uWcddZZjBkzhkmTJnHTTTeFpT//5z//oaCggLfffptp06YxadIktmzZ0iZ9Oz8/n1dffZVbbrmFSZMmMW7cOK6++mqqq6vD+v3SSy9x5JFHMnr0aE4//XT+97//hS1T1JqKPH/+/HbH0+1289JLLzFnzpw2gQtoudvpmmuuYfLkydTU1HR5TAFWrlzJX/7yF8aPH8+UKVO47rrrqKioCLVXVlZy8803M2PGDEaPHs2pp57Kl19+GXbc77//nj/+8Y+MGzeOiRMncumll7J169awfRYtWsQpp5zCqFGjmDZtGvfccw8ul6vdMbjtttuYNm0aiqKEbZ83bx6TJ0/G7/d3+J7YV62vx6effspVV13FuHHjmDRpErfeemtY/9asWcOf//xnJkyYwLhx4zj33HNZuXJlqD1Suv/elqjq6ntMCCGEEELmHTLv6Il5R0diY2MB2mQkTJw4kaSkpDbvq0AgwOeff85xxx3X6XPNnj2bq666qs32k046iUsvvRSAnTt3cskllzB58mTGjBnDaaedxuLFizs87osvvohWq+Wuu+5q815vPe/vfve7dh/f3rJje45rU1MTd999N4cffjhjx47l97//PV9//XWoXVEUXn31VU444QRGjx7NEUccwcMPP4zX6w3tU1tby/XXX8+0adMYNWoUJ510Upvfl9LSUq677jomTZrEmDFj+POf/8y6des6HAMhRP8kwQshRJ9zuVxcdNFFrFu3jhdeeIHJkye3u+/cuXNDKdytmpqa+Oabbzj++OPb7H/nnXfy5JNPcuKJJ/Lss88yZ84c7r33Xp5++unQPg8//DALFizgtNNO48UXX+Tuu++mvr6eq6++GrfbHdqvqqqKv//975xzzjk8//zzZGVlcdNNN4UuNn/00Uc89NBDnHnmmbz00ktcfvnlvP/++9x9993tPp9vv/0WIOzL++5MJhO33347U6dObfcYe3PvvffyzTffcNNNN4W+qD/44IO8++67pKSk8NRTTwEtKc+t//7pp58499xzMZvNPP744/zf//0fy5Yt45xzzsHj8YSOrSgKL7/8MvPmzQulFEfy2GOPEQwGefTRR7nxxhv56quvuPfee0PtTz31FA8//DDHHnssCxYsYMyYMVxzzTVhx0hJSeHNN9/kD3/4Q7vP9YcffsDlckV8L7TKz8/nySefJDMzc69j155169Zx1lln4fV6efDBB7nrrrtYs2YN559/PoFAgOrqak499VSWL1/Otddey/z588nMzOTyyy/ngw8+AKCoqIjLLruMkSNH8swzzzBv3jwKCwu56KKLCAaDAHz44YdcfvnlDBw4kKeffporrriCDz74gMsuu6zdQocnnXQS1dXVYRf+g8Egn376KccddxwGg6HD90Rn3XHHHWRmZrJgwQLOP/983nnnHZ555hlg112N8fHxzJ8/n8ceewy3283555+P0+ns9Lk6srf3mBBCCCGEzDtk3tFT845WwWCQQCBAIBDA4/GwYcMGFixYwJQpU9pkhuh0OmbPnt0mePHjjz/i9XrbfW06cuKJJ7J48eKwwNXWrVvZsGEDJ510EsFgkIsvvhi3282DDz7IggULiIuL49JLL2XHjh3tHvfLL79kypQpJCYmtrvPAw880K2sC0VR+Mtf/sKHH37IxRdfzIIFCxg4cCCXX345y5cvB+D222/nvvvu46ijjuKZZ57hzDPP5JVXXgmbD91www1s3bqVu+66ixdeeIGCggJuuummUFCwtraW008/nbVr13LbbbfxyCOPEAwGOfPMM9vcOCaE6P9k2SghRJ9qnUCsWLEi9HNHjjjiCCwWS1gK9xdffEFiYiITJkwI27ewsJC33nqL6667josuugiAww47DI1Gw3PPPcef/vQn4uPjqays5Nprrw27a8dkMnHllVeycePGUBqw2+1m3rx5oS/0ubm5zJw5k8WLFzNo0CCWLVtGVlYWZ555JlqtlkmTJmG1WmloaGj3+ZSVlQGQlZW174PWScuWLWPatGmhO3kmT56M1WolMTERo9HI8OHDgZaU59b05UceeYS8vDyee+45dDodAGPGjOG4447j3Xff5cwzzwwd/5JLLgmt89qeoUOHhq1tu2rVqtC
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"features_used = ['BMI', 'Insulin', 'Age', 'Glucose', 'Outcome']\n",
|
|||
|
"data_to_scale = df[features_used]\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"data_scaled = scaler.fit_transform(data_to_scale)\n",
|
|||
|
"\n",
|
|||
|
"random_state = 9\n",
|
|||
|
"kmeans = KMeans(n_clusters=3, random_state=random_state)\n",
|
|||
|
"labels = kmeans.fit_predict(data_scaled)\n",
|
|||
|
"centers = kmeans.cluster_centers_\n",
|
|||
|
"\n",
|
|||
|
"# Отображение центроидов\n",
|
|||
|
"centers_original = scaler.inverse_transform(centers) # Обратная стандартизация\n",
|
|||
|
"print(\"Центры кластеров:\\n\", centers_original)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов кластеризации KMeans\n",
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(x=df['Glucose'], y=df['Insulin'], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.scatter(centers[:, 0], centers[:, 1], s=300, c='red', label='Centroids')\n",
|
|||
|
"plt.title('KMeans Clustering: Glucose vs Insulin')\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(x=df['BMI'], y=df['Glucose'], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.scatter(centers[:, 2], centers[:, 3], s=300, c='red', label='Centroids')\n",
|
|||
|
"plt.title('KMeans Clustering: BMI vs Glucose')\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(x=df['Age'], y=df['BMI'], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.scatter(centers[:, 1], centers[:, 4], s=300, c='red', label='Centroids')\n",
|
|||
|
"plt.title('KMeans Clustering: Age vs BMI')\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 4)\n",
|
|||
|
"sns.scatterplot(x=df['Outcome'], y=df['Glucose'], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.scatter(centers[:, 3], centers[:, 4], s=300, c='red', label='Centroids')\n",
|
|||
|
"plt.title('KMeans Clustering: Outcome vs Glucose')\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"PCA для визуализации сокращенной размерности"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 85,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjAAAAJHCAYAAAA+Dx+UAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdZ3gc1fn38e9sX/VmFUuWe+82tjHdBozpgUCA0APBlIQE4gQINcmfQBJKEkqAhJJQHiChV2NCDRiDcaG5W+62ei9b53mx1tqSVrJkS7sr+fe5Ll225uzMObs7u5oz9znnNkzTNBEREREREREREREREYkjllg3QEREREREREREREREpDUFMEREREREREREREREJO4ogCEiIiIiIiIiIiIiInFHAQwREREREREREREREYk7CmCIiIiIiIiIiIiIiEjcUQBDRERERERERERERETijgIYIiIiIiIiIiIiIiISdxTAEBERERERERERERGRuKMAhohIH2CaZqybIO04kN+bA/m5i4iIiEhLujY8MOh9FpHupgCGSB92/vnnM3LkyBY/48aN46ijjuI3v/kN1dXVbfYpKiritttu45hjjmHChAkcddRRXHvttaxatardeu69915GjhzJ7373u558Ou267777GDlyZEzqjuTFF19k5MiRbN26tcf383q9/P73v+e1117rajO75Oyzz2bkyJEsWLCgR+uJt/dyf9TU1PCrX/2KJUuWhLedf/75nH/++VFrQ2c/z7Nnz+b666/v1rrXrl3LOeec0y3H2rp1KyNHjuTFF1/sluOJiIhI/FCfJTb6Up9l5MiR3HfffW22r1mzhpkzZ3LkkUeycePG8GNHjhzJPffcE/FYwWCQww8/vNdeexYXF/PHP/6RuXPnMnHiRA477DAuv/zyFn0S6Jl+yc6dO7nsssvYtm1btxyvvfdVRA48CmCI9HFjxozhueeeC/88/vjjXHTRRbzwwgvMmzevxeiId955h9NOO41vv/2WK664gr///e9cc801bNy4kR/84Ad88sknbY4fDAZ5+eWXGTFiBK+88gqNjY3RfHoHvJKSEv75z3/i9/t7rI4NGzawbNkyRowYwbPPPttj9fQ1K1eu5JVXXiEYDIa33Xrrrdx6661RqX9fPs/d6e2332bZsmXdcqzs7Gyee+45jjrqqG45noiIiMQX9Vn6tmj0WVpbu3YtF110EW63m6eeeopBgwaFyywWC2+//XbE/b744gtKSkqi1Mru9eWXX3Lqqafy/vvvc8EFF/DQQw9x44030tTUxPnnn8/LL7/co/V/+umnfPjhh912vOeee44zzzyz244nIr2XLdYNEJGelZSUxKRJk1psmzZtGvX19fz1r39lxYoVTJo0ic2bN3Pddddx+OGH8+c//xmr1Rp+/Jw5czjnnHO47rrreO+993A4HOGy//3vf+zcuZN77rmH8847j9dff10XGX3Miy++SH5+PvPmzWP+/Pls2rSJgQMHxrpZvdKwYcOiUs++fp7jlcPhaPM9JiIiIn2H+izSndavX8+FF15IYmIi//znP+nfv3+L8ilTprBkyRK+++47xowZ06LsjTfeYPTo0axcuTKaTd5vVVVV/PznP2fQoEE8/vjjuN3ucNlxxx3HZZddxi233MJhhx1GVlZWDFvaebr+F5FmmoEhcoAaN24cANu3bwfgySefxOv1ctNNN7XoCAC43W6uu+46vv/977eZwv3CCy8wYsQIpk6dyowZM3juuef2Wvfs2bP5/e9/z4UXXsiECRO48cYbgdBF1y233MIhhxzC+PHj+cEPfsCiRYta7OvxeLjjjjs49NBDmTx5MjfccAMej6fFYyJNh128eDEjR45k8eLF4W0bNmzgJz/5CdOnT2fatGnMmzeP9evXt6jrj3/8I0ceeSTjxo3j5JNP5s0332xx3GAwyIMPPshRRx3FxIkTufLKKyNOc2+ts/u9++67/PCHP2Ty5MmMGzeOuXPn8vTTTwOhZXWOPvpoAG644QZmz54d3u/f//43p59+OpMmTWLChAmceuqpvPXWWy2OPXLkyL0uGxQIBHj55ZeZNWsWxxxzDAkJCRHfY5/Px1133cURRxzBhAkTuOSSS3j55ZfbTC9/6aWXOOGEExg/fjynnHIKixYtYsyYMR1Oz37zzTc5/fTTmTx5Moceeii33HJLi9fqvvvuY+7cuSxcuJCTTjqJ8ePHc+qpp7Js2TKWL1/OmWeeyYQJEzjppJPanE9r1qxh3rx5TJkyhSlTpnDVVVexZcuWcHnzefPss88ya9YspkyZEh7V19FrvHjxYi644AIALrjggvD5uOe5+aMf/YjTTz+9zfO98sorOeWUU8K/L1myhPPOO4+JEycyffp0rrvuOioqKtp9vWDfP897Puc9Pyut2w7wzTffcOGFFzJ16lQmT57MRRddxPLly4HQe3L//fcDLad+B4NBHnnkEY499ljGjRvHcccdx5NPPtmmnvnz53P11VczadIkLr744jZLSL344ouMGTOGFStWcNZZZzF+/HhmzZrFo48+2uJYJSUlXHPNNeHP+C233MK9997b4rMiIiIi8Ut9FvVZOtNn2dP69eu54IILSE5O5qmnnmoTvIBQcCwrK6vNLAy/388777zDiSee2GafzrzvFRUV/OY3v2HWrFmMGzeO6dOnc9VVV7XoD51//vnceOONPPLIIxx11FGMHz+es88+m6+++ir8mKamJm677TaOOOKI8OvZ+jq3tZdffpmSkhJ+/etftwheQGjGyfz58zn33HOpq6trs297y7Vef/31Ld6vzZs3c/nllzNjxgwmTpzIWWedFZ5x8eKLL3LDDTcAcPTRR7d4z/79739z4oknhpeGu++++wgEAi3qufDCC7n11luZMmUKJ5xwAoFAoEU/ovmzsWjRIn70ox8xceJEDj30UP70pz+1OFZdXR233HILM2fOZPLkyVxzzTU88cQTcbV8m4h0nQIYIgeooqIiAAYMGADAxx9/zJgxY8jJyYn4+JkzZ3LNNdfQr1+/8Laqqiree+89vve97wFw2mmn8fXXX/Ptt9/utf6nn36a8ePH8+CDD3LGGWfg8Xi48MIL+e9//8s111zD/fffT25uLpdeemmLC8Nf/vKXPP/888ybN48///nPVFdX88QTT3T5+RcXF3PWWWexceNGbrvtNv70pz9RVlbGhRdeSFVVFaZpctVVV/Hss89y8cUX87e//S18AbTn1Ns//elPPPDAA5xxxhncf//9pKWlcffdd++1/s7s98EHH3DVVVcxduxYHnzwQe677z4GDBjAb3/7W1asWEF2dnb4JvEVV1wR/v/TTz/NLbfcwjHHHMPDDz/MXXfdhcPhYP78+ezcuTN8/Oeee44rr7yyw3Z+9NFHlJaW8r3vfQ+Xy8Xxxx/PSy+9hNfrbfG4W265hX/+85+cd955PPDAA2RlZXHzzTe3eMzLL7/M9ddfz5QpU3jwwQc57rjjuPLKK1tccLb24IMPcu211zJp0iT++te/ctVVV7FgwQLOP/98mpqawo/buXMnd955J5dffjl/+ctfqKmp4eqrr+baa6/lzDPP5IEHHsA0Ta655prwfkVFRZx99tmUl5fzhz/8gdtvv50tW7ZwzjnnUF5e3qId999/P9dddx233HILkydP3utrPHbsWG655ZbwaxNp2ahTTjmFb7/9lk2bNoW31dTU8NFHH3HqqacCoSnsF110ES6Xiz//+c/8+te/5vPPP+eCCy5o8fxb25fPc1fU1dVx6aWXkp6ezn333ce9995LY2Mjl1xyCbW1tZx55pmcccYZQMup37fddht//etfOeWUU3jooYeYO3cuv//973nggQdaHP+tt94iMTGRv/3tb1x66aUR2xAMBvn5z3/OCSecwCOPPMKUKVP44x//yMcffwyE1lq+8MILWbp0Kb/+9a+54447WLVqFY899tg+PWcRERGJPvVZ1GfpTJ+l2YYNG7jwwgtJSkriqaeeavc8sVqtHHfccW0CGIsWLcLj8bQZ7NKZ9900TebNm8cnn3zC/PnzefTRR/nJT37
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x600 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"reduced_data = pca.fit_transform(data_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация сокращенных данных\n",
|
|||
|
"plt.figure(figsize=(16, 6))\n",
|
|||
|
"plt.subplot(1, 2, 1)\n",
|
|||
|
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=result, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('PCA reduced data: Agglomerative Clustering')\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(1, 2, 2)\n",
|
|||
|
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('PCA reduced data: KMeans Clustering')\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Анализ инерции для метода локтя (метод оценки суммы квадратов расстояний)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 86,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA14AAAImCAYAAABD3lvqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB/1UlEQVR4nOzdd3RVVd7G8efe9E4aCQECIaGTUEOTLioIKmXsoviKWEBGQFFGdOw6FKmiYkUFRUURxTIoVWpoUhJKCKEnIR1IT+77R8gdYyghJDkp389aWZJz9jn3dy97mDzZ++xtslgsFgEAAAAAKozZ6AIAAAAAoKYjeAEAAABABSN4AQAAAEAFI3gBAAAAQAUjeAEAAABABSN4AQAAAEAFI3gBAAAAQAUjeAEAAABABSN4AQAAAEAFI3gBAAAAQAUjeAFAORgxYoSaN2+uu+6665Jtxo8fr+bNm+vZZ5+txMoAlNWJEyfUvHlzffvtt0aXAqAGIHgBQDkxm83atWuX4uLiSpzLyMjQ6tWrDagKAABUBQQvACgnrVq1koODg3755ZcS51avXi0nJyf5+fkZUBkAADAawQsAyomzs7N69+590eD1008/6aabbpKtrW2Jc7/99puGDRum0NBQXXfddXr11VeVkZEhSerXr5+aN29+0a8TJ05IkjZs2KB77rlHHTt2VJcuXTRx4kSdPn262GtMnDjxove40hSqoimUF/v6qz179uihhx5Sly5d1KFDBz366KM6dOiQ9fyWLVvUvHlzbdmyRZJ08OBB9e/fX3fddZfmzp17ydeYO3euJOnrr7/WwIED1aZNm2LnrzRt86uvvrroff96XdF0siu1K2sNpf1sLvf6lzpf9Pfw7LPPql+/fsVe98svvyz2Gf71dbZv316s7eeff67mzZsXu0dWVpZmzJihG2+8UW3atFGHDh304IMPKioqqti1l6prxIgRxdoU1XExf+8fRUaMGFHsPtnZ2Xr77bc1YMAAhYaG6sYbb9SCBQtUUFBQ7Jq/17Jly5ZSXXslFotFkydPVlhYmP74449SXwcAklTyJwAAQJndfPPNevLJJxUXFyd/f39J0rlz57Ru3Tp9/PHHWrduXbH2P/zwg5566indcsstevLJJ3Xy5EnNnDlT0dHR+vjjjzVv3jzl5OTozJkzGjt2rB577DH16dNHklS3bl0tW7ZMzzzzjAYPHqxHHnlEKSkpmjNnju68805999138vb2llT4A+udd96pYcOGSZL1fqXRqlUr/fvf/7Z+//XXX+ubb76xfr9582aNGjVKXbp00euvv67s7Gy99957uuuuu/TVV18pODi4xD2nTZumNm3a6LHHHpOHh4d69uwpSXrppZckyfp6/v7+ioiI0JQpU/SPf/xDU6ZMkYuLiySVqv6srCyFhoZqypQp1mOXuu6vn+3f25W1hqv5bF544QW1bt36oq+/ZMkSSdK+ffv08ssvl2j7d2lpaZo1a9ZFz7m4uGjVqlXq2LGj9dhPP/0ks7n472InTZqkbdu2acKECQoMDNTRo0c1e/ZsTZw4UStWrJDJZLK2/cc//qHbb7/d+n3R32N5slgsevTRR7Vr1y6NHTtWLVq00JYtWzRr1iwdP35cr7zyirXt3/tscHBwqa+9nFdffVU//vij3n77bfXo0aPc3yOAmo3gBQDlqE+fPnJyctIvv/yikSNHSpJWrlwpb2/vYj/oSoU/SE6fPl09e/bU9OnTrccbN26skSNHau3atdYgUDS6FRgYqHbt2kmSCgoKNH36dPXo0UMzZsywXt+hQwfdfPPN+vDDDzVp0iRJUmZmpho3bmy9tuh+peHq6mq9TpLWr19f7PyMGTPUqFEjLViwQDY2NpKkHj166IYbbtCcOXM0e/bsYu2PHj2qP/74Q8uXL1fTpk0lyRpSXV1dJanY661YsUKS9K9//csaeCTJ3t7+irVnZmbKx8en2P0udd1fP9u/t9u9e3eZariazyYkJOSSr190PDs7+6Jt/27OnDkKCAhQSkpKiXO9evXS77//rqefflqSFBcXp507d6pTp046efKkJCknJ0fnz5/XlClTdPPNN0uSOnfurHPnzunNN99UYmKifH19rff09/cvVk/R32N5WrdunTZu3Ki33npLgwYNkiRdd911cnR01OzZs3X//fdb+9Pf++zatWtLfe2lzJgxQ0uWLNG8efPUq1evcn9/AGo+phoCQDlydHRUv379ik03XLFihQYOHFhshECSYmJiFBcXp379+ikvL8/6FR4eLldXV23YsOGyr3XkyBGdOXNGgwcPLnY8MDBQ7du319atW63HTp8+LTc3t3J4h8VlZGRoz549GjhwoDVYSJK7u7v69u1brIai9jNnzlSXLl2u+INukbCwMEnSRx99pISEBOXk5CgvL69U15bX+y5LDVf72ZSXgwcPasmSJXr++ecver5fv36KjY1VTEyMJOmXX35R27ZtVb9+fWsbe3t7ffjhh7r55psVHx+vzZs368svv7QuEJOTk3PVdRUUFCgvL08Wi+WKbYq+/tp269atsrW11YABA4pdc+utt1rPX8q1XCtJixYt0oIFCzRo0KBio6IAcDUY8QKAcjZw4ECNHTtWcXFxcnBw0KZNm/Tkk0+WaJeamiqpcFrWxaZmJSQkXPZ1iq738fEpcc7Hx0eRkZGSCkfWTp06pQYNGlzdGymFs2fPymKxXLKGs2fPFjv26KOPyt3dvdhUxSsJDw/XlClTtGDBAs2bN++q6jt58uRlp+RVZA1X+9mUl1dffVWDBg1S+/btL3rez89Pbdq00e+//64mTZrop59+0uDBg639pcj69ev1+uuvKyYmRi4uLmrRooWcnZ0l6bLh6VLmz5+v+fPny8bGRj4+PurRo4f++c9/FltwpmiU+K86d+4sqXD6pKenZ7EQK8k68na5z/NarpWk/fv3q0ePHvrxxx/1wAMPqFWrVpdtDwAXQ/ACgHLWq1cvubi46JdffpGzs7MaNGigNm3alGjn7u4uqfBZmqIfLv/Kw8Pjsq9Tp04dSVJiYmKJc2fOnJGnp6ckKSoqSllZWSUWxCgPbm5uMplMl6yhqMYikyZN0i+//KJx48Zp0aJFpZ6Sdscdd+iPP/5QXl6eXnjhBTVo0ECPPfbYZa8pKCjQn3/+qeHDh5fqNf4+InmtNVztZ1Mefv75Z+3du7fY1NOLuf766/X7779r4MCB2rt3r+bNm1cseB07dkxjxoxR//799d5776lhw4YymUxatGhRiamm0pU/O6nw87vjjjtUUFCgU6dOaebMmXr44Ye1fPlya5uXXnqpWFD+63NaHh4eSklJUX5+frEAVfQLiqL+fjHXcq0k/fOf/9T999+vQYMGacqUKfr6669LhDgAuBKmGgJAObO3t1f//v3166+/6ueff7Y+U/J3TZo0kbe3t06cOKHQ0FDrl5+fn2bMmFFiBOLvgoKC5Ovrqx9//LHY8ePHj2vXrl3q0KGDJGnNmjVq2bKlvLy8rvq9FBQUXPYHTGdnZ7Vp00Y///yz8vPzrcfPnj2rNWvWlHiurU2bNpo3b55OnjypadOmlbqO2bNna82aNXrzzTc1cOBAhYaGXvH5qh07digjI0NdunS5bLui0Zu/Ly5xrTVc7WdzrXJycjR16lSNGTOm2PNXF9O/f3/9+eef+vzzz9WxY0fVrVu32Pm9e/cqOztbo0ePVmBgoDVYFYWuos+saEXAK312UuFiMKGhoWrbtq0GDhyoe++9VwcOHFBaWpq1TVBQULH/Lfz1ebrOnTsrLy+vxKqhRcHtcp/ntVwrFY5QOjo66oUXXtC+ffv08ccfX/H9AsDfMeIFABXg5ptv1iOPPCKz2VxsRb2/srGx0fjx4/XCCy/IxsZGffv2VXp6uubPn6/4+PgrTpEzm82aMGGCJk+erIkTJ+rWW29VSkqK5s2bJw8PDz344IPat2+fFi1apEGDBmnXrl3Wa8+cOSOpcGQjOTm5RChLTk5WdHS0jh49ag1wlzJx4kQ99NBDGj16tO655x7l5uZqwYIFysnJ0ZgxY0q09/Pz05NPPqn
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"inertias = []\n",
|
|||
|
"clusters_range = range(1, 11)\n",
|
|||
|
"for i in clusters_range:\n",
|
|||
|
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
|
|||
|
" kmeans.fit(data_scaled)\n",
|
|||
|
" inertias.append(kmeans.inertia_)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.plot(clusters_range, inertias, marker='o')\n",
|
|||
|
"plt.title('Метод локтя для оптимального k')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Инерция')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Оптимальное количество кластеров согласно методу локтя 2"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Расчет коэффициентов силуэта"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 87,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1oAAAImCAYAAABKNfuQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACl6UlEQVR4nOzdd1yV5f/H8ddhg2xUUFBBTXHhBPdI/dq0zMqWWpZZZtnUsmxbliNL+1lWWo7MhmU5KpvO3FvciCKiiCxlwzm/P8hT5AI8h5vxfj4ePtL73Pd93vflMfhwXffnNlksFgsiIiIiIiJiMw5GBxAREREREalsVGiJiIiIiIjYmAotERERERERG1OhJSIiIiIiYmMqtERERERERGxMhZaIiIiIiIiNqdASERERERGxMRVaIiIiIiIiNqZCS0RERERExMZUaIlIlTFo0CAGDRpUZNumTZu46aabaNq0KV9//bVd3/+5556jZ8+eJT6uZ8+ePPfcc3ZIJCL20rhxY6ZNm2Z0DBExkJPRAUREjHL69GkefvhhmjVrxsyZM2ncuLHRkURERKSSUKElIlXWp59+SnZ2NhMmTCAwMNDoOCIiIlKJaOmgiFRJKSkpzJ8/n759+55XZMXGxjJy5Eg6d+5Mq1atGDRoEJs3by6yz59//kn//v1p2bIlnTp14uWXX+bMmTNF9vn888+5+uqradmyJU8++SRnz54F4IMPPqBjx460a9eOl19+mdzcXOsxubm5vPrqq0RGRtK+fXvr0qOMjAxGjRpFq1at6N69O59//rn1mGPHjtG4cWO+/fZb67acnBx69epVZJbuQksn169fT+PGjVm/fv0F/wyFM3/t2rU7b9nj119/zQ033EDz5s3p0aMH06ZNo6CgwPr6hZZK/jvrufe60K9zOS+3bPJC1/RfiYmJPPvss3Ts2JHWrVszcOBAtm7dan39v0u8LBYLd955J40bN+bYsWNF9rtU1pEjR9KtWzfMZnOR93/hhRe45pprADhx4gRPPfUUHTp0oGXLlgwaNIht27YBMG3atIu+x7l8e/fu5dFHH6VDhw40a9aMrl27Mm7cOLKzsy85BmvWrLlk9uJeI8Bvv/3GLbfcQsuWLS95rn/79ttvady4Mdu3b+eWW24hIiKCvn378tNPPxXZ79ixY4wePZouXbrQrFkzOnbsyOjRo0lJSbHus2fPHu655x5at25N7969WbBggfW1C31+4fzPyeWW9f37czdnzpzz/n2tW7eO8PBw/u///u+i5/ivqVOn0qRJE7777rtiHyMiFZtmtESkSrFYLCQkJDBu3Djy8/N56KGHirx+8OBBBgwYQGhoKGPHjsXZ2Zk5c+Zw7733MmvWLKKioti4cSPDhw/npptu4umnn+bAgQO8++677N+/n3nz5uHo6Mivv/7Ka6+9xqBBg+jWrRtffvklv/76KwDLli1j3LhxxMfHM2nSJNzc3BgzZgwAEydOZOHChYwePZqgoCCmTJlCfHw88fHxXHvttUydOpWVK1fy2muvERQURK9evS54nZ988kmRIuFKTJ48mTNnzuDt7W3dNmPGDKZMmcLAgQMZM2YMe/bsYdq0aSQkJPDmm28W67zNmjXjyy+/BAqLtm+++cb6Z09PT5tkz8jI4K677qKgoIBRo0YRGBjIrFmzuP/++/nuu+8IDQ0975jvv/++SCH2b7fddhu333679c+vvvpqkdd+/vln1q9fT8eOHQHIzs7mp59+4sEHHyQ3N5ehQ4eSl5fHyy+/jLOzM9OnT2fQoEF89dVX3H777XTt2rXIeV9++WUAgoKCSExM5J577qFVq1a89dZbuLi4sHLlSj799FNq1qzJsGHDLjoO2dnZBAUF8d57710we3Gv8ejRozz++ON07dqVJ5980vqZuNi5/uuhhx5i4MCBPPnkk3zzzTc88cQTzJgxg+7du5OVlcXgwYPx8/Pj5ZdfxsvLi61bt/L+++/j5ubGa6+9RlZWFg8++CDBwcFMmzaNLVu28PLLL1O7dm26detWrAwlNWjQIJYvX87bb79Njx49cHFx4fnnn6dVq1Y8/PDDxTrHzJkzmT59OuPGjeOWW26xS04RKX9UaIlIlbJx40Z69OiBs7MzH3/88XnfaL///vu4uLgwZ84c6zf7PXr04MYbb2TChAl88803LFq0iNDQUMaPH4+DgwOdO3fG3d2dl156iRUrVtCzZ08+/PBD2rdvz9ixYwFo3749nTt35syZM4wfP57mzZsDkJ6ezscff8wjjzyC2Wzmyy+/ZNiwYQwcOBCA6tWrc8cdd+Dr68ukSZNwdnamW7du7N+/nxkzZlyw0EpISODjjz+mWbNm7N69+4rGa+fOnXz//fc0adKE9PR0AM6cOcP06dO54447rNfXpUsXfH19GTt2LEOGDOGqq6667Lk9PT1p1aoVAKtWrQKw/tlWvvvuO+Lj4/nuu+9o0qQJAG3atKFfv35s3LjxvL//jIwMJk2adNGxCwoKKpLx3wVhly5dCAoKYtGiRdZC65dffiEzM5N+/fqxbds2YmJi+Pzzz2ndurU1y//+9z+mT5/OtGnTCAoKKnLef7/X6tWradKkCe+995719U6dOrFmzRrWr19/yUIrKysLb2/vi2Yv7jVGR0eTl5fHk08+SaNGjS57rv8aNGgQI0aMAKBr167ccsst/N///R/du3cnNjaWoKAg3n77berUqQNAhw4d2L59Oxs2bAAgPj6eFi1a8Pzzz1OnTh26dOnC/PnzWbVqld0KLZPJxPjx47npppuYOHEijo6OpKamMnv2bBwdHS97/BdffMHEiRN57bXXuO222+ySUUTKJy0dFJEqpWnTprz11lv4+PgwZsyY82Z9NmzYwNVXX13kG0cnJyduuOEGdu3aRUZGBm+88QaLFi3CwcGB/Px88vPzueaaa3BwcGDjxo3k5+cTHR1Nly5drOdwdXWlZcuWuLu7W4ssKPzmPDs7m3379rFv3z5ycnKssxpQ+I22q6srERERODs7Fzlu9+7dRZbqnfP222/Trl07rr766isaK4vFwrhx47jtttsIDw+3bt+6dSvZ2dn07NnTev35+fnWZYJr1qwpcp5/7/PfZXXFzVHaYzdv3kxISIi1yAJwd3fn559/LjJrc8706dPx8/PjrrvuKvF7OTg4cMstt7B8+XKysrKAwkKvU6dOBAUFERUVxbZt22jVqhUFBQXk5+fj7e1N586d2bhx42XP36VLF+bNm4erqysHDx7kt99+44MPPiA5ObnI8tMLSUhIwMvLq8TX9F/NmjXDycmJefPmER8fT25uLvn5+VgslmId/+/ZHJPJxP/+9z927NhBdnY2TZo0Yf78+QQHBxMbG8uKFSuYOXMmMTEx1utr2LAhH3zwAXXq1CE3N5eVK1eSlpZGgwYNiryP2Wwu8rm7UL5z+xQne506dXjmmWf47rvv+Prrrxk7dqy1GLyUP/74g1dffZV27doxYMCAy+4vIpWLZrREpErx9PTklltuoX79+tx111088cQTfPnll9afTKelpVG9evXzjqtevToWi4WzZ89SrVo1XF1dgcJvPP8tPT2d06dPU1BQgJ+fX5HXfH198fHxKbLt3NKrpKQka9H03+N8fHzw9fU977j8/Pwi965AYaH466+/8sMPP7B06dLiDMlFLVq0iNjYWD788EPefvtt6/bU1FSAi86gJCYmWn8fHx9/3hiVJseiRYswmUwEBATQtm1bHn/88fO+ub6Q1NRUAgICivU+sbGxzJ49m08++YTjx4+XKuutt97Khx9+yPLly+nQoQN//fUXkyZNsr7u4uICFN639e97dYozM2I2m3nnnXf4/PPPyczMpFatWkRERFg/i5cSHx9PcHBwKa6oqDp16jBx4kTeeecd6zLPc6Kioi57fM2aNYv8OSAgAIvFQnp6Om5ubnz66ad8+OGHpKamUr16dZo3b467u/t59z+mp6cTGRkJQI0aNbjuuuuKvH7fffed997/zTd9+nSmT5+Oo6Mj1atXp0uXLjz++OMXbYxz/fXX89ZbbwHQuXPny14rwO7du+nRowd
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"silhouette_scores = []\n",
|
|||
|
"for i in clusters_range[1:]: \n",
|
|||
|
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
|
|||
|
" labels = kmeans.fit_predict(data_scaled)\n",
|
|||
|
" score = silhouette_score(data_scaled, labels)\n",
|
|||
|
" silhouette_scores.append(score)\n",
|
|||
|
"\n",
|
|||
|
"# Построение диаграммы значений силуэта\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.plot(clusters_range[1:], silhouette_scores, marker='o')\n",
|
|||
|
"plt.title('Коэффициенты силуэта для разных k')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Коэффициент силуэта')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 88,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Средний коэффициент силуэта: 0.191\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA04AAAJzCAYAAAA4M0NGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5gkVbn48W917uk4OcedzTnvwgJLRgkKBkRZQEFActIrV+5Vr+IPFQzkJCJJEQREAclxgc05707OeXp6OnfX74/ZaaZ3etLuxN338zw87NTprjpdUzNTb533vEdRVVVFCCGEEEIIIUSfNGPdASGEEEIIIYQY7yRwEkIIIYQQQogBSOAkhBBCCCGEEAOQwEkIIYQQQgghBiCBkxBCCCGEEEIMQAInIYQQQgghhBiABE5CCCGEEEIIMQAJnIQQQgghhBBiABI4CSGEEEIIIcQAJHASQhy2VatWMXXq1Jj/Fi1axCWXXMLatWvHuntCiKPc1KlTue+++3pt37t3L8uXL+ekk06irKysz/ffd999TJ06ldmzZ+N2u+O+5q9//StTp07llFNOGa5uCyEmKAmchBBHZMaMGTz//PM8//zzPPfcc9x1113o9Xouv/xy9u3bN9bdE0IcY/bt28dll12G2WzmmWeeoaCgYMD3hEIh3nvvvbhtr7/++jD3UAgxUUngJIQ4IlarlXnz5jFv3jwWLlzIaaedxn333YdGo+Gll14a6+4JIY4hBw4c4NJLL8VisfDMM8+Qm5s7qPctWLCAN954o9f2+vp61q9fz/Tp04e7q0KICUgCJyHEsDObzRiNRhRFiW5btWoVq1atinndPffcw9SpU2MCrGeeeYZTTz2V+fPnc/HFF7N3714Ann32WaZOnUppaWnMPv75z38yffp0amtrAXjnnXf49re/zfz585k1axZnnXUWzz77bMx7fvzjH/dKMez+r6qqKvqaQ1Nz/va3v/VKDXr99df58pe/zLx587jgggtYv359zHsG6s+aNWuYOnUqa9asiXnfoedrMOcvEAjw61//mpNOOonp06fHfK7+gthD933nnXcye/ZsPvroI+CLdKZ4//Xs92DOfUNDA//1X//F8uXLo9/jTZs2AXDKKacM+H1Zv349F198MXPnzmXJkiX813/9Fy0tLdH9v/TSS0ydOpUtW7Zw/vnnM2fOHM4991z+85//xPSjo6OD//f//h+nnXYas2fP5pxzzuHFF1+MeU3P/kybNo3Fixdz/fXX09ra2ue5BCgpKeG6665jyZIlLF68mKuuuooDBw70+fr+zm/P71tZWRk33HADxx9/PPPmzWPVqlVs2LAh2l5VVRV936uvvhpzjPfffz/a1tPrr7/OBRdcwPz58zn++OP53//9X9rb23v1rad41+Ipp5zCj3/84z6/PlR3X3t+vo0bN3LhhRcye/Zsjj/+eH7xi1/g8/n63MehDhw4wCWXXILNZuOZZ54hKytr0O/98pe/zCeffNIrXe8///kPhYWFTJs2rdd73nnnHS644IJof3/5y1/i8Xh6vWYwP/+fffYZ3/ve95g7dy7HH388v/3tbwmHw9HXrV69mm9+85vMnz+fxYsX84Mf/KDfa0oIMTIkcBJCHBFVVQmFQoRCIYLBII2Njdxzzz0EAgG+9rWv9fm+iooKnnzyyZhtb731Fr/4xS84++yzeeCBBwiHw1x99dUEAgHOPfdcjEYj//znP2Pe88orr7B8+XIyMzP54IMPuPbaa5k5cyYPPvgg9913H7m5ufzf//0fW7ZsiXlfampqNMXw+eef5wc/+EG/n7O9vZ0//OEPMdu2bt3Kbbfdxrx583jooYfIzMzk6quvpqmpCWBI/RmqeOfvscce4y9/+QuXXnopf/nLX3j++ee5//77h7TfrVu38te//pU//OEPzJ8/P6at5/n63//935i2wXzWzs5OLrroItasWcMPf/hD7r//foxGI9/73vcoKyvj/vvvj+nzD37wg+jx0tLSWLduHZdddhkmk4k//OEP/Pd//zdr167lkksu6XWDfdVVV3Hqqady//33U1hYyE033cSHH34IgM/n49vf/jb/+te/uOKKK3jwwQdZuHAhP/nJT3j44Ydj9nPSSSfx/PPP8/TTT3PrrbeyevVq7rzzzj7PX319PRdeeCFlZWX87Gc/47e//S1NTU1ceumltLW19Xvue57fQ79v+/fv54ILLqCqqoo77riDu+++G0VRuPTSS3vNJ7RYLL3Szl5//XU0mtg/+Q8++CC33HIL8+bN49577+Xaa6/lzTffZNWqVUMKWIZDbW0tl19+OYmJidx///3ccMMN/POf/+RHP/rRoN5fUlLCpZdeitVq5ZlnniE9PX1Ixz/zzDMJh8Nxz9vZZ5/d6/X/+te/uPbaaykqKuKBBx7guuuu49VXX+Waa65BVVVgaD//t912GwsXLuThhx/mnHPO4fHHH+eFF14AoLKykmuuuYZZs2bx0EMPceedd1JaWsqVV15JJBIZ0ucUQhwZ3Vh3QAgxsa1bt46ZM2f22n7LLbcwadKkPt/3q1/9ismTJ7Njx47otpaWFr797W9zyy23AF0jKN1P66dPn87pp5/Oq6++yo033oiiKNTV1fH555/z29/+Fui6uTz//PP5yU9+Et3n/PnzWbp0KWvWrGHu3LnR7QaDgXnz5kW/Likp6fdz3nvvvWRlZcWMNtTV1XHmmWfyy1/+Eo1GQ0pKCueccw6bN2/mtNNOG1J/hire+du6dSvTpk3je9/7XnRb90jNYHWP+J166qm92nqeL7/fH9M2mM/68ssvU11dzcsvvxxNfVqwYAFf/epXWbduHd/4xjdi+pyXlxdzzHvuuYfCwkIeeeQRtFotAHPnzuXss8/mH//4B9/5zneir121ahXXXnstACeccALnn38+DzzwACeddBIvvfQSe/fu5W9/+1s0ODzhhBMIhUI8+OCDfOtb38LpdAKQlJQU7cPixYv59NNPY875oZ588kkCgQB//vOfSU1NBWDatGlcdNFFbNmyhZNOOqnP9/b8rId+3+6//34MBgNPPfUUVqsVgJUrV3LOOefwm9/8Jma07MQTT+Tjjz8mEAhgMBjw+/28++67LF68ODpC2N7ezkMPPcQ3v/nNmCB4ypQpfOc73+l1PkfaY489RmJiIg888ED0e6vRaLjjjjvYs2dPr1GvnsrKyrjkkktoamoiGAweVjCRkpLC4sWLeeONNzjvvPMAqK6uZsuWLfzmN7/hoYceir5WVVXuvvtuTjjhBO6+++7o9oKCAi677DI+/PBDVq5cOaSf/2984xvR63X58uW88847fPDBB3zrW99i69at+Hw+rrrqqmhAmJGRwbvvvovH44leD0KIkSeBkxDiiMycOZOf//znQNcNhcvl4qOPPuL3v/89Ho+Hm2++udd7PvroIz799FMee+wxLrnkkuj2b33rWwBEIhE8Hg9vvfUWJpOJ7OxsAL7+9a/z73//m/Xr17N48WJeeeUVLBYLp59+OgBXXHEF0DWyUVpaSkVFBdu2bQO6grDDtXfv3uioQ3cfAc444wzOOOMMVFXF4/HwxhtvoNFoKCwsHNH+9HX+Zs+ezaOPPsqbb77JsmXLsFgsg76JVFWVTZs28frrr/cayRqMwXzWDRs2kJOTEzNfxGw28+abbw64f6/Xy5YtW7j88sujo5wAubm5TJo0idWrV8fc6J9//vnRfyuKwumnn859992Hz+dj7dq1ZGdn9xpRO++883jxxRdjApzuY0UiEXbv3s2GDRs47rjj+uznhg0bmDdvXjRogq6b3Pfff3/Az9iftWvXcvLJJ8fcJOt0uujobGdnZ3T7smXL+Oijj1izZg0nnHACH330EVarlUWLFkUDp82bNxMIBDjnnHNijrNo0SKys7NZu3btEQdO3edOo9H0Gu3qFolECIVCrF+/nhUrVkSDJugKAKHrnPYXOP373/9m1qxZ/P73v+d73/seP/zhD3nyySdjjhkOh6MjQdB1TfQ8FnSl6/3yl7/E7XZjtVp57bXXmDlzJvn5+TGvKykpoa6ujquuuip
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"kmeans = KMeans(n_clusters=3, random_state=42) \n",
|
|||
|
"df_clusters = kmeans.fit_predict(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Оценка качества кластеризации\n",
|
|||
|
"# ========================\n",
|
|||
|
"silhouette_avg = silhouette_score(df_scaled, df_clusters)\n",
|
|||
|
"print(f'Средний коэффициент силуэта: {silhouette_avg:.3f}')\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"# ========================\n",
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"df_pca = pca.fit_transform(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"sns.scatterplot(x=df_pca[:, 0], y=df_pca[:, 1], hue=df_clusters, palette='viridis', alpha=0.7)\n",
|
|||
|
"plt.title('Визуализация кластеров с помощью K-Means')\n",
|
|||
|
"plt.xlabel('Первая компонентa PCA')\n",
|
|||
|
"plt.ylabel('Вторая компонентa PCA')\n",
|
|||
|
"plt.legend(title='Кластер', loc='upper right')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Cредний коэффициент силуэта, равный 0.191, указывает на умеренно хорошую кластеризацию."
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": ".venv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|