PredictiveAnalytics/lab5.ipynb

677 lines
214 KiB
Plaintext
Raw Permalink Normal View History

2025-01-08 18:47:28 +04:00
{
"cells": [
{
"attachments": {},
"cell_type": "markdown",
"metadata": {},
"source": [
"##### Кластеризация.\n",
"Распределение студентов по типам учебных заведений"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Education Level</th>\n",
" <th>Institution Type</th>\n",
" <th>Gender</th>\n",
" <th>Age</th>\n",
" <th>Device</th>\n",
" <th>IT Student</th>\n",
" <th>Location</th>\n",
" <th>Financial Condition</th>\n",
" <th>Internet Type</th>\n",
" <th>Network Type</th>\n",
" <th>Flexibility Level</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>University</td>\n",
" <td>Private</td>\n",
" <td>Male</td>\n",
" <td>23</td>\n",
" <td>Tab</td>\n",
" <td>No</td>\n",
" <td>Town</td>\n",
" <td>Mid</td>\n",
" <td>Wifi</td>\n",
" <td>4G</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>University</td>\n",
" <td>Private</td>\n",
" <td>Female</td>\n",
" <td>23</td>\n",
" <td>Mobile</td>\n",
" <td>No</td>\n",
" <td>Town</td>\n",
" <td>Mid</td>\n",
" <td>Mobile Data</td>\n",
" <td>4G</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>College</td>\n",
" <td>Public</td>\n",
" <td>Female</td>\n",
" <td>18</td>\n",
" <td>Mobile</td>\n",
" <td>No</td>\n",
" <td>Town</td>\n",
" <td>Mid</td>\n",
" <td>Wifi</td>\n",
" <td>4G</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>School</td>\n",
" <td>Private</td>\n",
" <td>Female</td>\n",
" <td>11</td>\n",
" <td>Mobile</td>\n",
" <td>No</td>\n",
" <td>Town</td>\n",
" <td>Mid</td>\n",
" <td>Mobile Data</td>\n",
" <td>4G</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>School</td>\n",
" <td>Private</td>\n",
" <td>Female</td>\n",
" <td>18</td>\n",
" <td>Mobile</td>\n",
" <td>No</td>\n",
" <td>Town</td>\n",
" <td>Poor</td>\n",
" <td>Mobile Data</td>\n",
" <td>3G</td>\n",
" <td>Low</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1200</th>\n",
" <td>College</td>\n",
" <td>Private</td>\n",
" <td>Female</td>\n",
" <td>18</td>\n",
" <td>Mobile</td>\n",
" <td>No</td>\n",
" <td>Town</td>\n",
" <td>Mid</td>\n",
" <td>Wifi</td>\n",
" <td>4G</td>\n",
" <td>Low</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1201</th>\n",
" <td>College</td>\n",
" <td>Private</td>\n",
" <td>Female</td>\n",
" <td>18</td>\n",
" <td>Mobile</td>\n",
" <td>No</td>\n",
" <td>Rural</td>\n",
" <td>Mid</td>\n",
" <td>Wifi</td>\n",
" <td>4G</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1202</th>\n",
" <td>School</td>\n",
" <td>Private</td>\n",
" <td>Male</td>\n",
" <td>11</td>\n",
" <td>Mobile</td>\n",
" <td>No</td>\n",
" <td>Town</td>\n",
" <td>Mid</td>\n",
" <td>Mobile Data</td>\n",
" <td>3G</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1203</th>\n",
" <td>College</td>\n",
" <td>Private</td>\n",
" <td>Female</td>\n",
" <td>18</td>\n",
" <td>Mobile</td>\n",
" <td>No</td>\n",
" <td>Rural</td>\n",
" <td>Mid</td>\n",
" <td>Wifi</td>\n",
" <td>4G</td>\n",
" <td>Low</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1204</th>\n",
" <td>School</td>\n",
" <td>Private</td>\n",
" <td>Female</td>\n",
" <td>11</td>\n",
" <td>Mobile</td>\n",
" <td>No</td>\n",
" <td>Town</td>\n",
" <td>Poor</td>\n",
" <td>Mobile Data</td>\n",
" <td>3G</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1205 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" Education Level Institution Type Gender Age Device IT Student \\\n",
"0 University Private Male 23 Tab No \n",
"1 University Private Female 23 Mobile No \n",
"2 College Public Female 18 Mobile No \n",
"3 School Private Female 11 Mobile No \n",
"4 School Private Female 18 Mobile No \n",
"... ... ... ... ... ... ... \n",
"1200 College Private Female 18 Mobile No \n",
"1201 College Private Female 18 Mobile No \n",
"1202 School Private Male 11 Mobile No \n",
"1203 College Private Female 18 Mobile No \n",
"1204 School Private Female 11 Mobile No \n",
"\n",
" Location Financial Condition Internet Type Network Type Flexibility Level \n",
"0 Town Mid Wifi 4G Moderate \n",
"1 Town Mid Mobile Data 4G Moderate \n",
"2 Town Mid Wifi 4G Moderate \n",
"3 Town Mid Mobile Data 4G Moderate \n",
"4 Town Poor Mobile Data 3G Low \n",
"... ... ... ... ... ... \n",
"1200 Town Mid Wifi 4G Low \n",
"1201 Rural Mid Wifi 4G Moderate \n",
"1202 Town Mid Mobile Data 3G Moderate \n",
"1203 Rural Mid Wifi 4G Low \n",
"1204 Town Poor Mobile Data 3G Moderate \n",
"\n",
"[1205 rows x 11 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Education Level</th>\n",
" <th>Institution Type</th>\n",
" <th>Age</th>\n",
" <th>Location</th>\n",
" <th>Financial Condition</th>\n",
" <th>Flexibility Level</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>University</td>\n",
" <td>Private</td>\n",
" <td>23</td>\n",
" <td>Town</td>\n",
" <td>Mid</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>University</td>\n",
" <td>Private</td>\n",
" <td>23</td>\n",
" <td>Town</td>\n",
" <td>Mid</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>College</td>\n",
" <td>Public</td>\n",
" <td>18</td>\n",
" <td>Town</td>\n",
" <td>Mid</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>School</td>\n",
" <td>Private</td>\n",
" <td>11</td>\n",
" <td>Town</td>\n",
" <td>Mid</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>School</td>\n",
" <td>Private</td>\n",
" <td>18</td>\n",
" <td>Town</td>\n",
" <td>Poor</td>\n",
" <td>Low</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1200</th>\n",
" <td>College</td>\n",
" <td>Private</td>\n",
" <td>18</td>\n",
" <td>Town</td>\n",
" <td>Mid</td>\n",
" <td>Low</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1201</th>\n",
" <td>College</td>\n",
" <td>Private</td>\n",
" <td>18</td>\n",
" <td>Rural</td>\n",
" <td>Mid</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1202</th>\n",
" <td>School</td>\n",
" <td>Private</td>\n",
" <td>11</td>\n",
" <td>Town</td>\n",
" <td>Mid</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1203</th>\n",
" <td>College</td>\n",
" <td>Private</td>\n",
" <td>18</td>\n",
" <td>Rural</td>\n",
" <td>Mid</td>\n",
" <td>Low</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1204</th>\n",
" <td>School</td>\n",
" <td>Private</td>\n",
" <td>11</td>\n",
" <td>Town</td>\n",
" <td>Poor</td>\n",
" <td>Moderate</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1205 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" Education Level Institution Type Age Location Financial Condition \\\n",
"0 University Private 23 Town Mid \n",
"1 University Private 23 Town Mid \n",
"2 College Public 18 Town Mid \n",
"3 School Private 11 Town Mid \n",
"4 School Private 18 Town Poor \n",
"... ... ... ... ... ... \n",
"1200 College Private 18 Town Mid \n",
"1201 College Private 18 Rural Mid \n",
"1202 School Private 11 Town Mid \n",
"1203 College Private 18 Rural Mid \n",
"1204 School Private 11 Town Poor \n",
"\n",
" Flexibility Level \n",
"0 Moderate \n",
"1 Moderate \n",
"2 Moderate \n",
"3 Moderate \n",
"4 Low \n",
"... ... \n",
"1200 Low \n",
"1201 Moderate \n",
"1202 Moderate \n",
"1203 Low \n",
"1204 Moderate \n",
"\n",
"[1205 rows x 6 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Импорт необходимых библиотек\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.cluster import KMeans, AgglomerativeClustering\n",
"from sklearn.decomposition import PCA\n",
"from sklearn.metrics import silhouette_score\n",
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.compose import ColumnTransformer\n",
"\n",
"# Загрузка данных\n",
"df = pd.read_csv(\"data/students_education.csv\")\n",
"df.columns\n",
"display(df)\n",
"df = df.drop(['Gender', 'IT Student', 'Device', 'Internet Type','Network Type'], axis=1)\n",
"\n",
"# Удаление целевой переменной 'Institution Type'\n",
"X = df.drop(['Institution Type'], axis=1)\n",
"\n",
"# Преобразование категориальных переменных\n",
"categorical_features = ['Education Level','Location','Financial Condition','Flexibility Level']\n",
"numerical_features = ['Age']\n",
"display(df)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAr4AAAIjCAYAAADlfxjoAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAABsR0lEQVR4nO3deXhU5d3/8c/sWcjKkoCAYRFwwQUo/AAFFQQRW/UR14qgItbW5yl1BXe0CGptbatVKSK41kpFWyiCIhQFBBEpqOwmhC1sIfsymTnn90fMSCDLJExmfb+ua66rc849Z76ZxvDJnft8b4tpmqYAAACAKGcNdQEAAABAMBB8AQAAEBMIvgAAAIgJBF8AAADEBIIvAAAAYgLBFwAAADGB4AsAAICYQPAFAABATCD4AgAAICYQfAEAABATCL4AAACICQRfABFr3rx5slgsdT7OOuusUJeHFvb444/X+v88ISFBZ5xxhh5++GEVFRWdMH7nzp2644471LVrV8XFxSk5OVmDBw/WH//4R5WXl58w3uv1qkOHDrJYLFq0aFEwviQALcwe6gIA4GQ9+OCDOv30033Pp02bFsJqEGwvvfSSWrVqpZKSEi1ZskTTpk3Tp59+qpUrV8pisUiSFi5cqGuuuUYul0s333yzzjrrLLndbn3++ee677779O2332rmzJm1rvvpp59q//79ysrK0ltvvaVRo0aF4ssDEEAEXwAR75JLLtGFF17oez5r1iwdPnw4dAUhqMaMGaM2bdpIkn7xi1/o6quv1vvvv68vvvhCAwcOVHZ2tq6//nqdeuqp+vTTT9W+fXvfa3/1q19px44dWrhw4QnXffPNN9WnTx+NGzdODz74oEpLS5WYmBi0rwtA4LHUAUDEcrvdkiSrtfEfZfn5+br33nvVu3dvtWrVSsnJyRo1apT++9//1hq3fPlyWSwWzZs374RrtGrVSuPHj/c9nzNnjiwWi9atW1dr3OHDh2WxWPT444/7jtX8Wb6hQJ6VlVXr+pJUUFCgSZMmqVOnTnK5XOrevbuefvppGYbR6NeclZVV71KQmpnQGh6PR08++aS6desml8ulrKwsPfjgg6qsrDzhuosWLdLQoUOVlJSk5ORk/eQnP9Hbb79da0xOTo5f72sYhp5//nmdeeaZiouLU0ZGhu644w4dPXq00a+vPhdffLEkKTs7W5L0zDPPqKSkRK+++mqt0Fuje/fu+vWvf13rWHl5uebPn6/rr79e1157rcrLy/Xhhx82uyYA4YEZXwARqyb4ulyuRsd+//33+uCDD3TNNdeoS5cuOnDggF555RUNHTpU3333nTp06NDS5TZZWVmZhg4dqr179+qOO+5Q586dtWrVKk2ZMkX79+/X888/3+g1zj33XN1zzz21jr3++uv6+OOPax2bMGGC5s6dqzFjxuiee+7RmjVrNH36dG3evFnz58/3jZszZ45uvfVWnXnmmZoyZYpSU1P19ddf66OPPtKNN954wvtPnDhRF1xwgSTp/fffr3UtSbrjjjs0Z84c3XLLLfq///s/ZWdn64UXXtDXX3+tlStXyuFw+Ptx+ezcuVOS1Lp1a0nSv/71L3Xt2lWDBg3y+xr//Oc/VVJSouuvv16ZmZm68MIL9dZbb9X5NQKIHARfABGrsLBQkhQfH9/o2N69e2vbtm21ZofHjh2rXr166dVXX9UjjzzSYnU21+9//3vt3LlTX3/9tU477TRJ1UGxQ4cOevbZZ3XPPfeoU6dODV7jlFNO0U033VTr2BdffFEr+P73v//V3LlzNWHCBP31r3+VJP3yl79Uu3bt9Lvf/U7Lli3TRRddpMLCQv3f//2f+vfvr+XLlysuLs53DdM0a72Hx+ORJA0ePNj3/jt27KgVfD///HPNmjXrhEB50UUX6dJLL9V7773nV9DMz8+XJN8a37/85S/KyMjQBRdcoKKiIu3du1dXXHFFo9c51ptvvqlBgwb5Pt/rr79ev/zlL3Xo0CG1bdu2SdcCED5Y6gAgYh05ckSS/AoiLpfLF3q9Xq+OHDmiVq1aqWfPnlq/fv0J44uLi3X48OFaj/oUFhbWGlcTxOqSn5+vw4cPq7S0tNGa33vvPV1wwQVKS0urdf3hw4fL6/VqxYoVjV7DH//+978lSXfffXet4zUzxTXrXz/++GMVFxdr8uTJtUKvpBOWMPgzG//ee+8pJSVFl1xySa2vr2/fvmrVqpWWLVvmV/09e/ZU27Zt1aVLF91xxx3q3r27Fi5cqISEBF93h6SkJL+uJVV/Xy1evFg33HCD79jVV18ti8Wiv//9735fB0D4YcYXQMTatWuX7Ha7X8HXMAz98Y9/1F/+8hdlZ2fL6/X6ztX8SfxYt956q991DB8+3O+xPXv29P3vdu3a6fbbb9fUqVNls9lOGLt9+3Zt3Lix3q/v4MGDfr9vQ3bt2iWr1aru3bvXOp6ZmanU1FTt2rVL0o9LCPxpFVdQUCCpel10fbZv367CwkK1a9euzvP+fn3/+Mc/lJycLIfDoY4dO6pbt26+c8nJyZKqf5Hx17vvvquqqiqdd9552rFjh+/4gAED9NZbb+lXv/qV39cCEF4IvgAi1tatW9W1a1fZ7Y3/KHvqqaf0yCOP6NZbb9WTTz6p9PR0Wa1WTZo0qc4bxR599FHf2tQaP/3pT+u89osvvqgePXr4nhcVFenqq6+uc2xNSCsrK9P8+fM1bdo0JScn6/777z9hrGEYuuSSS+o8J6nWewbC8bO2JyMvL09SdXiuj2EYateund566606z/u7pGDIkCG+rg7HS05OVocOHfTNN9/4dS1JvnoGDx5c5/nvv/9eXbt29ft6AMIHwRdARKqsrNSGDRt05ZVX+jV+3rx5uuiii/Tqq6/WOl5QUFBnaOrdu/cJM7l1zcpKUv/+/dWvXz/f84aWRRwb0n72s59p5cqV+uijj+oMt926dVNJSUmTZpSb49RTT5VhGNq+fXutfsgHDhxQQUGBTj31VF89kvTNN9+cMDt8vO+++04Wi6XWDPfxunXrpk8++USDBw/2a512c11++eWaOXOmVq9erYEDBzY4Njs7W6tWrdJdd92loUOH1jpnGIbGjh2rt99+Ww8//HCL1Qug5bDGF0BEevvtt1VZWalhw4b5Nd5ms51wA9Z7772nvXv3tkR5fjFNU6Zp1huor732Wq1evVqLFy8+4VxBQYHvBrKTddlll0nSCV0ifv/730uSRo8eLUkaMWKEkpKSNH36dFVUVNQae+xn6/F49I9//EP9+/dvcKnDtddeK6/XqyeffPKEcx6Px7dc4mTdf//9SkxM1IQJE3TgwIETzu/cuVN//OMfJf0423v//fdrzJgxtR7XXnuthg4dWu8MNYDwx4wvgIhSWlqqP//5z3riiSd8YfbNN9+sNebAgQMqKSnRm2++qUsuuUQZGRm6/PLL9cQTT+iWW27RoEGDtGnTJr311ltB/5P1p59+Wmupw44dOzRp0qQ6x95333365z//qcsvv1zjx49X3759VVpaqk2bNmnevHnKycmp90/8TXHOOedo3LhxmjlzpgoKCjR06FCtXbtWc+fO1ZVXXqmLLrpIUvWygT/84Q+aMGGCfvKTn+jGG29UWlqa/vvf/6qsrExz587VJ598okceeUQbN27Uv/71rwbfd+jQobrjjjs0ffp0bdiwQSNGjJDD4dD27dv13nvv6Y9//KPGjBlz0l9ft27d9Pbbb+u6667T6aefXmvntlWrVum9997z9U9+6623dO6559bbLeNnP/uZ/vd//1fr169Xnz59Tro2AMFF8AUQUQ4dOqQpU6b4nt9xxx31jh07dqyWLVumjIwM385bb7/9tt5991316dNHCxcu1OTJk4NRts91110nqboFW5cuXfSHP/yh3pulEhIS9J///EdPPfWU3nvvPb3++utKTk5Wjx49NHXqVKWkpASsrlmzZqlr166aM2eO5s+fr8zMTE2ZMkWPPfZYrXG33Xab2rVrpxkzZujJJ5+Uw+FQr1699Jvf/EZSdf9bp9Opf//73xo5cmSj7/vyyy+rb9++euWVV/Tggw/
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Создание ColumnTransformer для обработки данных\n",
"preprocessor = ColumnTransformer(\n",
" transformers=[\n",
" ('num', StandardScaler(), numerical_features),\n",
" ('cat', OneHotEncoder(), categorical_features)\n",
" ])\n",
"\n",
"# Преобразование данных\n",
"X_scaled = preprocessor.fit_transform(X)\n",
"\n",
"# Понижение размерности с помощью PCA\n",
"pca = PCA(n_components=2)\n",
"X_pca = pca.fit_transform(X_scaled)\n",
"\n",
"# Визуализация данных после PCA\n",
"plt.figure(figsize=(8, 6))\n",
"plt.scatter(X_pca[:, 0], X_pca[:, 1], alpha=0.5)\n",
"plt.title('Данные после PCA')\n",
"plt.xlabel('PC1')\n",
"plt.ylabel('PC2')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsAAAAIjCAYAAAAN/63DAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAABp/ElEQVR4nO3dd3hUZd7G8XvSQyohpJEAwSAQQkcwoiiKBEQExYJS7fiiLsi6yOqKruuy4q5dUVdXbNgFBZWASBGN9AAJRUowQCoJqZA65/0DMzrSQpjkJJnv57rm2jfnPHPmdyb7wr0Pv/M8FsMwDAEAAABOwsXsAgAAAICGRAAGAACAUyEAAwAAwKkQgAEAAOBUCMAAAABwKgRgAAAAOBUCMAAAAJwKARgAAABOhQAMAAAAp0IABgAAgFMhAAMAAMCpEIAB4AzmzZsni8Uii8WiNWvWnHDeMAxFRUXJYrHo6quvNqFCAMDZIAADQC15eXlp/vz5JxxftWqVDh48KE9PTxOqAgCcLQIwANTSVVddpU8++URVVVV2x+fPn68+ffooLCzMpMoAAGeDAAwAtXTzzTcrLy9Py5Ytsx2rqKjQp59+qltuueWk77FarXruuefUtWtXeXl5KTQ0VHfffbeOHDliG9O+fXtbi8XJXu3bt7eNLS0t1fTp0xUVFSVPT0916tRJ//73v2UYxgmfvXLlylNes7YmTZp00vc/9thjduO+++47XXLJJfLx8VFgYKBGjhypHTt22I157LHHTvjsFStWyNPTU5MnT7Ybc7rXypUrbe+fO3eu4uLi1KJFC7sxn376aa3vEYDzcTO7AABoKtq3b6/4+Hh98MEHGjZsmCTpm2++UWFhocaMGaMXXnjhhPfcfffdmjdvnm699Vbdf//9SktL00svvaTNmzfrhx9+kLu7u5577jmVlJRIknbs2KF//vOf+utf/6ouXbpIknx9fSUd7zW+5pprtGLFCt1+++3q2bOnEhMT9eCDD+rQoUN69tlnT1r3/fffrwsuuECS9M4779gF+NoIDg62u/b48ePtzn/77bcaNmyYOnTooMcee0zHjh3Tiy++qAEDBmjTpk12Af73tmzZolGjRumqq67Syy+/LEm67rrrFBMTYxszbdo0denSRXfddZftWM338tFHH+n//u//dNlll+m+++6Tj4+P7fsDgNMyAACn9dZbbxmSjPXr1xsvvfSS4efnZxw9etQwDMO44YYbjEGDBhmGYRjt2rUzhg8fbnvf999/b0gy3n//fbvrLVmy5KTHDcMwVqxYYUgyVqxYccK5hQsXGpKMf/zjH3bHr7/+esNisRh79uyxO7506VJDkvHpp5/ajk2ZMsU4mz/6x44da0RHR9sdk2TMmjXL9nPPnj2NkJAQIy8vz3Zsy5YthouLizFhwgTbsVmzZtk+e//+/UZ4eLhx8cUXG8eOHTvl57dr186YOHHiSc/dfPPNRmBgoN37a76/Tz75pNb3CMD50AIBAGfhxhtv1LFjx7R48WIVFxdr8eLFp2x/+OSTTxQQEKArr7xShw8ftr369OkjX19frVix4qw+++uvv5arq6vuv/9+u+PTp0+XYRj65ptv7I6XlZVJOv7wXl1VVFSc9uG+zMxMJScna9KkSQoKCrId7969u6688kp9/fXXJ7wnLy9PCQkJ8vPz05dfflnn+oqLi9WiRYtzuj8AzokADABnoXXr1ho8eLDmz5+vzz//XNXV1br++utPOnb37t0qLCxUSEiIWrdubfcqKSlRTk7OWX32L7/8ooiICPn5+dkdr2kJ+OWXX+yOHz58WJIUEBBwVp/zewUFBbYWjFPVJEmdOnU64VyXLl10+PBhlZaW2h2/+uqrtWvXLhUUFJy0d7m24uPjlZGRoccee0zp6ek6fPiwCgsL63w9AM6DHmAAOEu33HKL7rzzTmVlZWnYsGEKDAw86Tir1aqQkBC9//77Jz3funXreqxS2r9/vySdsge3NrKystSuXTvHFPSrnTt36ptvvtGNN96o6dOn66233qrTdaZNm6Zdu3bpiSee0OOPP+7QGgE0b8wAA8BZuvbaa+Xi4qKffvrplO0PknTeeecpLy9PAwYM0ODBg0949ejR46w+t127dsrIyFBxcbHd8Z07d9rO/96GDRsUFhamyMjIs/qcGpWVldqzZ49thvlUNUnSrl27Tji3c+dOBQcHy8fHx+74l19+qaFDh2r27NmaN2+eli9fXqf6vL299d///lddu3bVxRdfrGXLlunf//53na4FwLkQgAHgLPn6+mru3Ll67LHHNGLEiFOOu/HGG1VdXa0nnnjihHNVVVUqKCg4q8+96qqrVF1drZdeesnu+LPPPiuLxWJbmUI63me7YsUKXXPNNWf1Gb/3xRdf6NixY7r88stPOSY8PFw9e/bU22+/bXc/KSkpWrp0qa666qoT3nPJJZdIkv7v//5PF110ke6++24dO3asTjXOnDlT6enpeu+99zR48GD16dOnTtcB4FxogQCAOpg4ceIZx1x66aW6++67NXv2bCUnJ2vIkCFyd3fX7t279cknn+j5558/Zf/wyYwYMUKDBg3Sww8/rP3796tHjx5aunSpvvjiC02dOlXnnXeeJCkpKUkPPfSQjh07ptatW+u9996zXePnn3+WJL333nu69tprT5idlaSjR49q1qxZeuWVV3TRRRdpyJAhp63r6aef1rBhwxQfH6/bb7/dtgxaQEDACesF/57FYtEbb7yhnj17atasWZozZ06tvwvp+PJrzz77rN59912Ht2kAaN4IwABQj1599VX16dNHr732mv7617/Kzc1N7du317hx4zRgwICzupaLi4u+/PJLPfroo/roo4/01ltvqX379nr66ac1ffp027jXXntNq1evliQ9+eSTJ73W+PHjlZaWdtIAfOTIEX300Ue666679Pjjj8vF5fT/WDh48GAtWbJEs2bN0qOPPip3d3ddeumleuqppxQdHX3a93bp0kUPP/ywnnjiCd18883q1avXmb4GScdnuCdOnKgxY8Zo7NixtXoPANSwGOfyCC4AoNGZNGmSJGnevHmnHGOxWJSWlnZOD8gBQFNFDzAAAACcCi0QANDMXHTRRWccM3bs2NOu7wsAzRktEAAAAHAqtEAAAADAqRCAAQAA4FToAa4Fq9WqjIwM+fn5yWKxmF0OAAAA/sAwDBUXFysiIuKMyzcSgGshIyNDUVFRZpcBAACAMzhw4MAZt4AnANeCn5+fpONfqL+/v8nVAAAA4I+KiooUFRVly22nQwCuhZq2B39/fwIwAABAI1abdlUeggMAAIBTIQADAADAqRCAAQAA4FQIwAAAAHAqBGAAAAA4FQIwAAAAnAoBGAAAAE6FAAwAAACnQgAGAACAUyEAAwAAwKkQgAEAAOBUCMAAAABwKgRgAAAAOBU3swvAiaqthtal5SunuEwhfl7qFx0kVxeL2WUBAAA0CwTgRmZJSqYeX7RdmYVltmPhAV6aNSJWQ+PCTawMAACgeaAFohFZkpKpe97bZBd+JSmrsEz3vLdJS1IyTaoMAACg+SAANxLVVkOPL9ou4yTnao49vmi7qq0nGwEAAIDaIgA3EuvS8k+Y+f09Q1JmYZnWpeU3XFEAAADNEAG4kcgpPnX4rcs4AAAAnBwBuJEI8fNy6DgAAACcHAG4kegXHaTwAC+darEzi46vBtEvOqghywIAAGh2CMCNhKuLRbNGxErSCSG45udZI2JZDxgAAOAcEYAbkaFx4Zo7rrfCAuzbHFr5emjuuN6sAwwAAOAAbITRyAyNC9eVsWFal5avp5bsUPKBQt3QN5LwCwAA4CDMADdCri4WxZ/XSrcOiJYkLU3NNrkiAACA5sPUADx37lx1795d/v7+8vf3V3x8vL755hvb+csuu0wWi8XuNXnyZLtrpKena/jw4WrRooVCQkL04IMPqqqqym7MypUr1bt3b3l6eiomJkbz5s1riNs7Z4M6h8jd1aK9uaXak1NsdjkAAADNgqkBODIyUv/617+0ceNGbdiwQZdffrlGjhyp1NRU25g777xTmZmZttecOXN
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Определение оптимального количества кластеров с помощью метода локтя\n",
"inertia = []\n",
"for k in range(1, 11):\n",
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
" kmeans.fit(X_scaled)\n",
" inertia.append(kmeans.inertia_)\n",
"\n",
"# Визуализация метода локтя\n",
"plt.figure(figsize=(8, 6))\n",
"plt.plot(range(1, 11), inertia, marker='o')\n",
"plt.title('Метод локтя')\n",
"plt.xlabel('Количество кластеров')\n",
"plt.ylabel('Инерция')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArwAAAIjCAYAAADhisjVAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAACCxklEQVR4nOzdd1xV9f8H8Ne9l3HZCMgUGaIgoqIyNLeSIwdqw8wEMW24oyzN1NRvkdXPTDMtc+QobbgrHORORUFUHKiIimxEtqx7z+8P8yYBymWdy+X1fDzOI/mccV/naPL28D6fIxEEQQARERERkZaSih2AiIiIiKg+seAlIiIiIq3GgpeIiIiItBoLXiIiIiLSaix4iYiIiEirseAlIiIiIq3GgpeIiIiItBoLXiIiIiLSaix4iYiIiEirseAlIiIiIq3GgpeI6t2GDRsgkUhw9uzZCuvWrFkDiUSCESNGQKFQNEieoUOHwtnZWe39pk6dColEUveBiIioXrHgJSLR7NixA2+99RZ69uyJrVu3QiaTiR2JiIi0EAteIhLF4cOHMWbMGHh6emLPnj2Qy+ViRyIiIi3FgpeIGlxMTAwCAwNhZ2eHffv2wczMrMI2v/zyC7p06QIDAwNYWVnh1VdfRVJSkmp9UlISxowZAwcHB+jr68PV1RXvvfce8vLyKhxr06ZNcHR0hLm5OcLCwlTj27Ztg729PaysrLBkyZIK++3btw9t2rSBsbExpk+fDkEQADws1lu1agVTU1OEhoaWa8U4fPgwJBIJDh8+XO5YQ4YMgUQiwUcffaQa++ijjyCRSJCZmVlu27Nnz0IikWDDhg2qsVu3blUYA4ApU6ZAIpFg/Pjx5cazs7Mxc+ZMODo6Ql9fH25ubliyZAmUSmWFY37xxRcVzt3Lywt9+vQpd05PWh4/r8oUFRXho48+Qps2bSCXy2FnZ4dRo0YhPj6+RucHAH369Kk0y6NjLFiwALq6usjIyKiw7+uvvw5zc3MUFRXh2LFjCAgIgJWVFQwMDNCpUyesWrVK9fv9pM96fHlk/fr16NevH6ytraGvrw9PT0+sWrXqideHiOqXjtgBiKhpiY+Px6BBg6Cvr499+/bBzs6uwjYbNmxASEgIfH19ERYWhrS0NHz11Vc4ceIEzp07B3Nzc8THxyMtLQ3Tpk1Ds2bNcOnSJSxfvhwRERE4fvw4DAwMAAAnTpxAcHAwnnnmGYwZMwabNm3CzZs38eDBAyxatAgffPAB9u/fj9mzZ6Nly5YYM2YMAODmzZsYMWIE3Nzc8MknnyA8PFzVgzxlyhRMmzYN586dw5dffonmzZtjzpw5VZ7z0aNH8ccff9T5tbxx4wbWrFlTYbywsBC9e/dGUlIS3njjDbRs2RJ///035syZg5SUFCxbtkytz2nbti02bdqk+vq7777DlStX8OWXX6rGOnToUOX+CoUCQ4cORUREBF5++WXMmDEDeXl5OHDgAGJjY9GqVSu1zu9xHh4emDt3LgAgMzMTb7/9tmrduHHjsGjRImzbtg1Tp05VjZeUlODXX3/F888/D7lcjr///hvW1tb48MMPIZPJcOTIEUyePBkXLlxQFapz587FxIkTy33O66+/jp49e1bItGrVKrRr1w7Dhw+Hjo4O9uzZg8mTJ0OpVGLKlClPPB8iqicCEVE9W79+vQBA2Lt3r9CqVSsBgDBgwIBKty0pKRGsra0FLy8v4cGDB6rxvXv3CgCE+fPnV/k5Bw4cEAAIixYtUo0NHz5ccHFxEYqKigRBEIS8vDzBxcVFMDQ0FG7evCkIgiAolUqhe/fuQseOHVX7TZ8+XTAxMREyMzMFQRCE0tJSoWvXrgIA4fTp06rtxowZI1hbW6uOf+jQIQGAcOjQIdU2/v7+wuDBgwUAwoIFC1TjCxYsEAAIGRkZ5c7jzJkzAgBh/fr1qrGEhIQKYy+99JLg5eUlODo6CsHBwarxxYsXC0ZGRsK1a9fKHXf27NmCTCYT7ty5U+6Yn3/+eYVr2a5dO6F3794VxgVBEIKDgwUnJ6dK11Vm3bp1AgBh6dKlFdYplUq1z++R7t27C3379lV9XdkxunXrJvj7+5fbb/v27RV+j/5r7ty5AgDh6NGjFdZV9jmPKywsrDA2cOBAwdXVtcrPI6L6xZYGImow48ePR2JiIl555RXs378fv/zyS4Vtzp49i/T0dEyePLlcX++QIUPg4eGB33//XTVWWlqKzMxM1eLt7Q0fH59yx42IiMBzzz0HfX19AICxsTE8PT3RvHlzuLi4AIBqlojz58/j3r17qv169eoFS0tLAICOjg66dOkCAPDz81Mdf9SoUUhPT0dsbGyl57x9+3acOXMGn376aY2uWVWioqLwyy+/ICwsDFJp+b/Kf/nlF/Ts2RPNmjUrd30CAgKgUChw9OjRctsXFhaW2y4zM7NOZ8z47bffYGVlhWnTplVYV9WsF086v0dKSkpUv69VCQoKwunTp1WtEwCwZcsWODo6onfv3qqx/16DSZMmQVdXt9I/o0/z6KcLAJCTk4PMzEz07t0bN2/eRE5OjtrHI6LaY8FLRA0mKysLmzdvxg8//ABvb2/MmDGjQgFw+/ZtAIC7u3uF/T08PFTrgYftCs2bNy+3nD17Fjdu3AAA3L9/HwUFBXBwcHhqtkfbJCYmqv5bk/0ep1Ao8MEHH2Ds2LFP/JF/TcyePRs9e/bE0KFDK6y7fv06wsPDK1ybgIAAAEB6enq57RcsWFBh26tXr9ZZ1vj4eLi7u0NHp/pddE86v0eys7NhbGz8xOOMHj0a+vr62LJlC4CHBejevXsxduzYcsX2Z599Vu78nZ2dUVpaqvqzpI4TJ04gICAARkZGMDc3R/PmzfHBBx+oPp+IGh57eImowXz++ed48cUXATzsA+3atSvmzJmDb775pkbH69ixIw4cOFBuLCwsDCdPngTw8EEpdT148KBG+z7a73Fr167FrVu3sG/fPrVzPMn+/ftx8OBB1Xn+l1KpxLPPPov33nuv0vVt2rQp9/Xrr7+u+n15ZNKkSXUTtgaedn6PpKamYuDAgU/cplmzZhg6dCi2bNmC+fPn49dff0VxcTFeffXVctsFBQWhR48e5cZefvlltbPHx8ejf//+8PDwwNKlS+Ho6Ag9PT388ccf+PLLL8s9NEhEDYcFLxE1mF69eql+7evriylTpmDlypUICgpC165dAQBOTk4AgLi4OPTr16/c/nFxcar1wMNi5tFdy0dCQ0NVD0FZWVlBV1cXycnJT832aAYIe3t7AICdnV2N9nuksLAQCxcuxOTJk8tlri1BEDB79myMHDlSdc3+q1WrVsjPz69wbarSunXrCtsaGRnVOuvjeU6fPo3S0lLo6uo+cdvqnB8A3L17F3l5eWjbtu1TPz8oKAiBgYE4c+YMtmzZgk6dOqFdu3bltnF1dYWrq6vq68zMTGRlZVX5QF1V9uzZg+LiYuzevRstW7ZUjR86dEit4xBR3WJLAxGJ5uOPP4adnR1ef/11lJWVAQB8fHxgbW2N1atXo7i4WLXtn3/+iStXrmDIkCEAUGmP6Z49e3Dx4kWMGjUKAKCrq4uuXbvijz/+QElJCQAgPz8fly9fRkZGBm7dugXgYZG1a9cutGzZUlWc9urVC0ePHkVWVpbq86KiogAAkZGRqs/cuXMnDAwM4OPjUy7LV199hYKCAtUMAnVl69atuHDhQrnp1f7rpZdewsmTJyu9s5ydna261g3l+eefR2ZmJr7++usK64THpv4Cqnd+j7YDUOEfRZUZPHiwauq5I0eOVLi7W9mfpbCwMAiCoPqzVF2PXp7y+Hnl5ORg/fr1ah2HiOoW7/ASkWhMTEywYsUKjBo1Cv/3f/+H999/H7q6uliyZAlCQkLQu3dvjBkzRjUtmbOzs2raqWPHjmH27NkYPnw4LC0tERkZiR9++AGenp6YNWuW6jM++OADDB48GAEBAXj55ZexceNG5OfnQyaTYejQoXjrrbewf/9+HD9+HKtXr1bt9+6772Lbtm3o06cPJk2ahD///BM3b94EAEyYMAGTJk1CTEwMtmzZgtmzZ1e
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Определение оптимального количества кластеров с помощью коэффициента силуэта\n",
"silhouette_scores = []\n",
"for k in range(2, 11): # Коэффициент силуэта не определен для k=1\n",
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
" kmeans.fit(X_scaled)\n",
" score = silhouette_score(X_scaled, kmeans.labels_)\n",
" silhouette_scores.append(score)\n",
"\n",
"# Визуализация коэффициента силуэта\n",
"plt.figure(figsize=(8, 6))\n",
"plt.plot(range(2, 11), silhouette_scores, marker='o')\n",
"plt.title('Коэффициент силуэта')\n",
"plt.xlabel('Количество кластеров')\n",
"plt.ylabel('Коэффициент силуэта')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"# Кластеризация с использованием K-Means\n",
"optimal_k = 3 # Выберите оптимальное количество кластеров на основе графиков\n",
"kmeans = KMeans(n_clusters=optimal_k, random_state=42)\n",
"kmeans_labels = kmeans.fit_predict(X_scaled)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"# Кластеризация с использованием Agglomerative Clustering\n",
"agg_clustering = AgglomerativeClustering(n_clusters=optimal_k)\n",
"agg_labels = agg_clustering.fit_predict(X_scaled)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'PC2')"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAoYAAAK9CAYAAAC5N60XAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB9rUlEQVR4nO3dd3xT9eLG8ScnbdLdMltAZFMQEZAlqIDKduDEdWU4UK8LxQHXiQsVceEWFf2J817EjSKKE9koouw9Wmab7jY55/dHS6V0paXNadrP+/XKS3LON988CbF9OCsOy7IsAQAAoM4z7A4AAACAmoFiCAAAAEkUQwAAABSgGAIAAEASxRAAAAAFKIYAAACQRDEEAABAAYohAAAAJFEMAQAAUIBiCAAAAEkUQ6BKzJw5Uw6HQ0uXLi1x/YABA3T88ccHOBUAABVDMQQAAIAkiiEAAAAKUAwBG73zzjvq3r27wsPDVb9+fV1yySXavn17kTEDBgzQgAEDiixbsmSJHA6HHA5Hpec8/vjjtWzZMvXt21fh4eFq1aqVXn755SLjcnNzdd9996l79+6KjY1VZGSkTj31VH3//fdFxm3ZskUOh0NPPvmknn76abVo0ULh4eHq37+//vzzz8Jxe/bsUaNGjTRgwABZllW4fMOGDYqMjNTFF19c4de9YMECORwOLViwoMjYMWPGqGXLlkWWmaapZ555Rp06dVJYWJji4+N17bXX6uDBg8Xex6+++kr9+/dXdHS0YmJi1LNnT7377rtl5nvkkUdkGEaRcT/99JMuuugiHXvssXK73WrevLluvfVWZWVlFXvO//73v+rRo4eio6MLX+eh97Ushw5l2LJlS+Gy1atXq169ejrrrLPk9XqLjB8wYECR+Q/dZs6cWanca9as0ciRI9WoUSOFh4crMTFRd999tyTpgQceKPG5Dr8d/ne3aNEiDR06VLGxsYqIiFD//v31yy+/FHm+Q3Meet6YmBg1aNBAt9xyi7Kzs4uMdTgceuCBB4osmzp1qhwOR5G/v59//lmnnHKKGjZsqLCwMLVu3Vp33XVXkfkOHDig22+/XZ07d1ZUVJRiYmI0bNgw/f7770XmP/SZ/O9//1vsvYqKitKYMWOKLGvZsmWxZR999JEcDkexz/CePXt01VVX6dhjj5XT6Sx8D6Oiooo9F1AZIXYHAGqT1NRU7du3r9jyvLy8YsseeeQR3XvvvRo5cqSuvvpq7d27V9OnT1e/fv20YsUKxcXFlfo8d911V4nLKzLnwYMHNXz4cI0cOVKXXnqpPvzwQ11//fVyuVy68sorJUkej0czZszQpZdeqmuuuUZpaWl6/fXXNWTIEC1evFhdu3Yt8vxvv/220tLSdMMNNyg7O1vPPvusTj/9dK1atUrx8fFq3LixXnrpJV100UWaPn26br75ZpmmqTFjxig6Olovvvhime9vaa/bX9dee61mzpypsWPH6uabb9bmzZv1/PPPa8WKFfrll18UGhoqKb9oXXnllerUqZMmTZqkuLg4rVixQnPnztVll11W4txvvvmm7rnnHk2bNq3ImI8++kiZmZm6/vrr1aBBAy1evFjTp0/Xjh079NFHHxWOW7hwoUaOHKkuXbroscceU2xsrPbt26dbb721wq9z+/btGjp0qDp06KAPP/xQISHFf9R36NChsLyV9Dz+5v7jjz906qmnKjQ0VOPGjVPLli21ceNGffbZZ3rkkUd0/vnnq23btoXjb731VnXs2FHjxo0rXNaxY0dJ0nfffadhw4ape/fuuv/++2UYht58802dfvrp+umnn9SrV68iGUeOHKmWLVtqypQp+u233/Tcc8/p4MGDevvtt0t9b1JSUjRlypRiy9PS0tSxY0eNHDlSERERWrhwoZ544gllZmZq+vTpkqRNmzZpzpw5uuiii9SqVSslJyfrlVdeUf/+/fXXX3+padOmpT5vRXi93sK/myONHj1a3377rW666SZ16dJFTqdTr776qpYvX14lzw3IAnDU3nzzTUtSmbdOnToVjt+yZYvldDqtRx55pMg8q1atskJCQoos79+/v9W/f//C+19++aUlyRo6dKh1+P/CFZ1TkjVt2rTCZTk5OVbXrl2txo0bW7m5uZZlWZbX67VycnKKzHfw4EErPj7euvLKKwuXbd682ZJkhYeHWzt27ChcvmjRIkuSdeuttxaZ49JLL7UiIiKsdevWWVOnTrUkWXPmzCkyxt/X/cMPP1iSrO+++67I40ePHm21aNGi8P5PP/1kSbJmzZpVZNzcuXOLLE9JSbGio6Ot3r17W1lZWUXGmqZZYr4vvvjCCgkJsSZMmGAdKTMzs9iyKVOmWA6Hw9q6dWvhskmTJlmSrN27dxcuO/S+Tp06tdgchzv0+du8ebN14MAB67jjjrMSExOtffv2lTj+5JNPtk477bRiz/Pmm29WOHe/fv2s6OjoIsssq+h7dbgWLVpYo0ePLrbcNE2rXbt21pAhQ4o8NjMz02rVqpU1aNCgwmX333+/Jck655xziszx73//25Jk/f7774XLJFn3339/4f0777zTaty4sdW9e/cin6+SDB8+3Dr++OML72dnZ1s+n6/ImM2bN1tut9t68MEHC5d9//33liTro48+KjZnZGRksdd/5Hvy4osvWm632zrttNOKfIazsrIswzCsa6+9tsjjR48ebUVGRpb5WgB/sSsZqEIvvPCC5s2bV+x2wgknFBk3e/ZsmaapkSNHat++fYW3hIQEtWvXrtiu2kMsy9KkSZN0wQUXqHfv3kc1Z0hIiK699trC+y6XS9dee6327NmjZcuWSZKcTqdcLpek/N2wBw4ckNfrVY8ePUrcQnHuueeqWbNmhfd79eql3r1768svvywy7vnnn1dsbKwuvPBC3Xvvvbriiis0YsSIUt/Xsl5348aNJUk7duwo9fFS/haw2NhYDRo0qMj70717d0VFRRW+P/PmzVNaWpomTpyosLCwInOUtOt+8eLFGjlypC644AJNnTq12Prw8PDCP2dkZGjfvn3q27evLMvSihUrCtelpaXJMIwytxSXJzs7W+ecc4727t2ruXPnqkGDBiWOy83NldvtLnMuf3Lv3btXP/74o6688kode+yxRR5f0ntVlpUrV2r9+vW67LLLtH///sK/n4yMDJ1xxhn68ccfZZpmkcfccMMNRe7fdNNNklTs83bIzp07NX36dN17772l7no9cOCAdu/erTlz5mjhwoXq169f4Tq32y3DyP+16fP5tH//fkVFRSkxMbHKtthlZmbqwQcf1I033ljsPc3IyJBpmqX+vQJVgV3JQBXq1auXevToUWx5vXr1iuxiXr9+vSzLUrt27Uqc59AuzSPNmjVLq1ev1ocffljkOLbKzNm0aVNFRkYWWda+fXtJ+ccMnnTSSZKkt956S9OmTdOaNWuK7BJv1apVseco6bnbt2+vDz/8sMiy+vXr67nnntNFF12k+Ph4PffccyVmPqSs1926dWslJCToySefVJcuXQp35+Xk5BQZt379eqWmphYWySPt2bNHkrRx40ZJ8uvyQjt37tSZZ56pjIwM7d+/v8QytG3bNt1333369NNPix3LmJqaWvjnPn366Pnnn9ctt9yiO++8U7GxsSUe+1iWsWPH6rffflNYWFix4woPl5KSohYtWpQ5lz+5N23aJMm/96o869evl5S/q7Q0qampqlevXuH9Iz9vbdq0kWEYRY61PNz999+vpk2b6tprry3x+D9JOu6445ScnCwp/zjVZ599tnCdaZp69tln9eKLL2rz5s3y+XyF66qqrD311FPKzs7Wf/7zH912221F1jVo0EDt2rXTjBkz1L9/f3Xt2lWGYRT7rANHg2II2MA0TTkcDn311VdyOp3F1pe0NSM3N1f33nuvrrrqqsICd7Rzluedd97RmDFjdO655+qOO+5Q48aN5XQ6NWXKlMICVVlff/21pPxjHXfs2FHqlrLyXrfL5dJrr72myy67TF26dCmy7vDyY5qmGjdurFmzZpX4PI0aNarwa9iwYYNOPPFEPf3007riiiv01ltvFSk2Pp9PgwY
"text/plain": [
"<Figure size 1600x800 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Визуализация результатов кластеризации\n",
"plt.figure(figsize=(16, 8))\n",
"\n",
"# K-Means\n",
"plt.subplot(1, 2, 1)\n",
"plt.scatter(X_pca[:, 0], X_pca[:, 1], c=kmeans_labels, cmap='viridis', alpha=0.5)\n",
"plt.title('Неиерархическая кластеризация')\n",
"plt.xlabel('PC1')\n",
"plt.ylabel('PC2')"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAUsAAAHHCAYAAAA2xXo9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAABm/ElEQVR4nO3dd3xT1f/H8Vdukqa7pVBaQPbeMgQZCiJLRAEVUFSGAuoPVARFceFCVEQRRFBRwC+48CugiChTHAgyylAEWZbVFijdI03u/f1Rmi+haZtCk5u0n+fjkYfm3pPknTT5cO69555r0DRNQwghRLEUvQMIIYQ/kGIphBBukGIphBBukGIphBBukGIphBBukGIphBBukGIphBBukGIphBBukGIphBBukGIphBBuKFfFctGiRRgMBrZv315o3ciRIzEYDLRo0UKHZEIIf1euimVRDh06xJIlS/SOIYTwYya9A3jDtGnTMJvNNGjQQO8oQgg/Ve57locPH2bJkiU88MADxMbGumyzZMkS2rVrR1BQEFFRUdx5550cP37cqU337t1p0aIFO3bsoHPnzgQFBVG3bl3mz5/v1M5qtfL888/Trl07IiIiCAkJ4brrrmPjxo1O7Y4dO4bBYODNN9/k7bffpnbt2gQFBdGtWzf27dvnaJeUlER0dDTdu3fn4gmiDh06REhICEOHDnXK2L17d6fX+eOPPzAYDBgMBseyTZs2YTAY2LRpk1PbkSNHUqdOHadlqqoya9YsmjdvTmBgIDExMTzwwAOcP3++0Of4/fff061bN8LCwggPD+eaa67h008/LTbftGnTUBTFqd3PP//M4MGDqVWrFhaLhZo1a/LYY4+RnZ1d6DW/+uor2rdvT1hYmON9FnyuxSnYZXPs2DHHsj///JNKlSrRv39/bDabU/vu3bs7PX/BbdGiRZeV+++//2bIkCFER0cTFBRE48aNeeaZZwB44YUXXL7WxbeL/3Zbt26lb9++REREEBwcTLdu3fj111+dXq/gOQteNzw8nMqVK/Poo4+Sk5Pj1NZgMPDCCy84LZsxYwYGg8Hp7/fLL7/QtWtXqlSpQmBgIPXq1ePJJ590er7k5GQef/xxWrZsSWhoKOHh4dx0003s3r3b6fkLvpNfffVVoc8qNDSUkSNHOi2rU6dOoWXLli3DYDAU+g4nJSVx//33U6tWLYxGo+MzDA0NLfRaxSn3PctXXnkFk8nEk08+yV133VVo/bRp03juuecYMmQIo0eP5syZM8yZM4frr7+eXbt2ERkZ6Wh7/vx5+vXrx5AhQ7jrrrv48ssveeihhwgICOC+++4DIC0tjQULFnDXXXcxZswY0tPT+eijj+jTpw/btm3j6quvdnr9Tz75hPT0dMaNG0dOTg7vvPMOPXr0YO/evcTExFC1alXmzZvH4MGDmTNnDo888giqqjJy5EjCwsJ47733in3/Tz755BV9fg888ACLFi1i1KhRPPLIIxw9epR3332XXbt28euvv2I2m4H84nPffffRvHlzpkyZQmRkJLt27WLNmjUMGzbM5XMvXLiQZ599lpkzZzq1WbZsGVlZWTz00ENUrlyZbdu2MWfOHE6cOMGyZcsc7bZs2cKQIUNo3bo1r732GhEREZw9e5bHHnus1O/z+PHj9O3blyZNmvDll19iMhX+aTRp0sRR0Fy9jru59+zZw3XXXYfZbGbs2LHUqVOHw4cP8+233zJt2jRuu+02p62gxx57jKZNmzJ27FjHsqZNmwKwYcMGbrrpJtq1a8fUqVNRFIWFCxfSo0cPfv75Zzp06OCUcciQIdSpU4fp06fz+++/M3v2bM6fP88nn3xS5GeTkpLC9OnTCy1PT0+nadOmDBkyhODgYLZs2cIbb7xBVlYWc+bMAeDIkSOsWLGCwYMHU7duXRITE3n//ffp1q0bf/31F9WrVy/ydUvDZrM5/jaXGjFiBOvWrePhhx+mdevWGI1GPvjgA3bu3Fm6F9HKkYULF2qA9scff2iapmmHDx/WTCaT9sgjj2iapmndunXTmjdv7mh/7NgxzWg0atOmTXN6nr1792omk8lpebdu3TRAmzlzpmNZbm6udvXVV2tVq1bVrFarpmmaZrPZtNzcXKfnO3/+vBYTE6Pdd999jmVHjx7VAC0oKEg7ceKEY/nWrVs1QHvsscecnuOuu+7SgoODtYMHD2ozZszQAG3FihVObbp166Z169bNcX/16tUaoPXt21e7+E/9008/aYC2YcMGp8ePGDFCq127tuP+zz//rAHa0qVLndqtWbPGaXlKSooWFhamdezYUcvOznZqq6qqy3zfffedZjKZtEmTJmmXysrKKrRs+vTpmsFg0P7991/HsilTpmiAdvr0aceygs91xowZhZ7jYgXflaNHj2rJyclas2bNtMaNG2tnz5512b5Lly7aDTfcUOh1Fi5cWOrc119/vRYWFua0TNOcP6uL1a5dWxsxYkSh5aqqag0bNtT69Onj9NisrCytbt26Wq9evRzLpk6dqgHarbfe6vQc//d//6cB2u7dux3LAG3q1KmO+5MnT9aqVq2qtWvXzun75Uq/fv20Fi1aOO7n5ORodrvdqc3Ro0c1i8WivfTSS45lGzdu1ABt2bJlhZ4zJCSk0Pu/9DN57733NIvFot1www1O3+Hs7GxNURTtgQcecHr8iBEjtJCQkGLfy6XK9WZ4Qa/yqaeecrn+66+/RlVVhgwZwtmzZx232NhYGjZsWGjT2WQy8cADDzjuBwQE8MADD5CUlMSOHTsAMBqNBAQEAPmbsMnJydhsNtq3b+/yX7KBAwdSo0YNx/0OHTrQsWNHVq9e7dTu3XffJSIigjvuuIPnnnuOe++9lwEDBhT53jVNY8qUKdx+++107NjRaV3VqlUBOHHiRJGPh/yeUkREBL169XL6fNq1a0doaKjj81m7di3p6ek89dRTBAYGOj3HxZv/BbZt28aQIUO4/fbbmTFjRqH1QUFBjv/PzMzk7NmzdO7cGU3T2LVrl2Ndeno6iqI49f5LKycnh1tvvZUzZ86wZs0aKleu7LKd1WrFYrEU+1zu5D5z5gybN2/mvvvuo1atWk6Pd/VZFScuLo5//vmHYcOGce7cOcffJzMzkxtvvJHNmzejqqrTY8aNG+d0/+GHHwYo9H0rcPLkSebMmcNzzz1X5GZrcnIyp0+fZsWKFWzZsoXrr7/esc5isaAo+WXGbrdz7tw5QkNDady4cel7dkXIysripZdeYvz48YU+08zMTFRVLfLvWhrltlgeOXKE//znP4wdO5Zq1aq5bPPPP/+gaRoNGzYkOjra6bZ//36SkpKc2levXp2QkBCnZY0aNQJw2ve1ePFiWrVqRWBgIJUrVyY6OprvvvuO1NTUQhkaNmxYaFmjRo2cng8gKiqK2bNns2fPHiIiIpg9e3ax73/p0qX8+eefvPrqq4XW1atXj9jYWN5880327Nnj+JHl5uYW+nxSU1OpWrVqoc8nIyPD8fkcPnwYwK1hWSdPnuTmm28mMzOTc+fOuSwQ8fHxjBw5kqioKEJDQ4mOjqZbt24ATp9hp06dUFWVRx99lMOHD3P27FmX+1KLM2rUKH755RfS09ML7ae8WEpKSon7uNzJfeTIEcC9z6ok//zzD5C/mXnp32fBggXk5uYW+s5d+n2rX78+iqIU+r4VmDp1KtWrV3fqJFyqWbNmVK9enUGDBjFgwADeeecdxzpVVXn77bdp2LAhFouFKlWqEB0dzZ49e1z+Hi7HW2+9RU5ODk8//XShdZUrV6Zhw4YsWLCAH3/8kaSkJJffdXeU232W06ZNc+yrLIqqqhgMBr7//nuMRmOh9aXdAQz5B4tGjhzJwIEDeeKJJ6hatSpGo5Hp06c7isrl+uGHH4D8facnTpwoskdltVp57rnnuP/++x3F/GIBAQF8+OGHDBs2jNatWzutq127tuP/VVWlatWqLF261OXrREdHl/o9HDp0iLZt2/L2229z7733snjxYkaMGOFYb7fb6dWrF8nJyTz55JM0adKEkJAQTp48yciRI516SnfeeSc7d+5kzpw5fPDBB6X
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Agglomerative Clustering\n",
"plt.subplot(1, 2, 2)\n",
"plt.scatter(X_pca[:, 0], X_pca[:, 1], c=agg_labels, cmap='viridis', alpha=0.5)\n",
"plt.title('Иерархическая кластеризация')\n",
"plt.xlabel('PC1')\n",
"plt.ylabel('PC2')\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Коэффициент силуэта для K-Means: 0.3456393814525895\n",
"Коэффициент силуэта для Agglomerative Clustering: 0.3584732406902143\n"
]
}
],
"source": [
"# Оценка качества кластеризации\n",
"print(\"Коэффициент силуэта для K-Means:\", silhouette_score(X_scaled, kmeans_labels))\n",
"print(\"Коэффициент силуэта для Agglomerative Clustering:\", silhouette_score(X_scaled, agg_labels))"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}