1062 lines
2.7 MiB
Plaintext
1062 lines
2.7 MiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#Анализ размера складских помещений магазина с применением метода кластеризации"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 29,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['Store ID ', 'Store_Area', 'Items_Available', 'Daily_Customer_Count',\n",
|
|||
|
" 'Store_Sales'],\n",
|
|||
|
" dtype='object')\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from scipy.cluster.hierarchy import dendrogram, linkage, fcluster\n",
|
|||
|
"from sklearn.cluster import KMeans\n",
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Stores.csv\")\n",
|
|||
|
"\n",
|
|||
|
"print(df.columns)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 30,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Store ID</th>\n",
|
|||
|
" <th>Store_Area</th>\n",
|
|||
|
" <th>Items_Available</th>\n",
|
|||
|
" <th>Daily_Customer_Count</th>\n",
|
|||
|
" <th>Store_Sales</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1659</td>\n",
|
|||
|
" <td>1961</td>\n",
|
|||
|
" <td>530</td>\n",
|
|||
|
" <td>66490</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>1461</td>\n",
|
|||
|
" <td>1752</td>\n",
|
|||
|
" <td>210</td>\n",
|
|||
|
" <td>39820</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>1340</td>\n",
|
|||
|
" <td>1609</td>\n",
|
|||
|
" <td>720</td>\n",
|
|||
|
" <td>54010</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>1451</td>\n",
|
|||
|
" <td>1748</td>\n",
|
|||
|
" <td>620</td>\n",
|
|||
|
" <td>53730</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>1770</td>\n",
|
|||
|
" <td>2111</td>\n",
|
|||
|
" <td>450</td>\n",
|
|||
|
" <td>46620</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Store ID Store_Area Items_Available Daily_Customer_Count Store_Sales\n",
|
|||
|
"0 1 1659 1961 530 66490\n",
|
|||
|
"1 2 1461 1752 210 39820\n",
|
|||
|
"2 3 1340 1609 720 54010\n",
|
|||
|
"3 4 1451 1748 620 53730\n",
|
|||
|
"4 5 1770 2111 450 46620"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 30,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.head()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 31,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Store ID</th>\n",
|
|||
|
" <th>Store_Area</th>\n",
|
|||
|
" <th>Items_Available</th>\n",
|
|||
|
" <th>Daily_Customer_Count</th>\n",
|
|||
|
" <th>Store_Sales</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>count</th>\n",
|
|||
|
" <td>896.000000</td>\n",
|
|||
|
" <td>896.000000</td>\n",
|
|||
|
" <td>896.000000</td>\n",
|
|||
|
" <td>896.000000</td>\n",
|
|||
|
" <td>896.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>mean</th>\n",
|
|||
|
" <td>448.500000</td>\n",
|
|||
|
" <td>1485.409598</td>\n",
|
|||
|
" <td>1782.035714</td>\n",
|
|||
|
" <td>786.350446</td>\n",
|
|||
|
" <td>59351.305804</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>std</th>\n",
|
|||
|
" <td>258.797218</td>\n",
|
|||
|
" <td>250.237011</td>\n",
|
|||
|
" <td>299.872053</td>\n",
|
|||
|
" <td>265.389281</td>\n",
|
|||
|
" <td>17190.741895</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>min</th>\n",
|
|||
|
" <td>1.000000</td>\n",
|
|||
|
" <td>775.000000</td>\n",
|
|||
|
" <td>932.000000</td>\n",
|
|||
|
" <td>10.000000</td>\n",
|
|||
|
" <td>14920.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25%</th>\n",
|
|||
|
" <td>224.750000</td>\n",
|
|||
|
" <td>1316.750000</td>\n",
|
|||
|
" <td>1575.500000</td>\n",
|
|||
|
" <td>600.000000</td>\n",
|
|||
|
" <td>46530.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>50%</th>\n",
|
|||
|
" <td>448.500000</td>\n",
|
|||
|
" <td>1477.000000</td>\n",
|
|||
|
" <td>1773.500000</td>\n",
|
|||
|
" <td>780.000000</td>\n",
|
|||
|
" <td>58605.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>75%</th>\n",
|
|||
|
" <td>672.250000</td>\n",
|
|||
|
" <td>1653.500000</td>\n",
|
|||
|
" <td>1982.750000</td>\n",
|
|||
|
" <td>970.000000</td>\n",
|
|||
|
" <td>71872.500000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>max</th>\n",
|
|||
|
" <td>896.000000</td>\n",
|
|||
|
" <td>2229.000000</td>\n",
|
|||
|
" <td>2667.000000</td>\n",
|
|||
|
" <td>1560.000000</td>\n",
|
|||
|
" <td>116320.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Store ID Store_Area Items_Available Daily_Customer_Count \\\n",
|
|||
|
"count 896.000000 896.000000 896.000000 896.000000 \n",
|
|||
|
"mean 448.500000 1485.409598 1782.035714 786.350446 \n",
|
|||
|
"std 258.797218 250.237011 299.872053 265.389281 \n",
|
|||
|
"min 1.000000 775.000000 932.000000 10.000000 \n",
|
|||
|
"25% 224.750000 1316.750000 1575.500000 600.000000 \n",
|
|||
|
"50% 448.500000 1477.000000 1773.500000 780.000000 \n",
|
|||
|
"75% 672.250000 1653.500000 1982.750000 970.000000 \n",
|
|||
|
"max 896.000000 2229.000000 2667.000000 1560.000000 \n",
|
|||
|
"\n",
|
|||
|
" Store_Sales \n",
|
|||
|
"count 896.000000 \n",
|
|||
|
"mean 59351.305804 \n",
|
|||
|
"std 17190.741895 \n",
|
|||
|
"min 14920.000000 \n",
|
|||
|
"25% 46530.000000 \n",
|
|||
|
"50% 58605.000000 \n",
|
|||
|
"75% 71872.500000 \n",
|
|||
|
"max 116320.000000 "
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 31,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.describe()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 32,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Store ID 0\n",
|
|||
|
"Store_Area 0\n",
|
|||
|
"Items_Available 0\n",
|
|||
|
"Daily_Customer_Count 0\n",
|
|||
|
"Store_Sales 0\n",
|
|||
|
"dtype: int64\n",
|
|||
|
"Store ID False\n",
|
|||
|
"Store_Area False\n",
|
|||
|
"Items_Available False\n",
|
|||
|
"Daily_Customer_Count False\n",
|
|||
|
"Store_Sales False\n",
|
|||
|
"dtype: bool\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Процент пропущенных значений признаков\n",
|
|||
|
"for i in df.columns:\n",
|
|||
|
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
|
|||
|
" if null_rate > 0:\n",
|
|||
|
" print(f'{i} Процент пустых значений: %{null_rate:.2f}')\n",
|
|||
|
"\n",
|
|||
|
"print(df.isnull().sum())\n",
|
|||
|
"\n",
|
|||
|
"print(df.isnull().any())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 33,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"Store ID int64\n",
|
|||
|
"Store_Area int64\n",
|
|||
|
"Items_Available int64\n",
|
|||
|
"Daily_Customer_Count int64\n",
|
|||
|
"Store_Sales int64\n",
|
|||
|
"dtype: object"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 33,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Проверка типов столбцов\n",
|
|||
|
"df.dtypes"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Атрибуты \n",
|
|||
|
"\n",
|
|||
|
"id: уникальный идентификатор магазина\n",
|
|||
|
"\n",
|
|||
|
"Store_Area: физическая площадь магазина на площади ярда\n",
|
|||
|
"Items_Available: количество различных товаров, доступных в соответствующем магазине.\n",
|
|||
|
"Daily_Customer_Count: количество клиентов, посетивших магазины в среднем за месяц.\n",
|
|||
|
"Store_Sales: Продажи в (долларах США), произведенные магазинами.\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"**Цель:** Кластеризация магазинов на группы для определения схожих характеристик. \n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Очистка данных"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 34,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Store_Area Items_Available Daily_Customer_Count Store_Sales\n",
|
|||
|
"0 1659 1961 530 66490\n",
|
|||
|
"1 1461 1752 210 39820\n",
|
|||
|
"2 1340 1609 720 54010\n",
|
|||
|
"3 1451 1748 620 53730\n",
|
|||
|
"4 1770 2111 450 46620\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Удалим несущественные столбцы\n",
|
|||
|
"columns_to_drop = [\"Store ID \"]\n",
|
|||
|
"df_cleaned = df.drop(columns=columns_to_drop)\n",
|
|||
|
"\n",
|
|||
|
"print(df_cleaned.head()) # Вывод очищенного DataFrame"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Визуализация парных взаимосвязей"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 35,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABcsAAAPYCAYAAAAFBqHZAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzde3zT9b0/8Fdza9ImbUJ6p5TSlkKFtqKiOPECQ8UJOmUXdYLzzOmUo+Lm3M70p3POzTPd5oaX6byj4GW6KcPppse7oojaohZ6o7RQekuTNGmS5vr7oyY0TdJcmss3zev5eJzHmU36zTef74VvX9/39/3J8ng8HhARERERERERERERZTBRqleAiIiIiIiIiIiIiCjVGJYTERERERERERERUcZjWE5EREREREREREREGY9hORERERERERERERFlPIblRERERERERERERJTxGJYTERERERERERERUcZjWE5EREREREREREREGY9hORERERERERERERFlPIblRERJ5vF4Ur0KREREREQZjdfkREQUDMNyIqI4aG1txXXXXYeTTjoJixcvxvLly7Fp0ybs3bvX7327d+/G5ZdfnqK19Gc2m9HY2IhFixZhcHAw1auTcP/3f/+HSy65BMcddxzq6+tx+umn4/bbb4dOp4t6WStXrsTPf/7zBKwlEREREcXb5s2bsWDBAgDAyMgIbrjhBnz88ccpXqvYXXDBBViwYAFeffXVhH3GxDEDYrv+jeR31q9fj/Xr18e0jkREicCwnIhomtra2vDd734XBoMBN910Ex555BHccMMN6O3txXe+8x189tlnvvc+99xz6OjoSN3KTvDPf/4TKpUKeXl5+Nvf/pbq1Umov//977jqqqswb9483HnnnfjrX/+KSy65BK+++iq++93vwmg0pnoViYiIiCgJWlpa8OKLL8Ltdqd6VWLS2dmJTz/9FLW1tXj66acT9jnf/va38cwzzyRs+UREQiVJ9QoQEaW7Rx99FBqNBn/9618hkRw5ra5atQqrV6/GfffdhwcffDCFaxjcCy+8gJNPPhlSqRTPPfccrrjiCohEM/Me6r333ouzzz4bv/zlL30/W7ZsGY477jice+65eO6553DZZZelbgWJiIiIiCLwwgsvYPbs2bjiiitw/fXX48CBA5g7d27cP6ekpAQlJSVxXy4RkdDNzFSEiCiJhoaG4PF4AqpTcnJy8Itf/AJnnXUWAODnP/85/v73v+PQoUNYsGABXnjhBQCAyWTCb3/7W6xatQr19fVYs2ZNQKX3ypUr8Zvf/AaXXHIJGhoacOONNwIADAYDbr75Znzta19DfX09vvOd7+CDDz4Iu87t7e1oamrCaaedhnPOOQeHDh3CO++84/eegwcPYsGCBXj00UexevVqNDY24vnnnwcw3nbmiiuuwDHHHINjjjkGGzduRE9Pj9/v7927F//93/+NZcuWYdGiRTj55JPx61//GjabLeg69fX1oa6uDk8++aTfz4eHh7Fo0SI89thjAID33nsP3/nOd7BkyRIsXboUV155Zdhqfe82mmzhwoX4n//5HyxevNjv82699VasWLECixcvxvHHH4+NGzfi4MGDIZc/NjaG3/3udzj11FOxePFirF27Fi+//LLfez7//HNccsklOPbYY7FkyRJ8//vf93vqgIiIiIgS68MPP8SGDRsAABs2bPBr//Haa6/h/PPPR319PU466ST8+te/hsVi8b2+efNmrF69Gv/5z3+wZs0a1NfX49xzz8Wnn36Kzz77DN/+9rfR0NCANWvW+F2P22w2/PKXv8Qpp5yCxYsXY/Xq1Xj44YdjWn+Xy4V//OMfWLFiBVatWoWcnJyA6u8zzzwT11xzTcDvnnvuubjyyit9y3nwwQexZs0aNDQ04Oijj8YFF1yAnTt3+n3fiW1YJjt48CBuuOEGLF++HIsWLcKJJ56IG264AXq93u99DocDv/71r7F06VIcd9xx+NnPfobh4eGQy3W73XjwwQdx+umnY/HixTjzzDOxZcuWiMaHiCgeWFlORDRNp512Gt566y1ccMEFWLduHZYtW4aqqipkZWVh9erVvvddddVVGB4expdffol77rkHFRUVsNlsuOiii6DT6XDNNddg9uzZeO2113DjjTdiaGgIP/rRj3y//9RTT+HSSy/FD3/4Q+Tm5mJsbAyXXHIJhoaGcN1116GoqAjPP/88LrvsMjz00EM48cQTQ67z888/D7VajRUrVkAmk2Hu3LnYtm0bTj311ID3bt68GTfeeCOUSiUaGxuxf/9+XHDBBaiqqsL//u//wul04v7778eFF16IF198EVqtFgMDA/je976Ho48+GnfccQdkMhnefvttPProoygqKgrat72kpATHH388duzYgYsvvtj381deeQUejwdnn302enp6cNVVV2HdunX48Y9/jJGREfzhD3/A5Zdfjv/85z8hK+NPO+007NixA2NjYzjrrLOwdOlSFBcXAwC+//3v+97n8XhwxRVXwGg04vrrr0dBQQH27duHu+++G7fcckvQP2w8Hg82btyITz75BNdccw2qq6vxn//8B9dddx3sdju++c1vwmw247LLLsOyZcuwefNm2O123H///fjBD36AN998EyqVKuS2IiIiIqL4WLRoEW6++Wb86le/ws0334wTTjgBALB9+3Zcf/31WLt2LTZt2oRDhw7hj3/8I9rb2/Hoo48iKysLwHhxxx133IHrrrsOOTk5uO2223DNNddAKpXiRz/6EUpLS32vv/nmm5DL5fjNb36Dd999Fz/72c9QUFCAt99+G7/73e+gVquxbt26qNb/7bffxuDgIL75zW9CLpfjrLPOwt///nds2rQJMpkMAHDOOefgwQcfhNlshlKpBAB0dHRg7969vrD8rrvuwrZt2/CTn/wECxYsQH9/P+69915ce+21ePPNN6FQKKZcD6vVig0bNkCj0eCWW26BSqXCp59+invuuQdyuRy/+tWvfO/917/+hcbGRtxxxx0YHh7GXXfdhfb2djz77LMQi8UBy/7lL3+JF154AVdccQWWLFmCXbt24Te/+Q1GRkawcePGqMaLiCgWDMuJiKbpoosuwuDgIB5++GHfhaFGo8Hy5cuxYcMGNDQ0AAAqKiowa9YsyGQyHH300QCArVu3orW1FU8//TSWLFkCADj55JPhdDpx33334YILLoBarQYAlJWV4frrr/d97rPPPou9e/fi2WefRWNjIwDglFNOwfr163HXXXf5qsAnczqdeOmll7BmzRrfRfV5552HzZs34/DhwygtLfV7/1lnneV3If+Tn/wECoUCjz32mO8C/MQTT8SqVavw0EMP4Wc/+xlaW1tRV1eHP/3pT773fO1rX8N7772HDz/8MOQkp+eeey5+8YtfoLe3F2VlZQCAHTt24Gtf+xoKCwuxY8cO2Gw2XHHFFb6wu6SkBK+//josFovvsya77bbb4Ha78e9//xuvvfaab3t8/etfx6WXXupb1sDAABQKBX72s5/huOOOAwCccMIJ6O7uDtmz8f3338c777yDP/7xj/jGN74BYHwbWq1W3HXXXVizZg3a29uh1+uxYcMGHHPMMQCAqqoqPPPMMxgdHWVYTkRERJQESqUSNTU1AICamhrU1NTA4/Hgrrvuwsknn4y77rrL997Kykp8//vfx1tvvYXTTjsNwHhIfMstt+CUU04BMP605u9//3vcfvvt+Na3vgUAsFgsuOaaa7B//37U1dXho48+wkknnYSzzz4bwPi1ZU5ODrRabdTr/8ILL6C2thb19fUAgPPPPx9/+9vf8Oqrr2Lt2rUAxsPyzZs347XXXsM3v/lNAONzFeXl5WHlypUAxq95r7vuOr/K+uzsbFx99dXYt2+f72+VULq6ulBSUoL//d//xZw5cwCMtzhsamrCRx995PdejUaDhx9+GDk5Ob7/3rhxI95++22sWLHC77379+/Hs88+ix//+Me+vxeWL1+OrKwsPPDAA7joooug0WiiHjciomiwDQsRURxce+21eOedd/D73/8e3/rWt6BUKrF9+3Z85zvfwRNPPBH
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1500x1000 with 3 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Настройка стиля графиков\n",
|
|||
|
"sns.set(style=\"whitegrid\")\n",
|
|||
|
"\n",
|
|||
|
"# Создание фигуры\n",
|
|||
|
"plt.figure(figsize=(15, 10))\n",
|
|||
|
"\n",
|
|||
|
"# График 1: Площадь vs Продажи\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"Store_Area\"], y=df_cleaned[\"Store_Sales\"], alpha=0.6, color=\"purple\"\n",
|
|||
|
")\n",
|
|||
|
"plt.title('Store Area vs Sales')\n",
|
|||
|
"plt.xlabel('Store Area')\n",
|
|||
|
"plt.ylabel('Sales')\n",
|
|||
|
"\n",
|
|||
|
"# График 2: Количество ассортимента vs продажи\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"Items_Available\"],\n",
|
|||
|
" y=df_cleaned[\"Store_Sales\"],\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
" color=\"green\",\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"Items_Available\")\n",
|
|||
|
"plt.xlabel(\"Store_Sales\")\n",
|
|||
|
"plt.ylabel('Sales')\n",
|
|||
|
"\n",
|
|||
|
"# График 3: Количество посетителей vs Цена\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"Daily_Customer_Count\"], y=df_cleaned[\"Store_Sales\"], alpha=0.6, color=\"red\"\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"Daily_Customer_Count vs Sales\")\n",
|
|||
|
"plt.xlabel(\"Daily_Customer_Count\")\n",
|
|||
|
"plt.ylabel('Sales')\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Упорядочиваем графики\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Стандартизация данных для кластеризации"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 36,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArMAAAImCAYAAABATALrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOy9eZxkVXn//75rrV3V+0wvM8MszLANgqCIChLEJYpGJHGJMYnfGBM1m36TGL+Jv5jFqEk0iRhMTExcosENEdAEFxBQlFVwGGCYpWd6n15rr7r7749bVdNLdXdVd1V3dc95v168dLqq7j333nPP+Zznec7zSJ7neQgEAoFAIBAIBJsQeaMbIBAIBAKBQCAQrBYhZgUCgUAgEAgEmxYhZgUCgUAgEAgEmxYhZgUCgUAgEAgEmxYhZgUCgUAgEAgEmxYhZgUCgUAgEAgEmxYhZgUCgUAgEAgEmxYhZgUCgUAgEAgEmxYhZgUCgUCwbog6PQKBoN4IMSsQCLYEb33rWzlw4MC8/y666CKuueYa/uIv/oJkMrnoNwMDA3zwgx/kuuuu4+KLL+aaa67hve99L88888yS5/mHf/gHDhw4wF/91V+t2Ka3ve1tPP/5z8c0zSW/85rXvIa3vOUtABw4cICbbrqpiqutL3/yJ3/CtddeW/73tddey5/8yZ/U9Rzj4+O84x3vYGRkpKHnEQgEZx/qRjdAIBAI6sUFF1zAn//5n5f/bVkWhw8f5uMf/zhPP/00//3f/40kSQB85zvf4Y//+I8599xzeec730l/fz/j4+N87nOf4w1veAOf+tSneNGLXjTv+K7rctttt7F//36++c1v8od/+IeEQqEl23PjjTfywAMPcN9993Hdddct+vzw4cM8++yzfPSjHwXgy1/+Mtu3b6/HrVgTn/zkJ4lGo3U95gMPPMC9997b8PMIBIKzDyFmBQLBliEajXLJJZfM+9vznvc8stksn/jEJ3jiiSe45JJLGBwc5H3vex9XXXUV//iP/4iiKOXvv/zlL+fNb34z73vf+7j77rvRdb382Q9/+EPGx8f5+Mc/zq/8yq9w55138ku/9EtLtudlL3sZ8Xic22+/vaKY/cY3vkE0GuUVr3gFwKK2bxQXXHDBljqPQCDY2ogwA4FAsOW56KKLABgdHQXgC1/4AqZp8md/9mfzhCxAKBTife97HzfeeOOi0ISvf/3r7N+/n8suu4wrrriCL3/5y8ueNxAIcP311/ODH/yATCYz7zPLsvjWt77Fq1/96rJ1d2GYwec+9zle+cpXcvDgQa666io++MEPlo8zPDzMgQMHuPXWW+cdd2HIgOM4fPrTn+b666/n4osv5pJLLuFNb3oTP/nJT5Zs91z3/0033bQofKP0X6mtK53j1ltv5f3vfz8AL33pS8vHXhhmkE6n+fCHP8x1113HwYMHuf766/na1762qG2f+MQn+OhHP8oLX/hCLr74Yn7jN36DkydPLvssBALB1kWIWYFAsOUZGBgAYMeOHQDcf//9XHDBBWzbtq3i96+88kre85730NXVVf5bIpHg7rvv5nWvex0AN9xwA4cOHeLw4cPLnvvGG2/EMAzuuuuueX+/7777mJmZWdKye+edd/J3f/d3vOUtb+Ezn/kM7373u/nmN79ZVazuXP7+7/+em2++mTe+8Y38+7//O3/1V39FIpHg93//98nn8yv+/pd+6Zf48pe/PO+/yy67jEgkwqte9aqqznHNNdfwzne+E/BDC971rnctOk+hUOCXf/mXueOOO3j729/OzTffzGWXXcaf/umf8i//8i/zvvv5z3+eEydO8OEPf5i//uu/5sknn+R973tfTfdFIBBsHUSYgUAg2DJ4nodt2+V/J5NJHnroIT71qU9x6aWXli204+PjnH/++TUd+4477sB1XX7hF34B8MMR/vIv/5JbbrllWYF54YUXcv7553PHHXdw4403lv9+2223ceDAAQ4ePFjxdw899BD9/f285S1vQZZlnv/85xMOhytuZFuOiYkJ3vOe9/DWt761/LdAIMDv/u7vcuTIkRVDG7Zv3z4vjvezn/0sjz32GJ/85CfZu3dv1efYuXMnAOeffz79/f2LznPrrbfy7LPPcsstt3DppZcCcNVVV2HbNjfffDNvetObaG1tBSAWi3HzzTeXreqDg4PcdNNNzM7O0tbWVtP9EQgEmx8hZgUCwZbh4Ycf5sILL5z3N1mWeeELX8hf/uVfljd/KYqC4zg1HfvrX/86V1xxBbquk0qlAN/lfeedd/K+971v2Y1MN954I3/zN3/D6dOn2bZtG4lEgnvuuYc//uM/XvI3L3jBC/jyl7/M61//eq677jpe8pKX8JrXvKZ8DdXysY99DICZmRlOnDjBqVOnuOeeewCWzbJQifvvv5+//du/5V3vete8GOB6nOOhhx6ir6+vLGRLvPa1r+VrX/saTzzxBC95yUsAOHjw4LzwkJLYzufzQswKBGchQswKBIItw4UXXshf/MVfACBJEoFAgJ6enkVCs7e3txw/WwnLskgmk3R2dgLw1FNP8fTTTwP+hrKF3H777fzyL//yksd7zWtew9/+7d/y7W9/m7e97W1861vfQpIkXvva1y75m1e96lW4rsuXvvQlbr75Zm666Sb6+vr4wz/8w7J7vxoOHTrEX/zFX3Do0CFCoRD79u2jt7cXqC3n64kTJ3jve9/L1Vdfze/+7u/W/RzJZHJeWEeJ0jMoLSCARRkkZNmPmHNdt+rrEQgEWwchZgUCwZYhEoks6bafy4tf/GI+97nPMTk5WVFA3Xvvvbz73e/mk5/8JC972cu49dZbCYfD3HzzzWXhVOL/+//+P7785S8vK2ZbW1u57rrruOOOO3jb297GN7/5TV72speV3eZLcf3113P99deTTqf54Q9/yL/927/xR3/0R1x22WVlC+1CC3Mulyv//0wmw9vf/nYOHDjAt771Lfbs2YMsy9x7772LYniXI5lM8s53vpPOzk7+/u//fp51uF7niMfjnDp1atHfJycnAYTFVSAQLInYACYQCM463vKWt6BpGh/60IcqisFPfOITtLW1cfXVV2OaJnfccQfXXnstV155JVdcccW8/173utfxzDPP8Pjjjy97zhtvvJHDhw/z0EMP8cQTT/CLv/iLy37/D/7gD3j3u98NQEtLCz//8z/Pu971LmzbZmJiomxtPn36dPk3lmXxs5/9rPzvEydOkEgk+NVf/VX27dtXFuL33XcfUJ0l07Zt/uAP/oCpqSn++Z//eZGVu9pzLFwELOR5z3seIyMj/PSnP53399tvvx1N07j44otXbKtAIDg7EZZZgUBw1tHf388HP/hB/vRP/5S3vOUtvOlNb6Knp4fBwUH+8z//k6GhIT7zmc8QCAT49re/TSKR4Prrr694rF/4hV/gn/7pn7jllluW3Uz1whe+kN7eXj7wgQ/Q39/PlVdeuWwbX/CCF/Dnf/7nfPSjH+Xqq68mlUrxyU9+knPOOYfzzjsPTdO49NJL+cIXvsCuXbuIx+N8/vOfp1AoEA6HAdi9ezfRaJR/+Zd/QVVVVFXlrrvuKqe7qiabwUc+8hEeeOAB/uRP/oRsNjtPtEej0arPEYvFAPjud7/L1VdfXd48VuL1r389X/rSl3j3u9/N7/3e79Hf38/dd9/N17/+dX7nd36n/HuBQCBYiBCzAoHgrOSGG25g165dfO5zn+Mf//EfmZ6epquri+c+97ncdNNNZbF16623Eo/HefGLX1zxOL29vTzvec/jf/7nf3j/+99PPB6v+D1Zlrnhhhv453/+Z37v935vxY1cb3rTm7Asi1tuuYUvfelLBINBrrzySv7oj/4ITdMAX2j+1V/9FX/2Z39GNBrlF3/xF7nsssv46le/CvgW3Ztvvpm//du/5fd///eJRCKcf/75/Nd//Re/+Zu/ySOPPDIvJ20l7r777vK5FvL85z+fL3zhC1Wd44orruCFL3whH/vYx/jxj3/Mpz/96XnHCoVCfOELX+BjH/sY//RP/0Qmk2HPnj186EMfWtGKLRAIzm4kr5YdAAKBQCAQCAQ
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Нормализация данных\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"data_scaled = scaler.fit_transform(df_cleaned)\n",
|
|||
|
"\n",
|
|||
|
"# Преобразование в DataFrame для удобства\n",
|
|||
|
"df_scaled = pd.DataFrame(data_scaled, columns=df_cleaned.columns)\n",
|
|||
|
"\n",
|
|||
|
"# Понижение размерности до 2 компонент\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"kc_pca = pca.fit_transform(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"plt.scatter(kc_pca[:, 0], kc_pca[:, 1], alpha=0.6)\n",
|
|||
|
"plt.title(\"PCA Visualization\")\n",
|
|||
|
"plt.xlabel(\"Principal Component 1\")\n",
|
|||
|
"plt.ylabel(\"Principal Component 2\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Агломеративная (иерархическая) кластеризация"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 37,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1AAAAJxCAYAAABMnFMWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACzqUlEQVR4nOzdd3xUVf7/8fckM5PeKwkhoTfp0iw0+4oFdV3X7hZZBXUtX9ctlt1V1111FcXu2nGxYF072AURVAQbSodQQgJJSJ8k8/uD373OTCbJnckkMySv5+PhwzDl3jP3nnvu+Zx2bW632y0AAAAAQLuiwp0AAAAAADhQEEABAAAAgEUEUAAAAABgEQEUAAAAAFhEAAUAAAAAFhFAAQAAAIBFBFAAAAAAYBEBFAAAAABYRAAFAAAAABYRQAGw5JprrtHgwYP9/nfNNdeEO3kAPFRUVGjcuHFavXq1KioqdNFFF+mRRx4Jd7IAoFuwhzsBAA4cWVlZmj9/vtdrc+fODVNqALQmJSVFF1xwgU4//XS53W4NHjxY//znP8OdLADoFgigAFjS1NSk+Ph4jR492ut1p9MZngQBaNPcuXN1xhlnqLKyUoWFhYqOjg53kgCgW2AIHwBLGhsbFRsba+mzK1eu1Nlnn61Ro0ZpwoQJ+sMf/qA9e/aY77/wwgsaPHiwtm3b5vW9GTNmeA0HdLlcrQ4b9N3WV199pVmzZmnkyJE64YQT9Oabb3pte9++ffrHP/6hI488UiNGjNDMmTP1/PPPt9i/7362bdumc845R9dcc43uv/9+HXLIIRo3bpwuvvhiFRcXe31/8eLFOvPMMzVmzBgddNBBOvbYY7VgwQLz/eXLl5vb/fzzz72++9RTT2nw4MGaMWNGi/T85S9/8fpsRUWFDjroIA0ePFjLly+3vP/WPPfcczrllFM0evRojRw5UieddJLeeOONFsfY37DN1s7POeec47WP119/XaeccorGjBmjQw89VNddd50qKirM9++++24NHjxYY8aMUUNDg9d3L7300hZDRevr6/Wvf/1LU6dO1UEHHaQTTjhBr7/+utf3ZsyYoTvuuEM333yzxo8fr4kTJ+rqq69WeXm55d/f1tDVF154wTynnuehrKxMBx98sN9zOXjwYA0ZMkTjx4/XJZdcor1795qfGTx4sO6++26vtBnHJZhjKUmZmZnq16+fli5d2u5wW999vfbaaxo/frxuv/12Sd751/c/z3R///33mjt3riZNmqThw4fr8MMP14033qi6ujrzMw0NDbrzzjt1xBFHaOTIkZo5c6ZefPFFS8dckrZv364rrrhCEyZM0KhRo3Teeefp22+/Nbe/bds2DR48WK+99pp+97vfadSoUZo2bZruueceNTc3e50X32NyxRVXeJ1Tt9utefPm6fDDD9e4ceP0u9/9Tjt27DA/39TUpAcffFAzZ87UyJEjNXr0aJ1xxhn69NNP2zyPUstz7vtvt9utM844w6u8vOaaa7zyliQtXLjQb/4B0DnogQJgSW1trVJSUtr93IoVK3TBBRdo0qRJuvPOO1VRUaF58+bp3HPP1fPPP285CJP2V5Il6b777lN6erqk/ZVd38BHkmbPnq2zzz5bl19+uZ5//nn9/ve/1wMPPKCpU6eqrq5OZ555psrKynTppZcqPz9fixcv1p///GeVlpbqd7/7nbmdqVOn6uKLLzb/nZ2dLUlasmSJ0tLS9Je//EXNzc26/fbbdc455+i1115TXFyc3n//fc2ZM0fnnnuuLrnkEtXV1enpp5/W3/72Nx100EEaNWqUuc2EhAS9++67GjdunPna66+/rqiolm1aCQkJev/99+V2u2Wz2SRJb7/9tpqamrw+F8j+PS1YsEA33nijLrnkEo0bN04VFRV66KGHdNVVV2nMmDHKzc01Pzt//nxlZWVJknk+JOm0007Tz3/+c/Pff/3rX732ce+99+quu+7SmWeeqcsvv1xbt27VvHnztGrVKj377LNeecJms2nZsmWaOnWqJKm6uloffPCB17Fxu92aM2eOvvjiC1166aXq37+/3nnnHV1++eVqaGjQySefbH726aefVmFhof7xj39oz549uv3227V582YtXLhQNput3d9/8cUX64wzzpC0v0dn2LBhZv7o06ePfvzxxxbH9Pbbb9e+ffuUnJzs9bqRt1wul9avX69//etfuummm3Tbbbf5PTf+BHIsDS6XSzfffLPlfUhSXV2d/va3v+k3v/mNTjjhBK/3rrvuOg0fPtz89y9+8Qvz75KSEp111lkaPXq0brnlFjmdTn344Yd69NFHlZ2drQsvvFCSdNVVV+mDDz7QRRddpFGjRumDDz7QNddcI4fD0e4x37Nnj8444wzFxcXp2muvVVxcnB5//HGdddZZev7559W/f38zPTfccIOmTp2qu+++W59//rnmz5+vmpoa/d///Z/f371y5Uq99tprXq899thjeuCBB3T11Verb9++uuWWW3TZZZfp2WeflSTddttt+u9//6srr7xSgwcP1q5du3TPPffosssu0/vvv6+4uLiAjr2nl19+WV9++WWbn6moqNCdd94Z9D4ABI4ACoAl5eXlZjDRlttvv119+/bVAw88YA4ZGjVqlI4//ngtWrRIZ511luV91tTUSJLGjBmjtLQ0SdJHH33k97PnnHOO5syZI0k6/PDDNWvWLN1zzz2aOnWqXnjhBf3www9auHChxowZY36msbFR9957r8444wylpqZK2h8Y+A5TlPYHkC+88IIKCgokSf369dOsWbP00ksv6Ze//KXWrVunWbNm6c9//rP5nTFjxmjixIlavny5VwAzZcoULVmyxKzE7dy5U19++aUOPvjgFr1akydP1gcffKCvvvrKTNcbb7yh8ePHe/V6BLJ/T1u3btWvf/1rr6AxPz9fp5xyij7//HMdf/zx5utDhw5V7969W2wjNzfX65glJiaaf1dUVOi+++7T6aefruuuu858fdCgQTrrrLNa5Anj2BgB1LvvvqusrCyvXoOlS5fqo48+0h133KGf/exnkvafz9raWt12222aOXOm7Pb9t7eoqCg9+uijSkpKkrT//M6ZM0cfffSRpkyZYun39+nTR9L+4aqt5Q/DmjVr9PLLL2vo0KGqrKz0es/zu+PHj9fSpUv1zTfftLotX4EeS8OTTz6pmpoaZWZmWt7X//73PzkcDv3mN79pMfRvwIABrR6DH374QUOHDtW8efPMfHDIIYfok08+0fLly3XhhRfqhx9+0FtvvaU//elPOu+88yTtz+fFxcVavny5Zs6c2eYxv+OOO1ReXq7//ve/ys/Pl7Q/3/zsZz/TvHnzdNddd5mfHT58uBmgTpkyRTU1NXr88cd10UUXeeVTSWpubtaNN96o4cOHe52XmpoaXXzxxTr//PMl7e/d+tvf/qbKykolJyerpKREl19+uVeva0xMjC655BKtXbu2zfzSlurqat12220t0uPrrrvuUl5enldvJoDOxRA+AJaUlJQoJyenzc/U1tbqq6++0tSpU+V2u9XY2KjGxkYVFBSof//++uSTT7w+39zcbH6msbGxxfZ27typqKioFhUdf2bNmmX+bbPZdNRRR2n16tWqq6vTZ599pvz8fDN4Mpx44omqr6/XV1991e72x44dawZPkjRs2DAVFBRoxYoVkqTf/OY3uuWWW1RdXa2vv/5ar7/+uh544AFJajEkbcaMGdq0aZM2bNggSXrzzTc1atQoszLoKSkpSRMmTNCSJUskSXv27NHy5cu9AptA9+/pmmuu0VVXXaXKykqtWrVKL7/8sjnsr63vWbVq1So1NDRo5syZXq8ffPDBys/P12effeb1+hFHHKF3331Xbrdb0v6eOSNIMixbtkw2m01Tp071yj8zZszQ7t27vXqFZsyYYQZPxr/tdrt53kL5+91ut2688UaddtppGjJkiN/3Gxsb1dDQoNWrV+vzzz/XQQcd5PUZ32vCM3AM9FhKUmlpqe655x794Q9/UExMjKXfsWvXLj300EM688wzA543ddhhh+mpp55STEyM1q1bpyVLlui
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Построение дендрограммы\n",
|
|||
|
"linkage_matrix = linkage(data_scaled, method='ward')\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"dendrogram(linkage_matrix)\n",
|
|||
|
"plt.title('Дендрограмма агломеративной кластеризации')\n",
|
|||
|
"plt.xlabel('Индекс образца')\n",
|
|||
|
"plt.ylabel('Расстояние')\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"# Получение результатов кластеризации с заданным порогом\n",
|
|||
|
"result = fcluster(linkage_matrix, t=60, criterion='distance')\n",
|
|||
|
"print(result) # Вывод результатов кластеризации"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 38,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJ8AAAMQCAYAAACJzMTyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3xUVf7/8fckMwkWkEiXEgQUdSnigoCurCAq1hXbqohg+QEKLlJUWHUFWREVEDA2ZFVAsCDYVle+giiroohlFwuydIIQQKQpZErO748ww0zmTjKTmZspeT0fDx8m5975zJlCyjvnfK7DGGMEAAAAAAAA2CAr2RMAAAAAAABA5iJ8AgAAAAAAgG0InwAAAAAAAGAbwicAAAAAAADYhvAJAAAAAAAAtiF8AgAAAAAAgG0InwAAAAAAAGAbwicAAAAAAADYhvAJAAAAAAAAtiF8AgCkpb59+6pv376Wx9588021bt1ao0aNquJZAaisQYMGad68ecmeBhLktdde04ABA5I9DQBAiiB8AgBklJ07d2r8+PHJngaAGCxYsEBFRUW64oorkj0VJMgVV1yhHTt26LXXXkv2VAAAKYDwCQCQUR544AH99ttvOvLII5M9FQBROHjwoCZOnKhBgwYpK4sfTTOFw+HQwIEDNXnyZB08eDDZ0wEAJBnf4QEAGeP//u//tHDhQg0ePFh5eXkhx0pKSjR9+nSde+65atOmjc4//3zNnj075Jy+fftq1KhRevrpp3XGGWfo97//vW677TZt2bIl5LxFixbpuuuuU4cOHdSmTRv16tVLc+bMCRz//PPP1bp1a3388cfq06eP2rVrp/POO09z584NnPPQQw+pdevW+uyzzwJjCxYsUOvWrfXGG28E5lN2a+GkSZPUunVrLViwQJLUunVrPf744yHnPP7442rdunXYnC+//HK1bdtWZ555pv7+97/rt99+Cznnm2++0U033aTTTjtNXbp00fDhw1VUVBTymD7//HNJ0urVq9WzZ09dc801UT8vkjR9+nSdc845OuWUU9S6devAf2UfQ7BRo0apR48egc+fe+45dejQQS+//HLI82b1n/95kqQvvvhCN998szp16qQ2bdqoR48eevzxx1VSUhI4Z//+/Ro3bpzOOussnXrqqbriiiv04YcfBl6PSPcT/LwMHDhQp512mk477TQNHjxYmzdvDtSP5r0hhb+uxhhdc801at26tQoLCyVJxcXFGjNmjLp27arOnTtr5MiR2rNnT+A2Bw8e1KRJk3TeeeepTZs2Ou2003TjjTfqhx9+iPjcSlJhYWHIc1f2c/99n3POOSHvs//973+B57fs81Oe+fPnq7i4WN27dw8Zf+yxxyyf67LvlXnz5umiiy5SmzZtdPbZZ+vxxx+Xz+eL6TFK0ieffGJ5f8H/BkeNGqW+ffvqtddeU/fu3dWhQwf169dPq1atCqm/YcMG/eUvf9GZZ56pU089VX379tWXX34Zdv/B//nn2Lp1a7344ou6++671aFDB51xxhl68MEHVVxcHLi9z+fT9OnTdfHFF6tdu3Y69dRTdc0114R8PfF/HejQoYPcbnfI/P7yl7+EbE0Ons9bb70Vcu6SJUvCXsdo7l+SunfvruLiYs2fP18AgOrNmewJAACQCLt379bYsWP1u9/9TrfccoteffXVkONjxozRggULNHDgQHXo0EFffPGFxo8fr71792rw4MGB8xYvXqy8vDzde++9Kikp0aRJk9S3b1+98847OuKII/Thhx9q8ODBuuGGG3T77bfr4MGDmjt3rh544AG1adNG7du3D9QaNmyYLrvsMg0aNEiLFy/W2LFjJUnXXXedhg0bpg8//FD333+/3n77be3cuVMPPvigLrjgAl122WWWj3HTpk164YUXYn5u3n77bY0cOVKXXHKJ7rjjDm3ZskWPPfaY1qxZo+eff14Oh0Pff/+9rr/+erVv316PPPKIfD6fJk2apJtvvjkQhgV79NFH1aZNG916662SFNXz8sYbb2jSpEkaOHCgunbtqiOOOEKS9Oc//znqx1JUVKTJkyfrgQce0B//+MeQYwUFBapXr54kaceOHRoyZEjg2KpVq9S/f3/16tVLjz32mIwxevvtt1VQUKAWLVrooosuks/n00033RQIDlq0aKHXX39dgwcP1syZM3X//fdr//79gTlfeeWVuuqqqyRJrVq10vr163XNNdeoRYsWevjhh+X1evXUU0/p2muv1Ztvvqk6deoE5lPee8PKm2++qa+//jrsNXjjjTd03333qVatWho7dqzGjBmjxx57TJJ01113acWKFRo+fLiaNWumjRs3aurUqRoxYoTeeecdORyOqJ/3smbMmBEIwfxuvfVW5eTkaNy4capfv76ysrI0b968CrddvfXWWzr77LOVk5MTMn7w4EH16NFDAwcODIyVfa8888wzeuyxx3T99ddr9OjR+uGHH/T4449r69atMW+/PXjwoBo2bKipU6cGxvyvS7AffvhB69at0/Dhw3XMMcdo2rRpuv766/Xuu++qfv36WrNmja6++mo1b95c9957r1wul2bNmqV+/frpueee0+mnnx7ynJ199tmSFPL4p06dqvbt22vKlClau3atpkyZoh07dmjKlCmSpIkTJ+qll17SiBEj1Lp1axUVFemJJ57Q0KFD9eGHHwb+bUmlK5CWLVsW+Pfy66+/6qOPPrJcZXbUUUfpgw8+0KWXXhoYe/fdd5WVlRUS0kZ7/7m5uerevbvefvtt9enTJ5aXAwCQYQifAAAZYfz48dqzZ4/+8Y9/yOkM/fa2fv16vfrqqxo+fHigAe4f/vAHORwOPfPMM7ruuusCK6UOHDigBQsWqGnTppKkFi1aqHfv3nrjjTd07bXXas2aNerdu7fuueeeQP0OHTqoc+fO+vzzz0PCp3PPPTdw3llnnaXt27frySef1LXXXqsaNWpowoQJuu666zR9+nR99dVXOvrooy1/2Q1+jCeccIK+++67wFhWVpa8Xm/E2xhjNHHiRJ111lmaOHFiYLx58+bq37+/PvroI5199tl6+umnVbt2bT333HPKzc2VJNWvX18jRozQ//73v5CaGzdu1Mcff6y33npLJ5xwgiRF9bz897//Ve3atTV8+PCI863Iyy+/rJNOOkmXX3552LGTTz5ZTZo0kaSwYGTVqlU644wz9OijjwZ+6T7zzDP1wQcf6PPPP9dFF12kpUuX6j//+Y+eeOIJ9ezZU5LUpUsXbd68WZ999llImCVJDRs21Kmnnhr4/P7779cRRxyhF154QUcffbQkqWvXrurZs6dmzJihu+++O3Buee+NsqHQr7/+qokTJ+p3v/tdyGtvjNFdd90V6JP01VdfBRp2u91u/frrr7r33nt14YUXSpJOP/107d+/XxMmTNDOnTsDQV2stm7dqmeffTZkPrt27dLmzZt13333qVevXoFz//3vf5dba//+/Vq5cqUuuOCCsGMHDhzQcccdF/IcB9u3b5+efPJJ/fnPf9a9994rqfTfde3atXXvvffqxhtvDLw/o3HgwAHVqlUr5P78r2PZ+3366afVsWNHSVK7du3Us2dPzZo1SyNHjlRBQYFycnI0a9aswO3PPvtsXXzxxXrkkUdCwrhmzZpZPr5jjz1WTz/9tJxOp/74xz8qKytLDz30kG6//Xa1bNlS27dv17Bhw0JWZeXm5ur222/Xjz/+GFKzW7duWrx4cSB8+uCDD1SvXr2QMCn43H//+99yu93KyclRcXGxFi9erE6dOgVW90mK6f7btm2rd999V/v377d8PgEA1QPb7gAAae+jjz7Sm2++qQEDBuikk04KO/7ZZ5/JGKMePXrI6/UG/uvRo4eKi4tDtsOcdtppgeBJkk455RQ1bdpUX3zxhSTplltu0YQJE/Trr7/q22+/1bvvvqtnnnlGksK2tvTu3Tvk8/POO087duzQ+vXrJZWGM/3799cTTzyhTz/9VBMmTNAxxxxj+RiXLl2qTz/9NCTAkKQ6deoEtsZZWbdunbZt2xb22Dt16qSjjz5an3zyiSTpyy+/VLdu3QL
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x800 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Выбираем подмножество данных для кластеризации\n",
|
|||
|
"features = df[[\"Store_Sales\", \"Store_Area\", \"Daily_Customer_Count\"]]\n",
|
|||
|
"\n",
|
|||
|
"scaled_features = scaler.fit_transform(features)\n",
|
|||
|
"\n",
|
|||
|
"# Построение дендрограммы\n",
|
|||
|
"linkage_matrix = linkage(scaled_features, method='ward') # Метод \"Ward\"\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(12, 8))\n",
|
|||
|
"dendrogram(linkage_matrix, labels=df.index, leaf_rotation=90, leaf_font_size=10)\n",
|
|||
|
"plt.title('Иерархическая кластеризация (дендрограмма)')\n",
|
|||
|
"plt.xlabel('Индекс')\n",
|
|||
|
"plt.ylabel('Евклидово расстояние')\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Визуализация распределения кластеров**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 39,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABWcAAASgCAYAAACUtPqkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3wVVfo/8M+021JILyTUFEoSehVBQBRUbGDbFXTt7q7rru6KX7t+LYtfXXXVddf6U9eGiigqiiKK9CZIryEhBdJIv3Vmzu+PCxcuSQiBJDfA5/16+dLMmTnzzMkFZ5575jmSEEKAiIiIiIiIiIiIiNqVHOoAiIiIiIiIiIiIiM5ETM4SERERERERERERhQCTs0REREREREREREQhwOQsERERERERERERUQgwOUtEREREREREREQUAkzOEhEREREREREREYUAk7NEREREREREREREIcDkLBEREREREREREVEIMDlLRKcMIUSoQ6ATxN8dERERhRrvR6i18TNFRK2ByVkiCqkdO3bgrrvuwqhRo5CdnY2zzz4bf/nLX7Bt27ag/dauXYtbb701RFEen+effx69evXC448/HupQ2lxlZSX+/ve/Y8KECcjOzsawYcNw/fXX4/vvvw/ab//+/bj11ltRVFQUokib5nQ68dJLL+HCCy9Ev379MHjwYFxzzTX45JNPWnyjvXLlSvTq1QsrV65so2iJiIjoeL300kvo1atX4OeamhrMmDEDa9asCWFUJ+eaa65Br169MH/+/DY7x9HjNn78ePzP//xPi/o4nmOmT5+O6dOnn1CM7YHPJ0TU3picJaKQ2blzJ66++mpUVVXhwQcfxFtvvYUZM2aguLgYV111FdavXx/Y95NPPsHu3btDF2wzTNPE559/jszMTHzxxRdwuVyhDqnNuN1uXHvttfjpp59w66234s0338RTTz2FuLg43HHHHXjnnXcC+y5btgyLFi0KYbSNE0Lg9ttvx6xZs3DNNdfg1VdfxT/+8Q9kZ2fjoYcewt///vdQh0hEREStZOvWrfjiiy9gmmaoQzkhubm5WLduHTIzM/HRRx+12XmuvPJKzJo1q836PxXw+YSIQkENdQBEdOb6f//v/yE6Ohqvv/46VPXwX0cTJkzApEmT8Morr+C1114LYYTHb8mSJdi/fz+ee+45TJs2DV999RWuvPLKUIfVJr799lvs3r0b8+fPR/fu3QPbJ0yYALfbjRdffBHTpk2DoiihC7IZa9euxcqVK/HWW29h1KhRge1jx46FLMt47733cMsttyA+Pj6EURIREREBn332GVJSUnDbbbfhb3/7G/Lz89GtW7dWP09SUhKSkpJavd9TCZ9PiCgUOHOWiEKmvLwcQogGsxgcDgfuv/9+XHDBBQCA//mf/8GcOXNQVFSEXr164bPPPgMA1NbWBl6tz8nJweTJk/Hpp58G9TV+/Hg89dRTuP7669GvXz888MADAICqqio8/PDDOOuss5CTk4OrrroKy5cvP+FrmT17NjIzMzF48GAMHz680VkH//M//4Prr78ejzzyCAYNGoQLL7wQhmHANE289tprOO+885CdnY2JEyfiv//9b9CxhmHgtddew+TJk9GvXz8MGDAA11xzDVasWNFkTA899BBGjRoFwzCCtj/55JMYPnw4fD4f3G43Hn30UYwZMwbZ2dmYNGkS3nzzzWNea3l5OQA0Ovvktttuwx/+8Ad4vV589tlnuO+++wAA5557buAVN8Mw8P777+Piiy9Gv379MHbsWDz77LPweDytMlbHo6ysrMlr+O1vf4u77roLkiQFtq1evRo33XQThg4diuzsbIwfPx4vvfTSMWfg7NixA7fddhsGDRqEQYMG4Y9//CMKCgqC9nnnnXcwadIk5OTkYPTo0Xj00UdRV1fX4ushIiKixq1cuRLXXXcdAOC6664Lep1+wYIFmDJlCnJycjBq1Cg88cQTcDqdgfaXXnoJkyZNwvfff4/JkycjJycHl156KdatW4f169fjyiuvRL9+/TB58uSg+8gTub9qimEY+PzzzzFu3DhMmDABDoejwX3mxIkTceeddzY49tJLL8Xvf//7QD/N3UseXdbgaIWFhZgxYwbOPvtsZGVlYeTIkZgxYwYqKyuD9vP5fHjiiScwdOhQDBkyBPfeey8OHDjQZL8ncn93PNe8d+9e3H777Rg+fDj69++Pq6++utk3uvh80rbPJ0TUBEFEFCLvv/++yMzMFJdffrl47733xK5du4Rpmg32y8/PF7fccosYNWqUWLdunaioqBAul0tMnjxZjBw5Unz44Yfi559/Fg8//LDIzMwU//73vwPHjhs3TvTt21c888wzYvHixeKXX34RbrdbXHLJJeKss84SH3/8sfjpp5/En/70J9G3b1+xbNmyFl9HZWWlyM7OFm+88YYQQojPP/9cZGZmik2bNgXtd++994q+ffuKW265RSxbtkwsWLBACCHEQw89JLKyssSLL74oFi9eLJ577jnRu3dv8fLLLweOnTlzpujfv7949913xcqVK8XcuXPFxIkTxbBhw4TT6Ww0rtWrV4vMzEyxdOnSwDbDMMSoUaPEY489Fjj3uHHjxFdffSVWrFgh/u///k9kZmaKTz/9tMnr3bZtm+jbt684++yzxUsvvSTWrVsnvF5vg/0qKirE888/LzIzM8V3330n8vPzhRBC3H///SIrK0u88MILYsmSJeK1114T/fv3FzfeeGPg938yY3U8ysvLxYABA8SQIUPE//3f/4kVK1YIl8vV6L5bt24Vffv2FXfffbdYvHix+Pnnn8U999wjMjMzxVdffSWEEGLFihUiMzNTrFixQgghRG5urhg4cKCYOnWq+O6778S8efPExRdfLEaNGiXKy8uFEEJ8+eWXIisrK/A7/fDDD8WAAQPEjBkzWnQtREREFOzFF18UmZmZQgghamtrxXvvvScyMzPFe++9J3bu3CmEEGLu3LkiMzNT/PWvfxWLFi0SH3zwgRg6dKi4/vrrA/cjL774oujfv78YP368+PLLL8UPP/wgxo4dK84++2wxbtw4MWvWLPHzzz+LCy+8UAwfPjxwL3Ei91dNWbhwocjMzBQbNmwQQvjvo0aMGCE8Hk9gn5dffln069dP1NbWBrbt2rVLZGZmim+++UYIcXz3kkeOmxD+++h7771XCCGE0+kU48aNE1OmTBHfffedWL58uXjllVdE3759xUMPPRR0TJ8+fcQ111wjFixYID7++GMxbNgwMWXKFKHruhBCiGnTpolp06YFjjmR+7vmrtkwDDFp0iRx3XXXiZ9++kksWbJE3HrrraJPnz4iLy+vyX75fNK2zydE1DgmZ4kopF544QWRk5MjMjMzRWZmphg+fLj461//Kn799deg/e69914xbty4wM+Hbpx++eWXoP3uv/9+kZOTIyorK4UQ/pufCRMmBO0za9YskZmZKdavXx/YZpqmuPbaa8WUKVNafA3vvvuu6Nu3rygrKxNC+G9eBw0aJB588MEG15CZmSn27dsX2Jabmyt69eolXn311aB9n3/+eZGTkyMOHDgghBDi7rvvFm+//XbQPvPnzxeZmZli3bp1jcZlmqYYN26cuO+++wLbli1bFnTtEydObBDnyy+/LH788cdjXvP8+fPFWWedFfi99evXT9x4441i3rx5QfvNnj1bZGZmioKCAiGEEDt37hSZmZkNrvfQDeNPP/0khDi5sTpeq1evFueee27gGrKyssS1114rZs2aFXh4EEKIOXPmiJtvvlkYhhHYZhiGGDx4cOBh5Ojk7N133y3OOuusoAeGyspKMXjwYDFz5kwhhP+md+LEiUH9fvHFF+Ldd99t0XUQERFRsKOTjEf/f9o0TTFmzBhx0003BR136D7p0H3QoX4WLVoU2OfVV18VmZmZ4pNPPgls+/bbb0VmZqbYsmWLEOLE768ac8cdd4jJkycHfl6zZo3IzMwUc+fODWzbu3ev6NWrl5gzZ05g2wsvvCCGDBkSSOIez73ksZKzW7ZsEb/5zW/E3r17g/q47bbbxMSJE4OOOeuss0R9fX1g2/f
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1400x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"plt.figure(figsize=(14, 12))\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 1: Store_Area vs Store_Sales\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"Store_Area\"],\n",
|
|||
|
" y=df_cleaned[\"Store_Sales\"],\n",
|
|||
|
" hue=result,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"Store_Area vs Store_Sales\")\n",
|
|||
|
"plt.xlabel('Store_Area')\n",
|
|||
|
"plt.ylabel(\"Store_Sales\")\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 2: Items_Available vs Store_Area\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"Items_Available\"],\n",
|
|||
|
" y=df_cleaned[\"Store_Area\"],\n",
|
|||
|
" hue=result,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"Items_Available vs Store_Area\")\n",
|
|||
|
"plt.xlabel(\"Items_Available\")\n",
|
|||
|
"plt.ylabel(\"Store_Area\")\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 3: Daily_Customer_Count vs Store_Sales\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"Daily_Customer_Count\"],\n",
|
|||
|
" y=df_cleaned[\"Store_Sales\"],\n",
|
|||
|
" hue=result,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"Daily_Customer_Count vs Store_Sales\")\n",
|
|||
|
"plt.xlabel(\"Daily_Customer_Count\")\n",
|
|||
|
"plt.ylabel(\"Store_Sales\")\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 4: Items_Available vs Daily_Customer_Count\n",
|
|||
|
"plt.subplot(2, 2, 4)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"Items_Available\"],\n",
|
|||
|
" y=df_cleaned[\"Daily_Customer_Count\"],\n",
|
|||
|
" hue=result,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"Items_Available vs Daily_Customer_Count Clusters\")\n",
|
|||
|
"plt.xlabel(\"Items_Available\")\n",
|
|||
|
"plt.ylabel(\"Daily_Customer_Count\")\n",
|
|||
|
"\n",
|
|||
|
"# Настройка графиков\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## KMeans (неиерархическая кластеризация) для сравнения"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 40,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Центры кластеров:\n",
|
|||
|
" [[ 1745.60833333 2093.9625 640.70833333 65702.91666667]\n",
|
|||
|
" [ 1334.08658009 1600.46320346 639.17748918 44322.25108225]\n",
|
|||
|
" [ 1590.27536232 1908.01449275 1048.26086957 55292.41545894]\n",
|
|||
|
" [ 1259.72477064 1511.40825688 853.94495413 72138.0733945 ]]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjAAAASgCAYAAABWngGUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3wUZf4H8M9sb+m9J5QEElroiFQLiIo/xVPOiorlDs96it7Ze/dUTgXsivXAihXFTpGiIh1SSEJ63V5m5vdHzMKSTc8mm+Tzfr18CfPMzjy7M7s83/k+RZBlWQYREREREREREREREVEQUfR2BYiIiIiIiIiIiIiIiI7FBAYREREREREREREREQUdJjCIiIiIiIiIiIiIiCjoMIFBRERERERERERERERBhwkMIiIiIiIiIiIiIiIKOkxgEBERERERERERERFR0GECg4iIiIiIiIiIiIiIgg4TGEREREREREREREREFHSYwCAi6kNkWe7tKvSKgfq++zpeNyIiIqLA6M/trP783qj38L4i6ruYwCCiVl144YW48MILm223WCw455xzMGLECKxbt867b1ZWFhYuXNji8a6//npkZWXhlltuCVidA8XpdOKVV17BggULMG7cOEycOBELFy7EBx984NMYeuaZZ5CVldWt53a5XHjggQfw8ccfd8vxWrquPWnNmjVYuHAhxo4di9GjR+PUU0/FU089BYvF4rPfs88+ixdffLGXatk2SZIwc+ZMZGVl4Y8//ujt6gTc1q1bcdVVV2HSpEkYMWIEZs6ciX/9618oKiry2e/rr7/G0qVLe6mWrdu3bx+uv/56TJ06FSNGjMDxxx+P6667Dnv27OnwsW655RbMnj07ALUkIiIKLowLjmBc0H1mz57tcw9s3boVV1xxRa/Vp6sKCgqQlZWFSZMmweVyBew8R39umzZtQlZWFjZt2tTu17fnNcXFxcjKysKaNWu6XN9A6Q8x5TfffIOLL74Y48ePx8iRI3HSSSfh/vvvR3V1dYePdez3iai/UPV2BYio77FYLFi8eDH27NmD//73v5gxY4a3TKFQ4Ndff0VZWRni4+N9Xmez2bB+/fqerm63qKqqwuLFi1FaWooLL7wQo0aNgiRJWL9+PW655RZs2bIF9957LwRBCMj5Kyoq8Oqrr+LBBx/sluPdeeed3XKczlq2bBmef/55XHrppfjb3/4GtVqNP/74Ay+88AJ++OEHvPXWW1Cr1QCAp556CldffXWv1rc1P/30E6qqqjBo0CC8/fbbuO+++3q7SgGzYcMGLF682NuoDgkJwaFDh/DSSy/h7LPPxnvvvYfU1FQAwCuvvNK7lW3B/v37ce6552LMmDG47bbbEBUVhbKyMrzxxhs455xz8Nprr2HMmDG9XU0iIqI+gXEB44Lu9t577+HgwYO9XY1OW716NQYPHozCwkJ8/vnnmD9/fkDOs2zZMphMpoAcu6/oDzHl+++/j1tvvRULFy7EokWLoNfrceDAAaxYsQLr16/H6tWrERYW1tvVJOp1TGAQUYc0BSm7d+/Gc889h6lTp/qUZ2dn48CBA/j888+xaNEin7L169dDr9cjNDS0B2vcPZYuXYqysjK88847SE9P926fOXMmEhMT8cQTT2DWrFk44YQTeq+SHTBkyJBeO7fL5cLKlStx2WWX4frrr/duP+644zBo0CAsWbIE69atwymnnNJrdeyINWvWIDc3F9OmTcNzzz2HW265pd8GE88//zxGjRqF//znP95tkyZNwowZM3DSSSfh5ZdfDrog+Fgvv/wyIiIisHLlSqhUR5pBJ554IubOnYtnn30WK1as6MUaEhER9Q2MCxgXkC9RFPHBBx/g3HPPxfbt2/H2228HLIGRnZ0dkOP2Ff0lpvzvf/+LU089FXfddZd32+TJkzF+/HicccYZeO+997B48eLeqyBRkOAUUkTUblarFZdffjn27t2LFStWNAtSAMBgMGDGjBn4/PPPm5V9+umnmDNnjs9DQ6BxCp4VK1bgpJNOwogRIzBnzhy8/vrrPvuIoogVK1bgtNNOw6hRozBmzBgsXLgQGzdu9O7zzDPP4KSTTsK3336L008/3XusDz74wOdYr776KubOnYuRI0di2rRpuOuuu5oNMT3a7t278eOPP+Kyyy7zCVKaLFq0COeffz4MBoPf1/sbxrlmzRpkZWWhuLgYAOBwOHDXXXdh+vTpGDFiBObOnesd4lpcXOwNgG699Vaf6Wq2bNmCCy64AKNHj8bEiROxdOlS1NTU+JwnOzsb7733HqZOnYqJEyfiwIEDzYaKZ2VlYdWqVfj3v/+NiRMnIjc3F9deey2qqqp86v3iiy/ihBNOwKhRo7Bw4UJ88803PkOPm4YZP/PMMy1+nhaLBQ6HA5IkNSubMWMGrr/+eqSkpHjrBTT2rjl6+P2OHTtw2WWXYdKkSRg7diyuuuoq7N+/31veNCT67bffxqxZszB27Fj89NNP7frMOqK+vh7r1q3DrFmzcNppp8Fut+PDDz9stl9WVhaWLVuGs846C6NGjcKyZcsAAIcPH8YNN9yAiRMnYvTo0bj44ouxa9cun9cWFxfj5ptvxvHHH4+cnBxMmTIFN998M2pra/3Wyel0Yty4cXj44Yd9tns8HkyePNk7QuSPP/7AxRdfjHHjxiE3NxeLFi3Cr7/+2ur7raqq8jt3bGxsLG677Tbvb8KFF16IzZs3Y/PmzT73R0VFBW699VbMmDEDo0aNwtlnn42vv/662z6r9mh6D8fefwaDAf/61798gpz2/O7489577+HUU0/1TrH1zDPPQBRFb3lNTQ1uvPFGTJ06FSNHjsQZZ5zR7HeKiIgomDEuYFzQHXHBsW655Ra8//77KCkp8Zm6yOl04pFHHsGMGTMwYsQInH766fj000+bfbbLli3DAw88gEmTJiE3Nxc33ngjrFYrVqxYgenTp2PcuHH4xz/+4dOO7kybuCU//vgjKioqMHPmTMyfPx9bt27FgQMHvOXbtm1DVlZWs9FHu3fvRlZWFr766isA7Wv/tzVV0Lp163DeeechNzfXex+tWrWq2X4HDhzAeeed55266Njv27E62iZv73v+5JNPMH/+fIwaNQqTJ0/GP//5T5SXl7d43P4SU7YUXw0bNgy33norRowY4d1WU1ODu+++G7NmzcKIESMwceJELFmyxPvb4U97vjvd+R0gChQmMIioXWw2G6644grs2rULK1euxKRJk1rcd968ed7h4k0sFgu+//57nHbaac32v+uuu/D0009j/vz5eP755zF37lw88MAD+O9//+vd57HHHsOzzz6Lc889Fy+88ALuvfde1NXV4dprr4XdbvfuV1lZiXvuuQcXXXQRVqxYgeTkZCxdutQ7DPmTTz7Bo48+ivPPPx8vvvgilixZgg8//BD33ntvi+/nhx9+AIAW57nXarW44447MGXKlBaP0ZYHHngA33//PZYuXeoNBh555BGsXr0asbGx3oe4f/vb37x//uWXX7Bo0SLodDr85z//wb/+9S9s3rwZF110ERwOh/fYoijipZdewv33349bb70VgwcP9luHJ598EpIk4YknnsDNN9+M9evX44EHHvCWL1u2DI899hhOOeUUPPvssxg9ejSuu+46n2PExsbinXfewV/+8pcW32tkZCRGjx6NF198EUuXLsW6deu8jT21Wo2rrrrK21B75513AABnn322988bN27EX//6V+/ndt9996G0tBQLFy5sNtx82bJlWLp0Ke644w7k5ua2+zNrr48//hiiKOL0009HYmIiJk+e7K3nsZ5//nmcfvrpePrppzFnzhzU1NRg4cKF2LlzJ26//XY8/vjjkCQJ559/vvd92O12XHTRRTh48CDuvPNOvPjii7jooouwdu1aPPnkk37Po9VqMWfOHHz22Wc+jeGffvoJtbW1OOOMM7w9JiMiIvDMM8/gySefhN1ux2WXXQaz2dzi+505cya2b9+OCy+8EP/73/981r34y1/+ghNPPBFA41QE2dnZyM7OxjvvvIOcnBxUVVXh7LPPxpYtW3D99dfjmWeeQVJ
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 3 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"features_used = [\n",
|
|||
|
" \"Store_Area\",\n",
|
|||
|
" \"Items_Available\",\n",
|
|||
|
" \"Daily_Customer_Count\",\n",
|
|||
|
" \"Store_Sales\"\n",
|
|||
|
"]\n",
|
|||
|
"data_to_scale = df_cleaned[features_used]\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"data_scaled = scaler.fit_transform(data_to_scale)\n",
|
|||
|
"\n",
|
|||
|
"random_state = 42\n",
|
|||
|
"kmeans = KMeans(n_clusters=4, random_state=random_state)\n",
|
|||
|
"labels = kmeans.fit_predict(data_scaled)\n",
|
|||
|
"centers = kmeans.cluster_centers_\n",
|
|||
|
"\n",
|
|||
|
"# Отображение центроидов\n",
|
|||
|
"centers_original = scaler.inverse_transform(centers) # Обратная стандартизация\n",
|
|||
|
"print(\"Центры кластеров:\\n\", centers_original)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов кластеризации KMeans\n",
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"Store_Area\"],\n",
|
|||
|
" y=df_cleaned[\"Store_Sales\"],\n",
|
|||
|
" hue=labels,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.scatter(centers[:, 0], centers[:, 1], s=300, c='cyan', label='Centroids')\n",
|
|||
|
"plt.title(\"KMeans Clustering: Store_Area vs Store_Sales\")\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"Items_Available\"],\n",
|
|||
|
" y=df_cleaned[\"Store_Sales\"],\n",
|
|||
|
" hue=labels,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.scatter(centers[:, 2], centers[:, 3], s=300, c='cyan', label='Centroids')\n",
|
|||
|
"plt.title(\"KMeans Clustering: Items_Available vs Store_Sales\")\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"Daily_Customer_Count\"],\n",
|
|||
|
" y=df_cleaned[\"Store_Sales\"],\n",
|
|||
|
" hue=labels,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.scatter(centers[:, 1], centers[:, 3], s=300, c='cyan', label='Centroids')\n",
|
|||
|
"plt.title(\"KMeans Clustering: Daily_Customer_Count vs Store_Sales\")\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### PCA для визуализации сокращенной размерности"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 41,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjAAAAJHCAYAAAA+Dx+UAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5hU5dkG8Pu06dt7X9ouLL1IUbo11ogaNcYWCyqJXzQmaqwpRpPYazTRmAjGEruiCIqKgChKlbbANrb3NvWU7491B4adWRa2zJb7d11esuedOeeZmTO75znPWwTDMAwQERERERERERERERH1I2K4AyAiIiIiIiIiIiIiIjocCxhERERERERERERERNTvsIBBRERERERERERERET9DgsYRERERERERERERETU77CAQURERERERERERERE/Q4LGERERERERERERERE1O+wgEFERERERERERERERP0OCxhERERERERERERERNTvsIBBRDQIGIYR7hAohKH82Qzl105EREREgXhtODTwcyainsYCBtEgdumllyI3Nzfgv3HjxmH+/Pn4/e9/j8bGxg7PKSgowL333ouTTjoJEyZMwPz583HzzTdj165dIY/zyCOPIDc3F3/84x978+WE9MQTTyA3Nzcsxw7mzTffRG5uLg4cONDrz/N6vfjzn/+M995772jDPCoXXXQRcnNzsWLFil49Tn/7LLujqakJv/3tb7Fx40b/tksvvRSXXnppn8XQ1e/zwoULcdttt/XosfPz83HxxRf3yL4OHDiA3NxcvPnmmz2yPyIiIuo/mLOEx2DKWXJzc/HEE0902L5nzx7MmjUL8+bNQ2Fhof+xubm5ePjhh4PuS9d1zJkzZ8Bee1ZWVuKvf/0rTjvtNEycOBGzZ8/GddddF5CTAL2Tl1RUVODaa69FaWlpj+wv1OdKREMPCxhEg1xeXh5effVV/3//+te/cMUVV+CNN97A4sWLA3pHfPzxxzj33HPx/fff4/rrr8c//vEP3HTTTSgsLMRPfvITrF27tsP+dV3H22+/jZycHLzzzjtwuVx9+fKGvKqqKvz73/+Gqqq9doz9+/dj06ZNyMnJwSuvvNJrxxlsdu7ciXfeeQe6rvu33XPPPbjnnnv65PjH8n3uSR999BE2bdrUI/tKTEzEq6++ivnz5/fI/oiIiKh/Yc4yuPVFznK4/Px8XHHFFbBarVi6dCmys7P9baIo4qOPPgr6vG+++QZVVVV9FGXP+vbbb3HOOedg9erVuOyyy/D3v/8dd9xxB9xuNy699FK8/fbbvXr8devW4fPPP++x/b366qu44IILemx/RDRwyeEOgIh6l8PhwKRJkwK2HXfccWhtbcXjjz+OLVu2YNKkSSguLsatt96KOXPm4NFHH4UkSf7Hn3LKKbj44otx66234tNPP4XJZPK3ffnll6ioqMDDDz+Mn/3sZ3j//fd5kTHIvPnmm0hLS8PixYtxyy23oKioCFlZWeEOa0AaOXJknxznWL/P/ZXJZOrwe4yIiIgGD+Ys1JP27duHyy+/HHa7Hf/+97+Rmpoa0D5lyhRs3LgRO3bsQF5eXkDbBx98gDFjxmDnzp19GXK3NTQ04Fe/+hWys7Pxr3/9C1ar1d926qmn4tprr8Xdd9+N2bNnIz4+PoyRdh2v/4moHUdgEA1R48aNAwCUlZUBAF566SV4vV7ceeedAYkAAFitVtx6660477zzOgzhfuONN5CTk4OpU6dixowZePXVV4947IULF+LPf/4zLr/8ckyYMAF33HEHgLaLrrvvvhvHH388xo8fj5/85CdYv359wHM9Hg/uv/9+nHDCCZg8eTJuv/12eDyegMcEGw67YcMG5ObmYsOGDf5t+/fvxy9+8QtMnz4dxx13HBYvXox9+/YFHOuvf/0r5s2bh3HjxuGss87C8uXLA/ar6zqefvppzJ8/HxMnTsQNN9wQdJj74br6vFWrVuGnP/0pJk+ejHHjxuG0007DsmXLALRNq3PiiScCAG6//XYsXLjQ/7zXX38dixYtwqRJkzBhwgScc845+PDDDwP2nZube8RpgzRNw9tvv40FCxbgpJNOgs1mC/oZ+3w+PPjgg5g7dy4mTJiAq666Cm+//XaH4eVvvfUWTj/9dIwfPx5nn3021q9fj7y8vE6HZy9fvhyLFi3C5MmTccIJJ+Duu+8OeK+eeOIJnHbaaVi5ciXOPPNMjB8/Hueccw42bdqEzZs344ILLsCECRNw5plndjif9uzZg8WLF2PKlCmYMmUKlixZgpKSEn97+3nzyiuvYMGCBZgyZYq/V19n7/GGDRtw2WWXAQAuu+wy//l46Ln585//HIsWLerwem+44QacffbZ/p83btyIn/3sZ5g4cSKmT5+OW2+9FXV1dSHfL+DYv8+HvuZDvyuHxw4A27dvx+WXX46pU6di8uTJuOKKK7B582YAbZ/Jk08+CSBw6Leu63juuedw8sknY9y4cTj11FPx0ksvdTjOLbfcghtvvBGTJk3ClVde2WEKqTfffBN5eXnYsmULLrzwQowfPx4LFizA888/H7Cvqqoq3HTTTf7v+N13341HHnkk4LtCRERE/RdzFuYsXclZDrVv3z5cdtlliIiIwNKlSzsUL4C24lh8fHyHURiqquLjjz/GGWec0eE5Xfnc6+rq8Pvf/x4LFizAuHHjMH36dCxZsiQgH7r00ktxxx134LnnnsP8+fMxfvx4XHTRRdi6dav/MW63G/feey/mzp3rfz8Pv8493Ntvv42qqir87ne/CyheAG0jTm655RZccsklaGlp6fDcUNO13nbbbQGfV3FxMa677jrMmDEDEydOxIUXXugfcfHmm2/i9ttvBwCceOKJAZ/Z66+/jjPOOMM/NdwTTzwBTdMCjnP55ZfjnnvuwZQpU3D66adD07SAPKL9u7F+/Xr8/Oc/x8SJE3HCCSfgb3/7W8C+WlpacPfdd2PWrFmYPHkybrrpJrz44ov9avo2Ijp6LGAQDVEFBQUAgIyMDADAmjVrkJeXh6SkpKCPnzVrFm666SYkJCT4tzU0NODTTz/Fj3/8YwDAueeei23btuH7778/4vGXLVuG8ePH4+mnn8b5558Pj8eDyy+/HJ988gluuukmPPnkk0hOTsbVV18dcGH4m9/8Bq+99hoWL16MRx99FI2NjXjxxReP+vVXVlbiwgsvRGFhIe6991787W9/Q01NDS6//HI0NDTAMAwsWbIEr7zyCq688ko888wz/gugQ4fe/u1vf8NTTz2F888/H08++SSio6Px0EMPHfH4XXneZ599hiVLlmDs2LF4+umn8cQTTyAjIwN/+MMfsGXLFiQmJvpvEl9//fX+fy9btgx33303TjrpJDz77LN48MEHYTKZcMstt6CiosK//1dffRU33HBDp3F+8cUXqK6uxo9//GNYLBb86Ec/wltvvQWv1xvwuLvvvhv//ve/8bOf/QxPPfUU4uPjcddddwU85u2338Ztt92GKVOm4Omnn8app56KG264IeCC83BPP/00br75ZkyaNAmPP/44lixZghUrVuDSSy+F2+32P66iogIPPPAArrvuOjz22GNoamrCjTfeiJtvvhkXXHABnnrqKRiGgZtuusn/vIKCAlx00UWora3FX/7yF9x3330oKSnBxRdfjNra2oA4nnzySdx66624++67MXny5CO+x2PHjsXdd9/tf2+CTRt19tln4/vvv0dRUZF/W1NTE7744gucc845ANqGsF9xxRWwWCx49NFH8bvf/Q5ff/01LrvssoDXf7hj+T4fjZaWFlx99dWIiYnBE088gUceeQQulwtXXXUVmpubccEFF+D8888HEDj0+95778Xjjz+Os88+G3//+99x2mmn4c9//jOeeuqpgP1/+OGHsNvteOaZZ3D11VcHjUHXdfzqV7/C6aefjueeew5TpkzBX//6V6xZswZA21zLl19+Ob777jv87ne/w/33349du3bhhRdeOKbXTERERH2POQtzlq7kLO3279+Pyy+/HA6HA0uXLg15nkiShFNPPbVDAWP9+vXweDwdOrt05XM3DAO
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x600 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"reduced_data = pca.fit_transform(data_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация сокращенных данных\n",
|
|||
|
"plt.figure(figsize=(16, 6))\n",
|
|||
|
"plt.subplot(1, 2, 1)\n",
|
|||
|
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=result, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('PCA reduced data: Agglomerative Clustering')\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(1, 2, 2)\n",
|
|||
|
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('PCA reduced data: KMeans Clustering')\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Анализ инерции для метода локтя (метод оценки суммы квадратов расстояний)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 42,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA14AAAImCAYAAABD3lvqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACDTUlEQVR4nOzdd3hUVf7H8c9MekghhSSEGgg9CRBKAGlGFhGwIHbFsiKIuP4EFUVZO4pIExAVUcGCFZZVUVkrSu81oQSS0FJIh/Qyvz9CRiMthGRuyvv1PHlC7j33zneGs2s+nHPPMVksFosAAAAAANXGbHQBAAAAAFDXEbwAAAAAoJoRvAAAAACgmhG8AAAAAKCaEbwAAAAAoJoRvAAAAACgmhG8AAAAAKCaEbwAAAAAoJoRvAAAAACgmhG8AAAAAKCaEbwAoAqMGjVK7dq102233XbeNhMmTFC7du301FNP2bAyAJV17NgxtWvXTsuXLze6FAB1AMELAKqI2WzWjh07lJiYeNa5nJwc/frrrwZUBQAAagKCFwBUkY4dO8rJyUk//PDDWed+/fVXubi4yN/f34DKAACA0QheAFBFXF1dNWDAgHMGr++++05XX3217O3tzzr3008/6cYbb1RoaKiuuOIKvfzyy8rJyZEkRUZGql27duf8OnbsmCRp7dq1uuOOO9StWzdFREToscceU0JCQrnXeOyxx855j4tNoSqbQnmur7/avXu37r//fkVERCg8PFwPPvigDh48aD2/ceNGtWvXThs3bpQkHThwQIMGDdJtt92mefPmnfc15s2bJ0n68ssvdc011ygkJKTc+YtN2/ziiy/Oed+/Xlc2nexi7SpbQ0U/mwu9/vnOl/09PPXUU4qMjCz3up999lm5z/Cvr7N169ZybT/++GO1a9eu3D3y8vI0c+ZMDR48WCEhIQoPD9d9992n6Ojocteer65Ro0aVa1NWx7n8vX+UGTVqVLn75Ofn680339SQIUMUGhqqwYMHa+HChSopKSl3zd9r2bhxY4WuvRiLxaLJkycrLCxMa9asqfB1ACBJZ/8GAACotKFDh+rRRx9VYmKiAgICJEmnT5/W77//rg8++EC///57ufbffPONHn/8cV177bV69NFHdfz4cc2ePVsxMTH64IMPNH/+fBUUFOjkyZN6+OGHNW7cOA0cOFCS5OfnpxUrVujJJ5/U8OHDNXbsWKWnp2vu3Lm69dZb9Z///Ec+Pj6SSn9hvfXWW3XjjTdKkvV+FdGxY0c999xz1p+//PJLffXVV9afN2zYoNGjRysiIkKvvPKK8vPz9c477+i2227TF198odatW591z9dff10hISEaN26cPD091a9fP0nSCy+8IEnW1wsICNDmzZs1ZcoU3XTTTZoyZYoaNGggSRWqPy8vT6GhoZoyZYr12Pmu++tn+/d2la3hUj6bZ599Vp06dTrn63/++eeSpL179+rFF188q+3fZWZmas6cOec816BBA/3yyy/q1q2b9dh3330ns7n8v8VOmjRJW7Zs0cSJE9W8eXPFx8frjTfe0GOPPaaVK1fKZDJZ29500026+eabrT+X/T1WJYvFogcffFA7duzQww8/rPbt22vjxo2aM2eOjh49qpdeesna9u99tnXr1hW+9kJefvllffvtt3rzzTfVt2/fKn+PAOo2ghcAVKGBAwfKxcVFP/zwg+69915J0o8//igfH59yv+hKpb9IzpgxQ/369dOMGTOsx1u2bKl7771Xq1evtgaBstGt5s2bq0uXLpKkkpISzZgxQ3379tXMmTOt14eHh2vo0KF67733NGnSJElSbm6uWrZsab227H4V4ebmZr1Okv74449y52fOnKkWLVpo4cKFsrOzkyT17dtX//jHPzR37ly98cYb5drHx8drzZo1+vrrr9WmTRtJsoZUNzc3SSr3eitXrpQkPf3009bAI0mOjo4XrT03N1e+vr7l7ne+6/762f693a5duypVw6V8NsHBwed9/bLj+fn552z7d3PnzlVgYKDS09PPOte/f3/9/PPPeuKJJyRJiYmJ2r59u7p3767jx49LkgoKCpSdna0pU6Zo6NChkqSePXvq9OnTmjZtmlJSUtSoUSPrPQMCAsrVU/b3WJV+//13rVu3TrNmzdKwYcMkSVdccYWcnZ31xhtv6O6777b2p7/32dWrV1f42vOZOXOmPv/8c82fP1/9+/ev8vcHoO5jqiEAVCFnZ2dFRkaWm264cuVKXXPNNeVGCCTp8OHDSkxMVGRkpIqKiqxfPXr0kJubm9auXXvB14qNjdXJkyc1fPjwcsebN2+url27atOmTdZjCQkJcnd3r4J3WF5OTo52796ta665xhosJMnDw0NXXnlluRrK2s+ePVsREREX/UW3TFhYmCTp/fffV3JysgoKClRUVFSha6vqfVemhkv9bKrKgQMH9Pnnn+vf//73Oc9HRkYqLi5Ohw8fliT98MMP6ty5s5o0aWJt4+joqPfee09Dhw5VUlKSNmzYoM8++8y6QExBQcEl11VSUqKioiJZLJaLtin7+mvbTZs2yd7eXkOGDCl3zXXXXWc9fz6Xc60kffLJJ1q4cKGGDRtWblQUAC4FI14AUMWuueYaPfzww0pMTJSTk5PWr1+vRx999Kx2GRkZkkqnZZ1ralZycvIFX6fsel9f37PO+fr6KioqSlLpyNqJEyfUtGnTS3sjFXDq1ClZLJbz1nDq1Klyxx588EF5eHiUm6p4MT169NCUKVO0cOFCzZ8//5LqO378+AWn5FVnDZf62VSVl19+WcOGDVPXrl3Ped7f318hISH6+eef1apVK3333XcaPny4tb+U+eOPP/TKK6/o8OHDatCggdq3by9XV1dJumB4Op8FCxZowYIFsrOzk6+vr/r27av/+7//K7fgTNko8V/17NlTUun0SS8vr3IhVpJ15O1Cn+flXCtJ+/btU9++ffXtt9/qnnvuUceOHS/YHgDOheAFAFWsf//+atCggX744Qe5urqqadOmCgkJOaudh4eHpNJnacp+ufwrT0/PC75Ow4YNJUkpKSlnnTt58qS8vLwkSdHR0crLyztrQYyq4O7uLpPJdN4aymosM2nSJP3www965JFH9Mknn1R4Stott9yiNWvWqKioSM8++6yaNm2qcePGXfCakpIS7dy5UyNHjqzQa/x9RPJya7jUz6YqfP/999qzZ0+5qafnctVVV+nnn3/WNddcoz179mj+/PnlgteRI0c0fvx4DRo0SO+8846aNWsmk8mkTz755KypptLFPzup9PO75ZZbVFJSohMnTmj27Nl64IEH9PXXX1vbvPDCC+WC8l+f0/L09FR6erqKi4vLBaiyf6Ao6+/ncjnXStL//d//6e6779awYcM0ZcoUffnll2eFOAC4GKYaAkAVc3R01KBBg7Rq1Sp9//331mdK/q5Vq1by8fHRsWPHFBoaav3y9/fXzJkzzxqB+LugoCA1atRI3377bbnjR48e1Y4dOxQeHi5J+u2339ShQwd5e3tf8nspKSm54C+Yrq6uCgkJ0ffff6/i4mLr8VOnTum3334767m2kJAQzZ8/X8ePH9frr79e4TreeOMN/fbbb5o2bZquueYahYaGXvT5qm3btiknJ0cREREXbFc2evP3xSUut4ZL/WwuV0FBgaZPn67x48eXe/7qXAYNGqSdO3fq448/Vrdu3eTn51fu/J49e5Sfn68xY8aoefPm1mBVFrrKPrOyFQEv9tlJpYvBhIaGqnPnzrrmmmt05513av/+/crMzLS2CQoKKve/hb8+T9ezZ08VFRWdtWpoWXC70Od5OddKpSOUzs7OevbZZ7V371598MEHF32/APB3jHgBQDUYOnSoxo4dK7PZXG5Fvb+ys7PThAkT9Oyzz8rOzk5XXnmlsrKytGDBAiUlJV10ipzZbNbEiRM1efJkPfbYY7ruuuuUnp6u+fPny9PTU/fdd5/27t2rTz75RMOGDdOOHTus1548eVJS6chGWlraWaEsLS1NMTExio+Ptwa483nsscd0//33a8yYMbrjjjtUWFiohQsXqqCgQOPHjz+
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"inertias = []\n",
|
|||
|
"clusters_range = range(1, 11)\n",
|
|||
|
"for i in clusters_range:\n",
|
|||
|
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
|
|||
|
" kmeans.fit(data_scaled)\n",
|
|||
|
" inertias.append(kmeans.inertia_)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.plot(clusters_range, inertias, marker='o')\n",
|
|||
|
"plt.title('Метод локтя для оптимального k')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Инерция')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Расчет коэффициентов силуэта"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 43,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1oAAAImCAYAAABKNfuQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACg6ElEQVR4nOzdeVhUZf8G8HtmYBj2fRXEBdkXlUVx37LFN3dtU9vUMsu3TYs3f7ZnuWRm4ZaWS2a5ZmpFWllqIYICyi6isg77vs/8/kAmCVQGZzwD3J/r4krOnBm+5xGDe87zfB+RUqlUgoiIiIiIiDRGLHQBREREREREXQ2DFhERERERkYYxaBEREREREWkYgxYREREREZGGMWgRERERERFpGIMWERERERGRhjFoERERERERaRiDFhERERERkYYxaBEREREREWkYgxYRdRuzZ8/G7NmzWxw7e/YsJk6cCG9vb+zZs0erX//111/HmDFj1H7emDFj8Prrr2uhIiLSFg8PD6xbt07oMohIQHpCF0BEJJTCwkI8++yz8PHxwZYtW+Dh4SF0SURERNRFMGgRUbf15ZdfoqamBitWrIC9vb3Q5RAREVEXwqmDRNQtFRcXY9euXXjwwQdbhayMjAwsWrQIQ4cORf/+/TF79mxER0e3OOf333/H1KlTERAQgCFDhuDNN99EeXl5i3O+/vprjB49GgEBAXjppZdQUVEBAFi/fj1CQ0MRFBSEN998E3V1darn1NXV4e2330ZwcDAGDRqkmnpUWVmJxYsXo3///hg5ciS+/vpr1XMyMzPh4eGB/fv3q47V1tZi7NixLe7StTV1MjIyEh4eHoiMjGzzc6Dpzl9QUFCraY979uzBhAkT4Ovri1GjRmHdunVobGxUPd7WVMkba23+Wm19NNd5u2mTbV3Tv8nlcrz22msIDQ3FgAEDMGvWLJw7d071+L+neCmVSjz88MPw8PBAZmZmi/NuVeuiRYswYsQIKBSKFl//jTfewL333gsAyM3Nxcsvv4zBgwcjICAAs2fPxvnz5wEA69atu+nXaK4vKSkJzz//PAYPHgwfHx8MHz4c7733Hmpqam45BqdOnbpl7e29RgA4fvw4pkyZgoCAgFu+1o32798PDw8PxMbGYsqUKfD398eDDz6In376qcV5mZmZWLJkCYYNGwYfHx+EhoZiyZIlKC4uVp2TmJiIxx57DAMGDMC4ceOwe/du1WNtff8Crb9Pbjet78bvu+3bt7f69/X333/D09MTn3/++U1f498+/fRTeHl54cCBA+1+DhF1bryjRUTdilKpRE5ODt577z00NDTgmWeeafF4WloaZs6ciV69emHp0qXQ19fH9u3b8fjjj2Pr1q0ICQlBVFQUFixYgIkTJ+KVV15BamoqPvnkE6SkpGDnzp2QSCQ4duwY3nnnHcyePRsjRozAt99+i2PHjgEAjh49ivfeew9ZWVlYtWoVZDIZwsLCAAArV67Evn37sGTJEjg4OGDNmjXIyspCVlYW7rvvPnz66af4448/8M4778DBwQFjx45t8zq/+OKLFiHhTqxevRrl5eUwMzNTHdu4cSPWrFmDWbNmISwsDImJiVi3bh1ycnLwwQcftOt1fXx88O233wJoCm179+5VfW5iYqKR2isrK/HII4+gsbERixcvhr29PbZu3YqnnnoKBw4cQK9evVo95/vvv28RxG40ffp0zJgxQ/X522+/3eKxn3/+GZGRkQgNDQUA1NTU4KeffsK8efNQV1eHuXPnor6+Hm+++Sb09fURHh6O2bNn47vvvsOMGTMwfPjwFq/75ptvAgAcHBwgl8vx2GOPoX///vjwww8hlUrxxx9/4Msvv4SdnR3mz59/03GoqamBg4MD1q5d22bt7b3Gq1ev4r///S+GDx+Ol156SfU9cbPX+rdnnnkGs2bNwksvvYS9e/fixRdfxMaNGzFy5EhUV1djzpw5sLS0xJtvvglTU1OcO3cOn332GWQyGd555x1UV1dj3rx56NGjB9atW4eYmBi8+eabcHJywogRI9pVg7pmz56NiIgIfPTRRxg1ahSkUin+97//oX///nj22Wfb9RpbtmxBeHg43nvvPUyZMkUrdRKR7mHQIqJuJSoqCqNGjYK+vj42b97c6hftzz77DFKpFNu3b1f9sj9q1Cj85z//wYoVK7B3714cPHgQvXr1wvLlyyEWizF06FAYGhpi2bJlOHHiBMaMGYMNGzZg0KBBWLp0KQBg0KBBGDp0KMrLy7F8+XL4+voCAMrKyrB582Y899xzUCgU+PbbbzF//nzMmjULAGBjY4OHHnoIFhYWWLVqFfT19TFixAikpKRg48aNbQatnJwcbN68GT4+Prh48eIdjVd8fDy+//57eHl5oaysDABQXl6O8PBwPPTQQ6rrGzZsGCwsLLB06VI8+eST6Nev321f28TEBP379wcA/PnnnwCg+lxTDhw4gKysLBw4cABeXl4AgIEDB2Ly5MmIiopq9fdfWVmJVatW3XTsHBwcWtR4YyAcNmwYHBwccPDgQVXQ+uWXX1BVVYXJkyfj/PnzSE9Px9dff40BAwaoarnnnnsQHh6OdevWwcHBocXr3vi1Tp48CS8vL6xdu1b1+JAhQ3Dq1ClERkbeMmhVV1fDzMzsprW39xoTEhJQX1+Pl156Ce7u7rd9rX+bPXs2Fi5cCAAYPnw4pkyZgs8//xwjR45ERkYGHBwc8NFHH8HFxQUAMHjwYMTGxuLMmTMAgKysLPj5+eF///sfXFxcMGzYMOzatQt//vmn1oKWSCTC8uXLMXHiRKxcuRISiQQlJSXYtm0bJBLJbZ//zTffYOXKlXjnnXcwffp0rdRIRLqJUweJqFvx9vbGhx9+CHNzc4SFhbW663PmzBmMHj26xS+Oenp6mDBhAi5cuIDKykq8//77OHjwIMRiMRoaGtDQ0IB7770XYrEYUVFRaGhoQEJCAoYNG6Z6DQMDAwQEBMDQ0FAVsoCmX85ramqQnJyM5ORk1NbWqu5qAE2/aBsYGMDf3x/6+votnnfx4sUWU/WaffTRRwgKCsLo0aPvaKyUSiXee+89TJ8+HZ6enqrj586dQ01NDcaMGaO6/oaGBtU0wVOnTrV4nRvP+fe0uvbW0dHnRkdHw9nZWRWyAMDQ0BA///xzi7s2zcLDw2FpaYlHHnlE7a8lFosxZcoUREREoLq6GkBT0BsyZAgcHBwQEhKC8+fPo3///mhsbERDQwPMzMwwdOhQREVF3fb1hw0bhp07d8LAwABpaWk4fvw41q9fj6KiohbTT9uSk5MDU1NTta/p33x8fKCnp4edO3ciKysLdXV1aGhogFKpbNfzb7ybIxKJcM899yAuLg41NTXw8vLCrl270KNHD2RkZODEiRPYsmUL0tPTVdfn5uaG9evXw8XFBXV1dfjjjz9QWlqKvn37tvg6CoWixfddW/U1n9Oe2l1cXPDqq6/iwIED2LNnD5YuXaoKg7fy22+/4e2330ZQUBBmzpx52/OJqGvhHS0i6lZMTEwwZcoU9OnTB4888ghefPFFfPvtt6p3pktLS2FjY9PqeTY2NlAqlaioqICxsTEMDAwANP3ieaOysjIUFhaisbERlpaWLR6zsLCAubl5i2PNU68KCgpUoenfzzM3N4eFhUWr5zU0NLRYuwI0BcVjx47h0KFDOHLkSHuG5KYOHjyIjIwMbNiwAR999JHqeElJCQDc9A6KXC5X/TkrK6vVGHWkjoMHD0IkEsHa2hqBgYH473//2+qX67aUlJTA2tq6XV8nIyMD27ZtwxdffIHs7OwO1Tpt2jRs2LABERERGDx4MP766y+sWrVK9bhUKgXQtG7rxrU67bkzolAo8PHHH+Prr79GVVUVHB0d4e/vr/pevJWsrCz06NGjA1fUkouLC1auXImPP/5YNc2zWUhIyG2fb2dn1+Jza2trKJVKlJWVQSaT4csvv8SGDRtQUlICGxsb+Pr6wtDQsNX6x7KyMgQHBwMAbG1tcf/997d4/Iknnmj1tf9dX3h4OMLDwyGRSGBjY4Nhw4bhv//9700b4zzwwAP48MMPAQB
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"silhouette_scores = []\n",
|
|||
|
"for i in clusters_range[1:]: \n",
|
|||
|
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
|
|||
|
" labels = kmeans.fit_predict(data_scaled)\n",
|
|||
|
" score = silhouette_score(data_scaled, labels)\n",
|
|||
|
" silhouette_scores.append(score)\n",
|
|||
|
"\n",
|
|||
|
"# Построение диаграммы значений силуэта\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.plot(clusters_range[1:], silhouette_scores, marker='o')\n",
|
|||
|
"plt.title('Коэффициенты силуэта для разных k')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Коэффициент силуэта')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 44,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Средний коэффициент силуэта: 0.234\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA04AAAJzCAYAAAA4M0NGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gVVfrA8e+ZuTU9JCSUAKFJ70WwASpWrGtZXdvay+ra1l133Wr57brq7qpYd11XEXuvKIoNUUAQEQGl904g7baZ8/tjkptccm8Kuam8n+fh0cy5d+bcySSZd84576u01hohhBBCCCGEEAkZLd0BIYQQQgghhGjtJHASQgghhBBCiDpI4CSEEEIIIYQQdZDASQghhBBCCCHqIIGTEEIIIYQQQtRBAichhBBCCCGEqIMETkIIIYQQQghRBwmchBBCCCGEEKIOEjgJIYQQQgghRB0kcBJC7Lfzzz+ffv36xfwbPXo0F1xwAXPnzm3p7gkh2rl+/frxwAMP1Nj+ww8/MH78eCZMmMCaNWsSvv+BBx6gX79+DBkyhJKSkrivefbZZ+nXrx9HHnlksrothGijJHASQjTKwIEDef7553n++eeZPn06f/3rX3G73VxyySX8+OOPLd09IcQB5scff+Siiy7C7/czbdo0CgsL63xPJBLho48+itv2zjvvJLmHQoi2SgInIUSjpKWlMXz4cIYPH86oUaM4+uijeeCBBzAMg1deeaWluyeEOICsXLmSCy+8kNTUVKZNm0a3bt3q9b6RI0fy7rvv1ti+detW5s+fz4ABA5LdVSFEGySBkxAi6fx+P16vF6VUdNv555/P+eefH/O6e++9l379+sUEWNOmTeOoo45ixIgRnHfeefzwww8APPPMM/Tr14/Vq1fH7OP1119nwIABbN68GYCZM2dy7rnnMmLECAYPHsxxxx3HM888E/Oe3/zmNzWmGFb+27BhQ/Q1+07Nee6552pMDXrnnXc44YQTGD58OKeffjrz58+PeU9d/fnqq6/o168fX331Vcz79j1f9Tl/oVCIv/3tb0yYMIEBAwbEfK7agth9933nnXcyZMgQPv30U6BqOlO8f9X7XZ9zv23bNn79618zfvz46Pd44cKFABx55JF1fl/mz5/Peeedx7Bhwxg7diy//vWv2bVrV3T/r7zyCv369WPRokWcdtppDB06lJNOOon33nsvph/FxcX83//9H0cffTRDhgxhypQpvPTSSzGvqd6f/v37M2bMGK699lp2796d8FwCrFq1il/84heMHTuWMWPGcMUVV7By5cqEr6/t/Fb/vq1Zs4brrruOQw89lOHDh3P++efz9ddfR9s3bNgQfd8bb7wRc4xZs2ZF26p75513OP300xkxYgSHHnoof/jDH9izZ0+NvlUX71o88sgj+c1vfpPw631V9rX651uwYAFnn302Q4YM4dBDD+X2228nEAgk3Me+Vq5cyQUXXEB6ejrTpk2jS5cu9X7vCSecwOeff15jut57771Hz5496d+/f433zJw5k9NPPz3a3zvuuIOysrIar6nPz/+cOXO4+OKLGTZsGIceeih///vfsSwr+rrZs2dz1llnMWLECMaMGcNVV11V6zUlhGgaEjgJIRpFa00kEiESiRAOh9m+fTv33nsvoVCIn/zkJwnft27dOp588smYbe+//z633347J554IlOnTsWyLK688kpCoRAnnXQSXq+X119/PeY9r732GuPHj6dz5858/PHHXHPNNQwaNIiHHnqIBx54gG7duvGXv/yFRYsWxbyvY8eO0SmGzz//PFdddVWtn3PPnj3885//jNn27bffcvPNNzN8+HAefvhhOnfuzJVXXsmOHTsAGtSfhop3/h5//HH+97//ceGFF/K///2P559/ngcffLBB+/3222959tln+ec//8mIESNi2qqfrz/84Q8xbfX5rKWlpZxzzjl89dVX/OpXv+LBBx/E6/Vy8cUXs2bNGh588MGYPl911VXR4+Xl5TFv3jwuuugifD4f//znP/ntb3/L3LlzueCCC2rcYF9xxRUcddRRPPjgg/Ts2ZPrr7+eTz75BIBAIMC5557Lm2++yaWXXspDDz3EqFGj+N3vfscjjzwSs58JEybw/PPP8/TTT3PTTTcxe/Zs7rzzzoTnb+vWrZx99tmsWbOGP/3pT/z9739nx44dXHjhhRQVFdV67quf332/bytWrOD0009nw4YN3Hbbbdxzzz0opbjwwgtrrCdMTU2tMe3snXfewTBi/+Q/9NBD3HjjjQwfPpz777+fa665hhkzZnD++ec3KGBJhs2bN3PJJZeQnZ3Ngw8+yHXXXcfrr7/OLbfcUq/3r1q1igsvvJC0tDSmTZtGfn5+g45/7LHHYllW3PN24okn1nj9m2++yTXXXEOvXr2YOnUqv/jFL3jjjTe4+uqr0VoDDfv5v/nmmxk1ahSPPPIIU6ZM4d///jcvvvgiAOvXr+fqq69m8ODBPPzww9x5552sXr2ayy+/HNu2G/Q5hRCN42rpDggh2rZ58+YxaNCgGttvvPFGevfunfB9d911F3379mXJkiXRbbt27eLcc8/lxhtvBJwRlMqn9QMGDGDy5Mm88cYb/PKXv0QpxZYtW/jyyy/5+9//Djg3l6eddhq/+93vovscMWIEBx98MF999RXDhg2Lbvd4PAwfPjz69apVq2r9nPfffz9dunSJGW3YsmULxx57LHfccQeGYZCbm8uUKVP45ptvOProoxvUn4aKd/6+/fZb+vfvz8UXXxzdVjlSU1+VI35HHXVUjbbq5ysYDMa01eezvvrqq2zcuJFXX301OvVp5MiRnHrqqcybN48zzzwzps/du3ePOea9995Lz549efTRRzFNE4Bhw4Zx4okn8vLLL/Ozn/0s+trzzz+fa665BoDDDz+c0047jalTpzJhwgReeeUVfvjhB5577rlocHj44YcTiUR46KGH+OlPf0pWVhYAHTp0iPZhzJgxfPHFFzHnfF9PPvkkoVCI//73v3Ts2BGA/v37c84557Bo0SImTJiQ8L3VP+u+37cHH3wQj8fDU089RVpaGgATJ05kypQp3H333TGjZUcccQSfffYZoVAIj8dDMBjkww8/ZMyYMdERwj179vDwww9z1llnxQTBBx10ED/72c9qnM+m9vjjj5Odnc3UqVOj31vDMLjttttYvnx5jVGv6tasWcMFF1zAjh07CIfD+xVM5ObmMmbMGN59911OPvlkADZu3MiiRYu4++67efjhh6Ov1Vpzzz33cPjhh3PPPfdEtxcWFnLRRRfxySefMHHixAb9/J955pnR63X8+PHMnDmTjz/+mJ/+9Kd8++23BAIBrrjiimhA2KlTJz788EPKysqi14MQoulJ4CSEaJRBgwbx5z//GXBuKPbu3cunn37KP/7xD8rKyrjhhhtqvOfTTz/liy++4PHHH+eCCy6Ibv/pT38KgG3blJWV8f777+Pz+ejatSsAZ5xxBm+99Rbz589nzJgxvPbaa6SmpjJ58mQALr30UsAZ2Vi9ejXr1q1j8eLFgBOE7a8ffvghOupQ2UeAY445hmOOOQatNWVlZbz77rsYhkHPnj2btD+Jzt+QIUN47LHHmDFjBuPGjSM1NbXeN5FaaxYuXMg777xTYySrPurzWb/++msKCgpi1ov4/X5mzJhR5/7Ly8tZtGgRl1xySXSUE6Bbt2707t2b2bNnx9zon3baadH/V0oxefJkHnjgAQKBAHPnzqVr1641RtROPvlkXnrppZgAp/JYtm2zbNkyvv76aw455JCE/fz6668ZPnx4NGgC5yZ31qxZdX7G2sydO5dJkybF3CS7XK7o6GxpaWl0+7hx4/j000/56quvOPzww/n0009JS0tj9OjR0cDpm2++IRQKMWXKlJjjjB49mq5duzJ37txGB06V584wjBqjXZVs2yYSiTB//nwOO+ywaNAETgAIzjmtLXB66623GDx4MP/4xz+4+OKL+dWvfsWTTz4Zc0zLsqIjQeBcE9WPBc50vTvuuIOSkhLS0tJ4++23GTRoED169Ih53apVq9iyZQtXXHF
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
"from sklearn.cluster import KMeans\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Применение K-Means\n",
|
|||
|
"# ========================\n",
|
|||
|
"kmeans = KMeans(n_clusters=3, random_state=42) \n",
|
|||
|
"df_clusters = kmeans.fit_predict(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Оценка качества кластеризации\n",
|
|||
|
"# ========================\n",
|
|||
|
"silhouette_avg = silhouette_score(df_scaled, df_clusters)\n",
|
|||
|
"print(f'Средний коэффициент силуэта: {silhouette_avg:.3f}')\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"# ========================\n",
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"df_pca = pca.fit_transform(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"sns.scatterplot(x=df_pca[:, 0], y=df_pca[:, 1], hue=df_clusters, palette='viridis', alpha=0.7)\n",
|
|||
|
"plt.title('Визуализация кластеров с помощью K-Means')\n",
|
|||
|
"plt.xlabel('Первая компонентa PCA')\n",
|
|||
|
"plt.ylabel('Вторая компонентa PCA')\n",
|
|||
|
"plt.legend(title='Кластер', loc='upper right')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Средний коэффициент силуэта, равный 0.234, указывает на умеренно хорошую кластеризацию. \n",
|
|||
|
"\n",
|
|||
|
"Средний коэффициент силуэта (silhouette score) указывает на качество кластеризации, измеряя, насколько хорошо точки внутри одного кластера близки друг к другу по сравнению с точками из других кластеров. Значения коэффициента силуэта находятся в диапазоне от -1 до 1:\n",
|
|||
|
"\n",
|
|||
|
"1: Указывает на идеально плотные и четко разделенные кластеры. \n",
|
|||
|
"0: Указывает на перекрытие кластеров или слабую структуру кластеризации. \n",
|
|||
|
"Отрицательные значения: Указывают, что точки в кластере расположены ближе к другому кластеру, чем к своему."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 45,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Средний коэффициент силуэта (агломеративная кластеризация): 0.199\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA04AAAJzCAYAAAA4M0NGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzddXhcVfrA8e94Ju7uSRtpk7q7oC1S3LXI4raw7LIL+0MW2AK7QHFZKBSHUrS0FFpa6u6SNO5uk/HfHyHTTDNJkzbe9/M8fSD3zL333Dt3Zu57zznvUdjtdjtCCCGEEEIIIdqk7O0KCCGEEEIIIURfJ4GTEEIIIYQQQhyHBE5CCCGEEEIIcRwSOAkhhBBCCCHEcUjgJIQQQgghhBDHIYGTEEIIIYQQQhyHBE5CCCGEEEIIcRwSOAkhhBBCCCHEcUjgJIQQQgghhBDHoe7tCgjRHa6++mo2btzotMzLy4vU1FTuuOMOxo4d20s1E0II0V/961//or6+nvvvv58DBw5w1113sW7dOlQqVW9XTQjRAyRwEgNWamoqjz76KABWq5XKyko++ugjbrzxRr788ksGDRrUyzUUQgjRn1x33XVcddVVjB8/Ho1Gw9///ncJmoQ4hSjsdru9tyshRFe7+uqrAVi0aJHT8oaGBiZMmMAVV1zBQw891BtVE0II0Y+ZzWZycnIICAjA19e3t6sjhOhBMsZJnFL0ej06nQ6FQuFYdvXVVzsCrWbPPfccSUlJfPnll45lH3zwAbNmzWLEiBFcddVVHDx4EIAPP/yQpKQkjhw54rSNr7/+mpSUFAoLCwFYsWIFV1xxBSNGjGDo0KGceeaZfPjhh07r/OUvfyEpKcnlv7y8PMdrZs6c6bTexx9/TFJSEi+99JJj2ffff8/ZZ5/N8OHDueCCC9i8ebPTOserz4YNG0hKSmLDhg1O6x17vjpy/kwmE8888wzTpk0jJSXF6bhanuNjHbvtJ598krS0NFavXg3ASy+91Ob5alnvjpz7kpISHnroISZMmOB4j7dt2wbAzJkzj/u+bN68mauuuophw4YxduxYHnroISoqKhzb//LLL0lKSmLHjh3MmzeP9PR0zjnnHH788UenetTW1vKvf/2L2bNnk5aWxty5c/n888+dXtOyPsnJyYwZM4Y777yTysrKNs8lQGZmpqOr6pgxY7jlllvIyMho8/Xtnd+W71tWVhZ33XUXkyZNYvjw4Vx99dVs2bLFUZ6Xl+dYb+nSpU77+OWXXxxlLX3//fdccMEFjBgxgkmTJvGPf/yD6urqVnVrydW1OHPmTP7yl7+0+fexmuva8vi2bt3KpZdeSlpaGpMmTeLxxx+nsbGxzW00b+fBBx9k8uTJDBkyhAkTJvDggw86vUeurqu8vLwOX9clJSU8/PDDTJs2jfT0dC666CJ+/vlnp3o0r/fqq686LT948GCraxi67jpu7/hbXg/H/mv+buvI90pzXZr/DR06lDPOOMPpGnN1nTSfl5bflx09ly+99BIajYaEhAR8fHy47LLLWp3D9vZVX1/P1VdfTWpqKkaj0XGsbZ2PZlarlTfeeIO5c+eSnp7O8OHDueyyy1i/fr3TvrZv384NN9zAyJEjGT9+PPfddx/FxcUdOucAn332GXPmzGHo0KFMnz6dl156CavV6ij/y1/+wtVXX83nn3/OjBkzGDFiBNdeey379+93vKb5fWl5Tg4dOsSQIUOc3tN9+/Zx5ZVXMmLECGbPns3HH3/sdCz79+/njjvuYPz48QwZMoQpU6bwxBNPOH32jn0fofV77uoa+O2330hKSnJ8F7j63BuNRmbNmuXy+hGnLgmcxIBlt9uxWCxYLBbMZjOlpaU899xzmEwmLrzwwjbXy8nJ4X//+5/Tsp9++onHH3+cOXPmsHDhQqxWK7feeismk4lzzjkHnU7H119/7bTOkiVLmDBhAmFhYfz666/cfvvtDBkyhFdeeYWXXnqJqKgo/u///o8dO3Y4rRcUFMQnn3zi+PenP/2p3eOsrq7mP//5j9OynTt38sADDzB8+HBeffVVwsLCuPXWWykrKwPoVH06y9X5e/PNN3nvvfe49tpree+99/jkk094+eWXO7XdnTt38tFHH/Gf//yHESNGOJW1PF//+Mc/nMo6cqz19fVcfvnlbNiwgT//+c+8/PLL6HQ6brjhBrKysnj55Zed6vynP/3Jsb/g4GA2bdrEddddh5ubG//5z3/461//ysaNG7nmmmta3WDfcsstzJo1i5dffpm4uDjuueceVq1aBUBjYyNXXHEF33zzDfPnz+eVV15h1KhR/O1vf+O1115z2s60adP45JNPWLRoEffffz9r167lySefbPP8FRcXc+mll5KVlcVjjz3Gv//9b8rKyrj22mupqqpq99y3PL/Hvm+HDx/mggsuIC8vj0ceeYQFCxagUCi49tprW40z9PDwYOXKlU7Lvv/+e5RK55+iV155hfvuu4/hw4fz4osvcvvtt7Ns2TKuvvrq4wYsXa2wsJAbb7wRPz8/Xn75Ze666y6+/vprHnzwwTbXMRgMXHPNNWRkZPDoo4/y9ttvc8011/Ddd9/xwgsvOL22+X1seT01a++6Lisr46KLLmLz5s3ce++9vPTSS0RERHD77be3Ck47et676jo+3vEHBwe3+n479jN2LFffK82a1124cCHx8fE89NBDrR5ktacz57Klr7/+2vFwpaMWL15MWVkZ7733Hlqt1rE8NTXV6f2+6KKLnNZbsGABr7zyCpdeeilvvfUWjz/+OFVVVdx9990YDAYA9u7dy1VXXYXRaOTZZ5/ln//8J7t37+bGG2/s0Dl//fXX+fvf/86ECRN47bXXuPLKK3nzzTf5+9//7lSXffv28cILL3DHHXfw73//m8rKSq666ipKSkraPO4nn3wSi8Xi+NtgMHDTTTdhsVh46aWXOPfcc3n00UcdD8VKSkq48sorMRgMPP3007z55pvMmTOHRYsW8f7773fqnB/LbDbz1FNPHfd1b731VrsBsTg1yRgnMWBt2rSJIUOGtFp+3333kZCQ0OZ6Tz31FIMGDWLPnj2OZRUVFVxxxRXcd999QFMLSvPT+pSUFE477TSWLl3K3XffjUKhoKioiPXr1/Pvf/8baLq5nDdvHn/7298c2xwxYgTjxo1jw4YNDBs2zLFcq9UyfPhwx9+ZmZntHueLL75IeHi405PsoqIizjjjDJ544gmUSiWBgYHMnTuX7du3M3v27E7Vp7Ncnb+dO3eSnJzMDTfc4FjW2R+k5ha/WbNmtSpreb6an+I268ixfvXVV+Tn5/PVV1+RkpICwMiRIzn//PPZtGkTF198sVOdo6Ojnfb53HPPERcXx+uvv+4Y7zBs2DDmzJnDF198wZVXXul47dVXX83tt98OwJQpU5g3bx4LFy5k2rRpfPnllxw8eJCPP/7YERxOmTIFi8XCK6+8wmWXXeboGuTv7++ow5gxY/j999+dzvmx/ve//2EymXj33XcJCgoCIDk5mcsvv5wdO3Ywbdq0NtdteazHvm8vv/wyWq2W999/H09PTwCmT5/O3LlzefbZZ51ay6ZOncpvv/2GyWRCq9ViNBr5+eefGTNmjKMlpbq6mldffZVLLrnEKVgYPHgwV155Zavz2d3efPNN/Pz8WLhwoeO9VSqVPPLIIxw4cMDl0+isrCxCQ0N55plniIqKAmD8+PHs2LGjVTDZ8n08VnvX9bvvvktFRQXLli0jIiICaArCrrvuOp599lnmzp3rCIymTp3Kjz/+SElJiSMw++GHH5zOO3TddXy842/5Hdf8/ZaSkkJkZKTL8wCuv1eatVw3LCyMlStXsm/fPuLi4trc3omey2b19fUsWLCAIUOGtPu5a8lqtTrG2Y4ZM8apzNPT0+n9/u2335zKS0pKuPfee51abHQ6HXfeeScHDhxg+PDhvPbaa/j6+vLOO++g0+kACA4O5v777ycjI6Pdc15bW+sIzB555BEAJk+ejK+vL4888gjXX3+9Y1xwbW0tr732GqNHjwYgPT2d2bNn8/777/PAAw+0Ou5ly5axY8cOp/c
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.cluster import AgglomerativeClustering\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Агломеративная кластеризация\n",
|
|||
|
"# ========================\n",
|
|||
|
"agg_cluster = AgglomerativeClustering(n_clusters=3) \n",
|
|||
|
"labels_agg = agg_cluster.fit_predict(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Оценка качества кластеризации\n",
|
|||
|
"# ========================\n",
|
|||
|
"silhouette_avg_agg = silhouette_score(df_scaled, labels_agg)\n",
|
|||
|
"print(f'Средний коэффициент силуэта (агломеративная кластеризация): {silhouette_avg_agg:.3f}')\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"# ========================\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"df_pca = pca.fit_transform(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"sns.scatterplot(x=df_pca[:, 0], y=df_pca[:, 1], hue=labels_agg, palette='viridis', alpha=0.7)\n",
|
|||
|
"plt.title('Визуализация кластеров с помощью агломеративной кластеризации')\n",
|
|||
|
"plt.xlabel('Первая компонентa PCA')\n",
|
|||
|
"plt.ylabel('Вторая компонентa PCA')\n",
|
|||
|
"plt.legend(title='Кластер', loc='upper right')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Значение коэффициента силуэта лежит в диапазоне от -1 до 1. Ближе к 1: Хорошо сформированные, плотные кластеры, четко отделенные друг от друга. \n",
|
|||
|
"\n",
|
|||
|
"Ближе к 0: Кластеры пересекаются или слабо разделены, не имеют четких границ. Точки расположены одинаково близко как к своему кластеру, так и к соседним. \n",
|
|||
|
"Ближе к -1 (Отрицательные значения): Некоторые точки скорее относятся к другим кластерам, чем к текущему (ближе к центрам других кластеров). Очень плохая кластеризация. \n",
|
|||
|
"Ближе к 1: Все точки внутри каждого кластера плотно сгруппированы и значительно удалены от точек других кластеров. Свидетельствует о четкой и хорошо разделенной структуре данных. Единица говорит об идеальной кластеризации.\n",
|
|||
|
"\n",
|
|||
|
"Значение 0.199 указывает на то, что кластеры с нечеткой границей и неоптимальный выбор числа кластеров или особенности данных, затрудняющие их разделение."
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "aisenv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|