1130 lines
992 KiB
Plaintext
1130 lines
992 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#Анализ популяции с применением метода кластеризации"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from scipy.cluster.hierarchy import dendrogram, linkage, fcluster\n",
|
|||
|
"from sklearn.cluster import KMeans\n",
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv(\n",
|
|||
|
" \".//static//csv///world-population-by-country-2020.csv\", index_col=\"no\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"df[\"Population2020\"] = df[\"Population2020\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"NetChange\"] = df[\"NetChange\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Yearly Change\"] = df[\"Yearly Change\"].apply(lambda x: float(\"\".join(x.rstrip(\"%\"))))\n",
|
|||
|
"df[\"LandArea\"] = df[\"LandArea\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Density\"] = df[\"Density\"].apply(lambda x: int(\"\".join(x.split(\",\"))))"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Country (or dependency)</th>\n",
|
|||
|
" <th>Population2020</th>\n",
|
|||
|
" <th>Yearly Change</th>\n",
|
|||
|
" <th>NetChange</th>\n",
|
|||
|
" <th>Density</th>\n",
|
|||
|
" <th>LandArea</th>\n",
|
|||
|
" <th>Migrants (net)</th>\n",
|
|||
|
" <th>Fert. Rate</th>\n",
|
|||
|
" <th>Med. Age</th>\n",
|
|||
|
" <th>Urban Pop %</th>\n",
|
|||
|
" <th>World Share</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>no</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>China</td>\n",
|
|||
|
" <td>1439323776</td>\n",
|
|||
|
" <td>0.39</td>\n",
|
|||
|
" <td>5540090</td>\n",
|
|||
|
" <td>153</td>\n",
|
|||
|
" <td>9388211</td>\n",
|
|||
|
" <td>-348,399</td>\n",
|
|||
|
" <td>1.7</td>\n",
|
|||
|
" <td>38</td>\n",
|
|||
|
" <td>61%</td>\n",
|
|||
|
" <td>18.47%</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>India</td>\n",
|
|||
|
" <td>1380004385</td>\n",
|
|||
|
" <td>0.99</td>\n",
|
|||
|
" <td>13586631</td>\n",
|
|||
|
" <td>464</td>\n",
|
|||
|
" <td>2973190</td>\n",
|
|||
|
" <td>-532,687</td>\n",
|
|||
|
" <td>2.2</td>\n",
|
|||
|
" <td>28</td>\n",
|
|||
|
" <td>35%</td>\n",
|
|||
|
" <td>17.70%</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>United States</td>\n",
|
|||
|
" <td>331002651</td>\n",
|
|||
|
" <td>0.59</td>\n",
|
|||
|
" <td>1937734</td>\n",
|
|||
|
" <td>36</td>\n",
|
|||
|
" <td>9147420</td>\n",
|
|||
|
" <td>954,806</td>\n",
|
|||
|
" <td>1.8</td>\n",
|
|||
|
" <td>38</td>\n",
|
|||
|
" <td>83%</td>\n",
|
|||
|
" <td>4.25%</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>Indonesia</td>\n",
|
|||
|
" <td>273523615</td>\n",
|
|||
|
" <td>1.07</td>\n",
|
|||
|
" <td>2898047</td>\n",
|
|||
|
" <td>151</td>\n",
|
|||
|
" <td>1811570</td>\n",
|
|||
|
" <td>-98,955</td>\n",
|
|||
|
" <td>2.3</td>\n",
|
|||
|
" <td>30</td>\n",
|
|||
|
" <td>56%</td>\n",
|
|||
|
" <td>3.51%</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>5</th>\n",
|
|||
|
" <td>Pakistan</td>\n",
|
|||
|
" <td>220892340</td>\n",
|
|||
|
" <td>2.00</td>\n",
|
|||
|
" <td>4327022</td>\n",
|
|||
|
" <td>287</td>\n",
|
|||
|
" <td>770880</td>\n",
|
|||
|
" <td>-233,379</td>\n",
|
|||
|
" <td>3.6</td>\n",
|
|||
|
" <td>23</td>\n",
|
|||
|
" <td>35%</td>\n",
|
|||
|
" <td>2.83%</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Country (or dependency) Population2020 Yearly Change NetChange Density \\\n",
|
|||
|
"no \n",
|
|||
|
"1 China 1439323776 0.39 5540090 153 \n",
|
|||
|
"2 India 1380004385 0.99 13586631 464 \n",
|
|||
|
"3 United States 331002651 0.59 1937734 36 \n",
|
|||
|
"4 Indonesia 273523615 1.07 2898047 151 \n",
|
|||
|
"5 Pakistan 220892340 2.00 4327022 287 \n",
|
|||
|
"\n",
|
|||
|
" LandArea Migrants (net) Fert. Rate Med. Age Urban Pop % World Share \n",
|
|||
|
"no \n",
|
|||
|
"1 9388211 -348,399 1.7 38 61% 18.47% \n",
|
|||
|
"2 2973190 -532,687 2.2 28 35% 17.70% \n",
|
|||
|
"3 9147420 954,806 1.8 38 83% 4.25% \n",
|
|||
|
"4 1811570 -98,955 2.3 30 56% 3.51% \n",
|
|||
|
"5 770880 -233,379 3.6 23 35% 2.83% "
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.head()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Population2020</th>\n",
|
|||
|
" <th>Yearly Change</th>\n",
|
|||
|
" <th>NetChange</th>\n",
|
|||
|
" <th>Density</th>\n",
|
|||
|
" <th>LandArea</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>count</th>\n",
|
|||
|
" <td>2.350000e+02</td>\n",
|
|||
|
" <td>235.000000</td>\n",
|
|||
|
" <td>2.350000e+02</td>\n",
|
|||
|
" <td>235.000000</td>\n",
|
|||
|
" <td>2.350000e+02</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>mean</th>\n",
|
|||
|
" <td>3.317120e+07</td>\n",
|
|||
|
" <td>1.104298</td>\n",
|
|||
|
" <td>3.461058e+05</td>\n",
|
|||
|
" <td>478.408511</td>\n",
|
|||
|
" <td>5.535918e+05</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>std</th>\n",
|
|||
|
" <td>1.351370e+08</td>\n",
|
|||
|
" <td>1.075665</td>\n",
|
|||
|
" <td>1.128255e+06</td>\n",
|
|||
|
" <td>2331.282424</td>\n",
|
|||
|
" <td>1.687796e+06</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>min</th>\n",
|
|||
|
" <td>8.010000e+02</td>\n",
|
|||
|
" <td>-2.470000</td>\n",
|
|||
|
" <td>-3.838400e+05</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>0.000000e+00</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25%</th>\n",
|
|||
|
" <td>4.188015e+05</td>\n",
|
|||
|
" <td>0.320000</td>\n",
|
|||
|
" <td>4.240000e+02</td>\n",
|
|||
|
" <td>37.000000</td>\n",
|
|||
|
" <td>2.545000e+03</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>50%</th>\n",
|
|||
|
" <td>5.459642e+06</td>\n",
|
|||
|
" <td>0.970000</td>\n",
|
|||
|
" <td>3.917000e+04</td>\n",
|
|||
|
" <td>95.000000</td>\n",
|
|||
|
" <td>7.724000e+04</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>75%</th>\n",
|
|||
|
" <td>2.057705e+07</td>\n",
|
|||
|
" <td>1.850000</td>\n",
|
|||
|
" <td>2.496600e+05</td>\n",
|
|||
|
" <td>240.000000</td>\n",
|
|||
|
" <td>4.038200e+05</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>max</th>\n",
|
|||
|
" <td>1.439324e+09</td>\n",
|
|||
|
" <td>3.840000</td>\n",
|
|||
|
" <td>1.358663e+07</td>\n",
|
|||
|
" <td>26337.000000</td>\n",
|
|||
|
" <td>1.637687e+07</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Population2020 Yearly Change NetChange Density LandArea\n",
|
|||
|
"count 2.350000e+02 235.000000 2.350000e+02 235.000000 2.350000e+02\n",
|
|||
|
"mean 3.317120e+07 1.104298 3.461058e+05 478.408511 5.535918e+05\n",
|
|||
|
"std 1.351370e+08 1.075665 1.128255e+06 2331.282424 1.687796e+06\n",
|
|||
|
"min 8.010000e+02 -2.470000 -3.838400e+05 0.000000 0.000000e+00\n",
|
|||
|
"25% 4.188015e+05 0.320000 4.240000e+02 37.000000 2.545000e+03\n",
|
|||
|
"50% 5.459642e+06 0.970000 3.917000e+04 95.000000 7.724000e+04\n",
|
|||
|
"75% 2.057705e+07 1.850000 2.496600e+05 240.000000 4.038200e+05\n",
|
|||
|
"max 1.439324e+09 3.840000 1.358663e+07 26337.000000 1.637687e+07"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.describe()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Migrants (net) Процент пустых значений: %14.47\n",
|
|||
|
"Country (or dependency) 0\n",
|
|||
|
"Population2020 0\n",
|
|||
|
"Yearly Change 0\n",
|
|||
|
"NetChange 0\n",
|
|||
|
"Density 0\n",
|
|||
|
"LandArea 0\n",
|
|||
|
"Migrants (net) 34\n",
|
|||
|
"Fert. Rate 0\n",
|
|||
|
"Med. Age 0\n",
|
|||
|
"Urban Pop % 0\n",
|
|||
|
"World Share 0\n",
|
|||
|
"dtype: int64\n",
|
|||
|
"Country (or dependency) False\n",
|
|||
|
"Population2020 False\n",
|
|||
|
"Yearly Change False\n",
|
|||
|
"NetChange False\n",
|
|||
|
"Density False\n",
|
|||
|
"LandArea False\n",
|
|||
|
"Migrants (net) True\n",
|
|||
|
"Fert. Rate False\n",
|
|||
|
"Med. Age False\n",
|
|||
|
"Urban Pop % False\n",
|
|||
|
"World Share False\n",
|
|||
|
"dtype: bool\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Процент пропущенных значений признаков\n",
|
|||
|
"for i in df.columns:\n",
|
|||
|
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
|
|||
|
" if null_rate > 0:\n",
|
|||
|
" print(f'{i} Процент пустых значений: %{null_rate:.2f}')\n",
|
|||
|
"\n",
|
|||
|
"print(df.isnull().sum())\n",
|
|||
|
"\n",
|
|||
|
"print(df.isnull().any())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"Country (or dependency) object\n",
|
|||
|
"Population2020 int64\n",
|
|||
|
"Yearly Change float64\n",
|
|||
|
"NetChange int64\n",
|
|||
|
"Density int64\n",
|
|||
|
"LandArea int64\n",
|
|||
|
"Migrants (net) object\n",
|
|||
|
"Fert. Rate object\n",
|
|||
|
"Med. Age object\n",
|
|||
|
"Urban Pop % object\n",
|
|||
|
"World Share object\n",
|
|||
|
"dtype: object"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Проверка типов столбцов\n",
|
|||
|
"df.dtypes"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"\n",
|
|||
|
"**Цель:** Кластеризация популяций стран для определения схожих характеристик. \n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Очистка данных"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Population2020 Yearly Change NetChange Density LandArea\n",
|
|||
|
"no \n",
|
|||
|
"1 1439323776 0.39 5540090 153 9388211\n",
|
|||
|
"2 1380004385 0.99 13586631 464 2973190\n",
|
|||
|
"3 331002651 0.59 1937734 36 9147420\n",
|
|||
|
"4 273523615 1.07 2898047 151 1811570\n",
|
|||
|
"5 220892340 2.00 4327022 287 770880\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Удалим несущественные столбцы\n",
|
|||
|
"columns_to_drop = [\n",
|
|||
|
" \"Migrants (net)\",\n",
|
|||
|
" \"Fert. Rate\",\n",
|
|||
|
" \"Med. Age\",\n",
|
|||
|
" \"Urban Pop %\",\n",
|
|||
|
" \"Country (or dependency)\",\n",
|
|||
|
" \"World Share\",\n",
|
|||
|
"]\n",
|
|||
|
"df_cleaned = df.drop(columns=columns_to_drop)\n",
|
|||
|
"\n",
|
|||
|
"print(df_cleaned.head()) # Вывод очищенного DataFrame"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Визуализация парных взаимосвязей"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABcwAAAPYCAYAAADn2rqgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3xTdfv/8XfaJk1L0wECpRRaCrKh7CWKcjtQ3LdbigNQXIgKCC5Q3IoMEREU79siTpwMxYEDvwq4CsoQCmWVMjvSmY78/uDX3IQWSNKMtryej4cP7TknJ9e5SvHK1c+5jsFut9sFAAAAAAAAAMApLijQAQAAAAAAAAAAUBvQMAcAAAAAAAAAQDTMAQAAAAAAAACQRMMcAAAAAAAAAABJNMwBAAAAAAAAAJBEwxwAAAAAAAAAAEk0zAEAAAAAAAAAkETDHAAAAAAAAAAASTTMAdRhdrs90CGgjuHPDAAAAFC7UKMDqG1omAPwuZSUFHXs2FHr16+vdv/gwYM1ceJEt865ZcsWXX/99dXu+/nnn3X33XfrzDPPVHJysi644AI999xzOnTokNNx7dq108svv+zW+55KUlJS1K5dO6d/OnfurLPPPluPP/64cnNzAxLX7t271a5dO3300UduvW7OnDl64403HF+//PLLateunbfDkyQdOHBAjzzyiM455xx1795dV155pZYtW1bluCVLlmjo0KHq2rWrLrzwQn388cdVjlm+fLn+/e9/q3v37ho0aJAmTZqkgwcPOh1z8OBBPfDAA+rbt6969uyp+++/X/v37/fJtQEAgFNTSkqKUlJSAhrDxIkTNXjw4Gr3TZ8+Xe3atdPUqVP9HJV/VdawR//TsWNH9e3bV3fddZe2bNkSsNg8+Vz3zTff6MEHH3R8vXr1arVr106rV6/2dniy2WyaO3euhgwZom7duumCCy7Q7NmzZbPZnI5bv369UlJS1L17dw0cOFAvvfRSlWP+/vtvjRo1Sv369VPfvn1166236u+//3Y6pqysTDNmzNCgQYOUnJysG264QWlpaV6/LgDeFxLoAACcGsrLyzVp0iR99NFHMplMNT7fF198oT/++KPK9hdffFGvv/66hgwZoocffljR0dHavHmz5s+frxUrVmjhwoVq1qxZjd//VNGxY0dNnjzZ8XVpaan+/vtvvfTSS9q4caPeeecdGQyGAEboupkzZ+ruu+92fH311VfrzDPP9Pr72Gw2jRw5UlarVWPGjFGTJk305Zdf6r777pPNZtPll18uSfryyy81btw4DR8+XGeeeaa+/vprTZw4USaTSUOHDpUkLV26VPfff7+uvfZa3XfffTp48KBmzpypm266SR999JFCQ0NVVlamUaNGKT8/X1OmTFFZWZmmTZumESNG6KOPPpLRaPT6NQIAANQmFRUV+uSTT9S2bVt9+umnGjdunMLCwgIdlk+99957jv8uLy9XZmampk+frhtvvFFLly5V48aNAxid6/7zn/84fd2pUye99957atOmjdff68knn9Rnn32mO++8U126dNH69ev1yiuvKDMzU08//bQkadeuXbrlllvUrVs3zZgxQ+np6Zo+fbpycnL0xBNPSJJ27NihYcOGqXPnznrqqadkMBi0YMEC3XDDDfr444+VlJQkSXr22Wf14Ycf6oEHHlDz5s315ptv6uabb9Ynn3yihIQEr18fAO+hYQ7ALywWi7Zs2aJXXnlF9913n0/eY+nSpZo/f74mTZqkm2++2bG9X79+GjRokK644go99dRTmj17tk/evz6KiIhQt27dnLb17t1bBQUFmjVrltLS0qrsrytiY2MVGxvr9fN+99132rRpkz744AN17dpVknTGGWcoMzNTr7/+uqNh/tJLL2nIkCF66KGHJElnnnmmcnNzNXPmTEfDfO7cuRo0aJCjOJekVq1a6ZprrtHKlSs1ZMgQffHFF9qwYYOWLl3q+GDRoUMHXXzxxVq+fLkuvfRSr18jAABAbbJq1SplZWXppZde0rBhw7RkyRJdffXVgQ7Lp46twXv27KlmzZrpxhtv1Mcff6zbbrstMIHVUHWfP7whOztb77//vsaNG6eRI0dKkvr37y9JmjZtmsaNG6eGDRtq/vz5atCggebMmSOTyaRBgwbJbDZr6tSpGj16tOLi4pSamqqwsDC99tprCg8Pl3TkM+fgwYO1cOFCPfbYY9q7d6/eeecdPfzww7rhhhskSQMHDtQFF1yg+fPn68knn/T6NQLwHkayAPCLDh066PLLL9frr7+uv/7666THf/DBBxo6dKhjBMjLL7+s8vJySUduQ6xseh89VmXevHlq06aNbrrppirnS0xM1Pjx49W9e3enGXn5+fl6+OGH1adPH3Xv3l1jxoxxGndRXl6uefPm6eKLL1bXrl3VrVs3XXfddfrll18cx7z88ss677zz9N133+mSSy5R586ddcEFF+iTTz5xiiE9PV2jRo1Sjx49NGDAAE2fPl2TJk1yurW1oqJC8+bN03nnnec4T2pq6glzdcEFF2jMmDFVtl922WW64447JEk7d+7U6NGj1bdvXyUnJ+vaa6/V999/f8Lznkjnzp0lSZmZmY5ty5Yt05VXXqnu3bvrjDPO0GOPPeY0tuXll1/W4MGDHY3e5ORkXXPNNU63W3700Udq166ddu/e7fR+J7u9c+3atRoxYoR69+6tzp07a/DgwXr55ZdVUVEhSY7RK7Nnz3b8d3UjWVy5hpN9ryMiInTttdeqS5cuTudOSkrSzp07JR0ZK5ORkaHzzjvP6ZgLLrhAO3bsUEZGhioqKnTGGWfommuuqXIeSY5zrVq1Sq1atXJahdOmTRu1bt26Rt9jAAAAT3zwwQe68sor1a1bN3Xt2lWXXXaZli9f7tj/0UcfqWPHjkpLS3PUTOecc47T6DxJys3N1aRJk9SnTx/17t1bL7zwgqO2O9bixYvVtm1b9ezZU3379nVafV1p4sSJuummmzR58mT16NFDF110kcrLy12qv135THCsRx99VGeccYbjM0ylp556Sn379lVpaamKi4s1ZcoUnXXWWercubOGDBlSJQ/uqKzR9+zZ49i2fv16jRgxQn379lWPHj00evRop7EtlSNQVq1apRtvvFFdu3bV+eefr0WLFjmOOd5IxBONyKl83YQJEzRw4EB16tRJ/fv314QJE5SdnS3pyJifNWvWaM2aNY4xLNWNZHH1Gn7++WfdeuutSk5O1hlnnKEXXnjBkf/8/Hxdd911VeKtrK137dol6UhtPWjQIKe7oocMGaKKigqtWrXK8Zpbb73V0SyXpPDwcMXGxjpq9J9//lllZWVO9b7JZNLZZ59NjQ7UATTMAfjNQw89pJiYGE2aNKnKDLijvfbaa3r00UfVv39/zZ07VzfeeKPmz5+vRx99VNKRURpXXXWVpCO3Il599dU6cOCANm3apLPPPvu4I0JuuOEGjRgxwmn/W2+9pdLSUs2cOVMPPPCAvv32W6fVvC+++KLmzJmja6+9Vq+//rqmTp2qnJwc3XvvvSoqKnIcd+DAAT3xxBMaPny45s2bp/j4eD344INKT0+XJB0+fFjDhg3T3r179cwzz+iRRx7RF198oSVLljjFOGXKFM2aNUuXXnqpY77e008/rVdeeeW4+br00kv1/fffKz8/37EtPT1dmzZt0mWXXaaKigrdfvvtKioq0vPPP685c+YoOjpad9xxh3bs2HHc857I9u3bJUktWrSQdGQ++P33369u3bpp1qxZuuuuu/Tll18qJSVFxcXFjtcdPnxYDz74oG644QbNnDlTZrNZI0aM0MaNGz2KQ5I2bdqkm2++WdHR0Zo+fbpeffVV9erVS7Nnz3Z8OKv80HTVVVdV+wHKnWs42fd6wIABeuKJJ5z+nJWWlur77793NLUrj01MTHSKofLWzO3btysoKEgTJ07Uueee63TM119/LUk6/fTTHec69jyS1LJlS8f3CQAAwB/efvttPfbYYzr33HP12muv6cUXX5TJZNK4ceOUlZXlOK6iokJjx47VRRddpHnz5qlHjx56/vnn9eOPPzr2jxw5Ut9
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1500x1000 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Настройка стиля графиков\n",
|
|||
|
"sns.set(style=\"whitegrid\")\n",
|
|||
|
"\n",
|
|||
|
"# Создание фигуры\n",
|
|||
|
"plt.figure(figsize=(15, 10))\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned[\"NetChange\"], y=df_cleaned[\"Population2020\"], alpha=0.6, color=\"purple\")\n",
|
|||
|
"plt.title(\"NetChange vs Population2020\")\n",
|
|||
|
"plt.xlabel('NetChange')\n",
|
|||
|
"plt.ylabel(\"Population2020\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"LandArea\"],\n",
|
|||
|
" y=df_cleaned[\"Population2020\"],\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
" color=\"green\",\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"LandArea vs Population2020\")\n",
|
|||
|
"plt.xlabel(\"LandArea\")\n",
|
|||
|
"plt.ylabel(\"Population2020\")\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"LandArea\"], y=df_cleaned[\"Yearly Change\"], alpha=0.6, color=\"red\"\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"LandArea vs Yearly Change\")\n",
|
|||
|
"plt.xlabel(\"LandArea\")\n",
|
|||
|
"plt.ylabel(\"Yearly Change\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 4)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned[\"LandArea\"], y=df_cleaned[\"Density\"], alpha=0.6, color=\"red\")\n",
|
|||
|
"plt.title(\"LandArea vs Density\")\n",
|
|||
|
"plt.xlabel(\"LandArea\")\n",
|
|||
|
"plt.ylabel(\"Density\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Стандартизация данных для кластеризации"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 23,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArMAAAImCAYAAABATALrAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACPI0lEQVR4nOzdeXicVd0+8PtZZ00mybRJkybdEigUihQqe1laQEXAsrzK6osvKLIoy4ssLizyw4VNBAFFQaG+WESQVUVokU1Zylp2mrY006bZZ1+e9ffHJEPSLDPTJpkl9+e6kPZ5JjNnToLXnTPnfL+Cbds2iIiIiIhKkFjoARARERERbS+GWSIiIiIqWQyzRERERFSyGGaJiIiIqGQxzBIRERFRyWKYJSIiIqKSxTBLRERERCWLYZaIiIiIShbDLBERTRr26SGi8cYwS0Rl4fTTT8f8+fOH/LP77rvj0EMPxTXXXINQKDTsazZs2ICrr74ahx9+OPbYYw8ceuihuPjii/Hhhx+O+jq/+MUvMH/+fFx77bVZx/SNb3wD++yzDzRNG/UxxxxzDE499VQAwPz583Hbbbfl8G7H1+WXX46lS5dm/r506VJcfvnl4/oaW7duxbe+9S1s3rx5Ql+HiKYeudADICIaLwsWLMBVV12V+buu63jvvfdw880344MPPsCf/vQnCIIAAPjnP/+JSy+9FDvttBPOOeccNDY2YuvWrbj33nvx1a9+FXfeeScOPPDAIc9vWRYeeeQR7Lzzznj00UdxySWXwOVyjTqeE044Af/+97/x/PPP4/DDDx92/7333sPHH3+Mn//85wCABx54ADNmzBiPqdghv/rVr+D1esf1Of/973/jueeem/DXIaKph2GWiMqG1+vFnnvuOeTa5z//ecRiMdx66614++23seeee2LTpk247LLLsGTJEtxyyy2QJCnz+COPPBInn3wyLrvsMqxevRqqqmbuvfjii9i6dStuvvlmnHbaaXjiiSfwX//1X6OO54gjjoDP58Njjz02Ypj961//Cq/Xiy984QsAMGzshbJgwYKyeh0iKm/cZkBEZW/33XcHAGzZsgUAsGLFCmiahh/+8IdDgiwAuFwuXHbZZTjhhBOGbU146KGHsPPOO2PvvffGvvvuiwceeGDM13U4HDj66KPxr3/9C9FodMg9Xdfx5JNP4stf/nJmdXfbbQb33nsvvvjFL2LhwoVYsmQJrr766szzBAIBzJ8/Hw8//PCQ5912y4Bpmrjrrrtw9NFHY4899sCee+6Jk046CS+//PKo4x788f9tt902bPvGwD8DY832Gg8//DCuuOIKAMCyZcsyz73tNoNIJIKf/vSnOPzww7Fw4UIcffTR+Mtf/jJsbLfeeit+/vOf44ADDsAee+yBM888Exs3bhzze0FE5YthlojK3oYNGwAATU1NAIAXXngBCxYsQF1d3YiP33///XHRRRdh+vTpmWvBYBCrV6/G8uXLAQDHHXcc1q5di/fee2/M1z7hhBOQSqXw1FNPDbn+/PPPo7e3d9SV3SeeeAI33HADTj31VNx9990477zz8Oijj+a0V3ewG2+8EXfccQe+9rWv4Xe/+x2uvfZaBINBXHDBBUgkElm//r/+67/wwAMPDPln7733hsfjwVFHHZXTaxx66KE455xzAKS3Fpx77rnDXieZTOKUU07B448/jrPOOgt33HEH9t57b/zgBz/Ar3/96yGPve+++7B+/Xr89Kc/xf/7f/8P7777Li677LK85oWIyge3GRBR2bBtG4ZhZP4eCoXw6quv4s4778SiRYsyK7Rbt27FrrvumtdzP/7447AsC1/5ylcApLcj/PjHP8bKlSvHDJi77bYbdt11Vzz++OM44YQTMtcfeeQRzJ8/HwsXLhzx61599VU0Njbi1FNPhSiK2GeffeB2u0c8yDaWzs5OXHTRRTj99NMz1xwOB77zne/go48+yrq1YcaMGUP28f7hD3/AG2+8gV/96ldobm7O+TVmzZoFANh1113R2Ng47HUefvhhfPzxx1i5ciUWLVoEAFiyZAkMw8Add9yBk046CVVVVQCAyspK3HHHHZlV9U2bNuG2225DX18fqqur85ofIip9DLNEVDZee+017LbbbkOuiaKIAw44AD/+8Y8zh78kSYJpmnk990MPPYR9990XqqoiHA4DSH/k/cQTT+Cyyy4b8yDTCSecgJ/85Cfo6OhAXV0dgsEgnn32WVx66aWjfs1+++2HBx54AMcffzwOP/xwHHLIITjmmGMy7yFXN910EwCgt7cX69evx6effopnn30WAMassjCSF154Addffz3OPffcIXuAx+M1Xn31VcycOTMTZAcce+yx+Mtf/oK3334bhxxyCABg4cKFQ7aHDITtRCLBMEs0BTHMElHZ2G233XDNNdcAAARBgMPhQH19/bCg2dDQkNk/OxJd1xEKhTBt2jQAwPvvv48PPvgAQPpA2bYee+wxnHLKKaM+3zHHHIPrr78ef/vb3/CNb3wDTz75JARBwLHHHjvq1xx11FGwLAv3338/7rjjDtx2222YOXMmLrnkkszH+7lYu3YtrrnmGqxduxYulwstLS1oaGgAkF/N1/Xr1+Piiy/GwQcfjO985zvj/hqhUGjIto4BA9+DgV8gAAyrICGK6R1zlmXl/H6IqHwwzBJR2fB4PKN+bD/YQQcdhHvvvRddXV0jBqjnnnsO5513Hn71q1/hiCOOwMMPPwy324077rgjE5wGXHnllXjggQfGDLNVVVU4/PDD8fjjj+Mb3/gGHn30URxxxBGZj81Hc/TRR+Poo49GJBLBiy++iN/+9rf43ve+h7333juzQrvtCnM8Hs/8ORqN4qyzzsL8+fPx5JNPYt68eRBFEc8999ywPbxjCYVCOOecczBt2jTceOONQ1aHx+s1fD4fPv3002HXu7q6AIArrkQ0Kh4AI6Ip59RTT4WiKLjuuutGDIO33norqqurcfDBB0PTNDz++ONYunQp9t9/f+y7775D/lm+fDk+/PBDvPXWW2O+5gknnID33nsPr776Kt5++22ceOKJYz7+wgsvxHnnnQcAqKiowJe+9CWce+65MAwDnZ2dmdXmjo6OzNfouo533nkn8/f169cjGAzi61//OlpaWjJB/PnnnweQ20qmYRi48MIL0d3djdtvv33YKneur7HtLwHb+vznP4/NmzfjzTffHHL9scceg6Io2GOPPbKOlYimJq7MEtGU09jYiKuvvho/+MEPcOqpp+Kkk05CfX09Nm3ahN///vdoa2vD3XffDYfDgb/97W8IBoM4+uijR3yur3zlK/jlL3+JlStXjnmY6oADDkBDQwN+9KMfobGxEfvvv/+YY9xvv/1w1VVX4ec//zkOPvhghMNh/OpXv8KcOXOwyy67QFEULFq0CCtWrMDs2bPh8/lw3333IZlMwu12AwDmzp0Lr9eLX//615BlGbIs46mnnsqUu8qlmsHPfvYz/Pvf/8bll1+OWCw2JLR7vd6cX6OyshIA8PTTT+Pggw/OHB4bcPzxx+P+++/Heeedh+9+97tobGzE6tWr8dBDD+H888/PfD0R0bYYZoloSjruuOMwe/Zs3HvvvbjlllvQ09OD6dOnY6+99sJtt92WCVsPP/wwfD4fDjrooBGfp6GhAZ///Ofx97//HVdccQV8Pt+IjxNFEccddxxuv/12fPe73816kOukk06CrutYuXIl7r//fjidTuy///743ve+B0VRAKSD5rXXXosf/vCH8Hq9OPHEE7H33nvjwQcfBJBe0b3jjjtw/fXX44ILLoDH48Guu+6KP/7xj/jmN7+JNWvWDKlJO5LVq1dnXmtb++yzD1asWJHTa+y777444IADcNNNN+E///kP7rrrriHP5XK5sGLFCtx000345S9/iWg0innz5uG6667LuopNRFObYOdzAoCIiIiIqIhwzywRERERlSyGWSIiIiIqWQyzRERERFSyGGaJiIiIqGQxzBIRERFRyWKYJSIiIqKSNeXqzL755puwbTtTp5GIiIiIiouu6xAEAYsWLcr62Cm3MmvbNlhad2S2bUP
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Нормализация данных\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"data_scaled = scaler.fit_transform(df_cleaned)\n",
|
|||
|
"\n",
|
|||
|
"# Преобразование в DataFrame для удобства\n",
|
|||
|
"df_scaled = pd.DataFrame(data_scaled, columns=df_cleaned.columns)\n",
|
|||
|
"\n",
|
|||
|
"# Понижение размерности до 2 компонент\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"kc_pca = pca.fit_transform(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"plt.scatter(kc_pca[:, 0], kc_pca[:, 1], alpha=0.6)\n",
|
|||
|
"plt.title(\"PCA Visualization\")\n",
|
|||
|
"plt.xlabel(\"Principal Component 1\")\n",
|
|||
|
"plt.ylabel(\"Principal Component 2\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Агломеративная (иерархическая) кластеризация"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 24,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA08AAAJwCAYAAABRZVrWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACuW0lEQVR4nOzdd3gU5drH8d+mJxAghB56C71XFZAi4qEcUUFUUFBEFFAEDiCCoiICgogEBOSIglhBxfaKAgoqClJDUUIRlCI9tFSSef/AnbObbJLJJmE3yfdzXVxMdp6ZvWd2dnbuecrYDMMwBAAAAADIlI+nAwAAAACA/IDkCQAAAAAsIHkCAAAAAAtIngAAAADAApInAAAAALCA5AkAAAAALCB5AgAAAAALSJ4AAAAAwAKSJwAAAACwgOQJgCXjx49XZGSky3/jx4/3dHgAHFy4cEHNmzdXdHS0Lly4oEcffVRvvvmmp8MCgHzPz9MBAMg/SpcuraioKKfXhg8f7qFoAGSkePHiGjRokPr27SvDMBQZGanp06d7OiwAyPdIngBYkpKSopCQEDVp0sTp9YCAAM8EBCBTw4cPV79+/XTx4kVVqVJFvr6+ng4JAPI9mu0BsOTq1asKCgqyVHbLli3q37+/GjdurFatWmncuHE6d+6cOf/jjz9WZGSkjh496rRcp06dnJoAJicnZ9hUMO26du7cqd69e6tRo0bq2bOnvv76a6d1X7p0SS+99JK6dOmihg0bqkePHlqxYkW690/7PkePHtWAAQM0fvx4LViwQDfccIOaN2+uxx57TMeOHXNafs2aNbr33nvVtGlTNWjQQN26ddPy5cvN+Zs2bTLXu3XrVqdl33nnHUVGRqpTp07p4pk4caJT2QsXLqhBgwaKjIzUpk2bLL9/Rj766CPdcccdatKkiRo1aqR///vf+r//+790+9hVU82MPp8BAwY4vcdXX32lO+64Q02bNtWNN96oZ555RhcuXDDnz507V5GRkWratKmSkpKcln388cfTNQ9NTEzUjBkz1KFDBzVo0EA9e/bUV1995bRcp06dNHv2bE2dOlUtW7ZU69atNXbsWMXGxlre/syaq3788cfmZ+r4OZw9e1YtWrRw+VlGRkaqTp06atmypUaMGKHz58+bZSIjIzV37lyn2Oz7xZ19KUmlSpVS9erVtXHjxiyb2KZ9ry+//FItW7bUrFmzJDkfv2n/Ocb9+++/a/jw4WrTpo3q16+vdu3aacqUKUpISDDLJCUl6dVXX1Xnzp3VqFEj9ejRQ5988omlfS5Jx48f16hRo9SqVSs1btxYDzzwgPbu3Wuu/+jRo4qMjNSXX36poUOHqnHjxrr55ps1b948paamOn0uaffJqFGjnD5TwzA0Z84ctWvXTs2bN9fQoUN14sQJs3xKSooWLVqkHj16qFGjRmrSpIn69eunX375JdPPUUr/maf92zAM9evXz+l8OX78eKdjS5Lef/99l8cPgNxHzRMAS+Lj41W8ePEsy/36668aNGiQ2rRpo1dffVUXLlzQnDlzdP/992vFihWWEzDp2gWyJL3++usqWbKkpGsXummTHkl65JFH1L9/fz355JNasWKFRo4cqYULF6pDhw5KSEjQvffeq7Nnz+rxxx9XRESE1qxZo6efflpnzpzR0KFDzfV06NBBjz32mPl3mTJlJElr165VWFiYJk6cqNTUVM2aNUsDBgzQl19+qeDgYH3//fcaNmyY7r//fo0YMUIJCQl699139fzzz6tBgwZq3Lixuc4iRYpo3bp1at68ufnaV199JR+f9PezihQpou+//16GYchms0mSvvnmG6WkpDiVy877O1q+fLmmTJmiESNGqHnz5rpw4YLeeOMNjRkzRk2bNlW5cuXMslFRUSpdurQkmZ+HJN11113q06eP+fdzzz3n9B7z58/Xa6+9pnvvvVdPPvmk/vrrL82ZM0c7duzQhx9+6HRM2Gw2/fzzz+rQoYMk6cqVK1q/fr3TvjEMQ8OGDdO2bdv0+OOPq0aNGvr222/15JNPKikpSbfffrtZ9t1331WVKlX00ksv6dy5c5o1a5aOHDmi999/XzabLcvtf+yxx9SvXz9J12py6tWrZx4flStX1v79+9Pt01mzZunSpUsqVqyY0+v2Yys5OVkHDx7UjBkz9OKLL2rmzJkuPxtXsrMv7ZKTkzV16lTL7yFJCQkJev755zV48GD17NnTad4zzzyj+vXrm3/ffffd5vSpU6d03333qUmTJpo2bZoCAgK0YcMGLVmyRGXKlNGQIUMkSWPGjNH69ev16KOPqnHjxlq/fr3Gjx8vf3//LPf5uXPn1K9fPwUHB2vSpEkKDg7W22+/rfvuu08rVqxQjRo1zHgmT56sDh06aO7cudq6dauioqIUFxen//znPy63e8uWLfryyy+dXnvrrbe0cOFCjR07VtWqVdO0adP0xBNP6MMPP5QkzZw5U++9955Gjx6tyMhInTx5UvPmzdMTTzyh77//XsHBwdna945WrVql7du3Z1rmwoULevXVV91+DwDZQ/IEwJLY2FgzkcjMrFmzVK1aNS1cuNBsJtS4cWN1795dK1eu1H333Wf5PePi4iRJTZs2VVhYmCTphx9+cFl2wIABGjZsmCSpXbt26t27t+bNm6cOHTro448/VkxMjN5//301bdrULHP16lXNnz9f/fr1U4kSJSRdSwrSNk2UriWPH3/8sSpVqiRJql69unr37q1PP/1U99xzjw4cOKDevXvr6aefNpdp2rSpWrdurU2bNjklL+3bt9fatWvNC7i///5b27dvV4sWLdLVZrVt21br16/Xzp07zbj+7//+Ty1btnSq7cjO+zv666+/9NBDDzkljBEREbrjjju0detWde/e3Xy9bt26qlixYrp1lCtXzmmfFS1a1Jy+cOGCXn/9dfXt21fPPPOM+Xrt2rV13333pTsm7PvGnjytW7dOpUuXdqot2Lhxo3744QfNnj1b//rXvyRd+zzj4+M1c+ZM9ejRQ35+137efHx8tGTJEoWGhkq69vkOGzZMP/zwg9q3b29p+ytXrizpWhPVjI4Pu127dmnVqlWqW7euLl686DTPcdmWLVtq48aN2rNnT4brSiu7+9Ju2bJliouLU6lSpSy/1xdffCF/f38NHjw4XXO/mjVrZrgPYmJiVLduXc2ZM8c8Dm644Qb99NNP2rRpk4YMGaKYmBitXr1aEyZM0AMPPCDp2nF+7Ngxbdq0ST169Mh0n8+ePVuxsbF67733FBERIenacfOvf/1Lc+bM0WuvvWaWrV+/vpmctm/fXnFxcXr77bf16KOPOh2nkpSamqopU6aofv36Tp9LXFycHnvsMQ0cOFDStVqt559/XhcvXlSxYsV06tQpPfnkk061rYGBgRoxYoT27duX6fGSmStXrmjmzJnp4knrtddeU4UKFZxqMQHkHZrtAbDk1KlTKlu2bKZl4uPjtXPnTnXo0EGGYejq1au6evWqKlWqpBo1auinn35yKp+ammqWuXr1arr1/f333/Lx8Ul3keNK7969zWmbzaZbbrlF0dHRSkhI0ObNmxUREWEmTna9evVSYmKidu7cmeX6mzVrZiZOklSvXj1VqlRJv/76qyRp8ODBmjZtmq5cuaLdu3frq6++0sKFCyUpXTO0Tp066fDhwzp06JAk6euvv1bjxo3NC0FHoaGhatWqldauXStJOnfunDZt2uSU1GT3/R2NHz9eY8aM0cWLF7Vjxw6tWrXKbOqX2XJW7dixQ0lJSerRo4fT6y1atFBERIQ2b97s9Hrnzp21bt06GYYh6VqNnD1Bsvv5559ls9nUoUMHp+OnU6dOOn36tFNtUKdOnczEyf63n5+f+bnl5vYbhqEpU6borrvuUp06dVzOv3r1qpKSkhQdHa2tW7eqQYMGTmXSficck8bs7ktJOnPmjObNm6dx48YpMDDQ0nacPHlSb7zxhu69995s95O66aab9M477ygwMFAHDhzQ2rVr9frrr+vcuXPm/rQ3We3atavTsnPnztULL7yQ5Xv8/PPPqlu3rsqWLWvuJx8fH7Vv314
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1\n",
|
|||
|
" 1 1 1 1 1 1 1 1 1 1 1 1 1]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Построение дендрограммы\n",
|
|||
|
"linkage_matrix = linkage(data_scaled, method='ward')\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"dendrogram(linkage_matrix)\n",
|
|||
|
"plt.title('Дендрограмма агломеративной кластеризации')\n",
|
|||
|
"plt.xlabel('Индекс образца')\n",
|
|||
|
"plt.ylabel('Расстояние')\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"# Получение результатов кластеризации с заданным порогом\n",
|
|||
|
"result = fcluster(linkage_matrix, t=60, criterion='distance')\n",
|
|||
|
"print(result) # Вывод результатов кластеризации"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJ8AAAMQCAYAAACJzMTyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXwTdf7H8Xd6txxyKiunIBSwgLCgoKsiouK54okH4rWIgiKCCCoKsiIop1RFRORQVsXb1Z+6eMB6AoILKLQgoHLIISJHm6Qk8/ujJiZp2s6kmaZJX8/Hw4fkm5nJJ9Mck/d8v99xGIZhCAAAAAAAALBBUqwLAAAAAAAAQOIifAIAAAAAAIBtCJ8AAAAAAABgG8InAAAAAAAA2IbwCQAAAAAAALYhfAIAAAAAAIBtCJ8AAAAAAABgG8InAAAAAAAA2IbwCQAAAAAAALYhfAIAxKX+/furf//+Ye976623lJ2drVGjRlVyVQAiNWjQIC1evDjWZSBKXn31VQ0cODDWZQAAqgjCJwBAQtm7d68mTJgQ6zIAWPD6669r165duuyyy2JdCqLksssu0549e/Tqq6/GuhQAQBVA+AQASCgPP/ywCgoKlJWVFetSAJjgdDo1efJkDRo0SElJHJomCofDoVtvvVVTp06V0+mMdTkAgBjjGx4AkDA+/PBDffDBBxo8eLDq1q0bdJ/X69Xs2bN19tlnKycnR+eee64WLlwYtEz//v01atQozZo1S6eccor++te/6vbbb9f27duDlluyZImuueYade7cWTk5OerTp49efPFF//1ff/21srOz9dlnn+naa69Vx44ddc4552jRokX+ZR599FFlZ2frq6++8re9/vrrys7O1ptvvumvJ3Ro4ZQpU5Sdna3XX39dkpSdna2ZM2cGLTNz5kxlZ2eXqPnSSy9Vhw4ddOqpp+qf//ynCgoKgpb59ttvddNNN6lLly7q3r277r77bu3atSvoOX399deSpPz8fPXu3Vv9+vUzvV8kafbs2TrrrLPUvn17ZWdn+/8LfQ6BRo0apV69evlvz507V507d9ZLL70UtN/C/efbT5K0YsUK3XzzzerWrZtycnLUq1cvzZw5U16v17/MoUOHNH78eJ122mk68cQTddlll+nTTz/1/z1Ke5zA/XLrrbeqS5cu6tKliwYPHqyff/7Zv30zrw2p5N/VMAz169dP2dnZ2rZtmyTJ5XJp7Nix6tGjh04++WSNGDFCv//+u38dp9OpKVOm6JxzzlFOTo66dOmiG2+8UevXry9130rStm3bgvZd6G3fY5911llBr7ONGzf692/o/inLa6+9JpfLpTPPPDOofdq0aWH3dehrZfHixbrggguUk5Ojnj17aubMmfJ4PJaeoyR9/vnnYR8v8D04atQo9e/fX6+++qrOPPNMde7cWQMGDNCGDRuCtr9161bdeeedOvXUU3XiiSeqf//++uabb0o8fuB/vhqzs7P1wgsv6N5771Xnzp11yimn6JFHHpHL5fKv7/F4NHv2bF144YXq2LGjTjzxRPXr1y/o88T3OdC5c2e53e6g+u68886gocmB9bz99ttBy37yyScl/o5mHl+SzjzzTLlcLr322msCAFRvKbEuAACAaNi/f7/GjRunE044QbfccoteeeWVoPvHjh2r119/Xbfeeqs6d+6sFStWaMKECTpw4IAGDx7sX+6jjz5S3bp19cADD8jr9WrKlCnq37+/3n33XWVmZurTTz/V4MGDdf311+uOO+6Q0+nUokWL9PDDDysnJ0edOnXyb2vYsGG65JJLNGjQIH300UcaN26cJOmaa67RsGHD9Omnn+qhhx7SO++8o7179+qRRx7Reeedp0suuSTsc/zpp580b948y/vmnXfe0YgRI3TRRRfprrvu0vbt2zVt2jRt2rRJzz//vBwOh77//ntdd9116tSpkx577DF5PB5NmTJFN998sz8MC/T4448rJydHt912mySZ2i9vvvmmpkyZoltvvVU9evRQZmamJOmqq64y/Vx27dqlqVOn6uGHH9YZZ5wRdF9ubq4aNmwoSdqzZ4+GDBniv2/Dhg264YYb1KdPH02bNk2GYeidd95Rbm6uWrZsqQsuuEAej0c33XSTPzho2bKl3njjDQ0ePFjz58/XQw89pEOHDvlrvvzyy3XFFVdIko4//nht2bJF/fr1U8uWLTVp0iQdOXJETz/9tK6++mq99dZbql+/vr+esl4b4bz11ltavXp1ib/Bm2++qTFjxqh27doaN26cxo4dq2nTpkmSRo4cqZUrV+ruu+9Ws2bN9OOPP2rGjBkaPny43n33XTkcDtP7PdScOXP8IZjPbbfdprS0NI0fP15HH320kpKStHjx4nKHXb399tvq2bOn0tLSgtqdTqd69eqlW2+91d8W+lp55plnNG3aNF133XUaPXq01q9fr5kzZ2rnzp2Wh986nU41atRIM2bM8Lf5/i6B1q9fr82bN+vuu+/WUUcdpSeeeELXXXed3nvvPR199NHatGmTrrzySrVo0UIPPPCAUlNTtWDBAg0YMEBz587VSSedFLTPevbsKUlBz3/GjBnq1KmTpk+frh9++EHTp0/Xnj17NH36dEnS5MmT9a9//UvDhw9Xdna2du3apSeffFJDhw7Vp59+6n9vScU9kL788kv/++Xw4cNaunRp2F5mNWrU0Mcff6yLL77Y3/bee+8pKSkpKKQ1+/jp6ek688wz9c477+jaa6+18ucAACQYwicAQEKYMGGCfv/9dz333HNKSQn+etuyZYteeeUV3X333f4JcP/2t7/J4XDomWee0TXXXOPvKVVYWKjXX39dTZs2lSS1bNlSffv21Ztvvqmrr75amzZtUt++fXX//ff7t9+5c2edfPLJ+vrrr4PCp7PPPtu/3Gmnnabdu3frqaee0tVXX62MjAxNnDhR11xzjWbPnq1Vq1apZs2aYX/sBj7H1q1b67vvvvO3JSUl6ciRI6WuYxiGJk+erNNOO02TJ0/2t7do0UI33HCDli5dqp49e2rWrFmqU6eO5s6dq/T0dEnS0UcfreHDh2vjxo1B2/zxxx/12Wef6e2331br1q0lydR+WbNmjerUqaO777671HrL89JLL6lt27a69NJLS9zXrl07NWnSRJJKBCMbNmzQKaecoscff9z/o/vUU0/Vxx9/rK+//loXXHCBli1bpv/973968skn1bt3b0lS9+7d9fPPP+urr74KCrMkqVGjRjrxxBP9tx966CFlZmZq3rx5qlmzpiSpR48e6t27t+bMmaN7773Xv2xZr43QUOjw4cOaPHmyTjjhhKC/vWEYGjlypH+epFWrVvkn7Ha73Tp8+LAeeOABnX/++ZKkk046SYcOHdLEiRO1d+9ef1Bn1c6dO/Xss88G1bNv3z79/PPPGjNmjPr06eNf9r///W+Z2zp06JDWrl2r8847r8R9hYWFOvbYY4P2caCDBw/qqaee0lVXXaUHHnhAUvH7uk6dOnrggQd04403+l+fZhQWFqp27dpBj+f7O4Y+7qxZs9S1a1dJUseOHdW7d28tWLBAI0aMUG5urtLS0rRgwQL/+j179tSFF16oxx57LCiMa9asWdjnV69ePc2aNUspKSk644wzlJSUpEcffVR33HGHWrVqpd27d2vYsGFBvbLS09N1xx13KC8vL2ibp59+uj766CN/+PTxxx+rYcOGQWFS4LL//e9/5Xa7lZaWJpfLpY8++kjdunXz9+6TZOnxO3TooPfee0+HDh0Kuz8BANUDw+4AAHFv6dKleuuttzRw4EC1bdu2xP1fffWVDMNQr169dOTIEf9/vXr1ksvlChoO06VLF3/wJEnt27dX06ZNtWLFCknSLbfcookTJ+rw4cNat26d3nvvPT3zzDOSVGJoS9++fYNun3POOdqzZ4+2bNkiqTicueGGG/Tkk0/qiy++0MSJE3XUUUeFfY7Lli3TF198ERRgSFL9+vX9Q+PC2bx5s3755ZcSz71bt26qWbOmPv/8c0nSN998o9NPP90fPPnq+/jjj9WuXTt/W0FBgaZNm6aTTz456Ie9mf3
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x800 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Выбираем подмножество данных для кластеризации\n",
|
|||
|
"features = df[[\"NetChange\", \"LandArea\", \"Density\", \"Yearly Change\", \"Population2020\"]]\n",
|
|||
|
"\n",
|
|||
|
"scaled_features = scaler.fit_transform(features)\n",
|
|||
|
"\n",
|
|||
|
"# Построение дендрограммы\n",
|
|||
|
"linkage_matrix = linkage(scaled_features, method='ward') # Метод \"Ward\"\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(12, 8))\n",
|
|||
|
"dendrogram(linkage_matrix, labels=df.index, leaf_rotation=90, leaf_font_size=10)\n",
|
|||
|
"plt.title('Иерархическая кластеризация (дендрограмма)')\n",
|
|||
|
"plt.xlabel('Индекс')\n",
|
|||
|
"plt.ylabel('Евклидово расстояние')\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Визуализация распределения кластеров**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABWYAAASgCAYAAAB7dpGaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3wVVf7/8dfM3JJ7k5CEllClI71XaaIiimLXtevqWtb9rWv52tauqKvurru6uPaOZS3Yxa6oFGnSey9JgJCe3DIzvz8iV2ISCDHJJfB+Ph4+IHNmznzOJ2H33E/OnDFc13URERERERERERERkXpjxjsAERERERERERERkUONCrMiIiIiIiIiIiIi9UyFWREREREREREREZF6psKsiIiIiIiIiIiISD1TYVZERERERERERESknqkwKyIiIiIiIiIiIlLPVJgVERERERERERERqWcqzIqIiIiIiIiIiIjUMxVmRUREKuG6brxDEBERkQZEcwcREdlfKsyKyEHv/PPP5/zzz49rDDfddBNjx46ttO2f//wnXbt25Z577qnnqOrXo48+SteuXcv917t3b4499lj+9re/kZubG7fYxo4dy0033RT7evLkyTzzzDNxi0dERETqzvnnn0/37t1ZtGhRpe2/nhdUx6pVqzj77LMrbZsxYwZ/+tOfGDlyJH369InNfXbu3FnuvK5du/Loo4/u130PBaFQiOOOO47BgwezY8eOSs/5+9//TteuXfn222/rNJa9zen316JFi/i///s/xowZQ+/evTn66KO57bbb2LRpU7nzavLzKCINhwqzIiJx5DgOU6dOpUuXLrz77ruUlJTEO6Q69/rrr/P666/z2muv8fjjj3PKKafwxhtvcNZZZ5GTkxOXmB577DH++Mc/xr7+17/+dUh8L0RERA5Vtm1z8803Ew6Ha6W/Tz75hPnz51c4/vDDD3PxxRfj8Xj461//yhNPPME555zD+++/z5lnnsm2bdtq5f4HM7/fz3333UdBQQF33313hfYlS5bw7LPPctZZZzFq1Kg4RLj/XnnlFX73u9+xc+dOrrvuOp566ikuu+wyZs+ezemnn87y5cvjHaKI1BMVZkVE4ui7774jMzOTO++8k6KiIj744IN4h1Tn+vbtS9++fenXrx9HHHEEV1xxBc899xxbtmzhH//4R1xi6t69O23bto3LvUVERKT+JScns2rVKv7zn//U2T0+/PBDnnrqKW666SYeeeQRxo8fz9ChQ7nwwgt5+eWXycnJYdKkSXV2/4NJv379uPDCC5k2bRqff/557Hg0GuWWW26hZcuWDWZV6dy5c5k0aRLnnHMOzz77LCeeeCJDhgzhzDPP5NVXX8Xv93PLLbfEO0wRqScqzIqI/Ox///sfp556Kn379qV3796cdNJJfPzxx7H2t99+m+7du/PTTz9x1lln0atXL4488sgKj7zn5eVx8803M3jwYAYNGsRDDz2E4ziV3vOtt96iS5cuDBgwgCFDhvD6669XOOemm27iwgsv5I477qB///4cf/zx2LaN4zg8+eSTHHPMMfTs2ZNjjz2Wl156qdy1tm3z5JNPcsIJJ9C7d2/69u3L7373O2bOnFllHm677TaOOOIIbNsud3zSpEkMGTKESCRCaWkpd955J6NGjaJnz56MHz/+Nz3637t3b8aNG8fUqVPLrVSdM2cO5513Hn369GHw4MHceOON5VbVVvd78sEHHzBx4kR69+7N0KFDuf7668nKyoq17/mIWNeuXYGyVbRdu3Zl1apVdO3atcL3Ztu2bXTr1o333nuvxuMWERGR+OjWrRsnn3wyTz/9NIsXL97n+f/73/+YMGECPXv2ZMyYMTz66KOxudKjjz7KY489BpTfjuDJJ5+kU6dOXHjhhRX6a9euHf/3f/9Hv379yu1NW1hYyF//+lcGDx5Mv379+POf/1zu8f3qzO0effRRjjnmGL7++mtOPPHE2Dxx6tSp5WJYs2YNf/jDH+jfvz/Dhw/nn//8JzfffHO5LcCqM9/8tWOPPZY///nPFY6fdNJJXHnllQBs3LiRK664giFDhtCnTx/OOussvvnmm732+5e//IV27dpx1113UVhYCMAzzzzDypUreeCBBwgGg8C+548AP/74I5dccgmDBg2iZ8+ejB07lkcffTQ2Z9+8eTNdu3blueeeY/z48fTp04e33nqrXB9/+9vf6N27NwUFBeWOT548mQEDBlT59NUzzzxDcnIy1157bYW2xo0bc9NNN3HUUUdRXFwcOx6JRHjwwQc54ogj6Nu3L7///e/ZsGFDuWtr67NMdnY211xzTeyzzO23384///nPCls47O3fhIhUnwqzIiKUPU50++23c/TRR/PEE0/w8MMP4/P5uP7668nMzIyd5zgOf/nLXzj++ON58skn6d+/Pw8++CDTp0+PtV966aV888033HjjjTzwwAPMmzePjz76qMI9c3Nz+fLLLzn55JMBOOWUU1i0aBFLliypcO6cOXPYtm0b//nPf7juuuuwLIs777yTf//730ycOJH//ve/jB8/nvvuu6/cyo+HH36YyZMnc9ZZZ/H0009zzz33kJuby9VXX13lZPGkk05ix44dzJo1q9y4P/74YyZMmIDX6+W+++7j22+/5cYbb+SZZ57hqKOO4sEHH6wwYd0fRxxxBJFIJLbf248//shFF11EQkICjzzyCLfccguzZ8/mggsuoLS0tFxse/uezJ07lxtuuIFx48bx1FNPcfPNNzNz5kyuu+66SuPYXYA9/fTTef311+ncuTN9+vTh3XffLXfe1KlTCQaDjBs3rsZjFhERkfi55ZZbSEtL2+eWBk888QS33XYbw4YN47///S/nnnsuTz31FLfddhsAZ5xxBqeffjpQNo8444wz2L59O8uXL2fMmDEYhlFpv+eccw6XXHJJufYXX3yRSCTCv/71L6677jq+/PLLco/vV3dut337du6++24uuOACnnzySVq3bs2NN97ImjVrAMjJyeG8885j27Zt3H///dx666188sknFZ7eqs5889cmTpzIN998EyueQlkRePny5Zx00kk4jsPll19OSUkJDz74IJMnTyY1NZUrr7yyQrFxTwkJCUyaNInt27fz2GOPsXnzZiZPnszvf/97BgwYAFRv/rh8+XIuuugiUlNT+ec//8njjz/OwIEDeeyxx8oVMqGsyP2HP/whVhTd0+mnn04oFOKTTz4pd/zdd9/l+OOPJxAIVBiD67p89913DBs2rNJ2gOOPP56rrroqVmgG+Oijj1i1ahUPPPAAd9xxB4sXL+aaa66JtdfWZ5lwOMyFF17IvHnzuOWWW7j//vtZvnw5zz77bLkY9/VvQkSqzxPvAEREDgSbNm3ikksuKbfPaKtWrTj11FOZO3cuEyZMAMomU3/84x8544wzABgwYACfffYZX3/9NSNHjuTbb79l4cKFPPXUU7E9roYNG1bpSwLef/99HMfhpJNOAmDcuHHcfffdvPbaaxVeBBaNRrn77rvJyMgAYN26dbzxxhtce+21XHbZZQCMGDECwzBie5elpaXFfuO958oHv9/P//t//48VK1bQt2/fCnENGDCAVq1a8cEHHzB8+HAAZs2axfbt22Oxzp49myOOOCKWlyFDhhAMBmnSpMl+Zv4XTZs2BYitCvn73/9O+/bteeKJJ7AsC4A+ffowYcIE3nrrLc4991xg39+TuXPnkpCQwGWXXYbP5wMgNTWVRYsW4bpuhQ9Lu3OSkZER+/tpp53GHXfcwaZNm2jTpg1QVpidMGECCQkJNR6ziIiIxE9KSgp33303V155Jf/5z3/KFbp2KygoiBVCb731VqBszpWamsqtt97KxRdfTOfOnWNztN1zh4ULFwLQunXr/YqpV69ePPjgg0DZHPKnn34qt5K0unO7kpISJk2axLBhw4CyFbpHHnkk33zzDR07duSll16iqKiIqVOnkp6eDhB7Mdlu1Z1v/trEiRN59NFH+fzzz2MLED744AMaNWrE2LFj2blzJ2vXruWPf/wjo0ePBsqennrsscf2uefvwIEDOe+883j55Zd
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1400x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"plt.figure(figsize=(14, 12))\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"LandArea\"],\n",
|
|||
|
" y=df_cleaned[\"Density\"],\n",
|
|||
|
" hue=result,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"LandArea vs Density\")\n",
|
|||
|
"plt.xlabel(\"LandArea\")\n",
|
|||
|
"plt.ylabel(\"Density\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"NetChange\"],\n",
|
|||
|
" y=df_cleaned[\"Yearly Change\"],\n",
|
|||
|
" hue=result,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"NetChange vs Yearly Change\")\n",
|
|||
|
"plt.xlabel(\"NetChange\")\n",
|
|||
|
"plt.ylabel(\"Yearly Change\")\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"NetChange\"],\n",
|
|||
|
" y=df_cleaned[\"Population2020\"],\n",
|
|||
|
" hue=result,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"NetChange vs Population2020\")\n",
|
|||
|
"plt.xlabel(\"NetChange\")\n",
|
|||
|
"plt.ylabel(\"Population2020\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 4)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"Density\"],\n",
|
|||
|
" y=df_cleaned[\"Yearly Change\"],\n",
|
|||
|
" hue=result,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"Density vs Yearly Change\")\n",
|
|||
|
"plt.xlabel(\"Density\")\n",
|
|||
|
"plt.ylabel(\"Yearly Change\")\n",
|
|||
|
"\n",
|
|||
|
"# Настройка графиков\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## KMeans (неиерархическая кластеризация) для сравнения"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\joblib\\externals\\loky\\backend\\context.py:136: UserWarning: Could not find the number of physical cores for the following reason:\n",
|
|||
|
"[WinError 2] Не удается найти указанный файл\n",
|
|||
|
"Returning the number of logical cores instead. You can silence this warning by setting LOKY_MAX_CPU_COUNT to the number of cores you want to use.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
" File \"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\joblib\\externals\\loky\\backend\\context.py\", line 257, in _count_physical_cores\n",
|
|||
|
" cpu_info = subprocess.run(\n",
|
|||
|
" ^^^^^^^^^^^^^^^\n",
|
|||
|
" File \"C:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\subprocess.py\", line 548, in run\n",
|
|||
|
" with Popen(*popenargs, **kwargs) as process:\n",
|
|||
|
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n",
|
|||
|
" File \"C:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\subprocess.py\", line 1026, in __init__\n",
|
|||
|
" self._execute_child(args, executable, preexec_fn, close_fds,\n",
|
|||
|
" File \"C:\\Users\\User\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\subprocess.py\", line 1538, in _execute_child\n",
|
|||
|
" hp, ht, pid, tid = _winapi.CreateProcess(executable, args,\n",
|
|||
|
" ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Центры кластеров:\n",
|
|||
|
" [[2.65582879e+04 1.01425144e+05 3.61537879e+02 3.76969697e-01\n",
|
|||
|
" 1.00728718e+07]\n",
|
|||
|
" [5.92861374e+05 1.05398517e+06 1.62666667e+02 2.08353535e+00\n",
|
|||
|
" 3.68242102e+07]\n",
|
|||
|
" [9.56336050e+06 6.18070050e+06 3.08500000e+02 6.90000000e-01\n",
|
|||
|
" 1.40966408e+09]\n",
|
|||
|
" [4.58400000e+03 1.55000000e+01 2.39910000e+04 1.05000000e+00\n",
|
|||
|
" 3.44288500e+05]]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjAAAASgCAYAAABWngGUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3wUdf7H8femVwgtBAgQitJBpPdyKlVRzhNOEVBA6dKLBVBEkSJSpIOnUlWwUQQVDn9ySrVSPOkECAFCCOnJZn9/xOyxpLAJm+wk+3o+HjwgM7Mzn9lPlswnn/l+x2SxWCwCAAAAAAAAAAAwEDdnBwAAAAAAAAAAAHA7GhgAAAAAAAAAAMBwaGAAAAAAAAAAAADDoYEBAAAAAAAAAAAMhwYGAAAAAAAAAAAwHBoYAAAAAAAAAADAcGhgAAAAAAAAAAAAw6GBAQAAAAAAAAAADIcGBgDkA4vF4uwQAEPjMwIAAOA4XFsVPN7zgsN7Dbg2GhhAEfH000/r6aefzrQ8NjZWTzzxhOrWratvvvnGum2NGjXUu3fvbPc3evRo1ahRQ5MmTcq3mPNLUlKS/vWvf+nvf/+7GjVqpKZNm6p379767LPPbC58Fi5cqBo1ajj02MnJyXrjjTf05ZdfOmR/2eW1IISHh6tGjRravHmzU46foUaNGlq4cGGm5WlpaWrfvr1q1Kih33//3QmRFZyOHTuqRo0aNn/q1aunBx98UHPnzlVSUpJT4tq3b59q1Kihffv22f2arD4jkyZNUseOHfMjRJ09e1YvvPCCWrdurUaNGumf//ynfvjhh0zbvf/++3rwwQdVv359PfbYY9qzZ4/N+rS0NK1fv14PP/ywGjZsqL/97W964403FBsbm+l4gwcPVuPGjdWsWTNNnTo10zYAABQF1B//Q/3hGBn1x61/6tatq5YtW2rIkCE6fPiwU+KSMufu0KFDeu6555wWT04yPm+3v4/t27fXq6++qhs3bjglrrzWl4sXL9aqVausX+fH5yjDlStX9PLLL6tDhw5q2LChevbsqW3btmXabsuWLerWrZvq16+vLl266NNPP820zfbt2/X3v/9dDRs2VLt27TR58mRdvXrVZpurV69q7NixatasmRo1aqQxY8YoMjIyX84NKMw8nB0AgPwTGxurgQMH6vjx43r33XfVrl076zo3Nzf9/PPPioiIUEhIiM3r4uPjtXv37oIO1yGuXr2qgQMH6tKlS3r66adVv359paWlaffu3Zo0aZIOHjyo6dOny2Qy5cvxIyMj9f777+vNN990yP6mTp3qkP0URXv37tXVq1dVtWpVbdiwQa+//rqzQ8pX7dq109ChQ61fJyUlad++fVq8eLEuXLigt99+24nR2S+rz8jQoUPVt29fhx/r+vXr6tOnj4KCgvTiiy8qICBAH3/8sZ599lm9//77atq0qSTpvffe0+zZszVs2DDVrVtXmzZt0pAhQ/TBBx+ocePGkqSVK1fqnXfe0YABA9SiRQudPn1aCxYs0J9//qnVq1fLZDIpJiZG/fr1U+nSpTVz5kxFRUVp9uzZCg8Ptym6AAAoqqg/qD8cYciQIWrfvr2k9GveiIgIffjhh3rqqae0cOFCPfDAAwUe0z/+8Q+1adPG+vXHH3+skydPFngc9qpdu7ZNLlNSUnTkyBG9/fbbOnbsmNavX59v35OONn/+fA0fPtz69e25cJTk5GQNHDhQN2/e1MiRIxUcHKwdO3Zo9OjRSk5O1qOPPipJ2rFjh8aNG6e+ffuqTZs2+uabbzRp0iR5eXmpW7dukqStW7dqzJgx6tWrl0aPHq2rV69q/vz56tevnzZv3ixvb2+lpqZq0KBBio2N1bRp05Samqq5c+dqwIAB2rx5szw9PR1+jkBhRQMDKKIyiodjx45pyZIlatWqlc362rVr68SJE/rqq6/Uv39/m3W7d++Wr6+vihUrVoARO8bEiRMVERGhjRs3KiwszLq8ffv2Kl++vN5++2116NBBf/vb35wXZC5Ur17d2SEY1ubNm9WwYUO1adNGS5Ys0aRJkxQQEODssPJNyZIldd9999ksa9asmSIiIrR582ZNmjRJwcHBzgnuLlWqVClf9vvZZ5/p+vXr+uSTT1S2bFlJUqtWrdSjRw+tWrVKTZs2VWJiohYvXqxnnnlGw4YNkyS1bdtWvXv31rvvvqv33ntPaWlpWrFihXr16qWxY8dKklq2bKkSJUpo9OjR+v3331WvXj2tX79e0dHR2rx5s0qWLClJKlu2rJ577jkdOnRIjRo1ypfzBADACKg/qD8cpVKlSpmue7t06aI+ffropZdeUvPmzQv8uj8kJCRT483IAgICMr2HTZo0UVxcnBYsWKBffvkl0/rCIr9y8e9//1vHjx/Xxx9/rPr160tKrx0uXryolStXWhsYb7/9tjp37qwXX3xRktSmTRvduHFD8+fPtzYwli5dqnbt2um1116z7r9KlSp64okntHv3bnXu3FlfffWVjh49qq1bt1o/d7Vq1VL37t21fft2PfLIIw4/R6CwYgopoAiKi4vToEGD9Mcff2j58uWZigdJ8vPzU7t27fTVV19lWrdt2zZ16tRJHh62Pc60tDQtX75cDz74oOrWratOnTrpww8/tNnGbDZr+fLl6t69u+rXr6/77rtPvXv31o8//mjdZuHChXrwwQf173//Ww8//LB1X5999pnNvt5//3117txZ9erVU5s2bTRt2rQcp2I5duyYvv/+ew0YMMCmeMjQv39/PfXUU/Lz88vy9R07dsw0ZH3z5s2qUaOGwsPDJUmJiYmaNm2a2rZtq7p166pz587WO6vDw8OthcnkyZNtpsQ5ePCg+vTpowYNGqhp06aaOHGioqKibI5Tu3Ztffzxx2rVqpWaNm2qEydOZBrCXaNGDa1du1YvvfSSmjZtqoYNG+qFF17INBR11apV+tvf/qb69eurd+/e2rVrl81UPxnDd7Oamim3oqKi9Oqrr6pDhw6qW7eumjZtqmHDhlnfMyl9GPNLL72k5cuXq3379qpXr5569+6tX3/91WZf+/fvV69evdSgQQN16tRJ//nPf7I85o0bN/TNN9+oQ4cO6t69uxISEvT5559n2q5GjRpatGiRevbsqfr162vRokWSpIsXL2rMmDFq2rSpGjRooH79+uno0aM2rw0PD9eECRPUunVr1alTRy1atNCECRN0/fr1LGNKSkpSo0aN9NZbb9ksT01NVfPmza0jRH7//Xf169dPjRo1UsOGDdW/f3/9/PPPOb/JOahbt64sFosuXbokKf0zuHbtWj388MOqX7++2rdvrzlz5thMMzVp0iQ9/fTT+uSTT6zDo/v166fjx49bt8luaPadvm+++eYbPfnkk2rYsKH1M7J27VpJ2X9Gbp9Cyt5z6N+/vzZt2qROnTqpbt266tGjh7777jvrNmXLllX//v2tzQtJcnd3V+XKlXXu3DlJ0i+//KKYmBg9+OCD1m1MJpMefPBB7du3T4mJiYqNjVWPHj3UvXt3m3OtWrWqJOn8+fOSpO+//16NGjWyNi8kqXXr1vL397eJCwCAoob6g/ojv+sPLy8vjRgxQtHR0dq+fbt1eXR0tKZMmaKWLVuqXr16euKJJzJNF2rPOZw7d06DBw9Ws2bN1KBBA/Xq1ctmStFbr40nTZqkTz/9VBcuXLBOifT3v/89yynS+vfvr2eeeSbLc+rUqZNGjhyZaXmPHj00ZMgQu+LKrbp160pKr4cybNu2TT179lTDhg3VqlUrTZkyxWaaqYULF6pjx47WX7w3aNBATzzxhM00srd/32bI6nv8VgcOHNCAAQPUpEkT1a1bVx07dtTChQuVlpYmSdb3fNGiRdZ/Z1Wn2HMOd/o/ICAgQL169VK9evVs9l21alVr7RAeHq4zZ87Y1A5Sei7Pnj2rM2fOKC0tTa1atdITTzyRaT+SrPv6/vvvVaVKFZumYfXq1VWtWrW7yjFQFNHAAIqY+Ph4Pffcczp69KhWrFihZs2aZbtt165drcO4M8TGxuq7777L9Is6SZo2bZoWLFigRx55REuXLlXnzp3
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 3 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"features_used = [\n",
|
|||
|
" 'NetChange','LandArea','Density','Yearly Change','Population2020'\n",
|
|||
|
"]\n",
|
|||
|
"data_to_scale = df_cleaned[features_used]\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"data_scaled = scaler.fit_transform(data_to_scale)\n",
|
|||
|
"\n",
|
|||
|
"random_state = 42\n",
|
|||
|
"kmeans = KMeans(n_clusters=4, random_state=random_state)\n",
|
|||
|
"labels = kmeans.fit_predict(data_scaled)\n",
|
|||
|
"centers = kmeans.cluster_centers_\n",
|
|||
|
"\n",
|
|||
|
"# Отображение центроидов\n",
|
|||
|
"centers_original = scaler.inverse_transform(centers) # Обратная стандартизация\n",
|
|||
|
"print(\"Центры кластеров:\\n\", centers_original)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов кластеризации KMeans\n",
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"LandArea\"],\n",
|
|||
|
" y=df_cleaned[\"Population2020\"],\n",
|
|||
|
" hue=labels,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.scatter(centers[:, 0], centers[:, 1], s=300, c='cyan', label='Centroids')\n",
|
|||
|
"plt.title(\"KMeans Clustering: LandArea vs Population2020\")\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"Density\"],\n",
|
|||
|
" y=df_cleaned[\"Population2020\"],\n",
|
|||
|
" hue=labels,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.scatter(centers[:, 2], centers[:, 3], s=300, c='cyan', label='Centroids')\n",
|
|||
|
"plt.title(\"KMeans Clustering: Density vs Population2020\")\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=df_cleaned[\"NetChange\"],\n",
|
|||
|
" y=df_cleaned[\"Yearly Change\"],\n",
|
|||
|
" hue=labels,\n",
|
|||
|
" palette=\"Set1\",\n",
|
|||
|
" alpha=0.6,\n",
|
|||
|
")\n",
|
|||
|
"plt.scatter(centers[:, 1], centers[:, 3], s=300, c='cyan', label='Centroids')\n",
|
|||
|
"plt.title(\"KMeans Clustering: NetChange vs Yearly Change\")\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### PCA для визуализации сокращенной размерности"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjAAAAJHCAYAAAA+Dx+UAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAD+xElEQVR4nOzdd5xcVf3/8dctU7f3kt57D6G3UKUKgoCIgII0RUEUkKp+UVSaNAUFUQg/UEF6lxYgBAIhJCEhPdlks73v9Jn7+2PJJJPdDQlsTd7PxyOPsPfcuefcM7Phnvmccz6G4zgOIiIiIiIiIiIiIiIifYjZ2w0QERERERERERERERHZngIYIiIiIiIiIiIiIiLS5yiAISIiIiIiIiIiIiIifY4CGCIiIiIiIiIiIiIi0ucogCEiIiIiIiIiIiIiIn2OAhgiIiIiIiIiIiIiItLnKIAhIiIiIiIiIiIiIiJ9jgIYIiIiIiIiIiIiIiLS5yiAISKyG3Acp7ebIJ3Yk9+bPfneRURERCSVng33DHqfRaSrKYAhshs766yzGDNmTMqfiRMncsghh/CrX/2KxsbGdq9Zu3YtN954I4cffjiTJ0/mkEMO4fLLL2f58uWd1nP77bczZswYfvOb33Tn7XTqrrvuYsyYMb1Sd0eefPJJxowZw8aNG7v9dZFIhN/+9rc8++yzu9rMXXL66aczZswYXn755W6tp6+9l19HU1MTv/jFL1iwYEHy2FlnncVZZ53VY23Y2d/n2bNnc9VVV3Vp3StXruSMM87okmtt3LiRMWPG8OSTT3bJ9URERKTv0Jild+xOY5YxY8Zw1113tTu+YsUK9t13Xw4++GDWrVuXPHfMmDHcdtttHV4rkUhw4IEH9ttnz8rKSv7whz9w9NFHM2XKFA444AAuvPDClDEJdM+4pKKigh/+8Ids2rSpS67X2fsqInseBTBEdnPjx4/n8ccfT/75+9//zjnnnMMTTzzBBRdckDI74pVXXuGkk05i6dKlXHTRRfz1r3/lsssuY926dXz729/m3XffbXf9RCLBU089xejRo3n66acJBoM9eXt7vKqqKv7xj38Qi8W6rY41a9awcOFCRo8ezWOPPdZt9exuli1bxtNPP00ikUgeu+GGG7jhhht6pP6v8vvclV566SUWLlzYJdcqLCzk8ccf55BDDumS64mIiEjfojHL7q0nxizbW7lyJeeccw4+n49HHnmEoUOHJstM0+Sll17q8HUffvghVVVVPdTKrvXRRx9x4okn8sYbb/C9732Pv/zlL1xzzTWEQiHOOussnnrqqW6t/7333uOtt97qsus9/vjjnHrqqV12PRHpv+zeboCIdK/09HSmTp2acmyvvfaitbWVO++8k0WLFjF16lQ2bNjAlVdeyYEHHsgdd9yBZVnJ84888kjOOOMMrrzySl5//XXcbney7J133qGiooLbbruN7373uzz33HN6yNjNPPnkkwwYMIALLriAK664gvXr1zNkyJDebla/NHLkyB6p56v+PvdVbre73b9jIiIisvvQmEW60urVqzn77LNJS0vjH//4B6WlpSnl06dPZ8GCBXz22WeMHz8+pez5559n3LhxLFu2rCeb/LU1NDTw05/+lKFDh/L3v/8dn8+XLDvqqKP44Q9/yPXXX88BBxxAfn5+L7Z05+n5X0S20AoMkT3UxIkTASgvLwfg4YcfJhKJcO2116YMBAB8Ph9XXnkl3/rWt9ot4X7iiScYPXo0M2bMYO+99+bxxx//0rpnz57Nb3/7W84++2wmT57MNddcA7Q9dF1//fXst99+TJo0iW9/+9vMmzcv5bXhcJjf/e537L///kybNo2rr76acDicck5Hy2Hnz5/PmDFjmD9/fvLYmjVr+NGPfsSsWbPYa6+9uOCCC1i9enVKXX/4wx84+OCDmThxIscffzwvvPBCynUTiQT33nsvhxxyCFOmTOHiiy/ucJn79nb2da+99hrf+c53mDZtGhMnTuToo49mzpw5QNu2OocddhgAV199NbNnz06+7t///jcnn3wyU6dOZfLkyZx44om8+OKLKdceM2bMl24bFI/Heeqppzj00EM5/PDD8fv9Hb7H0WiUW265hYMOOojJkyfzgx/8gKeeeqrd8vL//ve/HHPMMUyaNIkTTjiBefPmMX78+B0uz37hhRc4+eSTmTZtGvvvvz/XX399Sl/dddddHH300bz66qscd9xxTJo0iRNPPJGFCxfyySefcOqppzJ58mSOO+64dp+nFStWcMEFFzB9+nSmT5/OJZdcQllZWbJ8y+fmscce49BDD2X69OnJWX076uP58+fzve99D4Dvfe97yc/jtp/N73//+5x88snt7vfiiy/mhBNOSP68YMECvvvd7zJlyhRmzZrFlVdeSV1dXaf9BV/993nbe972d2X7tgMsWbKEs88+mxkzZjBt2jTOOeccPvnkE6DtPbn77ruB1KXfiUSC+++/nyOOOIKJEydy1FFH8fDDD7er54orruDSSy9l6tSpnHvuue22kHryyScZP348ixYt4rTTTmPSpEkceuihPPDAAynXqqqq4rLLLkv+jl9//fXcfvvtKb8rIiIi0ndpzKIxy86MWba1evVqvve975GRkcEjjzzSLngBbcGx/Pz8dqswYrEYr7zyCscee2y71+zM+15XV8evfvUrDj30UCZOnMisWbO45JJLUsZDZ511Ftdccw33338/hxxyCJMmTeL000/n008/TZ4TCoW48cYbOeigg5L9uf1z7vaeeuopqqqq+OUvf5kSvIC2FSdXXHEFZ555Ji0tLe1e29l2rVdddVXK+7VhwwYuvPBC9t57b6ZMmcJpp52WXHHx5JNPcvXVVwNw2GGHpbxn//73vzn22GOTW8PdddddxOPxlHrOPvtsbrjhBqZPn84xxxxDPB5PGUds+d2YN28e3//+95kyZQr7778/f/zjH1Ou1dLSwvXXX8++++7LtGnTuOyyy3jooYf61PZtIrLrFMAQ2UOtXbsWgEGDBgEwd+5cxo8fT1FRUYfn77vvvlx22WUUFBQkjzU0NPD666/zzW9+E4CTTjqJxYsXs3Tp0i+tf86cOUyaNIl7772XU045hXA4zNlnn83//vc/LrvsMu6++26Ki4s577zzUh4Mf/7zn/Ovf/2LCy64gDvuuIPGxkYeeuihXb7/yspKTjvtNNatW8eNN97IH//4R2pqajj77LNpaGjAcRwuueQSHnvsMc4991z+/Oc/Jx+Atl16+8c//pF77rmHU045hbvvvpvs7GxuvfXWL61/Z1735ptvcskllzBhwgTuvfde7rrrLgYNGsSvf/1rFi1aRGFhYfJL4osuuij533PmzOH666/n8MMP57777uOWW27B7XZzxRVXUFFRkbz+448/zsUXX7zDdr799ttUV1fzzW9+E6/Xyze+8Q3++9//EolEUs67/vrr+cc//sF3v/td7rnnHvLz87nuuutSznnqqae46qqrmD59Ovfeey9HHXUUF198ccoD5/buvfdeLr/8cqZOncqdd97JJZdcwssvv8xZZ51FKBRKnldRUcHNN9/MhRdeyJ/+9Ceampq49NJLufzyyzn11FO55557cByHyy67LPm6tWvXcvrpp1NbW8vvf/97brrpJsrKyjjjjDOora1Nacfdd9/NlVdeyfXXX8+0adO+tI8nTJjA9ddfn+ybjraNOuGEE1i6dCnr169PHmtqauLtt9/mxBNPBNqWsJ9zzjl4vV7uuOMOfvnLX/LBBx/wve99L+X+t/dVfp93RUtLC+eddx45OTncdddd3H777QSDQX7wgx/Q3NzMqaeeyimnnAKkLv2+8cYbufPOOznhhBP4y1/+wtFHH81vf/tb7rnnnpTrv/jii6SlpfHnP/+Z8847r8M2JBIJfvrTn3LMMcdw//33M336dP7whz8wd+5coG2v5bPPPpuPP/6YX/7yl/zud79j+fLlPPjgg1/pnkVERKTnacyiMcvOjFm2WLNmDWeffTbp6ek88sgjnX5OLMviqKOOahfAmDdvHuFwuN1kl5153x3H4YILLuDdd9/
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x600 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"reduced_data = pca.fit_transform(data_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация сокращенных данных\n",
|
|||
|
"plt.figure(figsize=(16, 6))\n",
|
|||
|
"plt.subplot(1, 2, 1)\n",
|
|||
|
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=result, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('PCA reduced data: Agglomerative Clustering')\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(1, 2, 2)\n",
|
|||
|
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('PCA reduced data: KMeans Clustering')\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Анализ инерции для метода локтя (метод оценки суммы квадратов расстояний)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 29,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA14AAAImCAYAAABD3lvqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB8fUlEQVR4nO3dd1yV5f/H8fc57K2gggv3Btya5iSzUivNdlmZK7OstK9l2jCzpebM1DRHWVppNizbabn3AgfuBYgskQ3n9wdxfh0HIgL3AV7Px4OHnvu+7vt8zuGKeHvd9+eYLBaLRQAAAACAImM2ugAAAAAAKO0IXgAAAABQxAheAAAAAFDECF4AAAAAUMQIXgAAAABQxAheAAAAAFDECF4AAAAAUMQIXgAAAABQxAheAAAAAFDECF4AAAAAUMQIXgBQCPr166cGDRrowQcfvOqYF154QQ0aNNDLL79cjJUBKKhTp06pQYMGWrFihdGlACgFCF4AUEjMZrN27typyMjIy/YlJyfrzz//NKAqAABgDwheAFBIGjduLBcXF61evfqyfX/++afc3Nzk7+9vQGUAAMBoBC8AKCTu7u7q3LnzFYPXjz/+qNtuu02Ojo6X7fvtt990zz33KDg4WDfffLPeeustJScnS5JCQ0PVoEGDK36dOnVKkrRu3To9/PDDatmypdq2bauRI0fq7NmzNs8xcuTIK57jWpdQ5V5CeaWv/9qzZ48GDBigtm3bqkWLFnrqqad06NAh6/5NmzapQYMG2rRpkyTp4MGD6tatmx588EHNmDHjqs8xY8YMSdJXX32lO+64Q0FBQTb7r3XZ5pdffnnF8/73uNzLya41rqA15Pe9yev5r7Y/9/vw8ssvKzQ01OZ5ly5davMe/vd5tm3bZjP2s88+U4MGDWzOkZqaqsmTJ6t79+4KCgpSixYt1L9/f4WHh9sce7W6+vXrZzMmt44ruXR+5OrXr5/NedLS0vThhx/q9ttvV3BwsLp37665c+cqOzvb5phLa9m0aVO+jr0Wi8Wi0aNHKyQkRP/880++jwMASbr8NwAAQIH16NFDzz//vCIjIxUQECBJSkpK0tq1a7VgwQKtXbvWZvz333+vF198UXfeeaeef/55nT59WlOmTFFERIQWLFigmTNnKj09XefOndMzzzyjoUOHqkuXLpKkSpUqaeXKlXrppZfUq1cvDRkyRHFxcZo+fboeeOABffPNN/Lz85OU8wvrAw88oHvuuUeSrOfLj8aNG+v111+3Pv7qq6/09ddfWx9v3LhRAwcOVNu2bfX2228rLS1Nc+bM0YMPPqgvv/xSderUueycEydOVFBQkIYOHSofHx917NhRkjRu3DhJsj5fQECAtmzZorFjx+ree+/V2LFj5eHhIUn5qj81NVXBwcEaO3asddvVjvvve3vpuILWcD3vzWuvvaYmTZpc8fmXLVsmSdq3b5/efPPNy8ZeKiEhQVOnTr3iPg8PD/3xxx9q2bKldduPP/4os9n232JHjRqlrVu3asSIEQoMDNTx48c1bdo0jRw5UqtWrZLJZLKOvffee3XfffdZH+d+HwuTxWLRU089pZ07d+qZZ55Rw4YNtWnTJk2dOlUnT57U+PHjrWMvnbN16tTJ97F5eeutt/TDDz/oww8/VIcOHQr9NQIo3QheAFCIunTpIjc3N61evVpPPPGEJOnXX3+Vn5+fzS+6Us4vkpMmTVLHjh01adIk6/aaNWvqiSee0Jo1a6xBIHd1KzAwUM2aNZMkZWdna9KkSerQoYMmT55sPb5Fixbq0aOH5s+fr1GjRkmSUlJSVLNmTeuxuefLD09PT+txkvT333/b7J88ebJq1KihuXPnysHBQZLUoUMH3XrrrZo+fbqmTZtmM/748eP6559/9N1336levXqSZA2pnp6ekmTzfKtWrZIkvfLKK9bAI0nOzs7XrD0lJUUVKlSwOd/Vjvvve3vpuN27dxeohut5b+rWrXvV58/dnpaWdsWxl5o+fbqqVKmiuLi4y/Z16tRJv//+u/73v/9JkiIjI7Vjxw61atVKp0+fliSlp6fr4sWLGjt2rHr06CFJatOmjZKSkvTuu+8qJiZGFStWtJ4zICDApp7c72NhWrt2rdavX68PPvhAPXv2lCTdfPPNcnV11bRp0/TYY49Z59Olc3bNmjX5PvZqJk+erGXLlmnmzJnq1KlTob8+AKUflxoCQCFydXVVaGiozeWGq1at0h133GGzQiBJR44cUWRkpEJDQ5WZmWn9at26tTw9PbVu3bo8n+vo0aM6d+6cevXqZbM9MDBQzZs31+bNm63bzp49Ky8vr0J4hbaSk5O1Z88e3XHHHdZgIUne3t7q2rWrTQ2546dMmaK2bdte8xfdXCEhIZKkTz75RNHR0UpPT1dmZma+ji2s112QGq73vSksBw8e1LJly/Tqq69ecX9oaKiOHTumI0eOSJJWr16tpk2bqmrVqtYxzs7Omj9/vnr06KGoqCht3LhRS5cutTaISU9Pv+66srOzlZmZKYvFcs0xuV//Hbt582Y5Ojrq9ttvtznmrrvusu6/mhs5VpKWLFmiuXPnqmfPnjarogBwPVjxAoBCdscdd+iZZ55RZGSkXFxctGHDBj3//POXjYuPj5eUc1nWlS7Nio6OzvN5co+vUKHCZfsqVKigsLAwSTkra2fOnFG1atWu74Xkw4ULF2SxWK5aw4ULF2y2PfXUU/L29ra5VPFaWrdurbFjx2ru3LmaOXPmddV3+vTpPC/JK8oarve9KSxvvfWWevbsqebNm19xv7+/v4KCgvT777+rdu3a+vHHH9WrVy/rfMn1999/6+2339aRI0fk4eGhhg0byt3dXZLyDE9XM2vWLM2aNUsODg6qUKGCOnTooOeee86m4UzuKvF/tWnTRlLO5ZPly5e3CbGSrCtveb2fN3KsJO3fv18dOnTQDz/8oMcff1yNGzfOczwAXAnBCwAKWadOneTh4aHVq1fL3d1d1apVU1BQ0GXjvL29JeXcS5P7y+V/+fj45Pk85cqVkyTFxMRctu/cuXMqX768JCk8PFypqamXNcQoDF5eXjKZTFetIbfGXKNGjdLq1as1fPhwLVmyJN+XpN1///36559/lJmZqddee03VqlXT0KFD8zwmOztbu3btUt++ffP1HJeuSN5oDdf73hSGn376SXv37rW59PRKbrnlFv3++++64447tHfvXs2cOdMmeJ04cULDhg1Tt27dNGfOHFWvXl0mk0lLliy57FJT6drvnZTz/t1///3Kzs7WmTNnNGXKFA0aNEjfffeddcy4ceNsgvJ/79Py8fFRXFycsrKybAJU7j9Q5M73K7mRYyXpueee02OPPaaePXtq7Nix+uqrry4LcQBwLVxqCACFzNnZWd26ddPPP/+sn376yXpPyaVq164tPz8/nTp1SsHBwdYvf39/TZ48+bIViEvVqlVLFStW1A8//GCz/eTJk9q5c6datGghSfrrr7/UqFEj+fr6Xvdryc7OzvMXTHd3dwUFBemnn35SVlaWdfuFCxf0119/XXZfW1BQkGbOnKnTp09r4sSJ+a5j2rRp+uuvv/Tuu+/qjjvuUHBw8DXvr9q+fbuSk5PVtm3bPMflrt5c2lziRmu43vfmRqWnp+v999/XsGHDbO6/upJu3bpp165d+uyzz9SyZUtVqlTJZv/evXuVlpamwYMHKzAw0BqsckNX7nuW2xHwWu+dlNMMJjg4WE2bNtUdd9yhRx55RAcOHFBCQoJ1TK1atWz+W/jv/XRt2rRRZmbmZV1Dc4NbXu/njRwr5axQurq66rXXXtO+ffu0YMGCa75eALgUK14AUAR69OihIUOGyGw223TU+y8HBwe98MILeu211+Tg4KCuXbsqMTFRs2bNUlRU1DUvkTObzRoxYoRGjx6tkSNH6q677lJcXJxmzpwpHx8f9e/fX/v27dOSJUvUs2dP7dy503rsuXPnJOWsbMTGxl4WymJjYxUREaHjx49bA9zVjBw5UgMGDNDgwYP18MMPKyMjQ3PnzlV6erqGDRt22Xh/f389//zzmjBhgvr27Wu9f+p
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"inertias = []\n",
|
|||
|
"clusters_range = range(1, 11)\n",
|
|||
|
"for i in clusters_range:\n",
|
|||
|
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
|
|||
|
" kmeans.fit(data_scaled)\n",
|
|||
|
" inertias.append(kmeans.inertia_)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.plot(clusters_range, inertias, marker='o')\n",
|
|||
|
"plt.title('Метод локтя для оптимального k')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Инерция')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Расчет коэффициентов силуэта"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 30,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1oAAAImCAYAAABKNfuQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACrwklEQVR4nOzdeVhUZRsG8HtmmGHfERBQQZFFEBBw35es1EzNNbcW1yzbpLLMyiwtNUvLNf1S0yyXyBbLtNx3UFFZFBAFZN/3ZWa+P5BJRJHBGc4M3L/r8lLOnHPmntcR5+G853lFSqVSCSIiIiIiItIYsdABiIiIiIiImhoWWkRERERERBrGQouIiIiIiEjDWGgRERERERFpGAstIiIiIiIiDWOhRUREREREpGEstIiIiIiIiDSMhRYREREREZGGsdAiIiIiIiLSMBZaRNRsTJ48GZMnT66x7fz58xg+fDg6dOiAXbt2afX533nnHQwYMEDt4wYMGIB33nlHC4mISFs8PT2xevVqoWMQkYAMhA5ARCSUrKwszJo1Cz4+Pti0aRM8PT2FjkRERERNBAstImq2/ve//6G0tBSff/45HBwchI5DRERETQinDhJRs5STk4MdO3bgqaeeqlVkJSQkYO7cuejZsycCAgIwefJkhIWF1djn8OHDGDVqFPz9/dGjRw988MEHKCgoqLHP9u3b0b9/f/j7++P1119HYWEhAGDt2rXo3r07goOD8cEHH6C8vFx1THl5OT766CN07twZXbt2VU09KioqQkhICAICAtC3b19s375ddUxSUhI8PT2xd+9e1baysjIMHDiwxlW6+02dPHPmDDw9PXHmzJn7fg1UXfkLDg6uNe1x165dGDp0KHx9fdGvXz+sXr0acrlc9fj9pkrenbX6ue73qzrnw6ZN3u813Ss9PR1vv/02unfvjk6dOmHSpEm4cOGC6vF7p3gplUqMHz8enp6eSEpKqrFfXVnnzp2LPn36QKFQ1Hj+9957D48//jgAIDU1FW+88Qa6desGf39/TJ48GRcvXgQArF69+oHPUZ0vOjoaL7/8Mrp16wYfHx/07t0bixcvRmlpaZ1jcOLEiTqz1/c1AsChQ4cwcuRI+Pv713muu+3duxeenp64dOkSRo4cCT8/Pzz11FP4888/a+yXlJSEt956C7169YKPjw+6d++Ot956Czk5Oap9oqKiMHHiRHTq1AmDBg3Czp07VY/d7/0L1H6fPGxa393vu61bt9b693X69Gl4eXnhm2++eeA57rVq1Sp4e3vj559/rvcxRKTfeEWLiJoVpVKJlJQULF68GJWVlZg5c2aNx2NjYzF27Fi4urpiwYIFkEql2Lp1K6ZOnYrNmzejS5cuOHfuHGbPno3hw4fjzTffxPXr1/Hll1/i2rVr+P777yGRSHDw4EEsWrQIkydPRp8+ffDjjz/i4MGDAIA//vgDixcvRnJyMpYvXw4jIyPMnz8fALBs2TLs2bMHb731FhwdHbFy5UokJycjOTkZTzzxBFatWoWjR49i0aJFcHR0xMCBA+/7Or/99tsaRcKjWLFiBQoKCmBhYaHatn79eqxcuRKTJk3C/PnzERUVhdWrVyMlJQWffvppvc7r4+ODH3/8EUBV0bZ7927V12ZmZhrJXlRUhAkTJkAulyMkJAQODg7YvHkzXnjhBfz8889wdXWtdcwvv/xSoxC72+jRozFmzBjV1x999FGNx/766y+cOXMG3bt3BwCUlpbizz//xPTp01FeXo5p06ahoqICH3zwAaRSKdasWYPJkyfjp59+wpgxY9C7d+8a5/3ggw8AAI6OjkhPT8fEiRMREBCApUuXQiaT4ejRo/jf//4He3t7zJgx44HjUFpaCkdHR3z11Vf3zV7f13jr1i28+uqr6N27N15//XXVe+JB57rXzJkzMWnSJLz++uvYvXs3XnvtNaxfvx59+/ZFSUkJpkyZAmtra3zwwQcwNzfHhQsX8PXXX8PIyAiLFi1CSUkJpk+fDmdnZ6xevRrh4eH44IMP4OTkhD59+tQrg7omT56MAwcO4LPPPkO/fv0gk8nw7rvvIiAgALNmzarXOTZt2oQ1a9Zg8eLFGDlypFZyEpHuYaFFRM3KuXPn0K9fP0ilUmzcuLHWB+2vv/4aMpkMW7duVX3Y79evH4YNG4bPP/8cu3fvRmhoKFxdXbFkyRKIxWL07NkTxsbGWLhwIY4cOYIBAwZg3bp16Nq1KxYsWAAA6Nq1K3r27ImCggIsWbIEvr6+AID8/Hxs3LgRL730EhQKBX788UfMmDEDkyZNAgDY2dlh3LhxsLKywvLlyyGVStGnTx9cu3YN69evv2+hlZKSgo0bN8LHxwdXr159pPG6fPkyfvnlF3h7eyM/Px8AUFBQgDVr1mDcuHGq19erVy9YWVlhwYIFeP7559G+ffuHntvMzAwBAQEAgGPHjgGA6mtN+fnnn5GcnIyff/4Z3t7eAIDAwECMGDEC586dq/X3X1RUhOXLlz9w7BwdHWtkvLsg7NWrFxwdHREaGqoqtP7++28UFxdjxIgRuHjxIuLj47F9+3Z06tRJleWxxx7DmjVrsHr1ajg6OtY4793Pdfz4cXh7e+Orr75SPd6jRw+cOHECZ86cqbPQKikpgYWFxQOz1/c1RkZGoqKiAq+//jo8PDweeq57TZ48GXPmzAEA9O7dGyNHjsQ333yDvn37IiEhAY6Ojvjss8/QqlUrAEC3bt1w6dIlnD17FgCQnJyMjh074t1330WrVq3Qq1cv7NixA8eOHdNaoSUSibBkyRIMHz4cy5Ytg0QiQW5uLrZs2QKJRPLQ43/44QcsW7YMixYtwujRo7WSkYh0E6cOElGz0qFDByxduhSWlpaYP39+ras+Z8+eRf/+/Wt8cDQwMMDQoUNx5coVFBUV4ZNPPkFoaCjEYjEqKytRWVmJxx9/HGKxGOfOnUNlZSUiIyPRq1cv1TkMDQ3h7+8PY2NjVZEFVH04Ly0tRUxMDGJiYlBWVqa6qgFUfdA2NDSEn58fpFJpjeOuXr1aY6petc8++wzBwcHo37//I42VUqnE4sWLMXr0aHh5eam2X7hwAaWlpRgwYIDq9VdWVqqmCZ44caLGee7e595pdfXN0dBjw8LC4OLioiqyAMDY2Bh//fVXjas21dasWQNra2tMmDBB7ecSi8UYOXIkDhw4gJKSEgBVhV6PHj3g6OiILl264OLFiwgICIBcLkdlZSUsLCzQs2dPnDt37qHn79WrF77//nsYGhoiNjYWhw4dwtq1a5GdnV1j+un9pKSkwNzcXO3XdC8fHx8YGBjg+++/R3JyMsrLy1FZWQmlUlmv4+++miMSifDYY48hIiICpaWl8Pb2xo4dO+Ds7IyEhAQcOXIEmzZtQnx8vOr1ubu7Y+3atWjVqhXKy8tx9OhR5OXloV27djWeR6FQ1Hjf3S9f9T71yd6qVSvMmzcPP//8M3bt2oUFCxaoisG6/Pvvv/joo48QHByMsWPHPnR/ImpaeEWLiJoVMzMzjBw5Em3btsWECRPw2muv4ccff1T9ZDovLw92dna1jrOzs4NSqURhYSFMTU1haGgIoOqD593y8/ORlZUFuVwOa2vrGo9ZWVnB0tKyxrbqqVeZmZmqoune4ywtLWFlZVXruMrKyhr3rgBVheLBgwexb98+/P777/UZkgcKDQ1FQkIC1q1bh88++0y1PTc3FwAeeAUlPT1d9efk5ORaY9SQHKGhoRCJRLC1tUVQUBBeffXVWh+u7yc3Nxe2trb1ep6EhARs2bIF3377LW7fvt2grM888wzWrVuHAwcOoFu3bjh16hSWL1+uelwmkwGoum/r7nt16nNlRKFQ4IsvvsD27dtRXFyMli1bws/PT/VerEtycjKcnZ0b8IpqatWqFZYtW4YvvvhCNc2zWpcuXR56vL29fY2vbW1toVQqkZ+fDyMjI/zvf//DunXrkJubCzs7O/j6+sLY2LjW/Y/5+fno3LkzAKBFixZ48sknazz+3HPP1Xrue/OtWbMGa9asgUQigZ2dHXr16oVXX331gY1xhgwZgqVLlwIAevbs+dD
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"silhouette_scores = []\n",
|
|||
|
"for i in clusters_range[1:]: \n",
|
|||
|
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
|
|||
|
" labels = kmeans.fit_predict(data_scaled)\n",
|
|||
|
" score = silhouette_score(data_scaled, labels)\n",
|
|||
|
" silhouette_scores.append(score)\n",
|
|||
|
"\n",
|
|||
|
"# Построение диаграммы значений силуэта\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.plot(clusters_range[1:], silhouette_scores, marker='o')\n",
|
|||
|
"plt.title('Коэффициенты силуэта для разных k')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Коэффициент силуэта')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 31,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Средний коэффициент силуэта: 0.408\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA04AAAJzCAYAAAA4M0NGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADNJUlEQVR4nOzdd3hb1fnA8a+2ZA3Le+8kdvZehJDB3gVK2YGWXUZZpaWlpS2FH20ZLYRNKYVASZll7xHCyCJ7D++9JVlbur8/nAgrthM78Uji9/M8eYjvubrnvSdyq1fn3PeoFEVREEIIIYQQQgjRLfVgByCEEEIIIYQQhzpJnIQQQgghhBBiPyRxEkIIIYQQQoj9kMRJCCGEEEIIIfZDEichhBBCCCGE2A9JnIQQQgghhBBiPyRxEkIIIYQQQoj9kMRJCCGEEEIIIfZDEichhBBCCCGE2A9JnIQQB+ySSy6hsLAw6s+UKVNYsGABy5cvH+zwhBBHuMLCQh555JFOx7dt28bMmTOZM2cOJSUl3b7+kUceobCwkLFjx+Jyubo85z//+Q+FhYXMnz+/r8IWQhymJHESQhyUUaNGsXjxYhYvXsxLL73Efffdh06n4/LLL2f79u2DHZ4QYojZvn07l112GSaTiUWLFpGbm7vf1wSDQT777LMu2957770+jlAIcbiSxEkIcVAsFgsTJkxgwoQJTJ48meOOO45HHnkEtVrN66+/PtjhCSGGkJ07d3LppZdiNptZtGgRWVlZPXrdpEmTeP/99zsdr62tZeXKlYwcObKvQxVCHIYkcRJC9DmTyYTBYEClUkWOXXLJJVxyySVR5z3wwAMUFhZGJViLFi3i2GOPZeLEiVx88cVs27YNgBdffJHCwkKKi4ujrvG///2PkSNHUl1dDcAnn3zChRdeyMSJExkzZgwnnXQSL774YtRrfv3rX3daYrjnT0VFReScvZfmvPzyy52WBr333nuccsopTJgwgbPPPpuVK1dGvWZ/8SxbtozCwkKWLVsW9bq9x6sn4+f3+/nLX/7CnDlzGDlyZNR97SuJ3fva99xzD2PHjmXJkiXAD8uZuvrTMe6ejH1dXR2/+tWvmDlzZuTfePXq1QDMnz9/v/8uK1eu5OKLL2b8+PFMmzaNX/3qVzQ1NUWu//rrr1NYWMjatWs566yzGDduHKeffjoffPBBVBxOp5P/+7//47jjjmPs2LGcdtppvPrqq1HndIynqKiIqVOncsMNN9Dc3NztWALs2rWL66+/nmnTpjF16lSuvvpqdu7c2e35+xrfjv9uJSUl3HjjjcyaNYsJEyZwySWXsGrVqkh7RUVF5HVvvfVWVB+ff/55pK2j9957j7PPPpuJEycya9Ysfv/739Pa2topto66ei/Onz+fX//6193+vLc9sXa8v++//57zzjuPsWPHMmvWLO6++268Xm+319jbzp07WbBgAVarlUWLFpGent7j155yyiksXbq003K9Dz74gLy8PIqKijq95pNPPuHss8+OxPvnP/8Zt9vd6Zye/P5/++23/OxnP2P8+PHMmjWLv/3tb4RCoch5X3/9NT/5yU+YOHEiU6dO5dprr93ne0oI0T8kcRJCHBRFUQgGgwSDQQKBAPX19TzwwAP4/X7OOeecbl9XVlbGc889F3Xso48+4u677+bUU0/l0UcfJRQKcc011+D3+zn99NMxGAz873//i3rNm2++ycyZM0lLS+OLL77guuuuY/To0Tz22GM88sgjZGVl8ac//Ym1a9dGvS4pKSmyxHDx4sVce+21+7zP1tZW/v73v0cdW7duHbfddhsTJkzg8ccfJy0tjWuuuYaGhgaAXsXTW12N39NPP82///1vLr30Uv7973+zePFiFi5c2Kvrrlu3jv/85z/8/e9/Z+LEiVFtHcfr97//fVRbT+61ra2NCy64gGXLlvHLX/6ShQsXYjAY+NnPfkZJSQkLFy6Mivnaa6+N9JecnMyKFSu47LLLMBqN/P3vf+c3v/kNy5cvZ8GCBZ0+YF999dUce+yxLFy4kLy8PG666Sa+/PJLALxeLxdeeCFvv/02V1xxBY899hiTJ0/mt7/9LU888UTUdebMmcPixYt54YUXuPXWW/n666+55557uh2/2tpazjvvPEpKSvjDH/7A3/72NxoaGrj00ktpaWnZ59h3HN+9/9127NjB2WefTUVFBXfeeSf3338/KpWKSy+9tNPzhGazudOys/feew+1Ovr/8h977DFuueUWJkyYwMMPP8x1113Hhx9+yCWXXNKrhKUvVFdXc/nllxMXF8fChQu58cYb+d///sftt9/eo9fv2rWLSy+9FIvFwqJFi0hJSelV/yeeeCKhUKjLcTv11FM7nf/2229z3XXXkZ+fz6OPPsr111/PW2+9xc9//nMURQF69/t/2223MXnyZJ544glOO+00nnnmGV555RUAysvL+fnPf86YMWN4/PHHueeeeyguLuaqq64iHA736j6FEAdHO9gBCCEObytWrGD06NGdjt9yyy0UFBR0+7p7772X4cOHs3HjxsixpqYmLrzwQm655RagfQZlz7f1I0eO5Pjjj+ett97iF7/4BSqVipqaGr777jv+9re/Ae0fLs866yx++9vfRq45ceJEpk+fzrJlyxg/fnzkuF6vZ8KECZGfd+3atc/7fPjhh0lPT4+abaipqeHEE0/kz3/+M2q1msTERE477TTWrFnDcccd16t4equr8Vu3bh1FRUX87Gc/ixzbM1PTU3tm/I499thObR3Hy+fzRbX15F7feOMNKisreeONNyJLnyZNmsSPfvQjVqxYwbnnnhsVc3Z2dlSfDzzwAHl5eTz55JNoNBoAxo8fz6mnnsprr73GRRddFDn3kksu4brrrgNg9uzZnHXWWTz66KPMmTOH119/nW3btvHyyy9HksPZs2cTDAZ57LHHOP/887Hb7QDEx8dHYpg6dSrffPNN1Jjv7bnnnsPv9/Ovf/2LpKQkAIqKirjgggtYu3Ytc+bM6fa1He9173+3hQsXotfref7557FYLADMnTuX0047jb/+9a9Rs2XHHHMMX331FX6/H71ej8/n49NPP2Xq1KmRGcLW1lYef/xxfvKTn0QlwSNGjOCiiy7qNJ797emnnyYuLo5HH3008m+rVqu588472bp1a6dZr45KSkpYsGABDQ0NBAKBA0omEhMTmTp1Ku+//z5nnHEGAJWVlaxdu5a//vWvPP7445FzFUXh/vvvZ/bs2dx///2R47m5uVx22WV8+eWXzJ07t1e//+eee27k/Tpz5kw++eQTvvjiC84//3zWrVuH1+vl6quvjiSEqampfPrpp7jd7sj7QQjR/yRxEkIclNGjR/PHP/4RaP9A4XA4WLJkCQ899BBut5ubb76502uWLFnCN998w9NPP82CBQsix88//3wAwuEwbrebjz76CKPRSEZGBgA//vGPeeedd1i5ciVTp07lzTffxGw2c/zxxwNwxRVXAO0zG8XFxZSVlbF+/XqgPQk7UNu2bYvMOuyJEeCEE07ghBNOQFEU3G4377//Pmq1mry8vH6Np7vxGzt2LE899RQffvghM2bMwGw29/hDpKIorF69mvfee6/TTFZP9OReV61aRWZmZtTzIiaTiQ8//HC/1/d4PKxdu5bLL788MssJkJWVRUFBAV9//XXUB/2zzjor8neVSsXxxx/PI488gtfrZfny5WRkZHSaUTvjjDN49dVXoxKcPX2Fw2G2bNnCqlWrOOqoo7qNc9WqVUyYMCGSNEH7h9zPP/98v/e4L8uXL2fevHlRH5K1Wm1kdratrS1yfMaMGSxZsoRly5Yxe/ZslixZgsViYcqUKZHEac2aNfj9fk477bSofqZMmUJGRgbLly8/6MRpz9ip1epOs117hMNhgsEgK1eu5Oijj44kTdCeAEL7mO4rcXrnnXcYM2YMDz30ED/72c/45S9/yXPPPRfVZygUiswEQft7omNf0L5c789//jMulwuLxcK7777L6NGjycnJiTpv165d1NTUcPXVV0feh9CeWFssFr7++mvmzp3bq9//vd+LqampkWV/48e
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
"from sklearn.cluster import KMeans\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Применение K-Means\n",
|
|||
|
"# ========================\n",
|
|||
|
"kmeans = KMeans(n_clusters=3, random_state=42) \n",
|
|||
|
"df_clusters = kmeans.fit_predict(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Оценка качества кластеризации\n",
|
|||
|
"# ========================\n",
|
|||
|
"silhouette_avg = silhouette_score(df_scaled, df_clusters)\n",
|
|||
|
"print(f'Средний коэффициент силуэта: {silhouette_avg:.3f}')\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"# ========================\n",
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"df_pca = pca.fit_transform(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"sns.scatterplot(x=df_pca[:, 0], y=df_pca[:, 1], hue=df_clusters, palette='viridis', alpha=0.7)\n",
|
|||
|
"plt.title('Визуализация кластеров с помощью K-Means')\n",
|
|||
|
"plt.xlabel('Первая компонентa PCA')\n",
|
|||
|
"plt.ylabel('Вторая компонентa PCA')\n",
|
|||
|
"plt.legend(title='Кластер', loc='upper right')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Средний коэффициент силуэта, равный 0.408, указывает на умеренно хорошую кластеризацию. \n",
|
|||
|
"\n",
|
|||
|
"Средний коэффициент силуэта (silhouette score) указывает на качество кластеризации, измеряя, насколько хорошо точки внутри одного кластера близки друг к другу по сравнению с точками из других кластеров. Значения коэффициента силуэта находятся в диапазоне от -1 до 1:\n",
|
|||
|
"\n",
|
|||
|
"1: Указывает на идеально плотные и четко разделенные кластеры. \n",
|
|||
|
"0: Указывает на перекрытие кластеров или слабую структуру кластеризации. \n",
|
|||
|
"Отрицательные значения: Указывают, что точки в кластере расположены ближе к другому кластеру, чем к своему."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 32,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Средний коэффициент силуэта (агломеративная кластеризация): 0.832\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA04AAAJzCAYAAAA4M0NGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADXQ0lEQVR4nOzdd3xUVdrA8d/0THrvlQRCAgmE3jtWFMGCSLFhW8vaV3d11dey6mIHewexIIioKE26dOmEGtJ7b5Pp7x/ZjIxJIEgKkOf7+bBr7rn3nueeuZnMM+fccxR2u92OEEIIIYQQQohmKTs6ACGEEEIIIYQ410niJIQQQgghhBCnIYmTEEIIIYQQQpyGJE5CCCGEEEIIcRqSOAkhhBBCCCHEaUjiJIQQQgghhBCnIYmTEEIIIYQQQpyGJE5CCCGEEEIIcRqSOAkhhBBCCCHEaag7OgAh2sKMGTPYtm2b0zYPDw8SExO55557GDBgQAdFJoQQ4nz1n//8h5qaGh566CEOHz7Mfffdx+bNm1GpVB0dmhCiHUjiJC5YiYmJPPXUUwBYrVbKysr48ssvufXWW1m8eDFdu3bt4AiFEEKcT2666SamT5/OoEGD0Gg0PPnkk5I0CdGJKOx2u72jgxCitc2YMQOAefPmOW2vra1l8ODB3HDDDfzjH//oiNCEEEKcx8xmM5mZmfj5+eHt7d3R4Qgh2pE84yQ6Fb1ej06nQ6FQOLbNmDHDkWg1eOWVV4iPj2fx4sWObfPnz2fs2LGkpKQwffp0jhw5AsAXX3xBfHw8J06ccDrH999/T0JCAnl5eQCsWrWKG264gZSUFHr27Mkll1zCF1984XTMY489Rnx8fJP/srOzHfuMGTPG6bivvvqK+Ph43nrrLce2ZcuWcdlll9G7d28mT57Mjh07nI45XTxbt24lPj6erVu3Oh335/ZqSfuZTCZeeuklRo4cSUJCgtN1ndzGf/bncz///PMkJSWxfv16AN56661m2+vkuFvS9oWFhfzjH/9g8ODBjtd4165dAIwZM+a0r8uOHTuYPn06vXr1YsCAAfzjH/+gtLTUcf7FixcTHx/Pnj17mDRpEsnJyVxxxRX88ssvTnFUVVXxn//8h3HjxpGUlMSECRP49ttvnfY5OZ7u3bvTv39/7r33XsrKypptS4C0tDTHUNX+/ftzxx13cPz48Wb3P1X7nvy6paenc9999zF06FB69+7NjBkz2Llzp6M8OzvbcdzSpUud6lizZo2j7GTLli1j8uTJpKSkMHToUP79739TUVHRKLaTNXUvjhkzhscee6zZn/+sIdaTr+/3339nypQpJCUlMXToUJ599lnq6uqaPUfDeR599FGGDRtGjx49GDx4MI8++qjTa9TUfZWdnd3i+7qwsJDHH3+ckSNHkpyczDXXXMPq1aud4mg47p133nHafuTIkUb3MLTefXyq6z/5fvjzv4b3tpa8rzTE0vCvZ8+eXHzxxU73WFP3SUO7nPx+2dK2fOutt9BoNMTGxuLl5cX111/fqA1PVVdNTQ0zZswgMTERo9HouNbm2qOB1Wrl/fffZ8KECSQnJ9O7d2+uv/56tmzZ4lTX7t27ueWWW+jTpw+DBg3iwQcfpKCgoEVtDrBw4UIuv/xyevbsyahRo3jrrbewWq2O8scee4wZM2bw7bffMnr0aFJSUrjxxhs5dOiQY5+G1+XkNjl69Cg9evRwek1TU1OZNm0aKSkpjBs3jq+++srpWg4dOsQ999zDoEGD6NGjB8OHD+e5555z+t378+sIjV/zpu6BDRs2EB8f73gvaOr33mg0Mnbs2CbvH9F5SeIkLlh2ux2LxYLFYsFsNlNUVMQrr7yCyWTi6quvbva4zMxMPv30U6dtK1as4Nlnn+Xyyy9n7ty5WK1W7rzzTkwmE1dccQU6nY7vv//e6ZglS5YwePBgQkJCWLt2LXfffTc9evTg7bff5q233iIiIoL/+7//Y8+ePU7HBQQE8PXXXzv+3XXXXae8zoqKCl5//XWnbXv37uXhhx+md+/evPPOO4SEhHDnnXdSXFwMcEbxnKmm2u+DDz7gs88+48Ybb+Szzz7j66+/Zs6cOWd03r179/Lll1/y+uuvk5KS4lR2cnv9+9//diprybXW1NQwdepUtm7dyiOPPMKcOXPQ6XTccsstpKenM2fOHKeY77rrLkd9gYGBbN++nZtuugkXFxdef/11/vnPf7Jt2zZmzpzZ6AP2HXfcwdixY5kzZw4xMTHcf//9rFu3DoC6ujpuuOEGfvjhB2bNmsXbb79N3759+de//sW7777rdJ6RI0fy9ddfM2/ePB566CE2bdrE888/32z7FRQUMGXKFNLT03n66af573//S3FxMTfeeCPl5eWnbPuT2/fPr9uxY8eYPHky2dnZPPHEE8yePRuFQsGNN97Y6DlDNzc3fv31V6dty5YtQ6l0/lP09ttv8+CDD9K7d2/efPNN7r77bpYvX86MGTNOm7C0try8PG699VZ8fHyYM2cO9913H99//z2PPvpos8cYDAZmzpzJ8ePHeeqpp/joo4+YOXMmP/30E6+99prTvg2v48n3U4NT3dfFxcVcc8017NixgwceeIC33nqLsLAw7r777kbJaUvbvbXu49Ndf2BgYKP3tz//jv1ZU+8rDRqOnTt3Ll26dOEf//hHoy+yTuVM2vJk33//vePLlZZasGABxcXFfPbZZ2i1Wsf2xMREp9f7mmuucTpu9uzZvP3220yZMoUPP/yQZ599lvLycv7+979jMBgAOHjwINOnT8doNPLyyy/zzDPPsH//fm699dYWtfl7773Hk08+yeDBg3n33XeZNm0aH3zwAU8++aRTLKmpqbz22mvcc889/Pe//6WsrIzp06dTWFjY7HU///zzWCwWx88Gg4HbbrsNi8XCW2+9xZVXXslTTz3l+FKssLCQadOmYTAYePHFF/nggw+4/PLLmTdvHp9//vkZtfmfmc1mXnjhhdPu9+GHH54yIRadkzzjJC5Y27dvp0ePHo22P/jgg8TGxjZ73AsvvEDXrl05cOCAY1tpaSk33HADDz74IFDfg9LwbX1CQgLjx49n6dKl/P3vf0ehUJCfn8+WLVv473//C9R/uJw0aRL/+te/HOdMSUlh4MCBbN26lV69ejm2a7Vaevfu7fg5LS3tlNf55ptvEhoa6vRNdn5+PhdffDHPPfccSqUSf39/JkyYwO7duxk3btwZxXOmmmq/vXv30r17d2655RbHtjP9g9TQ4zd27NhGZSe3V8O3uA1acq3fffcdOTk5fPfddyQkJADQp08frrrqKrZv3861117rFHNkZKRTna+88goxMTG89957jucdevXqxeWXX86iRYuYNm2aY98ZM2Zw9913AzB8+HAmTZrE3LlzGTlyJIsXL+bIkSN89dVXjuRw+PDhWCwW3n77ba6//nrH0CBfX19HDP379+e3335zavM/+/TTTzGZTHzyyScEBAQA0L17d6ZOncqePXsYOXJks8eefK1/ft3mzJmDVqvl888/x93dHYBRo0YxYcIEXn75ZafeshEjRrBhwwZMJhNarRaj0cjq1avp37+/oyeloqKCd955h+uuu84pWejWrRvTpk1r1J5t7YMPPsDHx4e5c+c6XlulUskTTzzB4cOHm/w2Oj09neDgYF566SUiIiIAGDRoEHv27GmUTJ78Ov7Zqe7rTz75hNLSUpYvX05YWBhQn4TddNNNvPzyy0yYMMGRGI0YMYJffvmFwsJCR2L2888/O7U7tN59fLrrP/k9ruH9LSEhgfDw8CbbAZp+X2lw8rEhISH8+uuvpKamEhMT0+z5/mpbNqipqWH27Nn06NHjlL93J7NarY7nbPv37+9U5u7u7vR6b9iwwam8sLCQBx54wKnHRqfTce+993L48GF69+7Nu+++i7e3Nx9//DE6nQ6AwMBAHnroIY4fP37KNq+qqnIkZk888QQAw4YNw9vbmyeeeIKbb77Z8VxwVVUV7777Lv369QMgOTmZcePG8fnnn/Pwww83uu7ly5ezZ88ep9cjJyeHpKQk/vnPfxIREcGwYcN
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.cluster import AgglomerativeClustering\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Агломеративная кластеризация\n",
|
|||
|
"# ========================\n",
|
|||
|
"agg_cluster = AgglomerativeClustering(n_clusters=3) \n",
|
|||
|
"labels_agg = agg_cluster.fit_predict(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Оценка качества кластеризации\n",
|
|||
|
"# ========================\n",
|
|||
|
"silhouette_avg_agg = silhouette_score(df_scaled, labels_agg)\n",
|
|||
|
"print(f'Средний коэффициент силуэта (агломеративная кластеризация): {silhouette_avg_agg:.3f}')\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"# ========================\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"df_pca = pca.fit_transform(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"sns.scatterplot(x=df_pca[:, 0], y=df_pca[:, 1], hue=labels_agg, palette='viridis', alpha=0.7)\n",
|
|||
|
"plt.title('Визуализация кластеров с помощью агломеративной кластеризации')\n",
|
|||
|
"plt.xlabel('Первая компонентa PCA')\n",
|
|||
|
"plt.ylabel('Вторая компонентa PCA')\n",
|
|||
|
"plt.legend(title='Кластер', loc='upper right')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Значение коэффициента силуэта лежит в диапазоне от -1 до 1. Ближе к 1: Хорошо сформированные, плотные кластеры, четко отделенные друг от друга. \n",
|
|||
|
"\n",
|
|||
|
"Ближе к 0: Кластеры пересекаются или слабо разделены, не имеют четких границ. Точки расположены одинаково близко как к своему кластеру, так и к соседним. \n",
|
|||
|
"Ближе к -1 (Отрицательные значения): Некоторые точки скорее относятся к другим кластерам, чем к текущему (ближе к центрам других кластеров). Очень плохая кластеризация. \n",
|
|||
|
"Ближе к 1: Все точки внутри каждого кластера плотно сгруппированы и значительно удалены от точек других кластеров. Свидетельствует о четкой и хорошо разделенной структуре данных. Единица говорит об идеальной кластеризации.\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "aisenv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|