1070 lines
1.9 MiB
Plaintext
1070 lines
1.9 MiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Вариант 19 Данные о миллионерах https://www.kaggle.com/datasets/surajjha101/forbes-billionaires-data-preprocessed"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['Rank ', 'Name', 'Networth', 'Age', 'Country', 'Source', 'Industry'], dtype='object')\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from scipy.cluster.hierarchy import dendrogram, linkage, fcluster\n",
|
|||
|
"from sklearn.cluster import KMeans\n",
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv(\"..//static//csv//Forbes Billionaires.csv\")\n",
|
|||
|
"print(df.columns)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 56,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Rank</th>\n",
|
|||
|
" <th>Name</th>\n",
|
|||
|
" <th>Networth</th>\n",
|
|||
|
" <th>Age</th>\n",
|
|||
|
" <th>Country</th>\n",
|
|||
|
" <th>Source</th>\n",
|
|||
|
" <th>Industry</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Elon Musk</td>\n",
|
|||
|
" <td>144.0</td>\n",
|
|||
|
" <td>50</td>\n",
|
|||
|
" <td>United States</td>\n",
|
|||
|
" <td>Tesla, SpaceX</td>\n",
|
|||
|
" <td>Automotive</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>Jeff Bezos</td>\n",
|
|||
|
" <td>138.0</td>\n",
|
|||
|
" <td>58</td>\n",
|
|||
|
" <td>United States</td>\n",
|
|||
|
" <td>Amazon</td>\n",
|
|||
|
" <td>Technology</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Bernard Arnault & family</td>\n",
|
|||
|
" <td>133.0</td>\n",
|
|||
|
" <td>73</td>\n",
|
|||
|
" <td>France</td>\n",
|
|||
|
" <td>LVMH</td>\n",
|
|||
|
" <td>Fashion & Retail</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>Bill Gates</td>\n",
|
|||
|
" <td>129.0</td>\n",
|
|||
|
" <td>66</td>\n",
|
|||
|
" <td>United States</td>\n",
|
|||
|
" <td>Microsoft</td>\n",
|
|||
|
" <td>Technology</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>Warren Buffett</td>\n",
|
|||
|
" <td>118.0</td>\n",
|
|||
|
" <td>91</td>\n",
|
|||
|
" <td>United States</td>\n",
|
|||
|
" <td>Berkshire Hathaway</td>\n",
|
|||
|
" <td>Finance & Investments</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Rank Name Networth Age Country \\\n",
|
|||
|
"0 1 Elon Musk 144.0 50 United States \n",
|
|||
|
"1 2 Jeff Bezos 138.0 58 United States \n",
|
|||
|
"2 3 Bernard Arnault & family 133.0 73 France \n",
|
|||
|
"3 4 Bill Gates 129.0 66 United States \n",
|
|||
|
"4 5 Warren Buffett 118.0 91 United States \n",
|
|||
|
"\n",
|
|||
|
" Source Industry \n",
|
|||
|
"0 Tesla, SpaceX Automotive \n",
|
|||
|
"1 Amazon Technology \n",
|
|||
|
"2 LVMH Fashion & Retail \n",
|
|||
|
"3 Microsoft Technology \n",
|
|||
|
"4 Berkshire Hathaway Finance & Investments "
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 56,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.head()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 57,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Rank</th>\n",
|
|||
|
" <th>Networth</th>\n",
|
|||
|
" <th>Age</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>count</th>\n",
|
|||
|
" <td>2600.000000</td>\n",
|
|||
|
" <td>2600.000000</td>\n",
|
|||
|
" <td>2600.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>mean</th>\n",
|
|||
|
" <td>1269.570769</td>\n",
|
|||
|
" <td>4.809596</td>\n",
|
|||
|
" <td>64.271923</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>std</th>\n",
|
|||
|
" <td>728.146364</td>\n",
|
|||
|
" <td>9.845084</td>\n",
|
|||
|
" <td>13.220607</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>min</th>\n",
|
|||
|
" <td>1.000000</td>\n",
|
|||
|
" <td>1.000000</td>\n",
|
|||
|
" <td>19.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25%</th>\n",
|
|||
|
" <td>637.000000</td>\n",
|
|||
|
" <td>1.500000</td>\n",
|
|||
|
" <td>55.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>50%</th>\n",
|
|||
|
" <td>1292.000000</td>\n",
|
|||
|
" <td>2.400000</td>\n",
|
|||
|
" <td>64.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>75%</th>\n",
|
|||
|
" <td>1929.000000</td>\n",
|
|||
|
" <td>4.500000</td>\n",
|
|||
|
" <td>74.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>max</th>\n",
|
|||
|
" <td>2578.000000</td>\n",
|
|||
|
" <td>144.000000</td>\n",
|
|||
|
" <td>100.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Rank Networth Age\n",
|
|||
|
"count 2600.000000 2600.000000 2600.000000\n",
|
|||
|
"mean 1269.570769 4.809596 64.271923\n",
|
|||
|
"std 728.146364 9.845084 13.220607\n",
|
|||
|
"min 1.000000 1.000000 19.000000\n",
|
|||
|
"25% 637.000000 1.500000 55.000000\n",
|
|||
|
"50% 1292.000000 2.400000 64.000000\n",
|
|||
|
"75% 1929.000000 4.500000 74.000000\n",
|
|||
|
"max 2578.000000 144.000000 100.000000"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 57,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.describe()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 58,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Rank 0\n",
|
|||
|
"Name 0\n",
|
|||
|
"Networth 0\n",
|
|||
|
"Age 0\n",
|
|||
|
"Country 0\n",
|
|||
|
"Source 0\n",
|
|||
|
"Industry 0\n",
|
|||
|
"dtype: int64\n",
|
|||
|
"Rank False\n",
|
|||
|
"Name False\n",
|
|||
|
"Networth False\n",
|
|||
|
"Age False\n",
|
|||
|
"Country False\n",
|
|||
|
"Source False\n",
|
|||
|
"Industry False\n",
|
|||
|
"dtype: bool\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Процент пропущенных значений признаков\n",
|
|||
|
"for i in df.columns:\n",
|
|||
|
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
|
|||
|
" if null_rate > 0:\n",
|
|||
|
" print(f'{i} Процент пустых значений: %{null_rate:.2f}')\n",
|
|||
|
"\n",
|
|||
|
"print(df.isnull().sum())\n",
|
|||
|
"\n",
|
|||
|
"print(df.isnull().any())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 59,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"Rank int64\n",
|
|||
|
"Name object\n",
|
|||
|
"Networth float64\n",
|
|||
|
"Age int64\n",
|
|||
|
"Country object\n",
|
|||
|
"Source object\n",
|
|||
|
"Industry object\n",
|
|||
|
"dtype: object"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 59,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Проверка типов столбцов\n",
|
|||
|
"df.dtypes"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Атрибуты \n",
|
|||
|
"\n",
|
|||
|
"Rank: Рейтинг миллиардера в списке Forbes.\n",
|
|||
|
"\n",
|
|||
|
"Name: Имя миллиардера.\n",
|
|||
|
"\n",
|
|||
|
"Networth: Чистая стоимость в миллиардах долларов США.\n",
|
|||
|
"\n",
|
|||
|
"Age: Возраст миллиардера.\n",
|
|||
|
"\n",
|
|||
|
"Country: Страна, в которой проживает миллиардер.\n",
|
|||
|
"\n",
|
|||
|
"Source: Основной источник богатства \n",
|
|||
|
"\n",
|
|||
|
"Industry: Индустрия, в которой миллиардер заработал свое состояние.\n",
|
|||
|
"\n",
|
|||
|
"# Цель:\n",
|
|||
|
"Оптимизация стратегий инвестирования и маркетинга для финансовых учреждений и компаний, стремящихся привлечь миллиардеров как клиентов или партнеров.\n",
|
|||
|
"Кластеризация миллиардеров на основе их характеристик (возраст, страна проживания, источник богатства, индустрия) для выявления групп с похожими профилями."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Очистка данных"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Цель: Упростить набор данных, удалив несущественные столбцы, чтобы сосредоточиться на ключевых атрибутах, которые будут использоваться для кластеризации и анализа.\n",
|
|||
|
"- Rank - этот столбец можно удалить, так как он не влияет на характеристики миллиардера\n",
|
|||
|
"- Name - этот столбец можно удалить, так как он не является количественным атрибутом"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 60,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" Networth Age Country Source Industry\n",
|
|||
|
"0 144.0 50 United States Tesla, SpaceX Automotive \n",
|
|||
|
"1 138.0 58 United States Amazon Technology \n",
|
|||
|
"2 133.0 73 France LVMH Fashion & Retail \n",
|
|||
|
"3 129.0 66 United States Microsoft Technology \n",
|
|||
|
"4 118.0 91 United States Berkshire Hathaway Finance & Investments \n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Удаление несущественных столбцов\n",
|
|||
|
"columns_to_drop = ['Rank ', 'Name']\n",
|
|||
|
"df_cleaned = df.drop(columns=columns_to_drop)\n",
|
|||
|
"\n",
|
|||
|
"print(df_cleaned.head()) # Вывод очищенного DataFrame"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Визуализация парных взаимосвязей\n",
|
|||
|
"Визуализировать ключевые атрибуты миллиардеров для выявления закономерностей и связей между ними."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 64,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjAAAA3QCAYAAADIJEmGAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXidZZk/8G+6QQtJZC0EpCFFKEhbFillRgSqgyK4IDqikCIjyFLAqmwjKCDKuKCAFHBhkwiyiAOoiAqKqFOgdRT4SRFqCYuhFBRDgEJbmt8fZ5oSkkLSJD1vcj6f6+JK3vd5zzl3Tm6S0/PN8zxV7e3t7QEAAAAAACiQYeUuAAAAAAAA4NUEGAAAAAAAQOEIMAAAAAAAgMIRYAAAAAAAAIUjwAAAAAAAAApHgAEAAAAAABSOAAMAAAAAACgcAQYAAAAAAFA4AgwAAID/097eXpGPDQAARSTAAACACvCZz3wm22yzTS699NJyl9Jnd911V7bZZpscffTR3Y7/6Ec/yjbbbJPHH3+8V/d74YUX5pJLLumPEnvtoYceykc+8pFO57bZZpucf/75ZakHAACKQIABAABDXFtbW2699dZsvfXWueaaa4bMX/rfdtttuemmm/rt/s4777wsXry43+6vN2655Zb88Y9/LMtjAwBAUQkwAABgiPvJT36SJDnllFPS3NycO++8s8wV9Y+ampp86UtfytNPP13uUgAAgAEgwAAAgCHu+uuvz2677ZapU6dm3Lhxufrqq7tcc8kll+Ttb397Jk2alAMPPDC/+tWvss022+Suu+7quObBBx/MEUcckZ122ik77bRTZsyYkccee2yVj/vjH/8422yzTR588MFO52+99dZss802uf/++5Mk3/ve9/Kud70rEydOzO67757TTz89zz333Ot+XZ/61Kfywgsv5PTTT3/da1taWvLpT386U6ZMyeTJk3PIIYd0PH5SWq4pSWbNmpVtttkmV1xxRSZMmJBnnnmm45oLLrgg22yzTWbPnt3pa5kwYUKefPLJJMl9992Xj3/849l1112z00475cgjj8xDDz3Ucf2K5a+uvvrq7LXXXtlpp51y4IEHZtasWR11vHLZqOeeey6nnHJKpkyZkh133DHHHXecwAYAgIohwAAAgCHsoYceyn333Zf3v//9SZL3v//9ue222zq9CT5r1qycffbZ2WeffXLhhRdm8uTJmTlzZqf7efjhh3PggQfm73//e77yla/kS1/6Uh577LF85CMfyd///vduH/sd73hHxowZk5/+9Kedzv/kJz/Jm970pmy33Xb5yU9+kq997Ws56KCDcskll2TGjBm58cYbc+aZZ77u1zZ+/Pgce+yx+eUvf9kxy6Q7//jHP3LggQfmz3/+cz73uc/l61//epYvX56DDjoof/3rX5Mk11xzTZLkgx/8YK655prsueeeaW9v7zRbZcXnc+bM6Th3xx13ZLvttsvYsWNz5513duxjcdZZZ+WLX/xinnjiiRx44IEdj7PCrFmzctJJJ+Xzn/98vv71r+eDH/xgRx0f+tCHOq674oorsnTp0px33nn5zGc+k1/96lf5whe+8LrPDQAADAUjyl0AAAAwcK6//vq84Q1vyLRp05Ik+++/f84///z88Ic/zJFHHpkXXngh3/3ud3PQQQfl+OOPT5K89a1vzeLFizve1E9Kb7iPHj06l19+edZdd90kyW677ZZ3vOMdufjii3PSSSd1eezRo0fnne98Z26++eZ86lOfSpI8//zz+fWvf50ZM2YkSe6+++5svvnmOeiggzJs2LBMmTIlY8aMSWtra4++vo9//OP55S9/mTPPPDNTp07Nhhtu2OWa733ve/nnP/+ZH/zgB9lss82SJG9729vy7ne/O+edd16++c1vZocddkiSbLLJJh2fb7nllpk9e3b22WefLF68OH/84x/z5je/uVOA8dvf/jYf+MAHkiRf//rXM27cuHznO9/J8OHDO57Lf/u3f8s3v/nNnHfeeR23++hHP5p3vetdHcebbLJJknQ89goTJ07MV7/61Y7n+5577slvfvObHj03AAAw2JmBAQAAQ9TSpUtz00035R3veEdefPHFPPvss1lnnXWy884759prr83y5cvzpz/9KS+++GKnN9OTZL/99ut0fOedd2bKlClZe+21s2zZsixbtizrrrtu3vKWt+R//ud/VlnD+973vjz66KO59957k5Q23l6yZEne+973JkmmTp2ahx9+OB/4wAcya9as3HfffXnPe96TxsbGHn2Nw4cPz3/913/lhRdeyBlnnNHtNbNnz862226bsWPHdtQ+bNiwvO1tb3vN2vfcc8+O8T/84Q8ZOXJkpk+fnnvuuSdLlizJ/Pnz09LSkj333DMvvPBC7rvvvuyzzz4d4UVS2qdjr732yt13393pvrfddtsefX0777xzp+PNN988zz77bI9uCwAAg50ZGAAAMETdfvvt+fvf/54f/vCH+eEPf9hl/Le//W3a2tqSJOuvv36nsQ022KDT8T//+c/cfPPNufnmm7vcz6tv+0q77rprxo4dm5/+9KeZNGlSfvrTn2bKlCkdMw7e/e53Z/ny5bnqqqty4YUX5vzzz89mm22W448/Pu9+97t79HVutdVWOeaYY/KNb3yjy3JVK2p/5JFH8uY3v7nb2y9evDijR4/ucn6PPfbIZZddlscffzyzZ8/OTjvtlN122y0vvfRS7rnnnvy///f/stFGG2X77bfPokWL0t7e3u0MkA033LDjeV5hzJgxPfraXn3dsGHD0t7e3qPbAgDAYCfAAACAIer666/PG9/4xnzpS1/qdL69vT3HHHNMrr766nz84x9Pkvz9739PQ0NDxzX/+Mc/Ot2muro6//Iv/5JDDz20y+OMGLHqf1YMGzYs73nPe/KTn/wkRx55ZH7/+9932cNhv/32y3777Ze2trb87ne/y3e/+92ccMIJ2XnnnTN27Ngefa2HHXZYfvGLX+TMM8/s+JpeWfuUKVNy4okndnvbUaNGdXv+LW95S9Zdd93Mnj07d955Z975zndm7Nixqa+vz1133ZU//OEP2XPPPVNVVZXq6upUVVV1u8H2U089lTe84Q09+joAAICVLCEFAABD0FNPPZXf/va32XfffbPrrrt2+m/q1Kl517veld/85jfZdNNNU11dnV/+8pedbv+LX/yi0/GUKVMyf/78bLvttpk4cWImTpyY7bffPpdffnmX277a+973vixcuDAXXHBBhg8fnr333rtjbObMmR37YVRXV2efffbJ0UcfnWXLlmXRokU9/nqHDx+eL3/5y3nuuefy7W9/u0vtDz/8cLbccsuO2idOnJgbb7wxP/zhDzuWfBo2rPM/j0aOHJl//dd/zW233ZZ58+ZlypQpSUrLXt1+++2ZO3du9tprrySlmRLbb799fvazn+Xll1/uuI+2trbcfvvtXZaCerVXPzYAACDAAACAIemGG27IsmXLsu+++3Y7/v73vz8vv/xyfvSjH+Wwww7L97///Zxzzjn5/e9/n3POOSc/+MEPkqx8Y/3oo4/Oo48+miOOOCK33nprfvvb3+bYY4/NT3/600yYMOE1a9l6662z7bbb5qqrrso73vGOjk3Ak1IYcOutt+YrX/lKZs+enZ///Oc577zzUl9f/7r3+2pvetObMmPGjC7LNX3sYx/L8uXL87GPfSw333xzZs+enc997nNpamrKlltu2XFdTU1N/vd//zdz5szpWKZpjz32yK9//eustdZa2X777ZOUlsW67777UlVVlX/5l3/puP1nPvOZPPzww/nEJz6R2267LbfccksOOeSQLFmypCOkWZWampokyU9+8pM89thjvfq6AQBgqBJgAADAEPSjH/0ob3rTm7L11lt3O77zzjtn8803z3XXXZfDDz88xx57bG688cYcccQRmTt3bo4//vgkK/dgmDBhQq688spUVVXlxBNPzHHHHZennnoqF1xwQacZFavyvve9Ly+//HLH5t0rHHjggTn11FNzxx135Mgjj8znP//5jB8/PpdeemlGjhzZ66/78MMP77LXxdi
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x4500 with 3 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Настройка стиля графиков\n",
|
|||
|
"sns.set(style=\"whitegrid\")\n",
|
|||
|
"\n",
|
|||
|
"# Создание фигуры\n",
|
|||
|
"plt.figure(figsize=(16, 45))\n",
|
|||
|
"\n",
|
|||
|
"# График 1: Возраст vs Чистый доход\n",
|
|||
|
"plt.subplot(4, 1, 1)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['Age'], y=df_cleaned['Networth'], alpha=0.6, color='blue')\n",
|
|||
|
"plt.title('Age vs Networth')\n",
|
|||
|
"plt.xlabel('Age')\n",
|
|||
|
"plt.ylabel('Networth (in billions USD)')\n",
|
|||
|
"\n",
|
|||
|
"# График 2: Страна проживания vs Чистый доход\n",
|
|||
|
"plt.subplot(4, 1, 2)\n",
|
|||
|
"sns.boxplot(x=df_cleaned['Country'], y=df_cleaned['Networth'], color='green')\n",
|
|||
|
"plt.title('Country vs Networth')\n",
|
|||
|
"plt.xlabel('Country')\n",
|
|||
|
"plt.ylabel('Networth (in billions USD)')\n",
|
|||
|
"plt.xticks(rotation=90)\n",
|
|||
|
"\n",
|
|||
|
"# График 3: Индустрия vs Чистый доход\n",
|
|||
|
"plt.subplot(4, 1, 3)\n",
|
|||
|
"sns.boxplot(x=df_cleaned['Industry'], y=df_cleaned['Networth'], color='purple')\n",
|
|||
|
"plt.title('Industry vs Networth')\n",
|
|||
|
"plt.xlabel('Industry')\n",
|
|||
|
"plt.ylabel('Networth (in billions USD)')\n",
|
|||
|
"plt.xticks(rotation=90)\n",
|
|||
|
"\n",
|
|||
|
"# Упорядочиваем графики\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Стандартизация данных для кластеризации"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 71,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"\n",
|
|||
|
"# Выделяем числовые и категориальные признаки\n",
|
|||
|
"numerical_cols = ['Networth', 'Age']\n",
|
|||
|
"categorical_cols = ['Country', 'Source', 'Industry']\n",
|
|||
|
"\n",
|
|||
|
"# Масштабирование числовых признаков\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"df_numerical_scaled = scaler.fit_transform(df_cleaned[numerical_cols])\n",
|
|||
|
"\n",
|
|||
|
"# Кодирование категориальных признаков с помощью OneHotEncoder\n",
|
|||
|
"encoder = OneHotEncoder(handle_unknown='ignore', sparse_output=False) # sparse=False для удобства\n",
|
|||
|
"encoded_data = encoder.fit_transform(df_cleaned[categorical_cols])\n",
|
|||
|
"\n",
|
|||
|
"# Создаем новые столбцы для закодированных категориальных признаков\n",
|
|||
|
"encoded_df = pd.DataFrame(encoded_data, columns=encoder.get_feature_names_out(categorical_cols))\n",
|
|||
|
"\n",
|
|||
|
"# Объединяем числовые и закодированные категориальные данные\n",
|
|||
|
"df_encoded = pd.concat([df_cleaned[numerical_cols], encoded_df], axis=1)\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 73,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABR8AAAP0CAYAAAAjkkunAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXxU9b3/8fc5zExmMkkmOEJMmCCY1LiBUqO4LyDWWrW4tVSr1atXq9halytaW9e61CpFwaXWnbYXa1XEWvUqWGm1LtGrtS74M4LMaEzCkHUmk5lhzu8Pb1JCAmRgTmbh9Xw8eADnnJz5JPkmkPf5fr5fw7IsSwAAAAAAAACQYWa2CwAAAAAAAABQmAgfAQAAAAAAANiC8BEAAAAAAACALQgfAQAAAAAAANiC8BEAAAAAAACALQgfAQAAAAAAANiC8BEAAAAAAACALQgfAQAAAAAAANiC8BEAAGAbYVlWtkvANohxBwDAto3wEQAAbLXTTjtNdXV1A37tscceOuyww3Tttdeqo6Nj0NusXLlS11xzjY444ghNnjxZhx12mC6++GJ99NFHG32dX//616qrq9P111+/2ZrOPPNM7bvvvorH4xu95thjj9Wpp54qSaqrq9P8+fOH8d5m1uWXX65p06b1/33atGm6/PLLM/oaX375pc455xx9/vnntr7O1uju7tYPf/hD7bnnntpnn320atWqQdc88cQTqqur0y9+8Ysh7zF//nzV1dXZXOnW63s/QqFQxu75+uuvq66uTq+//vpGr7n88ssHfZ2u/+u5557LWD2SFI/HdeONN+rpp5/O6H0BAEB+cWS7AAAAUBh22203XX311f1/TyQSev/99zV37lx9+OGH+u///m8ZhiFJ+p//+R9ddtll+trXvqbzzjtPgUBAX375pR5++GF95zvf0d13360DDzxwwP1TqZQWL16snXfeWU899ZQuvfRSeTyejdZz4okn6tVXX9Xy5ct1xBFHDDr//vvv6+OPP9Yvf/lLSdKjjz6qHXbYIRMfiq2yYMEClZSUZPSer776ql5++WXbX2drLF68WC+99JKuuuoqfe1rX1MgENjotb///e911FFHqb6+fgQrzJzDDjtMjz76qMaOHTvirz1mzBgtWLBgyHMTJkzI6Gu1tLTo4Ycf1k033ZTR+wIAgPxC+AgAADKipKREe+2114Bj++yzjyKRiO644w69++672muvvbR69WrNmTNHBx98sObNm6dRo0b1X3/kkUfqe9/7nubMmaNly5bJ5XL1n/v73/+uL7/8UnPnztX3v/99/fnPf9bJJ5+80XpmzJghn8+nJUuWDBk+PvnkkyopKdE3vvENSRpUe7bstttuBfU6w9Xe3i5JOuWUU/pD6o0pKSnRT3/6Uy1ZskRut3sEqsus7bbbTtttt11WXtvlcuXMWAcAANsG2q4BAICt9thjD0nSF198IUlauHCh4vG4fvaznw0IHiXJ4/Fozpw5OvHEEwe1aj/++OPaeeedtffee2vq1Kl69NFHN/m6RUVFOuaYY/TXv/5V3d3dA84lEgk988wz+ta3vtU/e3LDtuuHH35YRx11lCZNmqSDDz5Y11xzTf99QqGQ6urq9MQTTwy474Yt1OvWrdO9996rY445RpMnT9Zee+2lWbNm6bXXXtto3eu3Q/e1EQ/1q6/Wzb3GE088oSuuuEKSNH369P57b9h23dXVpZtuuklHHHGEJk2apGOOOUZ/+tOfBtV2xx136Je//KUOOOAATZ48WWedddaQLdLr6+3t1Z133tn/8TzyyCN17733KpVKSfqqbb/v/dlll1022w4+Z84crV69WnPnzt3kdRt+PqTBn7u+duV//OMfOu200/qXAHjsscfU0tKiCy64QFOmTNGhhx6qhx56aMC92tvbddVVV+mAAw7QpEmT9J3vfEf/+Mc/BlxTV1enBQsW6IQTTtDkyZO1YMGCIduuX375Zc2aNUt77bWXDjroIF111VXq7OzsP//mm2/qrLPO0j777KM99thD06ZN0/z58/s/hpn24osv6oQTTtCkSZN04IEH6he/+IWi0eiga0455RRNmTJFe+yxh4466ij9/ve/l/TVx3n69OmSpCuuuKL/83DaaafptNNOG3CfDVvGn3jiCe2222567LHHdOCBB2rffffVJ598Mqy6YrGYrrnmGh1yyCH9Nd1///22fIwAAMDwED4CAABbrVy5UpJUXV0tSfrb3/6m3XbbTRUVFUNev//+++uiiy7SmDFj+o+1t7dr2bJlmjlzpiTp+OOP13vvvaf3339/k6994oknqre3V88///yA48uXL9fatWs3OnPyz3/+s371q1/p1FNP1f3336/Zs2frqaeeGtZak+u79dZbddddd+m73/2u7rvvPl1//fVqb2/XhRdeqJ6ens2+/cknn6xHH310wK+9995bXq9XRx999LBe47DDDtN5550n6atW6/PPP3/Q68RiMZ1yyil6+umndfbZZ+uuu+7S3nvvrSuvvFL33HPPgGsfeeQRffrpp7rpppv0i1/8Qv/61780Z86cjb4PlmXphz/8oe677z6dfPLJuueee3TUUUdp3rx5/W36V199tU466SRJX7W/D1Xj+vbbbz9997vf1cKFC/XWW29t9uM4HBdffLGmTZum3/zmN5o4caKuvvpqnX766fra176mu+66S5MnT9ZNN92kf/7zn5K+ClR/8IMfaOnSpbrooou0YMEC7bDDDjr77LMHBZD33HOPjj32WN1xxx39M23X99JLL+ncc8+V3+/XvHnzdOmll+rFF1/URRddJEn66KOPdMYZZ6i8vFy//vWvdffdd6u+vl4LFizQs88+m/b7mkwmB/1af1OYp59+WrNnz9ZOO+2kO++8UxdccIGWLFmi888/v/+6v/71r5o9e7Z233133XXXXZo/f76qq6t13XXX6d1339XYsWP727vPO++8jbZ6b8y6dev0wAMP6IYbbtAVV1yhmpqaYdV14403avny5ZozZ47uv/9+TZ8+Xbfccosef/zxtD9OAAAgM2i7BgAAGWFZlpLJZP/fOzo69MYbb+juu+/unxklfbX5ya677prWvZ9++mmlUil9+9vflvRVe/Z1112nRYsWbTIQ3H333bXrrrvq6aef1oknnth/fPHixaqrq9OkSZOGfLs33nhDgUBAp556qkzT1L777qvi4uIhN87ZlJaWFl100UUDZnoVFRXpRz/6kVasWLHZ9tcddthhwDqUDz30kN5++20tWLBANTU1w36N8ePHS5J23XXXIddSfOKJJ/Txxx9r0aJFmjJliiTp4IMPVjKZ1F133aVZs2apvLxcklRWVqa77rqrf9bq6tWrNX/+fLW1tWn06NGD7r18+XK9+uqrmjt3rr71rW9Jkg488EC53W7dfvvt/QFf3/s53Jbgyy67TH/729/005/+VE899dRWt1+feOKJOvPMMyVJxcXF+s53vqPJkyfrwgsvlPTVjMz/+Z//0dtvv63Jkyfrqaee0kcffaQ//vGP2nPPPSVJhxxyiE477TTdeuutA8Ku+vr6/ntL0nvvvTfgtefPn69dd91VCxYs6G85d7lcuv3227VmzRp99NFHOuCAA/SrX/1KpvnV3IEDDzxQy5Yt0+uvv97/cR2Ozz//XLvvvvug45dcconOOeccWZalW2+9VQcffLBuvfXW/vMTJkzQGWecoZdfflmHHXaYPvnkEx1//PG68sor+6+ZMmWKpk6dqtdff1177rln/9f5+PHjt6jN/4c//KEOO+wwSRp2XW+88YYOPPDA/o/J1KlTVVxcLL/fn/brAwCAzCB8BAAAGfHmm28OCjVM09QBBxyg6667rj9UGTVqlNatW5fWvR9//HFNnTpVLpervxV12rRp+vOf/6w5c+ZscuOUE088UTfeeKOam5tVUVGh9vZ2vfTSS7rssss2+jb77befHn30UZ1wwgk64ogjdOihh+rYY4/d7FqEG7rtttskSWvXrtWnn36qzz77TC+99JIkbXIX7qH87W9/0y233KLzzz9/wBqWmXiNN954Q+PGjesPHvscd9x
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Применение PCA ТОЛЬКО к числовым данным\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"kc_pca = pca.fit_transform(df_numerical_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация\n",
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"plt.scatter(kc_pca[:, 0], kc_pca[:, 1], alpha=0.6)\n",
|
|||
|
"plt.title(\"PCA Visualization of Numerical Features\")\n",
|
|||
|
"plt.xlabel(\"Principal Component 1\")\n",
|
|||
|
"plt.ylabel(\"Principal Component 2\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Агломеративная (иерархическая) кластеризация"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 79,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABSEAAAP1CAYAAACe9CqJAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADd+klEQVR4nOzdeXiV5Z344e+BJICKgqigggpYcQURRZ1WUbDqVLsgrVoVt/oTFRUV61L33bGAsriAjAu4K1bt1NbWpdgZFYuK1NYpdS0oqIjgCgnh/f3B5JRAgCTkycly39fFdZ2c9cnZwvmc533eXJZlWQAAAAAAJNKi0AMAAAAAAJo2ERIAAAAASEqEBAAAAACSEiEBAAAAgKRESAAAAAAgKRESAAAAAEhKhAQAAAAAkhIhAQAAAICkREgAAAAAICkREoCCueCCC6JHjx5V/rvgggsKPTxgBYsWLYo+ffrEzJkzY9GiRXHqqafGHXfcUehh0QDMnDkzDjrooCgtLS30UGjC3nnnnejfv398/vnnhR4KALVUVOgBANC8bbrppjFu3LhKx51++ukFGg2wOhtttFGccMIJcfjhh0eWZdGjR4/4j//4j0IPiwJbsmRJnH/++fHzn/88SkpKCj0cmrBu3brFgAED4uqrr44bbrih0MMBoBZESAAKpry8PNZbb73YddddKx3vgyw0TKeffnoceeSR8fnnn8fWW28dLVu2LPSQKLD77rsvioqK4oADDij0UGgGTj755Nhvv/3iuOOOi5122qnQwwGghmyODUDBLF26NFq3bl2t806fPj2OOeaY6NWrV/Tt2zfOP//8WLBgQf70Rx99NHr06BFz5sypdLn+/ftX2rS7rKxstZuAr3xdr7/+egwcODB69uwZ3//+9+N3v/tdpev+4osv4rrrrosDDjggdtlllzj00EPjkUceWeX2V76dOXPmxODBg+OCCy6I2267Lf7t3/4t+vTpE6eddlp88MEHlS7/9NNPx1FHHRW9e/eOnXfeOQ4++OC4995786dPmzYtf72vvPJKpcvec8890aNHj+jfv/8q47n44osrnXfRokWx8847R48ePWLatGnVvv3Vefjhh+Owww6LXXfdNXr27Bk//OEP47e//e0q93FVm+Cv7vEZPHhwpdt48skn47DDDovevXvHt7/97bj00ktj0aJF+dPHjh0bPXr0iN69e6+ymeiZZ565ymb/S5YsiRtuuCH69esXO++8c3z/+9+PJ598stLl+vfvHzfeeGNce+21sccee8See+4Z5513XixcuLDav/+aliF49NFH84/pio/Dp59+GrvvvnuVj2WPHj1i++23jz322CPOOOOM+Oyzz/Ln6dGjR4wdO7bS2Crul9rclxERm2yySXTr1i1eeOGFtS6dsPJt/eY3v4k99tgjRo4cGRGVn78r/1tx3P/7v/8bp59+euy1116x0047xT777BNXX311LF68OH+e0tLSuOmmm2LAgAHRs2fPOPTQQ+NXv/pVte7ziIgPP/wwzjnnnOjbt2/06tUrjjvuuPjb3/6Wv/45c+ZEjx494je/+U2ccsop0atXr9hvv/3i5ptvjmXLllV6XFa+T84555xKj2mWZTF69OjYZ599ok+fPnHKKafE3Llz8+cvLy+PCRMmxKGHHho9e/aMXXfdNY488sh46aWX1vg4Rqz6mK/8c5ZlceSRR1Z6v7zgggsqPbciIh544IEqnz8rKi0tjTvvvDMOPfTQ/HGDBw9e5bW68nO6qrH/6U9/WuX59OWXX8ZVV10V++yzT+y6664xaNCg+OMf/7jK9a7t+bNkyZK4+eab4+CDD45ddtklDjzwwJgwYUKlx23w4MGVLr/bbrvFiSeeGLNnz86fp6rHdkVV/e4PP/xwHHLIIbHzzjvHfvvtF2PHjo3y8vLVXkfE6t8DV379r+29qLqP65NPPhnf+973Ytddd43DDjsspk+fnj+tus+z6rxGV7zMV199FYMHD44dd9wxlixZUu2xbrrpprHXXnvF+PHj13gfAtAwmQkJQMF88803sdFGG631fH/+85/jhBNOiL322ituuummWLRoUYwePTqOPfbYeOSRR6odMiOWfxiNiLj11ltj4403jojlHxJXjocREUOGDIljjjkmzj777HjkkUfirLPOivHjx0e/fv1i8eLFcdRRR8Wnn34aZ555Zmy55Zbx9NNPx0UXXRTz58+PU045JX89/fr1i9NOOy3/82abbRYREc8880y0b98+Lr744li2bFmMHDkyBg8eHL/5zW+iTZs28cc//jGGDh0axx57bJxxxhmxePHiuO++++LKK6+MnXfeOXr16pW/zvXXXz+effbZ6NOnT/64J598Mlq0WPX7xvXXXz/++Mc/RpZlkcvlIiLi97///SofjGty+yu699574+qrr44zzjgj+vTpE4sWLYrbb789zj333Ojdu3d06tQpf95x48bFpptuGhGRfzwiIn784x/HT37yk/zPV1xxRaXbuOWWW2LMmDFx1FFHxdlnnx2zZ8+O0aNHx4wZM+Khhx6q9JzI5XLx4osvRr9+/SJi+YffqVOnVrpvsiyLoUOHxquvvhpnnnlmdO/ePf7whz/E2WefHaWlpfGjH/0of9777rsvtt5667juuutiwYIFMXLkyHj//ffjgQceiFwut9bf/7TTTosjjzwyIpbPLNxxxx3zz4+tttoq/vGPf6xyn44cOTK++OKL2HDDDSsdX/HcKisri7fffjtuuOGGuOaaa2LEiBFVPjZVqcl9WaGsrCyuvfbaat9GRMTixYvjyiuvjJNOOim+//3vVzrt0ksvrTSr6Ygjjsgf/vjjj+Poo4+OXXfdNa6//vooKSmJ559/Pu68887YbLPN4uSTT46IiHPPPTemTp0ap556avTq1SumTp0aF1xwQRQXF6/1Pl+wYEEceeSR0aZNm7jkkkuiTZs2cffdd8fRRx8djzzySHTv3j0/nssvvzz69esXY8eOjVdeeSXGjRsXX3/9dfz85z+v8veePn16/OY3v6l03F133RXjx4+P8847L7p27RrXX399DBs2LB566KGIiBgxYkTcf//9MXz48OjRo0d89NFHcfPNN8ewYcPij3/8Y7Rp06ZG9/2KHn/88XjttdfWeJ5FixbFTTfdtNbrmjZtWnz00Udx4IEH1no8EVU/n8rLy+PEE0+M9957L84888zo1q1b/OpXv4qhQ4fG3XffHbvvvnv+vGt6/mRZFqecckrMmDEjTj/99Nh+++1j2rRpcdNNN8Xs2bPjqquuyp93xx13jMsuuyyWLl0ac+bMiZEjR8Z5550X999/f61+r/Hjx8eNN94YxxxzTFx44YXx5ptvxtixY2Pu3Llrff2s/B54yy23xFtvvVXpPGt7L1pZVY/rzJkz49xzz40f/ehHcckll8Q999wTp5xySvzud7+LTTbZpFq/Z3VfoyuPff78+XH33XdXufXDmp6DBx98cFxxxRXx1Vdfxfrrr1+tMQLQMIiQABTMwoUL80FuTUaOHBldu3aN8ePH5zf/7NWrVxxyyCExZcqUOProo6t9m19//XVERPTu3Tvat28fEctn4FRl8ODBMXTo0IiI2GeffWLgwIFx8803R79+/eLRRx+NWbNmxQMPPBC9e/fOn2fp0qVxyy23xJFHHhnt2rWLiOVxbeVNziOWR9hHH300unTpEhHL17saOHBgPPbYY/HTn/403nrrrRg4cGBcdNFF+cv07t079txzz5g2bVqlCLjvvvvGM888kw8h8+bNi9deey123333VWZX7r333jF16tR4/fXX8+P67W9/G3vssUel2Xc1uf0VzZ49O372s59VCq9bbrllHHbYYfHKK6/EIYcckj9+hx12iM6dO69yHZ06dap0n22wwQb5w4sWLYpbb701Dj/88Lj00kvzx2+33XZx9NFHr/KcqLhvKiLks88+G5tuummlWVAvvPBC/OlPf4obb7wxvve970XE8sfzm2++iREjRsShhx4aRUXL/9vUokWLuPPOO6Nt27YRsfzxHTp0aPzpT3+Kfffdt1q//1ZbbRURy5c
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[1 1 1 ... 1 1 1]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Построение дендрограммы (только для числовых данных)\n",
|
|||
|
"linkage_matrix = linkage(df_numerical_scaled, method='ward')\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"dendrogram(linkage_matrix)\n",
|
|||
|
"plt.title('Дендрограмма агломеративной кластеризации (числовые признаки)')\n",
|
|||
|
"plt.xlabel('Индекс образца')\n",
|
|||
|
"plt.ylabel('Расстояние')\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"# Получение результатов кластеризации (только для числовых данных)\n",
|
|||
|
"result = fcluster(linkage_matrix, t=100, criterion='distance') \n",
|
|||
|
"print(result) # Вывод результатов кластеризации (номера кластеров для каждого образца)\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 82,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJ8AAAMQCAYAAACJzMTyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADBOUlEQVR4nOzdeXwTdf7H8XfaJOUqh1wFigcooHIvyOEPl2tdPFfEAxQEXH+AIiKgKL/1ANlFXDkEqgiLcgmCCIrXqquiqNyXJ6goupQbodxt0nZ+f5SEpDmapJmmSV/Px4MH7Uw6+WQymcy85/v9jsUwDEMAAAAAAACACZJiXQAAAAAAAAASF+ETAAAAAAAATEP4BAAAAAAAANMQPgEAAAAAAMA0hE8AAAAAAAAwDeETAAAAAAAATEP4BAAAAAAAANMQPgEAAAAAAMA0hE8AAAAAAAAwDeETACAu9evXT/369fM7b+XKlWrcuLEeffTREq4KQKSGDBmiZcuWxboMRMnrr7+uQYMGxboMAEApQfgEAEgohw8f1oQJE2JdBoAwrFixQgcOHFCvXr1iXQqipFevXjp06JBef/31WJcCACgFCJ8AAAnlqaee0unTp1WhQoVYlwIgBNnZ2Zo0aZKGDBmipCQOTROFxWLR4MGDNWXKFGVnZ8e6HABAjPENDwBIGB9++KE++OADDR06VNWqVfOal5+fr9mzZ+tPf/qTmjZtqj//+c9auHCh12P69eunRx99VC+++KI6duyoP/zhD7rvvvu0Z88er8d99NFHuuOOO9SqVSs1bdpUPXr00KJFi9zz169fr8aNG+uLL77QnXfeqebNm+vqq6/W4sWL3Y95+umn1bhxY61bt849bcWKFWrcuLHefPNNdz2FuxZOnjxZjRs31ooVKyRJjRs31owZM7weM2PGDDVu3Nin5ptvvlnNmjXTlVdeqb///e86ffq012O2bdumu+++W61bt1b79u01cuRIHThwwOs1rV+/XpL0448/qnv37urdu3fI60WSZs+erW7duumyyy5T48aN3f8KvwZPjz76qLp27er+/eWXX1arVq20ZMkSr/Xm759rPUnSxo0b9de//lVt27ZV06ZN1bVrV82YMUP5+fnux5w8eVLjx49Xp06d1LJlS/Xq1Uuffvqp+/0I9Dye62Xw4MFq3bq1WrduraFDh2r37t3u5YeybUi+76thGOrdu7caN26szMxMSVJOTo7Gjh2rDh06qF27dnrooYd07Ngx999kZ2dr8uTJuvrqq9W0aVO1bt1aAwcO1Pbt2wOuW0nKzMz0WneFf3c9d7du3by2s59++sm9fguvn2CWL1+unJwcdenSxWv61KlT/a7rwtvKsmXLdN1116lp06bq3LmzZsyYoby8vLBeoyR9+eWXfp/P8zP46KOPql+/fnr99dfVpUsXtWrVSv3799eOHTu8lv/rr7/qgQce0JVXXqmWLVuqX79+2rx5s8/ze/5z1di4cWO98soreuSRR9SqVSt17NhR//jHP5STk+P++7y8PM2ePVvXX3+9mjdvrpYtW6p3795e+xPXfqBVq1ZyOBxe9T3wwANeXZM963nrrbe8Hrtq1Sqf9zGU55ekLl26KCcnR8uXLxcAoGyzxroAAACiISsrS+PGjdPll1+ue+65R6+99prX/LFjx2rFihUaPHiwWrVqpY0bN2rChAk6fvy4hg4d6n7cxx9/rGrVqumxxx5Tfn6+Jk+erH79+undd99V+fLl9emnn2ro0KG66667NGzYMGVnZ2vx4sV66qmn1LRpU7Vo0cK9rBEjRuimm27SkCFD9PHHH2vcuHGSpDvuuEMjRozQp59+qieffFJvv/22Dh8+rH/84x+65pprdNNNN/l9jf/97381b968sNfN22+/rYceekg33HCDHnzwQe3Zs0dTp07Vzp07NXfuXFksFn3//ffq27evWrRooX/+85/Ky8vT5MmT9de//tUdhnl69tln1bRpU917772SFNJ6efPNNzV58mQNHjxYHTp0UPny5SVJt99+e8iv5cCBA5oyZYqeeuop/fGPf/Sal5GRoZo1a0qSDh06pPvvv989b8eOHRowYIB69OihqVOnyjAMvf3228rIyFCDBg103XXXKS8vT3fffbc7OGjQoIHeeOMNDR06VPPnz9eTTz6pkydPumu+5ZZbdOutt0qSLr74Yu3atUu9e/dWgwYN9Mwzzyg3N1czZ85Unz59tHLlSlWvXt1dT7Btw5+VK1dq69atPu/Bm2++qccff1yVK1fWuHHjNHbsWE2dOlWSNHr0aG3atEkjR47U+eefr99++03Tpk3TqFGj9O6778pisYS83gubM2eOOwRzuffee2W32zV+/HjVqlVLSUlJWrZsWZHdrt566y117txZdrvda3p2dra6du2qwYMHu6cV3lZmzZqlqVOnqm/fvhozZoy2b9+uGTNmaN++fWF3v83OzlZaWpqmTZvmnuZ6Xzxt375dv/zyi0aOHKkqVapo+vTp6tu3r9577z3VqlVLO3fu1G233aYLL7xQjz32mGw2mxYsWKD+/fvr5Zdf1hVXXOG1zjp37ixJXq9/2rRpatGihZ577jn9/PPPeu6553To0CE999xzkqRJkybp1Vdf1ahRo9S4cWMdOHBAzz//vIYPH65PP/3U/dmSClogrV271v15OXXqlD777DO/rcwqVqyoTz75RDfeeKN72nvvvaekpCSvkDbU509JSVGXLl309ttv68477wzn7QAAJBjCJwBAQpgwYYKOHTuml156SVar99fbrl279Nprr2nkyJHuAXD/53/+RxaLRbNmzdIdd9zhbil15swZrVixQvXr15ckNWjQQD179tSbb76pPn36aOfOnerZs6f+9re/uZffqlUrtWvXTuvXr/cKn/70pz+5H9epUycdPHhQL7zwgvr06aNy5cpp4sSJuuOOOzR79mxt2bJFlSpV8nuy6/kaL7nkEn333XfuaUlJScrNzQ34N4ZhaNKkSerUqZMmTZrknn7hhRdqwIAB+uyzz9S5c2e9+OKLqlq1ql5++WWlpKRIkmrVqqVRo0bpp59+8lrmb7/9pi+++EJvvfWWLrnkEkkKab18/fXXqlq1qkaOHBmw3qIsWbJETZo00c033+wz79JLL1V6erok+QQjO3bsUMeOHfXss8+6T7qvvPJKffLJJ1q/fr2uu+46rV69Wl999ZWef/55de/eXZLUvn177d69W+vWrfMKsyQpLS1NLVu2dP/+5JNPqnz58po3b54qVaokSerQoYO6d++uOXPm6JFHHnE/Nti2UTgUOnXqlCZNmqTLL7/c6703DEOjR492j5O0ZcsW94DdDodDp06d0mOPPaZrr71WknTFFVfo5MmTmjhxog4fPuwO6sK1b98+/etf//Kq58iRI9q9e7cef/xx9ejRw/3Yzz//POiyTp48qW+++UbXXHONz7wzZ86obt26XuvY04kTJ/TCCy/o9ttv12OPPSap4HNdtWpVPfbYYxo4cKB7+wzFmTNnVLlyZa/nc72PhZ/3xRdfVJs2bSRJzZs3V/fu3bVgwQI99NBDysjIkN1u14IFC9x/37lzZ11//fX65z//6RXGnX/++X5f33nnnacXX3xRVqtVf/zjH5WUlKSnn35aw4YNU8OGDXXw4EGNGDHCq1VWSkqKhg0bph9++MFrmVdddZU+/vhjd/j0ySefqGbNml5hkudjP//8czkcDtntduXk5Ojjjz9W27Zt3a37JIX1/M2aNdN7772nkydP+l2fAICygW53AIC499lnn2nlypUaNGiQmjRp4jN/3bp1MgxDXbt2VW5urvtf165dlZOT49UdpnXr1u7gSZIuu+wy1a9fXxs3bpQk3XPPPZo4caJOnTqlb7/9Vu+9955mzZolST5dW3r27On1+9VXX61Dhw5p165dkgrCmQEDBuj555/XmjVrNHHiRFWpUsXva1y9erXWrFnjFWBIUvXq1d1d4/z55ZdftH//fp/X3rZtW1WqVElffvmlJGnz5s266qqr3MGTq75PPvlEl156qXva6dOnNXXqVLVr187rxD6
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x800 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Выбираем подмножество данных для кластеризации\n",
|
|||
|
"# у меня всего два числовых столбца мне грустно.....\n",
|
|||
|
"features = df_encoded[['Networth', 'Age']]\n",
|
|||
|
"\n",
|
|||
|
"scaled_features = scaler.fit_transform(features)\n",
|
|||
|
"\n",
|
|||
|
"# Построение дендрограммы\n",
|
|||
|
"linkage_matrix = linkage(scaled_features, method='ward') \n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(12, 8))\n",
|
|||
|
"dendrogram(linkage_matrix, labels=df.index, leaf_rotation=90, leaf_font_size=10)\n",
|
|||
|
"plt.title('Иерархическая кластеризация (дендрограмма)')\n",
|
|||
|
"plt.xlabel('Индекс миллиардера')\n",
|
|||
|
"plt.ylabel('Евклидово расстояние')\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Визуализация распределения кластеров**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 103,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAACbMAAAuoCAYAAADST7XUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3yfdb3//8c1PjM7adM23XtPOqHQYRkCsl0MEQVE8IiiHtGDyu/I8ciRLw7QowiuA4pAEdnILpRCoZsOukfa7DTN+Mxr/P5ImzZNUrqSNO3zfrvlRvJ+X9f7/bquz+dzlZIn77fh+76PiIiIiIiIiIiIiIiIiIiIiIiISCcyO7sAEREREREREREREREREREREREREYXZREREREREREREREREREREREREpNMpzCYiIiIiIiIiIiIiIiIiIiIiIiKdTmE2ERERERERERERERERERERERER6XQKs4mIiIiIiIiIiIiIiIiIiIiIiEinU5hNREREREREREREREREREREREREOp3CbCIiIiIiIiIiIiIiIiIiIiIiItLpFGYTERERERERERERERERERERERGRTqcwm4iIiIiIiIiInJB83z+l55fDo9dJREREREREROTkoTCbiIiIiIiIiIgck2uuuYZRo0axatWqVvvnzp3L7bfffkRjbtiwgc9//vPHo7yj8pvf/IaHHnqo6ef77ruP4cOHd1o9ramvr2f8+PGMHj2aioqKTqtj0aJFfO1rX+PMM89k/PjxnHvuudx9991UVVU1HVNcXMzw4cN58sknj+vcr776Kt/97neP65giIiIiIiIiItJ5FGYTEREREREREZFj5rou3/ve90ilUsdlvBdffJFly5Ydl7GOxi9/+Uvi8XinzX84nn32WbKyssjOzuaJJ57olBruuecerrvuOmzb5j/+4z/43e9+x5VXXskzzzzDZz7zGUpKStp1/j/96U/tPoeIiIiIiIiIiHQchdlEREREREREROSYZWVlsWHDBn796193dimnjCeffJIzzzyTs88+m8cffxzP8zp0/ueee47f//733H777fziF7/gvPPOY/r06Vx77bU8/PDDVFdX81//9V8dWpOIiIiIiIiIiHRtCrOJiIiIiIiIiMgxGzlyJJdccgkPPvggH3744cce//jjj3PBBRcwZswYZs+ezX333YfrukDjlp73338/AMOHD+eXv/wl06dP56677mo6P5VKMX78eK688spm41588cX88Ic/BCCZTPLrX/+a8847j7Fjx3LOOefwwAMPNAt9XXPNNXz729/m61//OhMmTOC6665r2k70/vvvb7G16BtvvMFFF13E2LFjOffcc3nqqafavMZnnnmG4cOHs379+mbtr7zyCsOHD2fNmjUA/PnPf26q8cwzz+TOO++kvr7+kPdv48aNrFixgtmzZ3PRRRexc+dO3nrrrRbHlZeX881vfpOpU6cyZcoUfvjDH/Lzn/+cuXPnNjvuUK9HWx544AGGDBnCtdde26JvwIABfOc732HixIn4vt+iv61tW4cPH859993X9POzzz7LRRddxLhx45g+fTrf/va3KSsrAxpfu8WLF7N48WKGDx/Oe++9B0BNTQ0//OEPOf300xk7diyf+cxnWLRoUYt57r//fi677DLGjRvH/fffj+d5TfdmzJgxzJ07l//3//4f6XT6kPdBRERERERERESOH7uzCxARERERERERkZPD97//fRYuXMj3vvc95s+fTzAYbPW43/3ud/z85z/n6quv5nvf+x5r167lvvvuo6SkhJ/85Cd8+tOfprS0lCeeeIK///3v9OzZk+Li4maBpGXLlpFIJFi1ahXJZJJQKER5eTnr1q3j1ltvxfd9brrpJpYvX87XvvY1RowYwXvvvccvfvELduzYwY9//OOmsV544QUuuugi/vd//xfP87j11lv57Gc/yxVXXMGnP/3pZrX/8Ic/5Bvf+AaFhYVNq5KNGDGCESNGtLjOefPmEY1Gee655xg2bFhT+7PPPsvQoUMZNWoUzz77LD/72c/47ne/y/Dhw9m8eTN333038Xicu+++u817PX/+fHJzc5kzZw7BYJD+/fvzt7/9jVmzZjUdk0qluPbaa4nFYnz/+98nMzOTBx54gLVr19K9e/fDfj1aU1FRwbp167j++usxDKPVYw4OGh6pJUuW8O///u/cfPPNTJkyhdLSUn72s5/xrW99i4cffpgf/ehHfOc73wHgRz/6EUOGDCGZTHLttddSWVnJN7/5TQoLC5k/fz7XX389Dz74IDNmzGga/7e//S3f+ta3GDhwIL179+b3v/89f/vb3/jud79L3759WbFiBT//+c8JBAJ8/etfP6ZrERERERERERGRw6Mwm4iIiIiIiIiIHBc5OTn853/+J1/96lf59a9/zTe/+c0Wx9TV1fGb3/yGz372s9xxxx0AzJw5k9zcXO644w6uu+46hg4dSs+ePQGYMGECALNnz+bpp5+mvLycwsJCFi1axOjRo1m9ejXLly9n2rRpvPXWW4TDYU4//XQWLFjAO++8w7333ssFF1wAwBlnnEE4HOaXv/wlX/jCFxg6dCgAgUCA/+//+/9ahO969uzZNP8+d911F2eddRYA/fr14+yzz2bx4sWthtkikQjnnnsuzz//fNO9aGho4PXXX+eWW24BYPHixfTp04errroK0zSZOnUq0WiUPXv2tHmfHcfh6aef5sILL2yq+dJLL20KoPXq1QuAp59+ms2bNzN//nzGjBkDwPTp05k3b94Rvx4HKykpAaBPnz5t1nmslixZQjgc5sYbb2y6ztzcXFatWoXv+wwZMoTMzExg//vkscceY926dTz22GOMHz8egLPOOotrrrmGe+65h/nz5zeNP3nyZK677rqmn3/6058yZswYLr/8cgCmTp1KJBIhKyur3a5RRERERERERESa0zajIiIiIiIiIiJy3MydO5eLLrqIBx98kNWrV7fo37ei2ty5c3Ecp+lr37aXCxcubHXcmTNnYlkW77zzDgDvvvsu5513HgMGDOD9998HYMGCBUyfPp1wOMzixYuxbZvzzjuv2TgXXXQR0Bgi22fQoEFtriJ3sMmTJzd9vy/IVVtb2+bxF198Mdu3b2flypUAvPrqq6RSqaY6pk+fzpYtW7jsssu4//77WbVqFZ/61Ke45ppr2hzzjTfeoLKyknnz5lFbW0ttbS1z587F8zwef/zxpuPeffdd+vbt2xRkA8jMzGTOnDlNPx/t62Hbjf+P7IFbth5vU6ZMIR6Pc+GFF/L//t//44MPPmDmzJl87Wtfa3M1uEWLFtG9e3dGjx7ddC2u6zJnzhw+/PDDZiHBkSNHNjt32rRpLFy4kCuvvJIHH3yQjRs3cvXVV3PxxRe32zWKiIiIiIiIiEhzWplNRERERERERESOqzvuuINFixY1bTd6oJqaGgBuvPHGVs8tLy9vtT0nJ4eJEyeyaNEi5s2bx6pVq7j99tvZsWMHixcvxnVdFi1axG233QbAnj17yMvLw7KsZuPs216zrq6uqS0jI+Owry0ajTZ9b5qN/5+o7/ttHj9t2jR69OjBc889x7hx43juueeYOnVq08pz559/Pp7n8de//pXf/OY33HffffTu3Ztvf/vbnH/++a2Oue+efvGLX2zR98QTT3DzzTdj2za7d++moKCgxTEHth3t69GrVy8Mw2Dnzp1tXvuePXuwbfuI7u+BJk6cyAMPPMCf/vQn/vjHP/LAAw/QrVs3brrppjbDfjU1NVRUVDB69OhW+ysqKsjJyQGav5YA119/PRkZGcyfP5977rmHn/3sZwwdOpQ77riD6dOnH9U1iIiIiIiIiIjIkVGYTUREREREREREjqucnBzuvPNObrnlFn7zm98068vOzgbgnnvuYcCAAS3O7datW5vjzpo1i4cffpgPPviAYDDImDFjKC4u5umnn2bx4sXs2bOnadWxnJwcdu/ejeu6zQJt+8JZeXl5x3qZh8U0TT71qU/x7LPPctNNN7Fw4UL+8z//s9kxF154IRdeeCF1dXW8/fbb/P73v+c73/kOp51
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 2500x3000 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.cluster import KMeans\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"# Закодирование категориальных переменных\n",
|
|||
|
"df_encoded = pd.get_dummies(df_cleaned, drop_first=True)\n",
|
|||
|
"\n",
|
|||
|
"# Выбор подмножества данных для кластеризации\n",
|
|||
|
"features = df_encoded[['Networth', 'Age']]\n",
|
|||
|
"\n",
|
|||
|
"# Масштабирование данных\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"scaled_features = scaler.fit_transform(features)\n",
|
|||
|
"\n",
|
|||
|
"# Кластеризация данных\n",
|
|||
|
"kmeans = KMeans(n_clusters=3)\n",
|
|||
|
"df_encoded['Cluster'] = kmeans.fit_predict(scaled_features)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"plt.figure(figsize=(25, 30))\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 1: Networth vs Age\n",
|
|||
|
"plt.subplot(4, 1, 1)\n",
|
|||
|
"sns.scatterplot(x=df_encoded['Networth'], y=df_encoded['Age'], hue=df_encoded['Cluster'], palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('Networth vs Age Clusters')\n",
|
|||
|
"plt.xlabel('Networth (in billions USD)')\n",
|
|||
|
"plt.ylabel('Age')\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 2: Networth vs Country\n",
|
|||
|
"plt.subplot(4, 1, 2)\n",
|
|||
|
"sns.scatterplot(x=df_encoded['Networth'], y=df_encoded['Country_United States'], hue=df_encoded['Cluster'], palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('Networth vs Country Clusters')\n",
|
|||
|
"plt.xlabel('Networth (in billions USD)')\n",
|
|||
|
"plt.ylabel('Country (United States = 1, Others = 0)')\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 3: Age vs Industry\n",
|
|||
|
"plt.subplot(4, 1, 3)\n",
|
|||
|
"sns.scatterplot(x=df_encoded['Age'], y=df_encoded['Industry_Technology '], hue=df_encoded['Cluster'], palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('Age vs Industry Clusters')\n",
|
|||
|
"plt.xlabel('Age')\n",
|
|||
|
"plt.ylabel('Industry (Technology = 1, Others = 0)')\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 4: Networth vs Source\n",
|
|||
|
"plt.subplot(4, 1, 4)\n",
|
|||
|
"sns.scatterplot(x=df_encoded['Networth'], y=df_encoded['Source_Amazon'], hue=df_encoded['Cluster'], palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('Networth vs Source Clusters')\n",
|
|||
|
"plt.xlabel('Networth (in billions USD)')\n",
|
|||
|
"plt.ylabel('Source (Amazon = 1, Others = 0)')\n",
|
|||
|
"\n",
|
|||
|
"# Настройка графиков\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## KMeans (неиерархическая кластеризация) для сравнения"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 107,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Центры кластеров:\n",
|
|||
|
" [[ 4.6469914 80.60315186]\n",
|
|||
|
" [ 3.49202201 48.5914718 ]\n",
|
|||
|
" [80.24333333 65.36666667]\n",
|
|||
|
" [ 3.76886463 64.24366812]]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjAAAASgCAYAAABWngGUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5wdVfn48c+027f3ks2mbnrvQEhCD5EuolSRjl/5ogL6QykqflUQlUQQKRYERYwivYcSCAkJCaSTnmzv9dYpvz8uuclmd9O3BJ7365VXsnPmznnm3Nmbe+aZc47iOI6DEEIIIYQQQgghhBBCCCFEH6L2dgBCCCGEEEIIIYQQQgghhBD7kgSGEEIIIYQQQgghhBBCCCH6HElgCCGEEEIIIYQQQgghhBCiz5EEhhBCCCGEEEIIIYQQQggh+hxJYAghhBBCCCGEEEIIIYQQos+RBIYQQgghhBBCCCGEEEIIIfocSWAIIYQQQgghhBBCCCGEEKLPkQSGEEIIIYQQQgghhBBCCCH6HElgCCGEaMdxnN4OoU/r7fbp7fqFEEIIIcSXm3wf3b/ebp/erv9YJm0nRN8kCQwhRK+79NJLufTSSztsb21t5cILL2TUqFG88cYbiX1LSkq46KKLujzezTffTElJCT/4wQ+6LebuEolE+POf/8z555/PxIkTmTJlChdddBHPPvtsuy9T8+fPp6Sk5KjWHY1G+fnPf87zzz9/VI7X1fvaE0pLSykpKeG8887DNM0O5UuXLqWkpISlS5ce0nGfeeYZfvnLXx6tMA9JZWUl11xzDWVlZYltc+bM6XPX+T//+U9KSkq47rrrejsUIYQQQnxBSH9hD+kvHB3SX+g927Zt46677uLkk09mzJgxzJo1i+9+97ts2LChV+PqzfdOCLF/ksAQQvRJra2tXHXVVWzYsIHf//73nHzyyYkyVVVZtWoVlZWVHV4XDAZZtGhRT4Z61NTW1vK1r32Nhx56iNmzZ/Ob3/yGX/3qV4nO1Y9//ONufSKkurqav/zlL51+gT8cd955J3feeedROdbhWrt2LY888shRO95DDz1EY2PjUTveofjggw945513eqXuQ7Fw4UKGDh3Ku+++S0VFRW+HI4QQQogvKOkvSH/haJD+Qs967bXXOPfcc1m7di3XX389jzzyCDfffDPbt2/nwgsv5P333++12HrzvRNC7J/e2wEIIcS+dndG1q9fz0MPPcRxxx3XrnzEiBFs3ryZV155hSuuuKJd2aJFi/B6vSQnJ/dgxEfHbbfdRmVlJU8//TTFxcWJ7bNmzSI/P5/777+f2bNnc9JJJ/VekIdg8ODBvR0CycnJiQ7tkCFDejucL7wtW7awatUqHn30UW6++Waefvpp/vd//7e3wxJCCCHEF4z0F6S/cLRIf6Hn7Ny5k9tuu40TTjiB3/72t2ialig79dRT+frXv85tt93GW2+9hcvl6sVIhRB9jYzAEEL0KW1tbVx99dVs3LiRP/7xjx06IwA+n48TTzyRV155pUPZSy+9xGmnnYaut8/P2rbNH//4R0455RRGjRrFaaedxhNPPNFuH8uy+OMf/8i8efMYM2YM48aN46KLLuLDDz9M7DN//nxOOeUU3n77bb7yla8kjvXss8+2O9Zf/vIXTj/9dEaPHs0JJ5zAXXfdRWtra5fnvX79ehYvXsy3vvWtdp2R3a644gouvvhifD5fp6/vbGjwv//9b0pKSigtLQUgHA5z1113MXPmTEaNGsXpp5/OY489BsSHUO/u6Pzwhz9kzpw5ieMsX76cSy65hLFjxzJlyhRuu+026uvr29UzYsQInnnmGY477jimTJnC5s2bOwwJLykp4cknn+T2229nypQpjB8/nptuuona2tp2cT/22GOcdNJJjBkzhosuuoi33nqr3fDt3cO958+f32V77nbttdcSCAT4wQ9+gGVZ+923sbGRO+64gxkzZjB69GguvPBClixZ0q6Ny8rK+M9//kNJSQl//etfKSkpYd26dYl9nn32WUpKSnjmmWcS29avX09JSQkrV64EYPv27XznO9/huOOOY9y4cVx66aWsWLEisf/u8/vTn/7E6aefztixY1m4cCE//OEPATjppJPavdexWIxf/epXieNdeeWV7Nixo8vzvPLKKznvvPM6bL/hhhs466yzAKivr+d73/sexx13HKNHj+bss8/ucI13ZuHChaSkpDBt2jROO+00/vWvf3X6hN7bb7/Neeedx5gxYzjttNN44YUXOOWUU9q9pwd6P4QQQgjx5ST9BekvSH/h2OwvPPHEE0SjUX70ox+1S14AeL1ebrvtNs4//3yampoS21966SXOO+88xo8fz3HHHccdd9zRrvwHP/hBu2tx7/b597//DeyZDmzJkiVceeWVjB07luOOO45777038Z7v+96VlpZ2et0++eSTlJSUsG3btnZ1/ve//2X48OEyAl2IbiIJDCFEnxEMBrnmmmtYt24djzzyCFOnTu1y37lz53YYFt7a2sq7777LvHnzOux/11138cADD3DWWWfxhz/8gdNPP52f//zn/P73v0/sc9999/Hggw/yta99jUcffZSf/vSnNDY2ctNNNxEKhRL71dTU8JOf/ITLLruMP/7xjxQWFnLbbbexZcsWAF544QXuvfdeLr74Yh577DFuvPFG/vvf//LTn/60y/N57733ADp8+drN7XZzxx13MH369C6PcSA///nPeffdd7ntttsSX/p/9atfsXDhQrKzs1mwYAEA119/feLfH330EVdccQUej4ff/va3/L//9/9YtmwZl112GeFwOHFsy7J4/PHHueeee/jhD3/IoEGDOo3hN7/5DbZtc//993PrrbeyaNEifv7znyfKFyxYwH333ccZZ5zBgw8+yNixYzs8wZ+dnc3TTz/NV7/61QOec3p6OnfccQdr1qzh0Ucf7XK/SCTC5ZdfzptvvsnNN9/MggULyM3N5aqrrkp0ShYsWEBWVhYnnngiTz/9NOeeey4ul4sPPvggcZzdndfly5cntr377rukp6czduxYNm/ezHnnnUdpaSk/+tGPuO+++1AUhcsvv5xly5a1i2n+/PlcffXV/OpXv2LGjBlcf/31iThuuOGGxH4vvfQSmzZt4he/+AV33nkna9as4eabb+7yXM866yzWrl3brtPS3NzMu+++y9lnnw3ALbfcwpYtW7j77rt55JFHGDFiBLfddlu7zvm+TNPkueeeY968eRiGwbnnnktNTQ1vvfVWu/0+/PBDbrjhBvLy8pg/fz4XX3wxd955Z7sv+wfzfgghhBDiy0f6C9JfkP7CHsdaf+G9995jxIgR5OTkdFo+ffp0br75ZrKysgB48MEH+e53v8u4ceN44IEHuPHGG3n11Ve59NJL211bB+v73/8+EydO5A9/+APz5s3j0UcfTSSS9n3vsrOzgY7X7bx583C73fz3v/9td+xnn32W6dOnk5eXd8hxCSEOTKaQEkL0Cbs7I7ufLAkGg/vdf9asWXi93nbDwl9//XUyMjKYOHFiu323bdvGP//5T7773e9yzTXXAHD88cejKAoPP/ww3/jGN0hLS6O6upqbb7653VNAbreb//mf/2Hjxo2MGzcOgFAoxD333JPoHBQXFzN79mzeeecdBg0axLJlyygsLOTiiy9GVVWmTJmCz+dr96TIvnbfvC0sLDz4RjtEy5Yt47jjjuPMM88EYOrUqfh8PjIyMnC5XAwfPhyAoqIiRowYAcCvf/1rBgwYwMMPP5x4Smbs2LGceeaZLFy4kIsvvjhx/Ouuu45Zs2btN4ahQ4fyf//3f4mfP/3008STccFgkEceeYSLL76Y73//+0D8fQqFQjz99NOJ17hcrsR7cTDmzp3Lyy+/zIIFC5gzZ06nQ8P/+9//smHDBv75z38yduxYAGbOnMmll17Kfffdx8KFCxkxYgQul4v09PRE/VOmTGHJkiVcddVVACxZsoSRI0fy0UcfJY793nvvceKJJ6KqKgsWLMDlcvHXv/6VQCAAxK/lefPm8atf/Yp//etfidedccYZnH/++Ymfi4qKABg+fHi
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df = pd.read_csv(\"..//static//csv//Forbes Billionaires.csv\")\n",
|
|||
|
"\n",
|
|||
|
"# Удаление несущественных столбцов\n",
|
|||
|
"columns_to_drop = ['Rank ', 'Name']\n",
|
|||
|
"df_cleaned = df.drop(columns=columns_to_drop)\n",
|
|||
|
"\n",
|
|||
|
"# Закодирование категориальных переменных\n",
|
|||
|
"df_encoded = pd.get_dummies(df_cleaned, drop_first=True)\n",
|
|||
|
"\n",
|
|||
|
"# Выбор подмножества данных для кластеризации\n",
|
|||
|
"features_used = ['Networth', 'Age']\n",
|
|||
|
"data_to_scale = df_encoded[features_used]\n",
|
|||
|
"\n",
|
|||
|
"# Масштабирование данных\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"data_scaled = scaler.fit_transform(data_to_scale)\n",
|
|||
|
"\n",
|
|||
|
"# Кластеризация данных\n",
|
|||
|
"random_state = 42\n",
|
|||
|
"kmeans = KMeans(n_clusters=4, random_state=random_state)\n",
|
|||
|
"labels = kmeans.fit_predict(data_scaled)\n",
|
|||
|
"centers = kmeans.cluster_centers_\n",
|
|||
|
"\n",
|
|||
|
"# Отображение центроидов\n",
|
|||
|
"centers_original = scaler.inverse_transform(centers) # Обратная стандартизация\n",
|
|||
|
"print(\"Центры кластеров:\\n\", centers_original)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов кластеризации KMeans\n",
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 1: Networth vs Age\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['Networth'], y=df_cleaned['Age'], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.scatter(centers_original[:, 0], centers_original[:, 1], s=300, c='red', label='Centroids')\n",
|
|||
|
"plt.title('KMeans Clustering: Networth vs Age')\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 2: Networth vs Country\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"if 'Country_United States' in df_encoded.columns:\n",
|
|||
|
" sns.scatterplot(x=df_cleaned['Networth'], y=df_encoded['Country_United States'], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
" plt.title('KMeans Clustering: Networth vs Country')\n",
|
|||
|
" plt.xlabel('Networth (in billions USD)')\n",
|
|||
|
" plt.ylabel('Country (United States = 1, Others = 0)')\n",
|
|||
|
"else:\n",
|
|||
|
" plt.title('KMeans Clustering: Networth vs Country (No Data)')\n",
|
|||
|
" plt.xlabel('Networth (in billions USD)')\n",
|
|||
|
" plt.ylabel('Country')\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 3: Age vs Industry\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"if 'Industry_Technology' in df_encoded.columns:\n",
|
|||
|
" sns.scatterplot(x=df_cleaned['Age'], y=df_encoded['Industry_Technology'], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
" plt.title('KMeans Clustering: Age vs Industry')\n",
|
|||
|
" plt.xlabel('Age')\n",
|
|||
|
" plt.ylabel('Industry (Technology = 1, Others = 0)')\n",
|
|||
|
"else:\n",
|
|||
|
" plt.title('KMeans Clustering: Age vs Industry (No Data)')\n",
|
|||
|
" plt.xlabel('Age')\n",
|
|||
|
" plt.ylabel('Industry')\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 4: Networth vs Source\n",
|
|||
|
"plt.subplot(2, 2, 4)\n",
|
|||
|
"if 'Source_Amazon' in df_encoded.columns:\n",
|
|||
|
" sns.scatterplot(x=df_cleaned['Networth'], y=df_encoded['Source_Amazon'], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
" plt.title('KMeans Clustering: Networth vs Source')\n",
|
|||
|
" plt.xlabel('Networth (in billions USD)')\n",
|
|||
|
" plt.ylabel('Source (Amazon = 1, Others = 0)')\n",
|
|||
|
"else:\n",
|
|||
|
" plt.title('KMeans Clustering: Networth vs Source (No Data)')\n",
|
|||
|
" plt.xlabel('Networth (in billions USD)')\n",
|
|||
|
" plt.ylabel('Source')\n",
|
|||
|
"\n",
|
|||
|
"# Настройка графиков\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### PCA для визуализации сокращенной размерности"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 108,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABi8AAAJHCAYAAADoqsXxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5xU1f3/8fedO3V7LywsHaR3EBUV7D22b0zssaCYmGiMJdYkvyQmsddoojFREjFR0dhL7CKKAiq9921s35mdnXJ/fxAmLLsLC8xO2X09H488DPfM3vueOTswZz73nGNYlmUJAAAAAAAAAAAgQdjiHQAAAAAAAAAAAGBXFC8AAAAAAAAAAEBCoXgBAAAAAAAAAAASCsULAAAAAAAAAACQUCheAAAAAAAAAACAhELxAgAAAAAAAAAAJBSKFwAAAAAAAAAAIKFQvAAAAAAAAAAAAAmF4gUAdAOWZcU7AjrQk/umJz93AAAAtMZnw56BfgYQTRQvgG7s/PPP19ChQ1v9b+TIkTryyCP1i1/8QnV1dW1+Zt26dbrjjjt09NFHa/To0TryyCN17bXXavny5R1e595779XQoUP1q1/9qiufTocefPBBDR06NC7Xbs8LL7ygoUOHavPmzV3+cy0tLfrNb36jf//73/sac5+cc845Gjp0qN58880uvU6i9eWBqK+v1/XXX68FCxZEjp1//vk6//zzY5ahs+/nGTNm6MYbb4zqtVetWqXvfe97UTnX5s2bNXToUL3wwgtROR8AAEgcjFniozuNWYYOHaoHH3ywzfGVK1dq6tSpOuKII7R+/frIY4cOHap77rmn3XOFw2FNmzYtaT97lpeX6/e//72OP/54jRkzRocddpiuuOKKVmMSqWvGJWVlZbr88su1ZcuWqJyvo34F0LNQvAC6ueHDh2vOnDmR//3lL3/RRRddpOeff14zZ85sdVfEW2+9pdNPP11LlizRlVdeqT/96U+65pprtH79ev3f//2fPvnkkzbnD4fDmjt3roYMGaKXXnpJPp8vlk+vx6uoqNBf//pXBYPBLrvG2rVrtXDhQg0ZMkTPPvtsl12nu1m2bJleeuklhcPhyLHbb79dt99+e0yuvz/v52h64403tHDhwqicq6CgQHPmzNGRRx4ZlfMBAIDEwpile4vFmGV3q1at0kUXXSSPx6NnnnlG/fr1i7TZbDa98cYb7f7cF198oYqKihiljK4vv/xSp512mt577z1dcMEF+uMf/6ibb75Zzc3NOv/88zV37twuvf6nn36qDz74IGrnmzNnjs4+++yonQ9AcrLHOwCArpWWlqaxY8e2OjZp0iQ1NTXpgQce0OLFizV27Fht3LhRN9xwg6ZNm6b77rtPpmlGHn/sscfqe9/7nm644Qb95z//kdPpjLR9/PHHKisr0z333KPzzjtPr7zyCh8wupkXXnhBJSUlmjlzpq677jpt2LBBffv2jXespDRo0KCYXGd/38+Jyul0tvl7DAAAdB+MWRBNa9as0YUXXqjU1FT99a9/Va9evVq1jx8/XgsWLNDSpUs1fPjwVm2vvvqqhg0bpmXLlsUy8gGrra3VT37yE/Xr109/+ctf5PF4Im3HHXecLr/8ct1222067LDDlJeXF8ekncfnfwASMy+AHmvkyJGSpK1bt0qSnn76abW0tOiWW25pNQiQJI/HoxtuuEFnnnlmm2nbzz//vIYMGaIJEyZoypQpmjNnzl6vPWPGDP3mN7/RhRdeqNGjR+vmm2+WtOMD12233aZDDjlEo0aN0v/93/9p3rx5rX7W7/frt7/9rQ499FCNGzdON910k/x+f6vHtDcFdv78+Ro6dKjmz58fObZ27Vr98Ic/1OTJkzVp0iTNnDlTa9asaXWt3//+9zriiCM0cuRInXLKKXrttddanTccDuuRRx7RkUceqTFjxmjWrFntTm3fXWd/7p133tH3v/99jRs3TiNHjtTxxx+v2bNnS9qxlM5RRx0lSbrppps0Y8aMyM/985//1BlnnKGxY8dq9OjROu200/T666+3OvfQoUP3ulRQKBTS3LlzNX36dB199NFKSUlpt48DgYDuuusuHX744Ro9erQuueQSzZ07t82U8hdffFEnnniiRo0apVNPPVXz5s3T8OHD9zgl+7XXXtMZZ5yhcePG6dBDD9Vtt93W6rV68MEHdfzxx+vtt9/WySefrFGjRum0007TwoULtWjRIp199tkaPXq0Tj755Da/TytXrtTMmTM1fvx4jR8/XldddZU2bdoUad/5e/Pss89q+vTpGj9+fORuvj29xvPnz9cFF1wgSbrgggsiv4+7/m7+4Ac/0BlnnNHm+c6aNUunnnpq5M8LFizQeeedpzFjxmjy5Mm64YYbVF1d3eHrJe3/+3nX57zre2X37JL07bff6sILL9SECRM0btw4XXTRRVq0aJGkHX3y0EMPSWo93TscDuvxxx/XMccco5EjR+q4447T008/3eY61113na6++mqNHTtWF198cZtlo1544QUNHz5cixcv1ne/+12NGjVK06dP1xNPPNHqXBUVFbrmmmsi7/HbbrtN9957b6v3CgAASFyMWRizdGbMsqs1a9boggsuUHp6up555pk2hQtpR2EsLy+vzeyLYDCot956SyeddFKbn+lMv1dXV+sXv/iFpk+frpEjR2ry5Mm66qqrWo2Hzj//fN188816/PHHdeSRR2rUqFE655xz9PXXX0ce09zcrDvuuEOHH3545PXc/XPu7ubOnauKigr9/Oc/b1W4kHbMNLnuuut07rnnqrGxsc3PdrRE64033tiqvzZu3KgrrrhCU6ZM0ZgxY/Td7343MtPihRde0E033SRJOuqoo1r12T//+U+ddNJJkeXgHnzwQYVCoVbXufDCC3X77bdr/PjxOvHEExUKhVqNI3a+N+bNm6cf/OAHGjNmjA499FD94Q9/aHWuxsZG3XbbbZo6darGjRuna665Rk899VRCLdkGYN9QvAB6qHXr1kmS+vTpI0n66KOPNHz4cBUWFrb7+KlTp+qaa65Rfn5+5Fhtba3+85//6Dvf+Y4k6fTTT9c333yjJUuW7PX6s2fP1qhRo/TII4/orLPOkt/v14UXXqh3331X11xzjR566CEVFRXp0ksvbfWh8Gc/+5mee+45zZw5U/fdd5/q6ur01FNP7fPzLy8v13e/+12tX79ed9xxh/7whz+oqqpKF154oWpra2VZlq666io9++yzuvjii/Xoo49GPvzsOt32D3/4gx5++GGdddZZeuihh5SVlaW77757r9fvzM+9//77uuqqqzRixAg98sgjevDBB9WnTx/98pe/1OLFi1VQUBD5gvjKK6+M/P/Zs2frtttu09FHH63HHntMd911l5xOp6677jqVlZVFzj9nzhzNmjVrjzk//PBDVVZW6jvf+Y7cbrdOOOEEvfjii2ppaWn1uNtuu01//etfdd555+nhhx9WXl6ebr311laPmTt3rm688UaNHz9ejzzyiI477jjNmjWr1YfN3T3yyCO69tprNXbsWD3wwAO66qqr9Oabb+r8889Xc3Nz5HFlZWW68847dcUVV+j+++9XfX29rr76al177bU6++yz9fDDD8uyLF1zzTWRn1u3bp3OOeccbd++Xb/73e/061//Wps2bdL3vvc9bd++vVWOhx56SDfccINuu+02jRs3bq+v8YgRI3TbbbdFXpv2loo69dRTtWTJEm3YsCFyrL6+Xh9++KFOO+00STumrV900UVyu92677779POf/1yff/65LrjgglbPf3f7837eF42Njbr00kuVnZ2tBx98UPfee698Pp8uueQSNTQ06Oyzz9ZZZ50lqfV07zvuuEMPPPCATj31VP3xj3/U8ccfr9/85jd6+OGHW53/9ddfV2pqqh599FFdeuml7WYIh8P6yU9+ohNPPFGPP/64xo8fr9///vf66KOPJO1YW/nCCy/UV199pZ///Of67W9/q+XLl+vJJ5/cr+cMAABijzELY5bOjFl2Wrt2rS688EKlpaXpmWee6fD3xDRNHXfccW2KF/PmzZPf729zo0t
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x600 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"reduced_data = pca.fit_transform(data_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация сокращенных данных\n",
|
|||
|
"plt.figure(figsize=(16, 6))\n",
|
|||
|
"plt.subplot(1, 2, 1)\n",
|
|||
|
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=result, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('PCA reduced data: Agglomerative Clustering')\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(1, 2, 2)\n",
|
|||
|
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('PCA reduced data: KMeans Clustering')\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Анализ инерции для метода локтя (метод оценки суммы квадратов расстояний)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 109,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA14AAAImCAYAAABD3lvqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB04UlEQVR4nO3dd3xUVf7/8fdMeocUUmgJBEJJQg0I0kUERJeiYgHLV2yLy0/RRVlZd1FRV0EEEVcWGwoCCmJBKSIqIiBVWugEAySEJKRAejK/P0JGhoQkhCQ35fV8PPIIuffcM58Zzq55c+4512SxWCwCAAAAAFQZs9EFAAAAAEBdR/ACAAAAgCpG8AIAAACAKkbwAgAAAIAqRvACAAAAgCpG8AIAAACAKkbwAgAAAIAqRvACAAAAgCpG8AIAAACAKkbwAgAAAIAqRvACgEowduxYhYWF6c4777ximyeffFJhYWF69tlnq7EyABV18uRJhYWFafny5UaXAqAOIHgBQCUxm83atWuX4uPji53LyMjQ+vXrDagKAADUBAQvAKgk7dq1k5OTk1atWlXs3Pr16+Xi4iJ/f38DKgMAAEYjeAFAJXF1dVXfvn1LDF7ffvutbrrpJtnb2xc79/3332vkyJGKiIjQ9ddfr5deekkZGRmSpAEDBigsLKzEr5MnT0qSNm7cqLvvvltdunRR9+7d9dRTTykuLs7mNZ566qkS+yjrFqqiWyhL+rrUnj179OCDD6p79+7q3LmzHn30UR0+fNh6fsuWLQoLC9OWLVskSYcOHdLAgQN155136q233rria7z11luSpM8++0xDhgxReHi4zfmybttcunRpif1eel3R7WRltatoDeX9bEp7/SudL/p7ePbZZzVgwACb1128eLHNZ3jp62zfvt2m7SeffKKwsDCbPrKysjRjxgwNGjRI4eHh6ty5sx544AFFR0fbXHulusaOHWvTpqiOklw+PoqMHTvWpp/s7Gy9/fbbGjx4sCIiIjRo0CDNmzdPBQUFNtdcXsuWLVvKdW1ZLBaLJk+erMjISP3yyy/lvg4AJKn4bwAAgAobOnSonnjiCcXHxysgIECSdP78ef3888/64IMP9PPPP9u0//rrr/X000/rlltu0RNPPKFTp05p5syZOnLkiD744APNmTNHOTk5Onv2rB5//HE99thj6tevnySpUaNGWrFihZ555hkNGzZMjzzyiM6dO6fZs2dr9OjR+uKLL+Tj4yOp8BfW0aNHa+TIkZJk7a882rVrp3/961/Wnz/77DN9/vnn1p83b96scePGqXv37nr55ZeVnZ2td999V3feeaeWLl2qli1bFuvz9ddfV3h4uB577DF5eXmpd+/ekqSpU6dKkvX1AgICtHXrVk2ZMkW33XabpkyZIjc3N0kqV/1ZWVmKiIjQlClTrMeudN2ln+3l7Spaw9V8Ns8//7zat29f4usvWbJEkrRv3z698MILxdpeLjU1VW+++WaJ59zc3PTDDz+oS5cu1mPffvutzGbbf4udNGmStm3bpokTJ6pZs2Y6ceKEZs2apaeeekorV66UyWSytr3tttt0++23W38u+nusTBaLRY8++qh27dqlxx9/XG3atNGWLVv05ptvKjY2Vi+++KK17eVjtmXLluW+tjQvvfSSvvnmG7399tvq1atXpb9HAHUbwQsAKlG/fv3k4uKiVatW6f7775ckrV27Vj4+Pja/6EqFv0hOnz5dvXv31vTp063Hg4ODdf/99+unn36yBoGi2a1mzZqpY8eOkqSCggJNnz5dvXr10owZM6zXd+7cWUOHDtV7772nSZMmSZIyMzMVHBxsvbaov/Jwd3e3XidJGzZssDk/Y8YMNW/eXPPmzZOdnZ0kqVevXrrxxhs1e/ZszZo1y6b9iRMn9Msvv+irr75Sq1atJMkaUt3d3SXJ5vVWrlwpSfrHP/5hDTyS5OjoWGbtmZmZ8vX1tenvStdd+tle3m737t0VquFqPpvQ0NArvn7R8ezs7BLbXm727NkKCgrSuXPnip3r06eP1q1bp7///e+SpPj4eO3cuVNdu3bVqVOnJEk5OTm6cOGCpkyZoqFDh0qSunXrpvPnz+vVV19VYmKi/Pz8rH0GBATY1FP091iZfv75Z/3666964403dPPNN0uSrr/+ejk7O2vWrFm69957rePp8jH7008/lfvaK5kxY4aWLFmiOXPmqE+fPpX+/gDUfdxqCACVyNnZWQMGDLC53XDlypUaMmSIzQyBJB07dkzx8fEaMGCA8vLyrF9RUVFyd3fXxo0bS32t48eP6+zZsxo2bJjN8WbNmqlTp0767bffrMfi4uLk4eFRCe/QVkZGhvbs2aMhQ4ZYg4UkeXp6qn///jY1FLWfOXOmunfvXuYvukUiIyMlSe+//74SEhKUk5OjvLy8cl1bWe+7IjVc7WdTWQ4dOqQlS5bon//8Z4nnBwwYoJiYGB07dkyStGrVKnXo0EGNGze2tnF0dNR7772noUOH6syZM9q8ebMWL15s3SAmJyfnqusqKChQXl6eLBZLmW2Kvi5t+9tvv8ne3l6DBw+2uebWW2+1nr+Sa7lWkhYuXKh58+bp5ptvtpkVBYCrwYwXAFSyIUOG6PHHH1d8fLycnJy0adMmPfHEE8XapaSkSCq8LaukW7MSEhJKfZ2i6319fYud8/X11f79+yUVzqydPn1aTZo0ubo3Ug7p6emyWCxXrCE9Pd3m2KOPPipPT0+bWxXLEhUVpSlTpmjevHmaM2fOVdV36tSpUm/Jq8oarvazqSwvvfSSbr75ZnXq1KnE8/7+/goPD9e6devUokULffvttxo2bJh1vBTZsGGDXn75ZR07dkxubm5q06aNXF1dJanU8HQlc+fO1dy5c2VnZydfX1/16tVL/+///T+bDWeKZokv1a1bN0mFt082bNjQJsRKss68lfZ5Xsu1knTgwAH16tVL33zzje677z61a9eu1PYAUBKCFwBUsj59+sjNzU2rVq2Sq6urmjRpovDw8GLtPD09JRWupSn65fJSXl5epb5OgwYNJEmJiYnFzp09e1YNGzaUJEVHRysrK6vYhhiVwcPDQyaT6Yo1FNVYZNKkSVq1apUmTJighQsXlvuWtDvuuEO//PKL8vLy9Pzzz6tJkyZ67LHHSr2moKBAv//+u0aNGlWu17h8RvJaa7jaz6YyfPfdd9q7d6/NraclueGGG7Ru3ToNGTJEe/fu1Zw5c2yC1x9//KHx48dr4MCBevfdd9W0aVOZTCYtXLiw2K2mUtmfnVT4+d1xxx0qKCjQ6dOnNXPmTD300EP66quvrG2mTp1qE5QvXafl5eWlc+fOKT8/3yZAFf0DRdF4L8m1XCtJ/+///T/de++9uvnmmzVlyhR99tlnxUIcAJSFWw0BoJI5Ojpq4MCBWr16tb777jvrmpLLtWjRQj4+Pjp58qQiIiKsX/7+/poxY0axGYjLhYSEyM/PT998843N8djYWO3atUudO3eWJP34449q27atvL29r/q9FBQUlPoLpqurq8LDw/Xdd98pPz/fejw9PV0//vhjsXVt4eHhmjNnjk6dOqXXX3+93HXMmjVLP/74o1599VUNGTJEERERZa6v2rFjhzIyMtS9e/dS2xXN3ly+ucS11nC1n821ysnJ0Wuvvabx48fbrL8qycCBA/X777/rk08+UZcuXdSoUSOb83v37lV2drYefvhhNWvWzBqsikJX0WdWtCNgWZ+dVLgZTEREhDp06KAhQ4bonnvu0cGDB5WammptExISYvO/hUvX03Xr1k15eXnFdg0tCm6lfZ7Xcq1UOEPp7Oys559/Xvv27dMHH3xQ5vsFgMsx4wUAVWDo0KF65JFHZDabbXbUu5SdnZ2efPJJPf/887Kzs1P//v2VlpamuXPn6syZM2XeImc2mzVx4kRNnjxZTz31lG699VadO3dOc+bMkZeXlx544AHt27dPCxcu1M0336xdu3ZZrz179qykwpmN5OTkYqEsOTlZR44c0YkTJ6wB7kqeeuopPfjgg3r44Yd19913Kzc3V/PmzVNOTo7Gjx9frL2/v7+eeOI
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"inertias = []\n",
|
|||
|
"clusters_range = range(1, 11)\n",
|
|||
|
"for i in clusters_range:\n",
|
|||
|
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
|
|||
|
" kmeans.fit(data_scaled)\n",
|
|||
|
" inertias.append(kmeans.inertia_)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.plot(clusters_range, inertias, marker='o')\n",
|
|||
|
"plt.title('Метод локтя для оптимального k')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Инерция')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Расчет коэффициентов силуэта"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 110,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1oAAAImCAYAAABKNfuQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAC2wklEQVR4nOzdeVTVdf7H8eflsm+yiCJuuAFugOa+pKlZqW22j2abluWMzZTUNDWtNjVpNVljamplZTVpmaWV2Z57mqDihooLgmwKssO99/cHwS/CBfBevhd4Pc7xqN/tvu5HVN73s5lsNpsNERERERERsRsXowOIiIiIiIg0Niq0RERERERE7EyFloiIiIiIiJ2p0BIREREREbEzFVoiIiIiIiJ2pkJLRERERETEzlRoiYiIiIiI2JkKLRERERERETtToSUiIiIiImJnKrREpMm49dZbufXWW6sc++WXX7jqqqvo1q0bH330kUNf/+9//zsjRoyo9X0jRozg73//uwMSiYijREZG8uqrrxodQ0QM5Gp0ABERo2RlZTF16lS6d+/OokWLiIyMNDqSiIiINBIqtESkyXrzzTcpKirihRdeoGXLlkbHERERkUZEQwdFpEk6efIkS5cu5corr6xWZCUnJzN9+nQGDx5MbGwst956K1u3bq1yzffff8/48eOJiYlh0KBBPPHEE5w+fbrKNe+99x6XXHIJMTEx/O1vfyMvLw+A119/nYEDB9KnTx+eeOIJSkpKKu8pKSnhqaeeom/fvvTv379y6FF+fj5xcXHExsYybNgw3nvvvcp7jh07RmRkJB9//HHlseLiYkaOHFmll+5MQyc3bdpEZGQkmzZtOuPvobznr0+fPtWGPX700UeMHTuWHj16MHz4cF599VUsFkvl+TMNlfx91orXOtOPipznGzZ5pvf0R+np6Tz88MMMHDiQXr16MXHiRH799dfK838c4mWz2bj55puJjIzk2LFjVa47V9bp06dz8cUXY7Vaq7z+o48+ymWXXQZAWloaDzzwAAMGDCAmJoZbb72V7du3A/Dqq6+e9TUq8u3Zs4c///nPDBgwgO7duzN06FBmzpxJUVHROdtg3bp158xe0/cI8M0333DttdcSExNzzmf93scff0xkZCTx8fFce+21REdHc+WVV/Lll19Wue7YsWM89NBDDBkyhO7duzNw4EAeeughTp48WXnN7t27mTBhAr169WLUqFF88MEHlefO9PUL1b9Ozjes7/dfd0uWLKn292vjxo1ERUXx3//+96zP+KM5c+bQtWtXPvnkkxrfIyINm3q0RKRJsdlspKamMnPmTMrKyrjnnnuqnE9KSuLGG28kPDycxx57DDc3N5YsWcJtt93G4sWL6devH1u2bOHee+/lqquu4sEHH2T//v385z//Yd++fbz77ruYzWbWrl3L008/za233srFF1/Mhx9+yNq1awFYvXo1M2fOJCUlhdmzZ+Pp6ckjjzwCwKxZs1i+fDkPPfQQoaGhvPzyy6SkpJCSksLll1/OnDlz+PHHH3n66acJDQ1l5MiRZ3yfCxcurFIkXIgXX3yR06dP4+/vX3ls/vz5vPzyy0ycOJFHHnmE3bt38+qrr5Kamsq//vWvGj23e/fufPjhh0B50bZs2bLK3/v6+tole35+PrfccgsWi4W4uDhatmzJ4sWLufPOO/nkk08IDw+vds+nn35apRD7veuvv54bbrih8vdPPfVUlXNfffUVmzZtYuDAgQAUFRXx5ZdfMmXKFEpKSpg8eTKlpaU88cQTuLm5MXfuXG699Vb+97//ccMNNzB06NAqz33iiScACA0NJT09nQkTJhAbG8vzzz+Pu7s7P/74I2+++SYtWrTg7rvvPms7FBUVERoayiuvvHLG7DV9j0eOHOH+++9n6NCh/O1vf6v8mjjbs/7onnvuYeLEifztb39j2bJl/PWvf2X+/PkMGzaMwsJCJk2aRGBgIE888QR+fn78+uuvvPbaa3h6evL0009TWFjIlClTaN26Na+++irbtm3jiSeeICwsjIsvvrhGGWrr1ltvZc2aNfz73/9m+PDhuLu7849//IPY2FimTp1ao2csWrSIuXPnMnPmTK699lqH5BQR56NCS0SalC1btjB8+HDc3Nx44403qn2j/dprr+Hu7s6SJUsqv9kfPnw448aN44UXXmDZsmWsWLGC8PBwnnvuOVxcXBg8eDBeXl48/vjj/PDDD4wYMYJ58+bRv39/HnvsMQD69+/P4MGDOX36NM899xw9evQAIDc3lzfeeIP77rsPq9XKhx9+yN13383EiRMBaN68OTfddBMBAQHMnj0bNzc3Lr74Yvbt28f8+fPPWGilpqbyxhtv0L17d3bt2nVB7bVjxw4+/fRTunbtSm5uLgCnT59m7ty53HTTTZXvb8iQIQQEBPDYY49xxx130KVLl/M+29fXl9jYWAB++ukngMrf28snn3xCSkoKn3zyCV27dgWgd+/eXHPNNWzZsqXan39+fj6zZ88+a9uFhoZWyfj7gnDIkCGEhoayYsWKykLr66+/pqCggGuuuYbt27dz8OBB3nvvPXr16lWZ5dJLL2Xu3Lm8+uqrhIaGVnnu71/r559/pmvXrrzyyiuV5wcNGsS6devYtGnTOQutwsJC/P39z5q9pu8xMTGR0tJS/va3vxEREXHeZ/3RrbfeyrRp0wAYOnQo1157Lf/9738ZNmwYycnJhIaG8u9//5u2bdsCMGDAAOLj49m8eTMAKSkp9OzZk3/84x+0bduWIUOGsHTpUn766SeHFVomk4nnnnuOq666ilmzZmE2mzl16hRvv/02ZrP5vPe///77zJo1i6effprrr7/eIRlFxDlp6KCINCndunXj+eefp1mzZjzyyCPVen02b97MJZdcUuUbR1dXV8aOHcvOnTvJz8/n2WefZcWKFbi4uFBWVkZZWRmXXXYZLi4ubNmyhbKyMhITExkyZEjlMzw8PIiJicHLy6uyyILyb86LiorYu3cve/fupbi4uLJXA8q/0fbw8CA6Oho3N7cq9+3atavKUL0K//73v+nTpw+XXHLJBbWVzWZj5syZXH/99URFRVUe//XXXykqKmLEiBGV77+srKxymOC6deuqPOf31/xxWF1Nc9T13q1bt9KmTZvKIgvAy8uLr776qkqvTYW5c+cSGBjILbfcUuvXcnFx4dprr2XNmjUUFhYC5YXeoEGDCA0NpV+/fmzfvp3Y2FgsFgtlZWX4+/szePBgtmzZct7nDxkyhHfffRcPDw+SkpL45ptveP3118nOzq4y/PRMUlNT8fPzq/V7+qPu3bvj6urKu+++S0pKCiUlJZSVlWGz2Wp0/+97c0wmE5deeikJCQkUFRXRtWtXli5dSuvWrUlOTuaHH35g0aJFHDx4sPL9de7cmddff522bdtSUlLCjz/+SE5ODp06daryOlartcrX3ZnyVVxTk+xt27ZlxowZfPLJJ3z00Uc89thjlcXguXz33Xc89dRT9OnThxtvvPG814tI46IeLRFpUnx9fbn22mvp2LEjt9xyC3/961/58MMPKz+ZzsnJoXnz5tXua968OTabjby8PHx8fPDw8ADKv/H8vdzcXLKysrBYLAQGBlY5FxAQQLNmzaocqxh6lZmZWVk0/fG+Zs2aERAQUO2+srKyKnNXoLxQXLt2LStXrmTVqlU1aZKzWrFiBcnJycybN49///vflcdPnToFcNYelPT09Mpfp6SkVGujuuRYsWIFJpOJ4OBgLrroIu6///5q31yfyalTpwgODq7R6yQnJ/P222+zcOFCjh8/Xqes1113HfPmzWPNmjUMGDCADRs2MHv27Mrz7u7uQPm8rd/P1alJz4jVauWll17ivffeo6CggFatWhEdHV35tXguKSkptG7dug7vqKq2bdsya9YsXnrppcphnhX69et33vtbtGhR5ffBwcHYbDZyc3Px9PTkzTffZN68eZw6dYrmzZvTo0cPvLy8qs1/zM3NpW/fvgCEhIRwxRVXVDl/++23V3vtP+abO3cuc+fOxWw207x5c4YMGcL9999/1oVxxowZw/P
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"silhouette_scores = []\n",
|
|||
|
"for i in clusters_range[1:]: \n",
|
|||
|
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
|
|||
|
" labels = kmeans.fit_predict(data_scaled)\n",
|
|||
|
" score = silhouette_score(data_scaled, labels)\n",
|
|||
|
" silhouette_scores.append(score)\n",
|
|||
|
"\n",
|
|||
|
"# Построение диаграммы значений силуэта\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.plot(clusters_range[1:], silhouette_scores, marker='o')\n",
|
|||
|
"plt.title('Коэффициенты силуэта для разных k')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Коэффициент силуэта')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Средний коэффициент силуэта: 0.478\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA00AAAJzCAYAAADTBPhFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3xUVf7G8c+dmt5JQguh11Ckq6iAuq51Qf1ZsRcUu66rq7uuurquixVQLKuorCv2tWDvld6kSQudkBDSM5l2f38ERoYU0icZnrcvXpJ7Zu79zr1JmGfOuecYpmmaiIiIiIiISLUsoS5ARERERESkNVNoEhERERERqYVCk4iIiIiISC0UmkRERERERGqh0CQiIiIiIlILhSYREREREZFaKDSJiIiIiIjUQqFJRERERESkFgpNIiIiIiIitVBoEpEGmzRpEr179w76M2zYMC666CLmz58f6vJEJMz17t2badOmVdn+66+/Mnr0aI499liys7NrfP60adPo3bs3WVlZlJSUVPuY//73v/Tu3Ztx48Y1Vdki0gYpNIlIo/Tr1485c+YwZ84cXn31VR566CHsdjuXX34569atC3V5InKYWbduHZdccgmRkZHMnj2bzMzMQz7H6/Xy5ZdfVts2d+7cJq5QRNoihSYRaZSYmBgGDx7M4MGDGTp0KMcffzzTpk3DYrHw9ttvh7o8ETmMbNiwgYsvvpjo6Ghmz55N586d6/S8I444go8++qjK9pycHBYuXEjfvn2bulQRaWMUmkSkyUVGRuJ0OjEMI7Bt0qRJTJo0KehxjzzyCL179w4KV7Nnz2b8+PEMGTKECy+8kF9//RWA//znP/Tu3ZtNmzYF7eN///sfffv2ZefOnQB8/vnnnH/++QwZMoQBAwZw0kkn8Z///CfoOXfccUeVYYX7/2zbti3wmIOH47z22mtVhgPNnTuXk08+mcGDBzNx4kQWLlwY9JxD1TNv3jx69+7NvHnzgp538Pmqy/lzu93885//5Nhjj6Vv375Br6u2AHvwvh944AGysrL49ttvgd+GMFX358C663Lud+/ezZ/+9CdGjx4duMZLliwBYNy4cYe8LgsXLuTCCy9k0KBBjBgxgj/96U/k5+cH9v/222/Tu3dvli1bxoQJExg4cCCnnXYaH3/8cVAdxcXF/OMf/+D4448nKyuLU089lTfffDPoMQfW06dPH4YPH87111/P3r17azyXABs3buS6665jxIgRDB8+nKuvvpoNGzbU+Pjazu+B1y07O5sbbriBo446isGDBzNp0iQWLVoUaN+2bVvgee+9917QMb766qtA24Hmzp3LxIkTGTJkCEcddRR//etfKSwsrFLbgar7Xhw3bhx33HFHjV8fbH+tB76+xYsXc84555CVlcVRRx3F/fffj8vlqnEfB9uwYQMXXXQRsbGxzJ49mw4dOtT5uSeffDLff/99lSF6H3/8MV27dqVPnz5VnvP5558zceLEQL1///vfKSsrq/KYuvz8//TTT1x22WUMGjSIo446in/961/4fL7A43744Qf+7//+jyFDhjB8+HCuueaaWr+nRKTpKTSJSKOYponX68Xr9eLxeMjNzeWRRx7B7XZz5pln1vi8LVu2MGvWrKBtn376Kffffz+nnHIKM2bMwOfzMXnyZNxuN6eddhpOp5P//e9/Qc959913GT16NO3bt+frr79mypQp9O/fn6eeeopp06bRuXNn7rvvPpYtWxb0vHbt2gWGFc6ZM4drrrmm1tdZWFjI448/HrRt+fLl3HbbbQwePJinn36a9u3bM3nyZPLy8gDqVU99VXf+nnvuOV566SUuvvhiXnrpJebMmcP06dPrtd/ly5fz3//+l8cff5whQ4YEtR14vv76178GtdXltZaWlnLeeecxb948/vjHPzJ9+nScTieXXXYZ2dnZTJ8+Pajma665JnC81NRUFixYwCWXXEJERASPP/44f/7zn5k/fz4XXXRRlTfXV199NePHj2f69Ol07dqVm266iW+++QYAl8vF+eefz/vvv88VV1zBU089xdChQ7nrrruYOXNm0H6OPfZY5syZwyuvvMKtt97KDz/8wAMPPFDj+cvJyeGcc84hOzubv/3tb/zrX/8iLy+Piy++mIKCglrP/YHn9+Drtn79eiZOnMi2bdu4++67mTp1KoZhcPHFF1e5fzA6OrrKULO5c+disQT/k//UU09xyy23MHjwYJ588kmmTJnCJ598wqRJk+oVVprCzp07ufzyy0lMTGT69OnccMMN/O9//+P222+v0/M3btzIxRdfTExMDLNnzyYtLa1ex//d736Hz+er9rydcsopVR7//vvvM2XKFLp168aMGTO47rrreO+997j22msxTROo38//bbfdxtChQ5k5cyannnoqzz//PG+88QYAW7du5dprr2XAgAE8/fTTPPDAA2zatImrrroKv99fr9cpIg1nC3UBItK2LViwgP79+1fZfsstt9C9e/can/fggw/Ss2dPVq5cGdiWn5/P+eefzy233AJU9pzs/5S+b9++nHDCCbz33nvceOONGIbBrl27+Pnnn/nXv/4FVL6xnDBhAnfddVdgn0OGDGHkyJHMmzePQYMGBbY7HA4GDx4c+Hrjxo21vs4nn3ySDh06BPUy7Nq1i9/97nf8/e9/x2KxkJKSwqmnnsrSpUs5/vjj61VPfVV3/pYvX06fPn247LLLAtv299DU1f6evvHjx1dpO/B8VVRUBLXV5bW+8847bN++nXfeeScw3OmII47gD3/4AwsWLODss88OqjkjIyPomI888ghdu3blmWeewWq1AjBo0CBOOeUU3nrrLS644ILAYydNmsSUKVMAGDNmDBMmTGDGjBkce+yxvP322/z666+89tprgWA4ZswYvF4vTz31FOeeey4JCQkAJCUlBWoYPnw4P/74Y9A5P9isWbNwu928+OKLtGvXDoA+ffpw3nnnsWzZMo499tgan3vgaz34uk2fPh2Hw8HLL79MTEwMAMcddxynnnoqDz/8cFAv2THHHMN3332H2+3G4XBQUVHBF198wfDhwwM9g4WFhTz99NP83//9X1AA7tWrFxdccEGV89ncnnvuORITE5kxY0bg2losFu6++27Wrl1bpbfrQNnZ2Vx00UXk5eXh8XgaFCRSUlIYPnw4H330EaeffjoA27dvZ9myZTz88MM8/fTTgceapsnUqVMZM2YMU6dODWzPzMzkkksu4ZtvvuG4446r18//2WefHfh+HT16NJ9//jlff/015557LsuXL8flcnH11VcHwmB6ejpffPEFZWVlge8HEWleCk0i0ij9+/fn3nvvBSrfTBQVFfHtt9/y2GOPUVZWxs0331zlOd9++y0//vgjzz33HBdddFFg+7nnnguA3++nrKyMTz/9lIiICDp27AjAWWedxQcffMDChQsZPnw47777LtHR0ZxwwgkAXHHFFUBlj8amTZvYsmULK1asACoDWEP9+uuvgd6G/TUCnHjiiZx44omYpklZWRkfffQRFouFrl27Nms9NZ2/rKwsnn32WT755BNGjRpFdHR0nd9AmqbJkiVLmDt3bpUerLqoy2tdtGgRnTp1Cro/JDIykk8++eSQ+y8vL2fZsmVcfvnlgd5NgM6dO9O9e3d++OGHoDf5EyZMCPzdMAxOOOEEpk2bhsvlYv78+XTs2LFKT9rpp5/Om2++GRRu9h/L7/ezZs0aFi1axJFHHlljnYsWLWLw4MGBwASVb3C/+uqrQ77G2syfP5+xY8cGvUG22WyBXtnS0tLA9lGjRvHtt98yb948xowZw7fffktMTAzDhg0LhKalS5fidrs59dRTg44zbNgwOnbsyPz58xsdmvafO4vFUqWXaz+/34/X62XhwoUcffTRgcAEleEPKs9pbaHpgw8+YMCAATz22GNcdtll/PGPf2TWrFlBx/T5fIEeIKj8njjwWFA5RO/vf/87JSUlxMTE8OGHH9K/f3+6dOkS9LiNGzeya9curr766sD3IVSG6piYGH744QeOO+64ev38H/y9mJ6eHhjqN2jQIJxOJ2eddRYnnXQ
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Применение K-Means\n",
|
|||
|
"# ========================\n",
|
|||
|
"kmeans = KMeans(n_clusters=3, random_state=42) \n",
|
|||
|
"df_clusters = kmeans.fit_predict(data_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Оценка качества кластеризации\n",
|
|||
|
"# ========================\n",
|
|||
|
"silhouette_avg = silhouette_score(data_scaled, df_clusters)\n",
|
|||
|
"print(f'Средний коэффициент силуэта: {silhouette_avg:.3f}')\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"# ========================\n",
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"df_pca = pca.fit_transform(data_scaled)\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"sns.scatterplot(x=df_pca[:, 0], y=df_pca[:, 1], hue=df_clusters, palette='viridis', alpha=0.7)\n",
|
|||
|
"plt.title('Визуализация кластеров с помощью K-Means')\n",
|
|||
|
"plt.xlabel('Первая компонентa PCA')\n",
|
|||
|
"plt.ylabel('Вторая компонентa PCA')\n",
|
|||
|
"plt.legend(title='Кластер', loc='upper right')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Средний коэффициент силуэта, равный 0.478, указывает на хорошую кластеризацию. \n",
|
|||
|
"\n",
|
|||
|
"Средний коэффициент силуэта (silhouette score) указывает на качество кластеризации, измеряя, насколько хорошо точки внутри одного кластера близки друг к другу по сравнению с точками из других кластеров. Значения коэффициента силуэта находятся в диапазоне от -1 до 1:\n",
|
|||
|
"\n",
|
|||
|
"1: Указывает на идеально плотные и четко разделенные кластеры. \n",
|
|||
|
"0: Указывает на перекрытие кластеров или слабую структуру кластеризации. \n",
|
|||
|
"Отрицательные значения: Указывают, что точки в кластере расположены ближе к другому кластеру, чем к своему."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 112,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Средний коэффициент силуэта (агломеративная кластеризация): 0.409\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA00AAAJzCAYAAADTBPhFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hTZf8G8Ptkdjfdk26gBVpmGcpG0VcUBQciggiIKCgOHLzqDyeiL7jYQ0VAFBFEEBAFlSV7j7JpoUD3btPM8/ujNjY0LU2bNm16f66LS3ue5OSb86Rp7jzPeY4giqIIIiIiIiIiskhi7wKIiIiIiIgaM4YmIiIiIiKiajA0ERERERERVYOhiYiIiIiIqBoMTURERERERNVgaCIiIiIiIqoGQxMREREREVE1GJqIiIiIiIiqwdBERERERERUDZm9CyCqDyNHjsT+/fvNtrm7u6NNmzaYNGkSunbtaqfKiIioqfrwww9RXFyMl19+GWfPnsXzzz+PPXv2QCqV2rs0IqpnDE3ksNq0aYNp06YBAAwGA3Jzc/Hdd99h7NixWLt2LVq2bGnnComIqCkZPXo0Hn/8cXTv3h1yuRxvvfUWAxNRMyGIoijauwgiWxs5ciQAYPny5WbbS0pK0KNHDzz22GN47bXX7FEaERE1YTqdDleuXIGPjw9UKpW9yyGiBsJzmqhZcXZ2hlKphCAIpm0jR440haxys2bNQuvWrbF27VrTthUrVmDAgAHo2LEjHn/8cZw7dw4A8O2336J169a4fPmy2T5+/vlnxMXF4caNGwCArVu34rHHHkPHjh3Rrl073H333fj222/N7vP666+jdevWFv+lpqaabtO/f3+z+33//fdo3bo1Zs+ebdq2adMm3HPPPejQoQOGDh2KgwcPmt3nVvXs27cPrVu3xr59+8zud/Pxqsnx02q1+Oijj9CnTx/ExcWZPa+Kx/hmN+/7gw8+QHx8PHbs2AEAmD17dpXHq2LdNTn2GRkZeO2119CjRw9THx85cgQA0L9//1v2y8GDB/H444+jffv26Nq1K1577TXk5OSY9r927Vq0bt0ax44dw5AhQ5CQkID77rsPv/76q1kdhYWF+PDDD3HHHXcgPj4e9957L3788Uez21SsJzY2FomJiXjuueeQm5tb5bEEgEuXLpmmpyYmJuLpp5/GxYsXq7x9dce3Yr8lJyfj+eefx+23344OHTpg5MiROHTokKk9NTXVdL/169ebPcaff/5paqto06ZNGDp0KDp27Ijbb78d//d//4f8/PxKtVVk6bXYv39/vP7661X+fLPyWis+v8OHD2PYsGGIj4/H7bffjvfeew+lpaVV7qN8P6+++ip69uyJtm3bokePHnj11VfN+sjS6yo1NbXGr+uMjAxMnToVffr0QUJCAh566CFs27bNrI7y+82fP99s+7lz5yq9hgHbvY6re/4VXw83/yt/b6vJ+0p5LeX/2rVrh7vuusvsNWbpdVJ+XCq+X9b0WM6ePRtyuRzR0dHw9PTEo48+WukYVvdYxcXFGDlyJNq0aQONRmN6rlUdj3IGgwGLFi3Cvffei4SEBHTo0AGPPvoo9u7da/ZYR48exZgxY9CpUyd0794dL730EtLT02t0zAFg9erVGDRoENq1a4e+ffti9uzZMBgMpvbXX38dI0eOxI8//oh+/fqhY8eOeOKJJ3DmzBnTbcr7peIxOX/+PNq2bWvWp0lJSRgxYgQ6duyIO+64A99//73Zczlz5gwmTZqE7t27o23btujVqxfef/99s9+9m/sRqNznll4DO3fuROvWrU3vBZZ+7zUaDQYMGGDx9UPNE0MTOSxRFKHX66HX66HT6ZCZmYlZs2ZBq9XiwQcfrPJ+V65cwdKlS822/fbbb3jvvfcwaNAgzJ07FwaDARMmTIBWq8V9990HpVKJn3/+2ew+69atQ48ePRAUFIS//voLEydORNu2bTFv3jzMnj0bLVq0wLvvvotjx46Z3c/Pzw+rVq0y/XvmmWeqfZ75+fn47LPPzLYdP34cU6ZMQYcOHTB//nwEBQVhwoQJyMrKAgCr6rGWpeO3ePFifPPNN3jiiSfwzTffYNWqVZgzZ45V+z1+/Di+++47fPbZZ+jYsaNZW8Xj9X//939mbTV5rsXFxRg+fDj27duHV155BXPmzIFSqcSYMWOQnJyMOXPmmNX8zDPPmB7P398fBw4cwOjRo+Hk5ITPPvsM//3vf7F//36MGjWq0ofrp59+GgMGDMCcOXMQGRmJF154Adu3bwcAlJaW4rHHHsOGDRswbtw4zJs3D507d8Ybb7yBBQsWmO2nT58+WLVqFZYvX46XX34Zu3fvxgcffFDl8UtPT8ewYcOQnJyMt99+G//73/+QlZWFJ554Anl5edUe+4rH9+Z+u3DhAoYOHYrU1FS8+eabmDlzJgRBwBNPPFHpvEJXV1f88ccfZts2bdoEicT8T9G8efPw0ksvoUOHDvjiiy8wceJEbNmyBSNHjrxlWLG1GzduYOzYsfDy8sKcOXPw/PPP4+eff8arr75a5X3UajVGjRqFixcvYtq0afjyyy8xatQobNy4EZ9++qnZbcv7seLrqVx1r+usrCw89NBDOHjwIF588UXMnj0bISEhmDhxYqVgWtPjbqvX8a2ev7+/f6X3t5t/x25m6X2lXPl9586di6ioKLz22muVvsSqjjXHsqKff/7Z9MVKTa1cuRJZWVn45ptvoFAoTNvbtGlj1t8PPfSQ2f1mzpyJefPmYdiwYViyZAnee+895OXlYfLkyVCr1QCA06dP4/HHH4dGo8HHH3+Md955BydPnsTYsWNrdMwXLlyIt956Cz169MCCBQswYsQILF68GG+99ZZZLUlJSfj0008xadIk/O9//0Nubi4ef/xxZGRkVPm8P/jgA+j1etPParUaTz31FPR6PWbPno3Bgwdj2rRppi/EMjIyMGLECKjVasyYMQOLFy/GoEGDsHz5cixbtsyqY34znU6H6dOn3/J2S5YsqTYMU/PDc5rIYR04cABt27attP2ll15CdHR0lfebPn06WrZsiVOnTpm25eTk4LHHHsNLL70EoGzkpPxb+ri4ONx5551Yv349Jk+eDEEQkJaWhr179+J///sfgLIPlkOGDMEbb7xh2mfHjh3RrVs37Nu3D+3btzdtVygU6NChg+nnS5cuVfs8v/jiCwQHB5t9g52Wloa77roL77//PiQSCXx9fXHvvffi6NGjuOOOO6yqx1qWjt/x48cRGxuLMWPGmLZZ+8eofKRvwIABldoqHq/yb2/L1eS5/vTTT7h27Rp++uknxMXFAQA6deqEBx54AAcOHMDDDz9sVnNYWJjZY86aNQuRkZFYuHCh6fyG9u3bY9CgQVizZg1GjBhhuu3IkSMxceJEAECvXr0wZMgQzJ07F3369MHatWtx7tw5fP/996Zg2KtXL+j1esybNw+PPvqoaTqQt7e3qYbExET8/fffZsf8ZkuXLoVWq8XXX38NPz8/AEBsbCyGDx+OY8eOoU+fPlXet+Jzvbnf5syZA4VCgWXLlsHNzQ0A0LdvX9x77734+OOPzUbJevfujZ07d0Kr1UKhUECj0WDbtm1ITEw0jaDk5+dj/vz5eOSRR8yCQqtWrTBixIhKx7O+LV68GF5eXpg7d66pbyUSCd58802cPXvW4rfQycnJCAwMxEcffYQWLVoAALp3745jx45VCpIV+/Fm1b2uv/76a+Tk5GDLli0ICQkBUBbARo8ejY8//hj33nuvKRT17t0bv/76KzIyMkyhbPPmzWbHHbDd6/hWz7/ie1z5+1tcXBxCQ0MtHgfA8vtKuYr3DQoKwh9//IGkpCRERkZWub/aHstyxcXFmDlzJtq2bVvt711FBoPBdF5tYmKiWZubm5tZf+/cudOsPSMjAy+++KLZSI1SqcRzzz2Hs2fPokOHDliwYAFUKhW++uorKJVKAIC/vz9efvllXLx4sdpjXlhYaAplb775JgCgZ8+eUKlUePPNN/Hkk0+azgMuLCzEggUL0KVLFwBAQkIC7rjjDixbtgxTpkyp9Ly3bNmCY8e
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.cluster import AgglomerativeClustering\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Агломеративная кластеризация\n",
|
|||
|
"# ========================\n",
|
|||
|
"agg_cluster = AgglomerativeClustering(n_clusters=3) \n",
|
|||
|
"labels_agg = agg_cluster.fit_predict(data_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Оценка качества кластеризации\n",
|
|||
|
"# ========================\n",
|
|||
|
"silhouette_avg_agg = silhouette_score(data_scaled, labels_agg)\n",
|
|||
|
"print(f'Средний коэффициент силуэта (агломеративная кластеризация): {silhouette_avg_agg:.3f}')\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"# ========================\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"df_pca = pca.fit_transform(data_scaled)\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"sns.scatterplot(x=df_pca[:, 0], y=df_pca[:, 1], hue=labels_agg, palette='viridis', alpha=0.7)\n",
|
|||
|
"plt.title('Визуализация кластеров с помощью агломеративной кластеризации')\n",
|
|||
|
"plt.xlabel('Первая компонентa PCA')\n",
|
|||
|
"plt.ylabel('Вторая компонентa PCA')\n",
|
|||
|
"plt.legend(title='Кластер', loc='upper right')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Значение коэффициента силуэта лежит в диапазоне от -1 до 1. Ближе к 1: Хорошо сформированные, плотные кластеры, четко отделенные друг от друга. \n",
|
|||
|
"\n",
|
|||
|
"Ближе к 0: Кластеры пересекаются или слабо разделены, не имеют четких границ. Точки расположены одинаково близко как к своему кластеру, так и к соседним. \n",
|
|||
|
"Ближе к -1 (Отрицательные значения): Некоторые точки скорее относятся к другим кластерам, чем к текущему (ближе к центрам других кластеров). Очень плохая кластеризация. \n",
|
|||
|
"Ближе к 1: Все точки внутри каждого кластера плотно сгруппированы и значительно удалены от точек других кластеров. Свидетельствует о четкой и хорошо разделенной структуре данных. Единица говорит об идеальной кластеризации.\n",
|
|||
|
"\n",
|
|||
|
"Средний коэффициент силуэта, равный 0.409, указывает на то, что кластеры имеют умеренно хорошее разделение, но могут иметь нечеткие границы и неоптимальный выбор числа кластеров. Это может быть связано с особенностями данных, затрудняющими их разделение."
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "aimenv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.5"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|