AIM-PIbd-31-Belianin-N-N/laboratory_5/lab5.ipynb

1378 lines
2.5 MiB
Plaintext
Raw Normal View History

2024-11-16 03:23:52 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Начинаем работу ... \n",
"\n",
"Датафрейм: Продажа домов в округе Кинг (вариант-6) \n",
"https://www.kaggle.com/datasets/harlfoxem/housesalesprediction"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living',\n",
" 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',\n",
" 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',\n",
" 'lat', 'long', 'sqft_living15', 'sqft_lot15'],\n",
" dtype='object')\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from scipy.cluster.hierarchy import dendrogram, linkage, fcluster\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.decomposition import PCA\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.metrics import silhouette_score\n",
"\n",
"# Подключим датафрейм и выгрузим данные\n",
"df = pd.read_csv(\".//static//csv//kc_house_data.csv\")\n",
"\n",
"print(df.columns)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>date</th>\n",
" <th>price</th>\n",
" <th>bedrooms</th>\n",
" <th>bathrooms</th>\n",
" <th>sqft_living</th>\n",
" <th>sqft_lot</th>\n",
" <th>floors</th>\n",
" <th>waterfront</th>\n",
" <th>view</th>\n",
" <th>...</th>\n",
" <th>grade</th>\n",
" <th>sqft_above</th>\n",
" <th>sqft_basement</th>\n",
" <th>yr_built</th>\n",
" <th>yr_renovated</th>\n",
" <th>zipcode</th>\n",
" <th>lat</th>\n",
" <th>long</th>\n",
" <th>sqft_living15</th>\n",
" <th>sqft_lot15</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>7129300520</td>\n",
" <td>20141013T000000</td>\n",
" <td>221900.0</td>\n",
" <td>3</td>\n",
" <td>1.00</td>\n",
" <td>1180</td>\n",
" <td>5650</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>7</td>\n",
" <td>1180</td>\n",
" <td>0</td>\n",
" <td>1955</td>\n",
" <td>0</td>\n",
" <td>98178</td>\n",
" <td>47.5112</td>\n",
" <td>-122.257</td>\n",
" <td>1340</td>\n",
" <td>5650</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>6414100192</td>\n",
" <td>20141209T000000</td>\n",
" <td>538000.0</td>\n",
" <td>3</td>\n",
" <td>2.25</td>\n",
" <td>2570</td>\n",
" <td>7242</td>\n",
" <td>2.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>7</td>\n",
" <td>2170</td>\n",
" <td>400</td>\n",
" <td>1951</td>\n",
" <td>1991</td>\n",
" <td>98125</td>\n",
" <td>47.7210</td>\n",
" <td>-122.319</td>\n",
" <td>1690</td>\n",
" <td>7639</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>5631500400</td>\n",
" <td>20150225T000000</td>\n",
" <td>180000.0</td>\n",
" <td>2</td>\n",
" <td>1.00</td>\n",
" <td>770</td>\n",
" <td>10000</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>6</td>\n",
" <td>770</td>\n",
" <td>0</td>\n",
" <td>1933</td>\n",
" <td>0</td>\n",
" <td>98028</td>\n",
" <td>47.7379</td>\n",
" <td>-122.233</td>\n",
" <td>2720</td>\n",
" <td>8062</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2487200875</td>\n",
" <td>20141209T000000</td>\n",
" <td>604000.0</td>\n",
" <td>4</td>\n",
" <td>3.00</td>\n",
" <td>1960</td>\n",
" <td>5000</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>7</td>\n",
" <td>1050</td>\n",
" <td>910</td>\n",
" <td>1965</td>\n",
" <td>0</td>\n",
" <td>98136</td>\n",
" <td>47.5208</td>\n",
" <td>-122.393</td>\n",
" <td>1360</td>\n",
" <td>5000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1954400510</td>\n",
" <td>20150218T000000</td>\n",
" <td>510000.0</td>\n",
" <td>3</td>\n",
" <td>2.00</td>\n",
" <td>1680</td>\n",
" <td>8080</td>\n",
" <td>1.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>...</td>\n",
" <td>8</td>\n",
" <td>1680</td>\n",
" <td>0</td>\n",
" <td>1987</td>\n",
" <td>0</td>\n",
" <td>98074</td>\n",
" <td>47.6168</td>\n",
" <td>-122.045</td>\n",
" <td>1800</td>\n",
" <td>7503</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" id date price bedrooms bathrooms sqft_living \\\n",
"0 7129300520 20141013T000000 221900.0 3 1.00 1180 \n",
"1 6414100192 20141209T000000 538000.0 3 2.25 2570 \n",
"2 5631500400 20150225T000000 180000.0 2 1.00 770 \n",
"3 2487200875 20141209T000000 604000.0 4 3.00 1960 \n",
"4 1954400510 20150218T000000 510000.0 3 2.00 1680 \n",
"\n",
" sqft_lot floors waterfront view ... grade sqft_above sqft_basement \\\n",
"0 5650 1.0 0 0 ... 7 1180 0 \n",
"1 7242 2.0 0 0 ... 7 2170 400 \n",
"2 10000 1.0 0 0 ... 6 770 0 \n",
"3 5000 1.0 0 0 ... 7 1050 910 \n",
"4 8080 1.0 0 0 ... 8 1680 0 \n",
"\n",
" yr_built yr_renovated zipcode lat long sqft_living15 \\\n",
"0 1955 0 98178 47.5112 -122.257 1340 \n",
"1 1951 1991 98125 47.7210 -122.319 1690 \n",
"2 1933 0 98028 47.7379 -122.233 2720 \n",
"3 1965 0 98136 47.5208 -122.393 1360 \n",
"4 1987 0 98074 47.6168 -122.045 1800 \n",
"\n",
" sqft_lot15 \n",
"0 5650 \n",
"1 7639 \n",
"2 8062 \n",
"3 5000 \n",
"4 7503 \n",
"\n",
"[5 rows x 21 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>price</th>\n",
" <th>bedrooms</th>\n",
" <th>bathrooms</th>\n",
" <th>sqft_living</th>\n",
" <th>sqft_lot</th>\n",
" <th>floors</th>\n",
" <th>waterfront</th>\n",
" <th>view</th>\n",
" <th>condition</th>\n",
" <th>grade</th>\n",
" <th>sqft_above</th>\n",
" <th>sqft_basement</th>\n",
" <th>yr_built</th>\n",
" <th>yr_renovated</th>\n",
" <th>zipcode</th>\n",
" <th>lat</th>\n",
" <th>long</th>\n",
" <th>sqft_living15</th>\n",
" <th>sqft_lot15</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>count</th>\n",
" <td>2.161300e+04</td>\n",
" <td>2.161300e+04</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>2.161300e+04</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" <td>21613.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>mean</th>\n",
" <td>4.580302e+09</td>\n",
" <td>5.400881e+05</td>\n",
" <td>3.370842</td>\n",
" <td>2.114757</td>\n",
" <td>2079.899736</td>\n",
" <td>1.510697e+04</td>\n",
" <td>1.494309</td>\n",
" <td>0.007542</td>\n",
" <td>0.234303</td>\n",
" <td>3.409430</td>\n",
" <td>7.656873</td>\n",
" <td>1788.390691</td>\n",
" <td>291.509045</td>\n",
" <td>1971.005136</td>\n",
" <td>84.402258</td>\n",
" <td>98077.939805</td>\n",
" <td>47.560053</td>\n",
" <td>-122.213896</td>\n",
" <td>1986.552492</td>\n",
" <td>12768.455652</td>\n",
" </tr>\n",
" <tr>\n",
" <th>std</th>\n",
" <td>2.876566e+09</td>\n",
" <td>3.671272e+05</td>\n",
" <td>0.930062</td>\n",
" <td>0.770163</td>\n",
" <td>918.440897</td>\n",
" <td>4.142051e+04</td>\n",
" <td>0.539989</td>\n",
" <td>0.086517</td>\n",
" <td>0.766318</td>\n",
" <td>0.650743</td>\n",
" <td>1.175459</td>\n",
" <td>828.090978</td>\n",
" <td>442.575043</td>\n",
" <td>29.373411</td>\n",
" <td>401.679240</td>\n",
" <td>53.505026</td>\n",
" <td>0.138564</td>\n",
" <td>0.140828</td>\n",
" <td>685.391304</td>\n",
" <td>27304.179631</td>\n",
" </tr>\n",
" <tr>\n",
" <th>min</th>\n",
" <td>1.000102e+06</td>\n",
" <td>7.500000e+04</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>290.000000</td>\n",
" <td>5.200000e+02</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1.000000</td>\n",
" <td>1.000000</td>\n",
" <td>290.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1900.000000</td>\n",
" <td>0.000000</td>\n",
" <td>98001.000000</td>\n",
" <td>47.155900</td>\n",
" <td>-122.519000</td>\n",
" <td>399.000000</td>\n",
" <td>651.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>25%</th>\n",
" <td>2.123049e+09</td>\n",
" <td>3.219500e+05</td>\n",
" <td>3.000000</td>\n",
" <td>1.750000</td>\n",
" <td>1427.000000</td>\n",
" <td>5.040000e+03</td>\n",
" <td>1.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>3.000000</td>\n",
" <td>7.000000</td>\n",
" <td>1190.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1951.000000</td>\n",
" <td>0.000000</td>\n",
" <td>98033.000000</td>\n",
" <td>47.471000</td>\n",
" <td>-122.328000</td>\n",
" <td>1490.000000</td>\n",
" <td>5100.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>50%</th>\n",
" <td>3.904930e+09</td>\n",
" <td>4.500000e+05</td>\n",
" <td>3.000000</td>\n",
" <td>2.250000</td>\n",
" <td>1910.000000</td>\n",
" <td>7.618000e+03</td>\n",
" <td>1.500000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>3.000000</td>\n",
" <td>7.000000</td>\n",
" <td>1560.000000</td>\n",
" <td>0.000000</td>\n",
" <td>1975.000000</td>\n",
" <td>0.000000</td>\n",
" <td>98065.000000</td>\n",
" <td>47.571800</td>\n",
" <td>-122.230000</td>\n",
" <td>1840.000000</td>\n",
" <td>7620.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>75%</th>\n",
" <td>7.308900e+09</td>\n",
" <td>6.450000e+05</td>\n",
" <td>4.000000</td>\n",
" <td>2.500000</td>\n",
" <td>2550.000000</td>\n",
" <td>1.068800e+04</td>\n",
" <td>2.000000</td>\n",
" <td>0.000000</td>\n",
" <td>0.000000</td>\n",
" <td>4.000000</td>\n",
" <td>8.000000</td>\n",
" <td>2210.000000</td>\n",
" <td>560.000000</td>\n",
" <td>1997.000000</td>\n",
" <td>0.000000</td>\n",
" <td>98118.000000</td>\n",
" <td>47.678000</td>\n",
" <td>-122.125000</td>\n",
" <td>2360.000000</td>\n",
" <td>10083.000000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>max</th>\n",
" <td>9.900000e+09</td>\n",
" <td>7.700000e+06</td>\n",
" <td>33.000000</td>\n",
" <td>8.000000</td>\n",
" <td>13540.000000</td>\n",
" <td>1.651359e+06</td>\n",
" <td>3.500000</td>\n",
" <td>1.000000</td>\n",
" <td>4.000000</td>\n",
" <td>5.000000</td>\n",
" <td>13.000000</td>\n",
" <td>9410.000000</td>\n",
" <td>4820.000000</td>\n",
" <td>2015.000000</td>\n",
" <td>2015.000000</td>\n",
" <td>98199.000000</td>\n",
" <td>47.777600</td>\n",
" <td>-121.315000</td>\n",
" <td>6210.000000</td>\n",
" <td>871200.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" id price bedrooms bathrooms sqft_living \\\n",
"count 2.161300e+04 2.161300e+04 21613.000000 21613.000000 21613.000000 \n",
"mean 4.580302e+09 5.400881e+05 3.370842 2.114757 2079.899736 \n",
"std 2.876566e+09 3.671272e+05 0.930062 0.770163 918.440897 \n",
"min 1.000102e+06 7.500000e+04 0.000000 0.000000 290.000000 \n",
"25% 2.123049e+09 3.219500e+05 3.000000 1.750000 1427.000000 \n",
"50% 3.904930e+09 4.500000e+05 3.000000 2.250000 1910.000000 \n",
"75% 7.308900e+09 6.450000e+05 4.000000 2.500000 2550.000000 \n",
"max 9.900000e+09 7.700000e+06 33.000000 8.000000 13540.000000 \n",
"\n",
" sqft_lot floors waterfront view condition \\\n",
"count 2.161300e+04 21613.000000 21613.000000 21613.000000 21613.000000 \n",
"mean 1.510697e+04 1.494309 0.007542 0.234303 3.409430 \n",
"std 4.142051e+04 0.539989 0.086517 0.766318 0.650743 \n",
"min 5.200000e+02 1.000000 0.000000 0.000000 1.000000 \n",
"25% 5.040000e+03 1.000000 0.000000 0.000000 3.000000 \n",
"50% 7.618000e+03 1.500000 0.000000 0.000000 3.000000 \n",
"75% 1.068800e+04 2.000000 0.000000 0.000000 4.000000 \n",
"max 1.651359e+06 3.500000 1.000000 4.000000 5.000000 \n",
"\n",
" grade sqft_above sqft_basement yr_built yr_renovated \\\n",
"count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n",
"mean 7.656873 1788.390691 291.509045 1971.005136 84.402258 \n",
"std 1.175459 828.090978 442.575043 29.373411 401.679240 \n",
"min 1.000000 290.000000 0.000000 1900.000000 0.000000 \n",
"25% 7.000000 1190.000000 0.000000 1951.000000 0.000000 \n",
"50% 7.000000 1560.000000 0.000000 1975.000000 0.000000 \n",
"75% 8.000000 2210.000000 560.000000 1997.000000 0.000000 \n",
"max 13.000000 9410.000000 4820.000000 2015.000000 2015.000000 \n",
"\n",
" zipcode lat long sqft_living15 sqft_lot15 \n",
"count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n",
"mean 98077.939805 47.560053 -122.213896 1986.552492 12768.455652 \n",
"std 53.505026 0.138564 0.140828 685.391304 27304.179631 \n",
"min 98001.000000 47.155900 -122.519000 399.000000 651.000000 \n",
"25% 98033.000000 47.471000 -122.328000 1490.000000 5100.000000 \n",
"50% 98065.000000 47.571800 -122.230000 1840.000000 7620.000000 \n",
"75% 98118.000000 47.678000 -122.125000 2360.000000 10083.000000 \n",
"max 98199.000000 47.777600 -121.315000 6210.000000 871200.000000 "
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.describe()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"id 0\n",
"date 0\n",
"price 0\n",
"bedrooms 0\n",
"bathrooms 0\n",
"sqft_living 0\n",
"sqft_lot 0\n",
"floors 0\n",
"waterfront 0\n",
"view 0\n",
"condition 0\n",
"grade 0\n",
"sqft_above 0\n",
"sqft_basement 0\n",
"yr_built 0\n",
"yr_renovated 0\n",
"zipcode 0\n",
"lat 0\n",
"long 0\n",
"sqft_living15 0\n",
"sqft_lot15 0\n",
"dtype: int64\n",
"id False\n",
"date False\n",
"price False\n",
"bedrooms False\n",
"bathrooms False\n",
"sqft_living False\n",
"sqft_lot False\n",
"floors False\n",
"waterfront False\n",
"view False\n",
"condition False\n",
"grade False\n",
"sqft_above False\n",
"sqft_basement False\n",
"yr_built False\n",
"yr_renovated False\n",
"zipcode False\n",
"lat False\n",
"long False\n",
"sqft_living15 False\n",
"sqft_lot15 False\n",
"dtype: bool\n"
]
}
],
"source": [
"# Процент пропущенных значений признаков\n",
"for i in df.columns:\n",
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
" if null_rate > 0:\n",
" print(f'{i} Процент пустых значений: %{null_rate:.2f}')\n",
"\n",
"print(df.isnull().sum())\n",
"\n",
"print(df.isnull().any())"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"id int64\n",
"date object\n",
"price float64\n",
"bedrooms int64\n",
"bathrooms float64\n",
"sqft_living int64\n",
"sqft_lot int64\n",
"floors float64\n",
"waterfront int64\n",
"view int64\n",
"condition int64\n",
"grade int64\n",
"sqft_above int64\n",
"sqft_basement int64\n",
"yr_built int64\n",
"yr_renovated int64\n",
"zipcode int64\n",
"lat float64\n",
"long float64\n",
"sqft_living15 int64\n",
"sqft_lot15 int64\n",
"dtype: object"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Проверка типов столбцов\n",
"df.dtypes"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Атрибуты \n",
"\n",
"id - уникальный идентификатор дома \n",
"\n",
"date - Дата продажи дома \n",
"\n",
"price - Цена дома в долларах США \n",
"\n",
"bedrooms - Количество спален в доме \n",
"\n",
"bathrooms - Количество ванных комнат, включая дробные значения (н, 2.5 означает 2 ванны и 1 туалет) \n",
"\n",
"sqft_living - Жилая площадь дома (в кв. футах) \n",
"\n",
"sqft_lot - Общая площадь участка \n",
"\n",
"floors - Количество этажей в доме \n",
"\n",
"waterfront - Есть ли вид на воду (1 - да, 0 - нет) \n",
"\n",
"view - Оценка вида дома (0-4) \n",
"\n",
"condition - Оценка состояния дома (1 - плохое, 5 - отличное) \n",
"\n",
"grade - Оценка качество дома по архитектурным и строительным стандартам (1-13) \n",
"\n",
"sqft_basement - Площадь подвала дома. \n",
"\n",
"yr_built - Год постройки дома \n",
"\n",
"yr_renovated - Год послежней реновации дома (0, если реновация не проводилась) \n",
"\n",
"zipcode - Почтовый индекс местоположения дома \n",
"\n",
"sqft_living15 - Средняя жилая площадб домов в 15 ближайших соседях \n",
"\n",
"price_category - Категория цены дома (low, medium, high) \n",
"\n",
"**Цель:** Кластеризация домов на группы для определения схожих ценовых категорий и характеристик. \n",
"\n",
"К примеру, Группировка домов для анализа рыночных трендов. Определение похожих групп домов для маркетинговых или инвестиционных целей."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Очистка данных"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" price bedrooms bathrooms sqft_living sqft_lot floors waterfront \\\n",
"0 221900.0 3 1.00 1180 5650 1.0 0 \n",
"1 538000.0 3 2.25 2570 7242 2.0 0 \n",
"2 180000.0 2 1.00 770 10000 1.0 0 \n",
"3 604000.0 4 3.00 1960 5000 1.0 0 \n",
"4 510000.0 3 2.00 1680 8080 1.0 0 \n",
"\n",
" view condition sqft_basement yr_built \n",
"0 0 3 0 1955 \n",
"1 0 3 400 1951 \n",
"2 0 3 0 1933 \n",
"3 0 5 910 1965 \n",
"4 0 3 0 1987 \n"
]
}
],
"source": [
"# Удалим несущественные столбцы\n",
"columns_to_drop = ['id', 'date', 'grade', 'yr_renovated', 'sqft_living15', 'lat', 'long', 'sqft_lot15', 'sqft_above', 'zipcode']\n",
"df_cleaned = df.drop(columns=columns_to_drop)\n",
"\n",
"print(df_cleaned.head()) # Вывод очищенного DataFrame"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Визуализация парных взаимосвязей"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABi8AAASgCAYAAACAO9vxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXxU9b3/8fdMkslkmUyGsIQRw5iEYNgCoogLKta6VNHaxVa9wVqtrahtXG619dalrbU/q62Ke61Y02ptq5dK0Xqva92tCwErCCEMEENYwmQyWSfb749zJ8lk3+dM5vV8PHhAzpk588n5noTz/X7O9/uxtLe3twsAAAAAAAAAAMAkrJEOAAAAAAAAAAAAoCuSFwAAAAAAAAAAwFRIXgAAAAAAAAAAAFMheQEAAAAAAAAAAEyF5AUAAAAAAAAAADAVkhcAAAAAAAAAAMBUSF4AAAAAAAAAAABTIXkBAAAAAAAAAABMheQFAGDCaG9vj3QIAAAAAAAAGAUkLwDEtMLCQhUWFvb7mhtuuEEnn3zyoI+5evVqzZ49e6ShDclbb72l2bNna8WKFeP6uYOxceNGnXbaaQoGg6N63F/96ldasmSJFi5cqLVr1+rll1/W9ddf37G/rKxMJ598smpqakb1cwersLBQs2fPDvszb948nXTSSbr11lvl9/v7fX95eblmz56tZ599dpwiBgAAQCy64YYbety3dv3zj3/8Q5J08skn64YbbohwtOOP+3oAiJz4SAcAAGa3atUqrVy5ctCv//rXv65ly5aNYUQ9PfPMM8rLy9PWrVv14YcfavHixeP6+X1pamrS9ddfr//8z/+UzWYbteNu3bpVjz76qM477zydc845ys7O1g9+8IOw12RnZ+sLX/iCfv7zn+uOO+4Ytc8eijlz5ujmm2/u+Lq5uVn//ve/9etf/1qbN2/WU089JYvF0ut7p06dqqefflpZWVnjFS4AAABi1JQpU3Tffff1us/j8YxvMCbEfT0ARAbJCwAYwFBvMjMzM5WZmTlG0fRUU1Ojl156Sbfeeqsefvhh/elPfzJN8uLJJ59UfHy8TjnllFE9bnV1tSTpzDPP1JFHHtnn6y677DKddNJJuuiiizR37txRjWEwUlNTtXDhwrBtRx11lOrq6nTvvfeqpKSkx/4Qm83W5z4AAABgNHHv2T/u6wEgMlg2CgAG0HXZqJ/85Cc67rjj1NraGvaa2267TUcffbSam5t7LBtVWFioG2+8UY888ohOOukkzZ8/X9/85je1cePGsGO89tpr+spXvqIFCxbotNNO09///nd98Ytf1OrVq/uNb926dWppadGyZct09tln68UXX+wY3A959tlnNWfOHP3lL3/RcccdpyVLlqi0tFSS9NJLL+krX/mK5s+fr+OOO04///nPVV9fH/b+l156SRdccIEWLVqkefPm6fTTT9cf//jHfuMKBoNas2aNzjrrrLDtf//733X22WdrwYIFWrp0qa677jrt3bu3Y39bW5vuv/9+nXTSSSooKND3vvc9vfDCC5o9e7bKy8u1evXqjqW+LrroIp188skqLCzU+++/r/fff1+zZ8/We++9J8l4gmzp0qV6+OGH+4zztNNO0/e///0e28855xxdfvnlkqRdu3bpe9/7no4++mgVFBToG9/4hl5//fV+v//+zJs3T5JUUVEhybhGrrvuOn3/+9/XwoULdfHFF/c6vbysrExXXnmllixZoqOOOkrf/e53tX379o79TU1NuuOOO3TiiSdq3rx5WrFihZ5//vlhxwkAAAD0JRAI6Pbbb9cpp5yi+fPn66yzztJf//rXsNe0trbqj3/8o1asWKEFCxbopJNO0p133qmmpqaO19xwww266KKLdPPNN+uII47Ql770JbW2tuqtt97Seeedp0WLFumoo47S5ZdfHnbv2x339QAw8ZC8AIAhOOecc3TgwIGOwXHJGGx/4YUXdOaZZyohIaHX97344ot6+eWX9V//9V/69a9/rQMHDuiqq67qSIK8++67WrVqlaZPn67Vq1frwgsv1M0336w9e/YMGNMzzzyjZcuWafLkyfryl7+s5uZm/fd//3eP17W2tuqxxx7Tbbfdph/96EfKycnRunXrdMUVVyg7O1v333+/rrzySj333HNatWpVR/Hr1157TVdccYXmzp2rBx54QKtXr9ahhx6qn/70pyopKekzrvfee0979+7Vqaee2rHtww8/1A9/+EOdeuqp+u1vf6sf/ehHevfdd3Xttdd2vOaOO+7QAw88oK9+9atavXq10tLSwqZof/3rX9dNN90kSbrpppt033336eabb9acOXM0Z84cPf3002GzLE4//XS98sorqqur6zXOs88+W6+//rpqa2s7tm3fvl1btmzROeeco7a2Nn33u99VQ0NDR2zp6em6/PLLtXPnzoGap1c7duyQJB166KEd21544QWlpKTowQcf1KWXXtrjPXv37tU3vvENeb1e3XLLLfrVr36lAwcO6KKLLlJ1dbXa29t1xRVX6E9/+pMuvvhiPfjgg1q0aJGuvvpqrV27dlhxAgAAIHa0tLT0+BPqE3TX2NioCy64QOvWrdOll16qBx54QIsXL9aNN96ohx56qON1N910U0eC48EHH9SFF16oP/zhD2H9DUn64IMPtGfPHt1///269tprVVFRoVWrVmnevHl68MEHddttt2nHjh267LLL1NbW1mtM3NcDwMQTE8tGPfzww3rzzTdVXFw8pPetXbtWjzzyiHbv3q2srCxdeeWVOuOMM8YoSgDRYPHixTrkkEP097//Xccee6wkY5B+//79Ouecc/p8X0tLi373u98pNTVVklRXV6frr79emzdv1rx587R69WrNmjVL9913X8daqRkZGbrmmmv6jeezzz7Tv//9b917772SJLfbraVLl+rpp5/WxRdf3OP13/ve93TSSSdJktrb23XnnXdq2bJluvPOOzte4/F49K1vfUuvv/66TjrpJJWWlurcc8/VjTfe2PGaRYsW6eijj9Z7772ngoKCXmN79913lZaWpsMOO6xj24cffii73a7LLrusowZGenq6Nm3apPb2dtXU1OgPf/iDVq5cqauuukqSdMIJJ+jiiy/W22+/LclYlis3N1eSlJubqzlz5khSx7ntPiV7/vz5am5u1gcffKATTzyxR5xnn322Vq9erZdeeklf/vKXJRmzQ9LS0nTyySerqqpKZWVlWrVqVcf7FyxYoPvuu2/AIuTt7e1qaWnp+Nrv9+v999/v6ICEntSSpISEBN16660d56W8vDzsWI8//njHbJYpU6ZIkg4//HCdf/75KikpUXx8vN544w395je/0Ze+9CVJ0rJly9TQ0KA777xTZ511luLjY+K/fQCIGPodAKLV559/3usyq9dee60uu+yyHtufffZZbd26VX/605+0aNEiSca9Z0tLix544AF985vf1IEDB/TXv/417BjHHXecpk6dqh/+8If65z//2XF/3dLSop/+9Kcdy++uX79ejY2N+u53v6tp06ZJMvoBL7/8surr6zvu/bvivh4AJp4JP/Pij3/8o+6+++4hv+9vf/ubbrzxRl144YVav369zjrrLF1zzTX6+OOPRz9IAFHDYrHo7LPP1ksvvdRxg7t+/Xp5PJ4+B/ElY5C96w126Aa8oaFBwWBQH3/8sU499dSwIm+nn376gDelzzzzjNLS0nTkkUeqpqZGNTU1Ou2007Rjxw69++67PV6fn5/f8e+ysjJVVlbq5JNPDnu66qijjlJqaqreeustSdKll16qX/7yl6qrq9Mnn3yi559/vmMZpv5u8nfv3q1DDjkkbNtRRx2lhoYGnXXWWbrrrrv0wQcf6Pjjj9eVV14pi8WiDRs2qLm5WV/4whfC3nf22Wf3ex76E4qhe6ch5NBDD9URRxwRNg17/fr1Ov3002Wz2TR58mTl5ubqJz/5ia6//nqtW7dObW1t+tGPfqRZs2b1+9n/+te/NHfu3I4/xx57rK655hrNmzdPd911V1h7Z2dn91vU/MMPP9TChQs7OjiS0YF79dVXdeKJJ+qdd96
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Настройка стиля графиков\n",
"sns.set(style=\"whitegrid\")\n",
"\n",
"# Создание фигуры\n",
"plt.figure(figsize=(16, 12))\n",
"\n",
"# График 1: Площадь vs Цена\n",
"plt.subplot(2, 2, 1)\n",
"sns.scatterplot(x=df_cleaned['sqft_living'], y=df_cleaned['price'], alpha=0.6, color='blue')\n",
"plt.title('Living Area (sqft) vs Price')\n",
"plt.xlabel('Living Area (sqft)')\n",
"plt.ylabel('Price')\n",
"\n",
"# График 2: Количество спален vs Цена\n",
"plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x=df_cleaned['floors'], y=df_cleaned['price'], alpha=0.6, color='green')\n",
"plt.title('Floors vs Price')\n",
"plt.xlabel('Floors')\n",
"plt.ylabel('Price')\n",
"\n",
"# График 3: Количество ванных комнат vs Цена\n",
"plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x=df_cleaned['bathrooms'], y=df_cleaned['price'], alpha=0.6, color='red')\n",
"plt.title('Bathrooms vs Price')\n",
"plt.xlabel('Bathrooms')\n",
"plt.ylabel('Price')\n",
"\n",
"# График 4: Площадь участка vs Цена\n",
"plt.subplot(2, 2, 4)\n",
"sns.scatterplot(x=df_cleaned['sqft_lot'], y=df_cleaned['price'], alpha=0.6, color='purple')\n",
"plt.title('Lot Area (sqft) vs Price')\n",
"plt.xlabel('Lot Area (sqft)')\n",
"plt.ylabel('Price')\n",
"\n",
"# Упорядочиваем графики\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Стандартизация данных для кластеризации"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"# Нормализация данных\n",
"scaler = StandardScaler()\n",
"data_scaled = scaler.fit_transform(df_cleaned)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArsAAAImCAYAAABTm0IfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOy9d5xddZ3///ycduvc6amThJCE0HvHgqjoKupaF0X9rqtfG7q6rvWnu7a1LcqqIPJlRV1RFlxBRHStgGCjCAihhfSZTDL99nvq5/P749x7M5OZJJNkkkkmn+fjkTK3nPO5dV7nfV7v11sopRQajUaj0Wg0Gs0cxJjtBWg0Go1Go9FoNAcKLXY1Go1Go9FoNHMWLXY1Go1Go9FoNHMWLXY1Go1Go9FoNHMWLXY1Go1Go9FoNHMWLXY1Go1Go9FoNHMWLXY1Go1Go9FoNHMWLXY1Go1Go9FoNHMWLXY1Go3mEETP+9HsDv3+0Gimjxa7Gs0Rxpve9CZWr1494c+JJ57IhRdeyKc//WkKhcKk+2zcuJFPfepTvOAFL+Dkk0/mwgsv5AMf+ABPPfXULvfzH//xH6xevZrPfvaze1zTW97yFs4++2x839/lbV72spdx2WWXAbB69WquuuqqaTzameWjH/0oF110UfPniy66iI9+9KMzuo/t27fz9re/na1btx7Q/ewP5XKZd77znZxyyimcddZZbNq0adJt7rvvPlavXs1999035TauuuoqVq9efYBXOjVvetObeNOb3nTQ9jX+s3bsscdy2mmn8apXvYrvfe97hGG419t85plneP3rX38AVqvRzE2s2V6ARqM5+Bx//PF88pOfbP4cBAGPP/44V155JU8++ST//d//jRACgF/96ld8+MMfZtWqVbzrXe+ip6eH7du381//9V+87nWv45vf/CYXXHDBhO1LKbnttts45phj+MlPfsIHP/hBUqnULtfz6le/mj/+8Y/cc889vOAFL5h0/eOPP87atWv50pe+BMDNN9/MggULZuKp2C+uvvpqstnsjG7zj3/8I7/73e8O+H72h9tuu4277rqLf/3Xf2XVqlX09PTM9pL2ivHv/YPB+M9bFEUUCgXuuecevvCFL/Dggw/y1a9+FcOYfu3pF7/4BQ8//PCBWq5GM+fQYlejOQLJZrOceuqpEy4766yzqFQqfP3rX+evf/0rp556Klu2bOEjH/kIz372s/nqV7+KaZrN21988cW8/vWv5yMf+Qh33nknjuM0r/v973/P9u3bufLKK3njG9/IHXfcwWtf+9pdrueFL3whra2t3H777VOK3R//+Mdks1le9KIXAUxa+2xx/PHHz6n9TJd8Pg/AG97whuZB0eHEypUrD+r+pvq8XXTRRRx99NF87nOf44477uDlL3/5QV2TRnMkoW0MGo2myYknnghAf38/ADfccAO+7/OJT3xigtAFSKVSfOQjH+HVr371JOvDLbfcwjHHHMMZZ5zBOeecw80337zb/SYSCS655BLuvvtuyuXyhOuCIOBnP/sZL33pS5vV4Z1tDP/1X//Fi1/8Yk466SSe/exn86lPfaq5nb6+PlavXs2tt946Ybs7WxKiKOK6667jkksu4eSTT+bUU0/l0ksv5c9//vMu1z3eXtA4LT/Vn8Za97SPW2+9lY997GMAPP/5z29ue2cbQ6lU4gtf+AIveMELOOmkk7jkkkv40Y9+NGltX//61/nSl77E+eefz8knn8xb3/rWKS0H4/E8j2984xvN5/Piiy/muuuuQ0oJxKflG4/n2GOPnVF7xWOPPcZb3/pWzjnnHE4//XTe+c538swzzzSvv/XWW1m9ejV9fX2THuv4dfzhD3/gda97HaeddhpnnXUW73rXu1i/fn3z+p1tDKtXr+YHP/gBH//4xzn77LM57bTTeN/73sfw8PCE/Vx//fU8//nP5+STT+bSSy/lzjvv3K1VY0+88Y1vZP78+dx0003Ny1zX5Stf+QoXX3wxJ554IqeffjpvectbePLJJ4H4fXb11Vc31914LUZHR/n0pz/N8573PE488UTOPvtsLr/88knPlUZzJKLFrkajabJx40YAlixZAsC9997L8ccfz/z586e8/Xnnncc//dM/0d3d3bwsn89z55138rd/+7cAvPKVr+Sxxx7j8ccf3+2+X/3qV+N5Hr/85S8nXH7PPfcwOjq6y8rwHXfcwRVXXMFll13G9ddfz+WXX85PfvKTaXmFx/PlL3+Za665hr/7u7/jW9/6Fp/97GfJ5/O8733vo1ar7fH+r33ta7n55psn/DnjjDPIZDK85CUvmdY+LrzwQt71rncBsXXh3e9+96T9uK7LG97wBn7605/ytre9jWuuuYYzzjiDj3/841x77bUTbvu9732PDRs28IUvfIF/+7d/Y82aNXzkIx/Z5WNQSvHOd76Tb33rW7z2ta/l2muv5cUvfjFf/epXm6fhP/nJT/Ka17wGiO0kU61xPFJKwjCc9Kchnhv8+c9/bvpQP//5z/Nv//ZvbNu2jUsvvXSCUN0Tvb29vPvd7+bEE0/km9/8Jp/73OfYuHEjb3/72yftczz/8R//gZSSK6+8kg9/+MPcddddfP7zn29ef/XVV/PlL3+Zv/mbv+Gaa67hlFNO4f3vf/+01zUVhmFw3nnn8eijjza9ux/+8Ie55ZZbePvb3863v/1tPvaxj/HMM8/wz//8zyileO1rXzvh+X/ta1+LUop3vOMd/OEPf+CDH/wg119/Pe95z3v405/+dNAtGxrNoYi2MWg0RyBKqQmNMYVCgfvvv59vfvObnHbaac0K7/bt2znuuOP2ats//elPkVLyile8AojtDp/5zGe46aabditATzjhBI477jh++tOf8upXv7p5+W233cbq1as56aSTprzf/fffT09PD5dddhmGYXD22WeTTqenbLTbHYODg/zTP/3ThIpfIpHgve99L08//fQerRMLFiyY4CP+7ne/y0MPPcTVV1/NihUrpr2PpUuXAnDcccdN6YW99dZbWbt2LTfddBOnnXYaAM9+9rMJw5BrrrmGSy+9lLa2NgByuRzXXHNNsyq/ZcsWrrrqKsbGxmhvb5+07XvuuYc//vGPXHnllbz0pS8F4IILLiCZTPK1r32NN7/5zaxatar5OKdjJ/n7v//7Pd4G4Ctf+QrLli3juuuua673Wc96Fi984Qv5+te/zte+9rVpbefRRx/FdV3e8Y53NA/SFixYwG9/+1uq1eouvc/HHHMMX/jCFyZs5xe/+AUA1WqV//zP/+Syyy7jgx/8YHNttVptj2ct9kRXVxdBEJDP58nlclQqFT7xiU80D5DOPvtsyuUyX/ziFxkeHp7wPms8/wMDA80zLWeeeSYA55xzDlu2bNnv9Wk0cwEtdjWaI5AHHniAE044YcJlhmFw/vnn85nPfKbpwzRNkyiK9mrbt9xyC+eccw6O41AsFoH4NPMdd9zBRz7ykd02Wr361a/m85//PAMDA8yfP598Ps9dd93Fhz/84V3e59xzz+Xmm2/mVa96FS94wQt47nOfy8te9rK99pJ+5StfAeLTwRs2bGDz5s3cddddALtNiZiKe++9l3//93/n3e9+9wQP8kzs4/7772fx4sVNodvg5S9/OT/60Y/461//ynOf+1wATjrppAn2k4ZIqtVqU4rd+++/H8uyePGLXzxp21/72te4//77WbVq1bTW2eDTn/70pPcawA9/+EN++MMfArGYfOyxx3jPe94zYb25XI7nPe95kxr2dscpp5xCIpHgNa95DS9+8Yt5znOewznnnMPJJ5+82/vtLNwXLFjQrOg/8sgjuK476Xm55JJL9ltMNiLEhBA4jsP1118PxAJ248aNbNq0aY/vkfnz5/O9730PpRR9fX1s3ryZDRs28NBDD+31e1ejmYtosavRHIGccMIJfPrTnwbiX7KJRIKFCxdOEqKLFi1q+nenIggCCoUCXV1dADzxxBNNb+FZZ5016fa33347b3jDG3a5vZe97GX8+7//Oz//+c95y1vews9+9jOEELtt3nnJS16ClJIbb7yRa665hquuuor
"text/plain": [
"<Figure size 800x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Преобразование в DataFrame для удобства\n",
"df_scaled = pd.DataFrame(data_scaled, columns=df_cleaned.columns)\n",
"\n",
"# Понижение размерности до 2 компонент\n",
"pca = PCA(n_components=2)\n",
"kc_pca = pca.fit_transform(df_scaled)\n",
"\n",
"# Визуализация\n",
"plt.figure(figsize=(8, 6))\n",
"plt.scatter(kc_pca[:, 0], kc_pca[:, 1], alpha=0.6)\n",
"plt.title(\"PCA Visualization of Housing Data\")\n",
"plt.xlabel(\"Principal Component 1\")\n",
"plt.ylabel(\"Principal Component 2\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Агломеративная (иерархическая) кластеризация"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1kAAAJ4CAYAAACXhikUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACBs0lEQVR4nO3dd3xUVf7/8fckM0kghCodlCYgIEWKsruAArKusrqgX1ZRbOuCSLFggV17QVYpUqTZaIKisJaf7qqgomsBQRQ7Koj0ACEhQJKZTO7vjzjD9JY7mZnwej4ePB7JzJ07Z0ou933POZ9jMQzDEAAAAADAFGmJbgAAAAAAVCWELAAAAAAwESELAAAAAExEyAIAAAAAExGyAAAAAMBEhCwAAAAAMBEhCwAAAABMRMgCAAAAABMRsgAAAADARIQsAKaZOHGi2rVrF/DfxIkTE908AB4KCgrUvXt3bdmyRQUFBRo9erSeffbZRDcLAKoEa6IbAKBqqV+/vubMmeN129ixYxPUGgDB1KpVS9ddd52GDRsmwzDUrl07/etf/0p0swCgSiBkATCN0+lU9erV1bVrV6/bMzIyEtMgACGNHTtWl19+uY4cOaLTTjtN6enpiW4SAFQJDBcEYJrS0lJlZWVFtO3GjRt11VVXqUuXLurVq5fuuusu5eXlue9fvXq12rVrp127dnk9rn///l5DDx0OR9Ahir77+vLLLzVkyBB17txZf/7zn/Xf//7Xa9+FhYV69NFHNXDgQJ155pkaPHiwXn75Zb/n932eXbt2acSIEZo4caLmz5+v3/3ud+revbtuuukm7d692+vxa9as0fDhw9WtWzd16tRJF1xwgZ5//nn3/evXr3fvd9OmTV6PXbZsmdq1a6f+/fv7tefuu+/22ragoECdOnVSu3bttH79+oifP5iXXnpJQ4cOVdeuXdW5c2ddcskl+s9//uP3HgcaIhrs8xkxYoTXc7z55psaOnSounXrpt///ve69957VVBQ4L5/9uzZateunbp16ya73e712PHjx/sNSy0pKdFjjz2mfv36qVOnTvrzn/+sN9980+tx/fv314wZMzR58mT17NlTZ599tu68807l5+dH/PpDDZNdvXq1+zP1/BwOHTqkHj16BPws27Vrp/bt26tnz54aN26cDh8+7N6mXbt2mj17tlfbXO9LLO+lJJ1yyilq1aqVPv7447BDe32f64033lDPnj01bdo0Sd7fX99/nu3+/vvvNXbsWJ1zzjnq2LGj+vTpo4cffljFxcXubex2u5544gkNGDBAnTt31uDBg/Xvf/87ovdckvbs2aPbbrtNvXr1UpcuXXTNNdfo22+/de9/165dateund544w3deOON6tKli84991w9+eSTKisr8/pcfN+T2267zeszNQxDM2fOVJ8+fdS9e3fdeOON2rt3r3t7p9OphQsXavDgwercubO6du2qyy+/XJ9++mnIz1Hy/8x9fzcMQ5dffrnX8XLixIle3y1JeuGFFwJ+fwDEBz1ZAExTVFSkWrVqhd3us88+03XXXadzzjlHTzzxhAoKCjRz5kxdffXVevnllyMOalL5ibQkzZs3T3Xr1pVUfkLsG44kadSoUbrqqqt066236uWXX9Ytt9yiBQsWqF+/fiouLtbw4cN16NAhjR8/Xk2bNtWaNWv0z3/+UwcPHtSNN97o3k+/fv100003uX9v0KCBJGnt2rWqU6eO7r77bpWVlWnatGkaMWKE3njjDVWrVk3vv/++xowZo6uvvlrjxo1TcXGxli9frgcffFCdOnVSly5d3PvMzs7Wu+++q+7du7tve/PNN5WW5n9tLDs7W++//74Mw5DFYpEkvf3223I6nV7bRfP8np5//nk9/PDDGjdunLp3766CggI99dRTuv3229WtWzc1atTIve2cOXNUv359SXJ/HpJ02WWX6f/+7//cvz/wwANezzF37lzNmjVLw4cP16233qqdO3dq5syZ+uKLL7Ry5Uqv74TFYtEnn3yifv36SZKOHTumdevWeb03hmFozJgx+vzzzzV+/Hi1bt1a77zzjm699VbZ7Xb95S9/cW+7fPlynXbaaXr00UeVl5enadOmaceOHXrhhRdksVjCvv6bbrpJl19+uaTynqEOHTq4vx+nnnqqfvzxR7/3dNq0aSosLFTNmjW9bnd9txwOh37++Wc99thjeuSRRzR16tSAn00g0byXLg6HQ5MnT474OSSpuLhYDz74oG644Qb9+c9/9rrv3nvvVceOHd2///Wvf3X/nJubqyuvvFJdu3bVlClTlJGRoQ8++EDPPfecGjRooJEjR0qSbr/9dq1bt06jR49Wly5dtG7dOk2cOFE2my3se56Xl6fLL79c1apV0z333KNq1app8eLFuvLKK/Xyyy+rdevW7vbcf//96tevn2bPnq1NmzZpzpw5On78uO64446Ar3vjxo164403vG5btGiRFixYoDvvvFMtW7bUlClTdPPNN2vlypWSpKlTp2rFihWaMGGC2rVrp/379+vJJ5/UzTffrPfff1/VqlWL6r339Oqrr2rz5s0htykoKNATTzwR83MAiB4hC4Bp8vPz3YEjlGnTpqlly5ZasGCBe3hSly5ddNFFF2nVqlW68sorI37O48ePS5K6deumOnXqSJI+/PDDgNuOGDFCY8aMkST16dNHQ4YM0ZNPPql+/fpp9erV2rp1q1544QV169bNvU1paanmzp2ryy+/XLVr15ZUHh58h0RK5SFz9erVat68uSSpVatWGjJkiF555RVdccUV+umnnzRkyBD985//dD+mW7duOvvss7V+/XqvkNO3b1+tXbvWfaK3b98+bd68WT169PDrHevdu7fWrVunL7/80t2u//znP+rZs6dX70k0z+9p586d+tvf/uYVLJs2baqhQ4dq06ZNuuiii9y3n3HGGWrWrJnfPho1auT1ntWoUcP9c0FBgebNm6dhw4bp3nvvdd/etm1bXXnllX7fCdd74wpZ7777rurXr+/V+/Dxxx/rww8/1IwZM3ThhRdKKv88i4qKNHXqVA0ePFhWa/l/gWlpaXruueeUk5MjqfzzHTNmjD788EP17ds3otd/6qmnSiofGhvs++Hy1Vdf6dVXX9UZZ5yhI0eOeN3n+diePXvq448/1jfffBN0X76ifS9dli5dquPHj+uUU06J+Ln+3//7f7LZbLrhhhv8hhm2adMm6HuwdetWnXHGGZo5c6b7e/C73/1OH330kdavX6+RI0dq69ateuutt/SPf/xD11xzjaTy7/nu3bu1fv16DR48OOR7PmPGDOXn52vFihVq2rSppPLvzYUXXqiZM2dq1qxZ7m07duzoDrF9+/bV8ePHtXjxYo0ePdrreypJZWVlevjhh9WxY0evz+X48eO66aabdO2110oq7yV78MEHdeTIEdWsWVO5ubm69dZbvXpvMzMzNW7cOP3www8hvy+hHDt2TFOnTvVrj69Zs2apSZMmXr2iAOKL4YIATJObm6uGDRuG3KaoqEhffvml+vXrJ8MwVFpaqtLSUjVv3lytW7fWRx995LV9WVmZe5vS0lK//e3bt09paWl+J0OBDBkyxP2zxWLR+eefry1btqi4uFgbNmxQ06ZN3QHL5eKLL1ZJSYm+/PLLsPs/66yz3AFLkjp06KDmzZvrs88+kyTdcMMNmjJlio4dO6avv/5ab775phYsWCBJfsPf+vfvr19++UXbtm2TJP33v/9Vly5d3CeMnnJyctSrVy+tXbtWkpSXl6f169d7hZ9on9/TxIkTdfvtt+vIkSP64osv9Oqrr7qHGIZ6XKS++OIL2e12DR482Ov2Hj16qGnTptqwYYPX7QMGDNC7774rwzAklffwuYKUyyeffCKLxaJ+/fp5fX/69++vAwcOePUu9e/f3x2wXL9brVb352bm6zcMQw8//LAuu+wytW/fPuD9paWlstvt2rJlizZt2qROnTp5beP7N+EZLqN9LyXp4MGDevLJJ3XXXXcpMzMzotexf/9+PfXUUxo+fHjU87j+8Ic/aNmyZcrMzNRPP/2ktWvXat68ecrLy3O
"text/plain": [
"<Figure size 1000x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[ 2 7 2 ... 14 14 14]\n"
]
}
],
"source": [
"# Построение дендрограммы\n",
"linkage_matrix = linkage(data_scaled, method='ward')\n",
"plt.figure(figsize=(10, 7))\n",
"dendrogram(linkage_matrix)\n",
"plt.title('Дендрограмма агломеративной кластеризации')\n",
"plt.xlabel('Индекс образца')\n",
"plt.ylabel('Расстояние')\n",
"plt.show()\n",
"\n",
"# Получение результатов кластеризации с заданным порогом\n",
"result = fcluster(linkage_matrix, t=60, criterion='distance')\n",
"print(result) # Вывод результатов кластеризации"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJ8AAAMQCAYAAACJzMTyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACgjElEQVR4nOzdd3RUdf7/8dckkwSUUKRLEUEFXbqAoIsCIqKiX1kb1b6AgigoKLuoID8RlU5UYFEpwqKIjdXVVURZG1VWLKgoqAQIIIYi6bm/P+IMU+6UO5mbKTwf53BIptz53Dt3JnNf8/68r8MwDEMAAAAAAACADVJiPQAAAAAAAAAkL8InAAAAAAAA2IbwCQAAAAAAALYhfAIAAAAAAIBtCJ8AAAAAAABgG8InAAAAAAAA2IbwCQAAAAAAALYhfAIAAAAAAIBtCJ8AAAAAAABgG8InAEBSGDx4sAYPHmx63euvv67mzZvrgQceqOBRAYjUsGHDtGLFilgPA1Hy8ssva8iQIbEeBgAgRgifAABJ7cCBA5o8eXKshwHAgldeeUU5OTm65pprYj0URMk111yj/fv36+WXX471UAAAMUD4BABIao888oiOHTumk046KdZDARCG/Px8TZ06VcOGDVNKCh9Vk4XD4dDQoUM1ffp05efnx3o4AIAKxl90AEDS+s9//qN33nlHw4cPV40aNbyuKy0t1fz583XJJZeoZcuWuvTSS7VkyRKv2wwePFgPPPCA5s6dq/PPP1/nnnuu7rzzTmVnZ3vd7r333tOAAQPUrl07tWzZUr1799bSpUvd169bt07NmzfXRx99pIEDB6p169bq1auXli1b5r7NY489pubNm+uzzz5zX/bKK6+oefPmeu2119zj8Z1aOG3aNDVv3lyvvPKKJKl58+aaM2eO123mzJmj5s2b+435L3/5i1q1aqULLrhA/+///T8dO3bM6zZbtmzRrbfeqvbt26tz584aPXq0cnJyvNZp3bp1kqTvvvtOPXv2VL9+/cLeLpI0f/58XXzxxTrnnHPUvHlz9z/fdfD0wAMPqEePHu7fn3vuObVr107Lly/32m5m/1zbSZI2bNig2267TR07dlTLli3Vo0cPzZkzR6Wlpe7bHD16VJMmTVLXrl3Vtm1bXXPNNfrggw/cz0egx/HcLkOHDlX79u3Vvn17DR8+XL/88ot7+eHsG5L/82oYhvr166fmzZtr165dkqSCggJNmDBBXbp00Xnnnaf77rtPhw4dct8nPz9f06ZNU69evdSyZUu1b99et9xyi7755puA21aSdu3a5bXtfH93PfbFF1/stZ99//337u3ru32CWblypQoKCtS9e3evy2fMmGG6rX33lRUrVuiKK65Qy5Yt1a1bN82ZM0clJSWW1lGSPv74Y9PH83wNPvDAAxo8eLBefvllde/eXe3atdNNN92kbdu2eS1/586dGjlypC644AK1bdtWgwcP1qZNm/we3/Ofa4zNmzfXCy+8oPvvv1/t2rXT+eefr0cffVQFBQXu+5eUlGj+/Pnq06ePWrdurbZt26pfv35e7yeu94F27dqpsLDQa3wjR470mprsOZ433njD67Zr1qzxex7DeXxJ6t69uwoKCrRy5UoBAE4szlgPAAAAO+Tm5mrixIn605/+pNtvv10vvfSS1/UTJkzQK6+8oqFDh6pdu3basGGDJk+erMOHD2v48OHu261evVo1atTQ+PHjVVpaqmnTpmnw4MF68803VblyZX3wwQcaPny4brzxRt11113Kz8/XsmXL9Mgjj6hly5Zq06aNe1mjRo3S1VdfrWHDhmn16tWaOHGiJGnAgAEaNWqUPvjgAz388MNatWqVDhw4oEcffVSXXXaZrr76atN1/Pnnn7Vw4ULL22bVqlW67777dOWVV+qee+5Rdna2ZsyYoe3bt+v555+Xw+HQ119/rUGDBqlNmzZ64oknVFJSomnTpum2225zh2GennzySbVs2VJ33HGHJIW1XV577TVNmzZNQ4cOVZcuXVS5cmVJ0g033BD2uuTk5Gj69Ol65JFHdNFFF3ldl5WVpdq1a0uS9u/frxEjRriv27Ztm26++Wb17t1bM2bMkGEYWrVqlbKystS0aVNdccUVKikp0a233uoODpo2bapXX31Vw4cP16JFi/Twww/r6NGj7jFfe+21uu666yRJZ5xxhnbs2KF+/fqpadOmevzxx1VcXKxnnnlG/fv31+uvv66aNWu6xxNs3zDz+uuv6/PPP/d7Dl577TU9+OCDqlq1qiZOnKgJEyZoxowZkqSxY8dq48aNGj16tBo3bqyffvpJs2bN0r333qs333xTDocj7O3ua8GCBe4QzOWOO+5Qenq6Jk2apDp16iglJUUrVqwIOe3qjTfeULdu3ZSenu51eX5+vnr06KGhQ4e6L/PdV+bNm6cZM2Zo0KBBGjdunL755hvNmTNHe/bssTz9Nj8/X/Xq1dOsWbPcl7meF0/ffPONfvzxR40ePVrVqlXT7NmzNWjQIL311luqU6eOtm/fruuvv15NmjTR+PHjlZaWpsWLF+umm27Sc889p06dOnlts27dukmS1/rPmjVLbdq00cyZM/XDDz9o5syZ2r9/v2bOnClJmjp1qv75z3/q3nvvVfPmzZWTk6OnnnpKd999tz744AP3a0sqq0D69NNP3a+X33//XR9++KFpldnJJ5+s999/X1dddZX7srfeekspKSleIW24j5+RkaHu3btr1apVGjhwoJWnAwCQ4AifAABJafLkyTp06JCeffZZOZ3ef+527Nihl156SaNHj3Y3wP3zn/8sh8OhefPmacCAAe5Kqby8PL3yyitq1KiRJKlp06bq27evXnvtNfXv31/bt29X37599fe//929/Hbt2um8887TunXrvMKnSy65xH27rl27at++fXr66afVv39/VapUSVOmTNGAAQM0f/58bd68WVWqVDE92PVcxzPPPFNfffWV+7KUlBQVFxcHvI9hGJo6daq6du2qqVOnui9v0qSJbr75Zn344Yfq1q2b5s6dq+rVq+u5555TRkaGJKlOnTq699579f3333st86efftJHH32kN954Q2eeeaYkhbVdvvjiC1WvXl2jR48OON5Qli9frhYtWugvf/mL33Vnn322GjZsKEl+wci2bdt0/vnn68knn3QfdF9wwQV6//33tW7dOl1xxRVau3at/ve//+mpp55Sz549JUmdO3fWL7/8os8++8wrzJKkevXqqW3btu7fH374YVWuXFkLFy5UlSpVJEldunRRz549tWDBAt1///3u2wbbN3xDod9//11Tp07Vn/70J6/n3jAMjR071t0nafPmze6G3YWFhfr99981fvx4XX755ZKkTp066ejRo5oyZYoOHDjgDuqs2rNnj/7xj394jefgwYP65Zdf9OCDD6p3797u2/73v/8NuqyjR49q69atuuyyy/yuy8vL06mnnuq1jT0dOXJETz/9tG644QaNHz9eUtnrunr16ho/frxuueUW9/4Zjry8PFWtWtXr8VzPo+/jzp07Vx06dJAktW7dWj179tTixYt13333KSsrS+np6Vq8eLH7/t26dVOfPn30xBNPeIVxjRs3Nl2/U045RXPnzpXT6dRFF12klJQUPfbYY7rrrrvUrFkz7du3T6NGjfKqysrIyNBdd92lb7/91muZF154oVavXu0On95//33Vrl3bK0zyvO1///tfFRYWKj09XQUFBVq9erU6duzoru6TZOnxW7VqpbfeektHjx413Z4AgOTEtDsAQNL58MMP9frrr2vIkCFq0aKF3/WfffaZDMNQjx49VFxc7P7Xo0cPFRQUeE2Had++vTt4kqRzzjlHjRo10oYNGyRJt99+u6ZMmaLff/9dX375pd566y3NmzdPkvymtvTt29fr9169emn//v3asWOHpLJw5uabb9ZTTz2lTz75RFOmTFG1atVM13Ht2rX65JNPvAIMSapZs6Z7apyZH3/8UXv37vVb944dO6pKlSr6+OOPJUmbNm3ShRde6A6eXON7//33dfbZZ7svO3bsmGbMmKHzzjvP68A
"text/plain": [
"<Figure size 1200x800 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Выбираем подмножество данных для кластеризации\n",
"features = df[['price', 'sqft_living', 'floors', 'bathrooms']]\n",
"\n",
"scaled_features = scaler.fit_transform(features)\n",
"\n",
"# Построение дендрограммы\n",
"linkage_matrix = linkage(scaled_features, method='ward') # Метод \"Ward\"\n",
"\n",
"plt.figure(figsize=(12, 8))\n",
"dendrogram(linkage_matrix, labels=df.index, leaf_rotation=90, leaf_font_size=10)\n",
"plt.title('Иерархическая кластеризация (дендрограмма)')\n",
"plt.xlabel('Индекс дома')\n",
"plt.ylabel('Евклидово расстояние')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Визуализация распределения кластеров**"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABi0AAASgCAYAAACEzgvMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUVf7H8c+dljqQEEhC71VakKpS7UpZLLsq9oZix4KuugFdF1dFRUAEGzYUFcUCrj+x6yqKChZQeicJkJ5MJlPu749sIiEFSDIlyfv1PDyQcyb3fGfOTbjnfu85xzBN0xQAAAAAAAAAAECIWUIdAAAAAAAAAAAAgETSAgAAAAAAAAAAhAmSFgAAAAAAAAAAICyQtAAAAAAAAAAAAGGBpAUAAAAAAAAAAAgLJC0AAAAAAAAAAEBYIGkBAAAAAAAAAADCAkkLAAAAAAAAAAAQFkhaAAAQ5kzTDHUIAAAAAEKIMQGAxoSkBYB676KLLtJFF11U7WvuvPNOjRkz5oiPOWfOHHXv3r22oR2Vr7/+Wt27d9e4ceOC2m6wrVq1St27dy/3p0ePHhowYIDOO+88ffLJJ4c9xtH2Z21t3bpV06dP10knnaS+fftq1KhRmjp1qn7//fdyrxszZozuvPPOOm1748aNOv/88+v0mAAAAAisO++8s8I178F//vOf/0gKzPVjfXDRRRdV+Ex69+6tUaNGacaMGcrJyan2+3ft2qXu3bvrrbfeCkq8fr9fb7zxhiZNmqQhQ4ZowIABmjhxol566SUVFxeXve6tt95S9+7dtWvXrjpt/8knn9Szzz5bp8cEgHBmC3UAABAMU6ZM0cUXX3zErz/33HM1fPjwAEZU0dKlS9WtWzdt2LBBP/zwg4499tigth9s//jHP3TMMcdIKnlqKCcnR88995ymTJmiBQsWaOTIkVV+79H2Z2383//9n+644w517dpV1157rdq0aaO0tDS98MIL+utf/6r58+fr+OOPD1j7//nPf/TTTz8F7PgAAAAIjBYtWmju3LmV1nXo0CG4wYShXr16KTU1texrj8ej3377TY8++qjWr1+vV199VYZhVPq9iYmJWrJkidq1axfwOF0ul6655hqtXbtW559/vq688krZ7XZ9++23euihh/TFF19o3rx5cjgcAYth9uzZuv766wN2fAAINyQtADQKR3sxm5ycrOTk5ABFU1Fubq5WrlypGTNmaMGCBXrttdcafNKiS5cu6t+/f7mygQMHatSoUXrxxRerTVoEY3AiSTt27NC0adM0fPhwPf7447JarWV1p5xyis4//3xNmzZNn3zySUAHKQAAAKh/HA5Hhetd/Ck2NrbC5zNo0CAVFBToiSee0Nq1a6v8/IL52c6cOVM//vijXnrppXJtnnDCCerRo4duvfVWvfbaa0F7qAoAGgOWhwLQKBy8nNC9996r448/Xj6fr9xrHnjgAQ0ZMkQej6fC8lAXXXSR7r77bi1cuFCjRo1Snz59dN555+nnn38ud4zPPvtMZ511lvr27atTTz1V77//vk4++WTNmTOn2vjee+89eb1eDR8+XOPHj9eHH36o7Ozscq9566231KtXL73xxhs6/vjjNXjwYG3atEmStHLlSp111lnq06ePjj/+eP3zn/9UYWFhue9fuXKlLrjgAqWkpKh379467bTT9Morr1QbU/fu3bVhw4YKx+nevbvWrVsnSXrhhRd02mmnqU+fPho+fLimT5+u/Pz8at9vVWJjY9WxY0ft2bNH0p9LSb322msaPXq0BgwYoK+//rrC8lCmaWrRokU6/fTT1bdvX5188sl69tlny637unr1al144YXq16+fBg8erGnTpikzM7PaeEqne99zzz3lEhaSFBUVpWnTpunss8+udPp6aeyrVq0qV37ocma//vqrLrnkEh177LFKSUnRpZdeqjVr1kgqWaas9Om87t27l51Hfr9fCxcu1Mknn6zevXvr1FNP1UsvvVShndtuu0033nij+vfvr8suu0yS9P7772v8+PHq27evhg4dqttuu03p6enVfg4AAAAIjry8PM2cOVMnnXSS+vTpo7Fjx+rNN98s9xqfz6dXXnlF48aNK1u69JFHHpHb7S57zZ133qlLLrlEqampGjBggM444wz5fD59/fXX+utf/6qUlBQNGjRI1157rTZv3lxlPKeeeqpuvPHGCuUTJkzQtddeK6nkQZ9rrrlGQ4YMUb9+/fS3v/1Nn3/+eY0/g969e0tS2ZigsuvaypaH2rJli66//noNHjxYgwYN0uTJk8u9N7fbrYceekgjR45U7969NW7cOK1YsaLaWDIzM7V06VKdffbZlSZJxo4dq8svv1xJSUmVfn9lSxkfOk7w+/167LHHNGbMGPXu3VtjxozRrFmz5PF4JKlsXDp37txyY9QNGzZo8uTJGjBggAYMGKDrrrtOO3furNDOoWOpzMxM3XrrrTr++OPVp08fTZgwQcuWLav2cwCAYGOmBYBGZ8KECXr99de1atUqHXfccZJKLhQ/+OADnXnmmbLb7ZV+34cffqjOnTvrnnvukWma+ve//60bbrhBn3zyiaxWq7799ltNmTJFo0eP1k033aTt27crNTW13OChKkuXLtXw4cPVvHlz/eUvf9GcOXP09ttvl91oLuXz+fTcc8/pgQceUFZWljp37qz33ntPt912m8aNG6ebb75Zu3fv1mOPPaZNmzbp+eefl2EY+uyzz3Tdddfp4osv1g033KCioiItXrxY9913n3r37q1+/fpViOmkk05SdHS0li9frm7dupWVv//+++ratat69eql999/Xw8//LCmTZum7t27a8uWLfr3v/8tl8ulf//730fTLZKk4uJi7dq1S3379i1XPnfuXN1zzz0qKipSSkqK3nvvvXL1Dz30kF544QVddtllOv744/XLL7/okUcekdfr1eTJk/X999/rsssu09ChQ/X4448rJydHs2fP1sUXX6w333xTkZGRlcbz5ZdfqlevXlUOQoYNG6Zhw4Yd9fsslZ+fryuvvFJDhw7VnDlzVFxcrPnz5+uKK67QZ599pnPPPVdpaWl68803tWTJkrLZP9OnT9dbb72lyZMnKyUlRd9//73+9a9/KTc3V9ddd13Z8T/44AONHz9e8+fPl9/v1w8//KA77rhDU6ZM0aBBg5SWlqaHH35Yt956q15++eUavw8AAABUzuv1ViizWq2VLntUVFSkCy64QAcOHNCNN96o1q1ba+XKlbr77ru1f/9+XXPNNZJKlll95513dNVVV2ngwIFat26d5s2bp/Xr1+uZZ54pO/bq1asVERGhefPmqbCwUHv27NGUKVN09tlna+rUqcrNzdWjjz6qq6++Wh999JEslorPtY4fP14LFy5Ufn6+YmNjJUmbN2/W77//rmuvvVZ+v1+TJ09WYmKiHnroIdlsNr344ou69tpr9cEHH6h9+/ZH/Zlt3bpVktS2bduyskOvaw+Vnp6uv/3tb0pKStL06dMVHR2tOXPm6JJLLtH777+vpk2b6rrrrtOPP/6oG2+8UZ07d9ZHH32kW265RcXFxfrLX/5SaSzffPONvF6vRo8eXWW806ZNO+r3eLCnn35ar776qqZNm6a2bdtq7dq1euyxx2S323XjjTdqyZIl+tvf/qZzzjlH5557rqSSz+i8885Tp06d9O9//1ter1fz58/X+eefr3feeUcJCQllxz90LHXDDTfowIEDmjFjhmJjY/XOO+9o2rRpSk5O1tChQ2v1XgCgrjSKpMWCBQv01VdfVXgK9XCWLVumhQsXaufOnWrXrp2uv/56nX766QGKEkCwHHvssWrdurXef//9sqTFqlWrtG/fPk2YMKHK7/N6vXr22WfLLtYLCgo0bdo0rV+/Xr1799acOXPUtWtXzZ07t2ygkJCQoKlTp1Ybzx9//KHffvtNTzzxhCSpVatWGjp0qJYsWVIhaSFJ11xzjUaNGiWpZIbBI488ouHDh+uRRx4pe02HDh106aWX6vPPP9eoUaO0adMmTZw4UXfffXfZa1JSUjRkyBCtWrWq0qRFVFSUTj31VK1
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Визуализация кластеров\n",
"plt.figure(figsize=(16, 12))\n",
"\n",
"# Парный график 1: sqft_living vs price\n",
"plt.subplot(2, 2, 1)\n",
"sns.scatterplot(x=df_cleaned['sqft_living'], y=df_cleaned['price'], hue=result, palette='Set1', alpha=0.6)\n",
"plt.title('Living Area vs Price Clusters')\n",
"plt.xlabel('Living Area (sqft)')\n",
"plt.ylabel('Price')\n",
"\n",
"# Парный график 2: bedrooms vs price\n",
"plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x=df_cleaned['floors'], y=df_cleaned['price'], hue=result, palette='Set1', alpha=0.6)\n",
"plt.title('Floors vs Price Clusters')\n",
"plt.xlabel('Floors')\n",
"plt.ylabel('Price')\n",
"\n",
"# Парный график 3: bathrooms vs sqft_living\n",
"plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x=df_cleaned['bathrooms'], y=df_cleaned['price'], hue=result, palette='Set1', alpha=0.6)\n",
"plt.title('Bathrooms vs Price Clusters')\n",
"plt.xlabel('Bathrooms')\n",
"plt.ylabel('Price')\n",
"\n",
"# Парный график 4: sqft_living vs bedrooms\n",
"plt.subplot(2, 2, 4)\n",
"sns.scatterplot(x=df_cleaned['sqft_living'], y=df_cleaned['price'], hue=result, palette='Set1', alpha=0.6)\n",
"plt.title('Living Area vs Price Clusters')\n",
"plt.xlabel('Living Area (sqft)')\n",
"plt.ylabel('Price')\n",
"\n",
"# Настройка графиков\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## KMeans (неиерархическая кластеризация) для сравнения"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Центры кластеров:\n",
" [[2.22041876e+03 5.15870327e+05 2.07766990e+00 2.47761993e+00\n",
" 3.43303826e+00]\n",
" [2.21224000e+03 5.28096070e+05 1.07634518e+00 2.19979695e+00\n",
" 3.87857868e+00]\n",
" [1.32625308e+03 3.60889221e+05 1.10303964e+00 1.36120782e+00\n",
" 2.67688806e+00]\n",
" [3.88146209e+03 1.24719106e+06 1.89801444e+00 3.31836643e+00\n",
" 4.38447653e+00]]\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABi0AAASgCAYAAACEzgvMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5xcVf3/8ff0ur2nbHpvJEDohIReFURFQIqAdKVJ8IsKIkivCSXUqBRRQQQCgkGK8JMSEQgkIW1TN9vr7PTy+2PdgWV3U3Zn9u5OXs/HI49k75m597Pn3pnccz73nGNKJBIJAQAAAAAAAAAAGMxsdAAAAAAAAAAAAAASSQsAAAAAAAAAADBAkLQAAAAAAAAAAAADAkkLAAAAAAAAAAAwIJC0AAAAAAAAAAAAAwJJCwAAAAAAAAAAMCCQtAAAAAAAAAAAAAMCSQsAAAAAAAAAADAgkLQAgAyXSCSMDgEAAABABqKtAQBIB5IWAFLuhz/8oX74wx922e7z+fS9731PU6dO1dKlS5OvnTBhgk455ZQe93f55ZdrwoQJuuaaa9IWc7qEQiEtXrxY3/nOd7Tnnntq9uzZOuWUU/TCCy90usFfsGCBJkyYkNJjh8Nh/fa3v9VLL72Ukv31dF770/PPP69TTjlFs2bN0owZM3Tsscfq3nvvlc/nMzSuge7555/XhAkTOv2ZNGmS9t57b/3oRz/Sf/7znx3uYyCcfwAAANoaX6GtkTrfvFf++p+ZM2dKkj744ANNmDBBH3zwgWFxGmHLli1d6mTixImaOXOmTjrpJP3lL3/Z4T7ScQ0CyGxWowMAsHvw+Xw699xztWrVKt1///2aM2dOssxsNuuTTz5RVVWVSktLO73P7/frzTff7O9wU6Kurk7nnnuutm3bph/+8IeaPn264vG43nzzTV1zzTVatmyZfvOb38hkMqXl+DU1Nfrd736nm2++OSX7u+6661Kyn95auHChHnroIf3oRz/ShRdeKJvNps8//1yPPvqo/vWvf+mZZ56RzWYzNMaBbuHChSoqKpIkxeNx1dXV6f7779eZZ56pv/zlL5o4cWKP7zX6/AMAAPSEtgZtjVQ4+eST9d3vfrfLdrOZ530l6cILL9QhhxwiqX2ETVtbm/785z/r2muvVTQa3W5y8Lvf/a4OOuigfooUQCYgaQEg7ToaEStXrtSDDz6oAw44oFP55MmTtXbtWv3973/XWWed1anszTfflMvlUnZ2dj9GnBrz589XVVWVnn32WY0cOTK5/ZBDDtGQIUN01113ae7cuTr00EONC3IXjB071rBjh8NhPfLIIzrnnHN0+eWXJ7fvv//+Gj16tC6++GItXbpURx99tGExDgaTJk3SsGHDOm2bPHmyDj/8cD399NO64YYbenyvkecfAACgJ7Q1aGukSmlpqfbYYw+jwxiwysvLu9TP/vvvr1WrVmnx4sXbTVqUlpZ2SRoCwPaQLgaQVm1tbTrvvPP05Zdf6uGHH+7SiJAkt9utOXPm6O9//3uXsldeeUVHHnmkrNbOOdZ4PK6HH35Yhx9+uKZOnaojjzxSf/jDHzq9JhaL6eGHH9Zxxx2n6dOna4899tApp5yi999/P/maBQsW6PDDD9dbb72l448/PrmvF154odO+fve73+moo47StGnTdNBBB+n666/f7pREK1eu1LvvvqtzzjmnUyOiw1lnnaXTTjtNbre72/fPmzevyxD1jil+tmzZIkkKBoO6/vrrdfDBB2vq1Kk66qij9Nhjj0lqH8Lb0UD5+c9/rnnz5iX3s2zZMp1++umaMWOGZs+erfnz56uhoaHTcSZPnqw///nPOuCAAzR79mytXbu2y5DtCRMm6KmnntK1116r2bNna+bMmfrpT3+qurq6TnE/9thjOvTQQzV9+nSdcsop+uc//9lpWHXHcOMFCxb0WJ8+n0/BYFDxeLxL2Zw5c3T55Zdr+PDhyW1NTU2aP3++Zs+erdmzZ+u3v/2t7rvvvk71sDN1LElLly7VqaeeqpkzZybr+amnnkqWdwwT/+Mf/6i5c+dq1qxZeu+993aqrr/poYce0tSpU9Xc3Nxp++LFizVlyhTV19crHo/r7rvv1rx58zR16lTNmzdPd955pyKRSI/73Z5hw4YpLy9PlZWVyTrYmfMfDod1zz33JM/tcccdp7/+9a+d9r106VKddNJJmjZtmg444ADdeOON8vv9vYoTAADgm2hr0NZIRVujL5YvX65zzjlH++yzj2bNmqULLrhAa9as6fSampoa/fznP9ecOXM0ffp0nXzyyXrjjTc6vWbChAlauHChTjrpJE2fPl0LFy7c5fv+jz/+WBMmTOgyemjlypWaMGGC/vGPf0iSXn75ZZ1wwgmaPn269t13X1111VWqrq7u1e9vNps1adKkZFuio76feOIJHXXUUZoxY4aee+65bqeHeuGFF3TiiSdqxowZOuSQQ3TnnXcqHA4ny1evXq3zzz9fs2bN0qxZs3TxxRdr8+bNvYoTwODDSAsAaeP3+/XjH/9YK1as0GOPPaa99tqrx9cec8wxuuyyyzoN2/b5fHrnnXf0xBNP6J133un0+uuvv17PP/+8zj//fM2cOVMfffSRfvvb36qlpUUXX3yxJOmOO+7QM888oyuvvFITJkxQdXW17r//fv30pz/VW2+9JZfLJUmqra3VDTfcoAsvvFBDhw7VY489pvnz52vatGkaM2aMXn75Zd1+++2aP3++JkyYoPXr1+vWW29VIBDQrbfe2u3v869//UuSOt3Af53D4dCvfvWrXavQb/jtb3+rd999V/Pnz1dhYaHeeecd3XbbbcrNzdXxxx+vhQsX6pJLLtGFF16oI444QpL00Ucf6eyzz9a+++6re+65R83Nzbr33nt1xhln6C9/+YucTqek9kbY448/rptuukmNjY0aM2ZMtzHcfffdOvzww3XXXXdp8+bNuvnmm2WxWHTXXXdJap+O6P7779c555yjfffdV//617902WWXddpHcXGxnn322e0+eZOfn68ZM2boscceU01NjQ4//HDNmjVL+fn5stlsuuCCC5KvjcfjOvfcc7V161b97Gc/U15enh5++GFt2LBBXq93l+r4rbfe0sUXX6wzzjhDl156qYLBYHJEwtSpUzVjxozkaxcuXKhf/OIXCgaDyWtyZ+r6644//njdc889ev311zsNTV+yZIkOPPBAFRQUaNGiRXrmmWc0f/58DR8+XJ9++qnuvvtu2Ww2/eQnP9ml30+SGhsb1djYqPLy8uS2nTn/V111ld5++21deOGFmjFjht5++21dc801stlsOu644/TSSy/pqquu0vHHH6/LLrtMW7du1d133621a9fqiSeeSNtUBQAAYPdAW4O2RqraGh3i8bii0WiX7d9ManV4//33de6552qfffbRb3/7W4VCIS1atEinnHKK/vSnP2nMmDGqq6vTySefLIfDocsvv1x5eXl6/vnndfHFF+u2227TCSeckNzfQw89pCuvvFKjRo3S0KFD9cgjj+zSff+sWbNUXl6uJUuWaO7cucntL7/8snJzczVnzhz95z//0dVXX62LLrpIe++9t6qqqnT77bfryiuv1JNPPrnDOupORUVFp7aE1J6wu/baa+X1ejVjxgz9+c9/7lT+1FNP6YYbbtB3v/tdXXHFFdq8ebNuu+02NTc364YbblBFRYVOOeUUjR49Wrfeequi0agefPBB/eAHP9Df/vY3FRQU9CpWAIPHbpG0WLRokd59990uT0bsyAsvvKCHH35YmzdvVnl5uS655BKmHgF2UkcjomOB3x09XX3IIYfI5XJ1Grb9j3/8QwUFBdpzzz07vbaiokJ/+tOfdMUVV+jHP/6xJOnAAw+UyWTSokWLdOqppyovL081NTW6/PLLOz2x43A4dOmll+rLL79MDm0NBAK66aabtN9++0mSRo4cqblz5+rtt9/WmDFj9OGHH2rYsGE67bTTZDabNXv2bLnd7i5Pw3/dtm3bJKnLVDyp9OGHH+qAAw7QscceK0naZ5995Ha7VVBQILvdrkmTJklqH8Y7efJkSdKdd96
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Убедитесь, что масштабирование применяется только к нужным признакам\n",
"features_used = ['sqft_living', 'price', 'floors', 'bathrooms', 'bedrooms']\n",
"data_to_scale = df_cleaned[features_used]\n",
"scaler = StandardScaler()\n",
"data_scaled = scaler.fit_transform(data_to_scale)\n",
"\n",
"random_state = 42\n",
"kmeans = KMeans(n_clusters=4, random_state=random_state)\n",
"labels = kmeans.fit_predict(data_scaled)\n",
"centers = kmeans.cluster_centers_\n",
"\n",
"# Отображение центроидов\n",
"centers_original = scaler.inverse_transform(centers) # Обратная стандартизация\n",
"print(\"Центры кластеров:\\n\", centers_original)\n",
"\n",
"# Визуализация результатов кластеризации KMeans\n",
"plt.figure(figsize=(16, 12))\n",
"plt.subplot(2, 2, 1)\n",
"sns.scatterplot(x=df_cleaned['sqft_living'], y=df_cleaned['price'], hue=labels, palette='Set1', alpha=0.6)\n",
"plt.scatter(centers[:, 0], centers[:, 1], s=300, c='red', label='Centroids')\n",
"plt.title('KMeans Clustering: Square vs Price')\n",
"plt.legend()\n",
"\n",
"plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x=df_cleaned['floors'], y=df_cleaned['price'], hue=labels, palette='Set1', alpha=0.6)\n",
"plt.scatter(centers[:, 2], centers[:, 3], s=300, c='red', label='Centroids')\n",
"plt.title('KMeans Clustering: Floors vs Price')\n",
"plt.legend()\n",
"\n",
"plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x=df_cleaned['bathrooms'], y=df_cleaned['price'], hue=labels, palette='Set1', alpha=0.6)\n",
"plt.scatter(centers[:, 1], centers[:, 4], s=300, c='red', label='Centroids')\n",
"plt.title('KMeans Clustering: Bathrooms vs Price')\n",
"plt.legend()\n",
"\n",
"plt.subplot(2, 2, 4)\n",
"sns.scatterplot(x=df_cleaned['sqft_living'], y=df_cleaned['price'], hue=labels, palette='Set1', alpha=0.6)\n",
"plt.scatter(centers[:, 3], centers[:, 4], s=300, c='red', label='Centroids')\n",
"plt.title('KMeans Clustering: Square vs Price')\n",
"plt.legend()\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### PCA для визуализации сокращенной размерности"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABi8AAAJHCAYAAADoqsXxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5QUZdbA4V9VdZycMzDEIUdJklFQMaCuObsGFHfd1dVVV8X0mXbNOYdVXBOKmAVFRUAQyTkNMMPkHDpX1fdHQcsEkDAwgPc5Z45MVXW91dXVY9++dd+rmKZpIoQQQgghhBBCCCGEEEIIcZhQW/sAhBBCCCGEEEIIIYQQQgghdiXJCyGEEEIIIYQQQgghhBBCHFYkeSGEEEIIIYQQQgghhBBCiMOKJC+EEEIIIYQQQgghhBBCCHFYkeSFEEIIIYQQQgghhBBCCCEOK5K8EEIIIYQQQgghhBBCCCHEYUWSF0IIIYQQQgghhBBCCCGEOKxI8kIIIYQQQgghhBBCCCGEEIcVSV4IIcQfiGmarX0IYjf+yK/NH/m5CyGEEEKIpuTz4R+DvM5CiN8jyQshBBdffDE5OTkNfnr27Mno0aO55557qK6ubvKY3Nxc7r77bo4//nh69+7N6NGjufHGG1m7du1ux3n88cfJycnhvvvuO5hPZ7eefvppcnJyWmXs5nz00Ufk5OSQn59/0B8XCAR44IEH+PTTT/f1MPfJeeedR05ODl9//fVBHedwey0PRE1NDf/85z9ZtGhReNnFF1/MxRdffMiOYW/fz2PHjuXWW29t0bE3bNjA+eef3yL7ys/PJycnh48++qhF9ieEEEKIw4vELa3jaIpbcnJyePrpp5ssX79+PUOHDmXUqFFs2bIlvG1OTg6PPfZYs/syDIMRI0YcsZ8/i4uL+fe//82JJ55Inz59GD58ONdcc02DuAQOTmxSVFTE1Vdfzfbt21tkf7t7XYUQRz5JXgghAOjevTvvvfde+Of111/nsssuY9q0aUyaNKnBHRHffPMNZ5xxBqtWreLaa6/l5Zdf5oYbbmDLli2cc845zJ07t8n+DcNg+vTpdOnShU8++QSv13son94fXklJCW+++SahUOigjbF582aWLFlCly5dePfddw/aOEebNWvW8Mknn2AYRnjZXXfdxV133XVIxt+f93NL+uqrr1iyZEmL7CslJYX33nuP0aNHt8j+hBBCCHH4kbjl6HYo4pbGNmzYwGWXXYbb7ebtt98mOzs7vE5VVb766qtmH/fLL79QUlJyiI6yZf36669MnDiR2bNnc8kll/DCCy9w++234/P5uPjii5k+ffpBHX/evHn88MMPLba/9957j7PPPrvF9ieEOHzYWvsAhBCHh6ioKPr27dtg2cCBA6mvr+epp55i2bJl9O3bl23btnHLLbcwYsQInnjiCTRNC28/fvx4zj//fG655Ra+++47HA5HeN1PP/1EUVERjz32GBdddBGfffaZfLg4ynz00UdkZmYyadIkbrrpJrZu3Uq7du1a+7COSJ06dTok4+zv+/lw5XA4mvwdE0IIIcTRReIW0ZI2bdrEpZdeSmRkJG+++SYZGRkN1vfv359FixaxevVqunfv3mDd559/Trdu3VizZs2hPOQDVlVVxd///neys7N5/fXXcbvd4XUnnHACV199NVOmTGH48OEkJSW14pHuPYkBhDh6SeWFEGKPevbsCUBBQQEAb731FoFAgDvuuKNBAADgdru55ZZb+NOf/tSkZHvatGl06dKFAQMGMHjwYN57773fHXvs2LE88MADXHrppfTu3Zvbb78dsD5sTZkyhWOPPZZevXpxzjnnMH/+/AaP9fv9PPjggwwbNox+/fpx22234ff7G2zTXPnrggULyMnJYcGCBeFlmzdv5i9/+QuDBg1i4MCBTJo0iU2bNjUY69///jejRo2iZ8+enHrqqXzxxRcN9msYBs899xyjR4+mT58+TJ48udmy9sb29nGzZs3iggsuoF+/fvTs2ZMTTzyRqVOnAtZUOscddxwAt912G2PHjg0/7oMPPuDMM8+kb9++9O7dm4kTJ/Lll1822HdOTs7vThWk6zrTp09nzJgxHH/88URERDT7GgeDQR555BFGjhxJ7969ueKKK5g+fXqTcvKPP/6YCRMm0KtXL0477TTmz59P9+7d91iO/cUXX3DmmWfSr18/hg0bxpQpUxqcq6effpoTTzyRmTNncsopp9CrVy8mTpzIkiVLWLp0KWeffTa9e/fmlFNOaXI9rV+/nkmTJtG/f3/69+/PddddR15eXnj9zuvm3XffZcyYMfTv3z98J9+ezvGCBQu45JJLALjkkkvC1+Ou1+af//xnzjzzzCbPd/LkyZx22mnh3xctWsRFF11Enz59GDRoELfccgsVFRW7PV+w/+/nXZ/zru+VxscOsHLlSi699FIGDBhAv379uOyyy1i6dClgvSbPPPMM0LDU2zAMXnrpJcaNG0fPnj054YQTeOutt5qMc9NNN3H99dfTt29fLr/88ibTRn300Ud0796dZcuWce6559KrVy/GjBnDq6++2mBfJSUl3HDDDeH3+JQpU3j88ccbvFeEEEIIcXiTuEXilr2JW3a1adMmLrnkEqKjo3n77bebJC7ASowlJSU1qb4IhUJ88803nHzyyU0eszeve0VFBffccw9jxoyhZ8+eDBo0iOuuu65BTHTxxRdz++2389JLLzF69Gh69erFeeedx/Lly8Pb+Hw+7r77bkaOHBk+n40/6zY2ffp0SkpK+Ne//tUgcQFWpclNN93EhRdeSF1dXZPH7m6a1ltvvbXB67Vt2zauueYaBg8eTJ8+fTj33HPDlRYfffQRt912GwDHHXdcg9fsgw8+4OSTTw5PB/f000+j63qDcS699FLuuusu+vfvz4QJE9B1vUEssfO9MX/+fP785z/Tp08fhg0bxn/+858G+6qrq2PKlCkMHTqUfv36ccMNN/DGG28cVlO2CSEkeSGE+B25ubkAtGnTBoA5c+bQvXt3UlNTm91+6NCh3HDDDSQnJ4eXVVVV8d1333H66acDcMYZZ7BixQpWrVr1u+NPnTqVXr168dxzz3HWWWfh9/u59NJL+fbbb7nhhht45plnSEtL48orr2zwgfDmm2/m/fffZ9KkSTzxxBNUV1fzxhtv7PPzLy4u5txzz2XLli3cfffd/Oc//6GsrIxLL72UqqoqTNPkuuuu49133+Xyyy/n+eefD3/w2bXU9j//+Q/PPvssZ511Fs888wxxcXE8+uijvzv+3jzu+++/57rrrqNHjx4899xzPP3007Rp04Z7772XZcuWkZKSEv6C+Nprrw3/e+rUqUyZMoXjjz+eF198kUceeQSHw8FNN91EUVFReP/vvfcekydP3uNx/vjjj5SWlnL66afjcrk46aST+PjjjwkEAg22mzJlCm+++SYXXXQRzz77LElJSdx5550Ntpk+fTq33nor/fv357nnnuOEE05g8uTJDT5oNvbcc89x44030rdvX5566imuu+46vv76ay6++GJ8Pl94u6KiIh566CGuueYannzySWpqarj++uu58cYbOfvss3n22WcxTZMbbrgh/Ljc3FzOO+88ysvLefjhh7n//vvJy8vj/PPPp7y8vMFxPPPMM9xyyy1MmTKFfv36/e457tGjB1OmTAmfm+amijrttNNYtWoVW7duDS+rqanhxx9/ZOLEiYBVsn7ZZZfhcrl44okn+Ne//sXChQu55JJLGjz/xvbn/bwv6urquPLKK4mPj+fpp5/m8ccfx+v1csUVV1BbW8vZZ5/NWWedBTQs9b777rt56qmnOO2003jhhRc48cQTeeCBB3j22Wcb7P/LL78kMjKS559/niuvvLLZYzAMg7///e9MmDCBl156if79+/Pvf/+bOXPmANa8ypdeeimLFy/mX//6Fw8++CBr167ltdde26/nLIQQQojWIXGLxC17E7fstHnzZi699FKioqJ4++23d3udaJrGCSec0CR5MX/+fPx+f5ObXfbmdTdNk0mTJjF37lxuuukmXn31Vf7yl78wf/78JvH
"text/plain": [
"<Figure size 1600x600 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pca = PCA(n_components=2)\n",
"reduced_data = pca.fit_transform(data_scaled)\n",
"\n",
"# Визуализация сокращенных данных\n",
"plt.figure(figsize=(16, 6))\n",
"plt.subplot(1, 2, 1)\n",
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=result, palette='Set1', alpha=0.6)\n",
"plt.title('PCA reduced data: Agglomerative Clustering')\n",
"\n",
"plt.subplot(1, 2, 2)\n",
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=labels, palette='Set1', alpha=0.6)\n",
"plt.title('PCA reduced data: KMeans Clustering')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Анализ инерции для метода локтя (метод оценки суммы квадратов расстояний)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA28AAAImCAYAAADE77LsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACIkUlEQVR4nOzdd3xUVf7/8fekTnpIT4BASOgJPTQBEZG1oCti2a9iRxFxUdEfirAqKuoqCgKCIiq6q6uyIOpiAzs9oPRQAgQIpJBCes/8/ggZHUMJYZiSvJ6PRx4k95578pnJEfPmnHuuwWQymQQAAAAAcGgu9i4AAAAAAHB2hDcAAAAAcAKENwAAAABwAoQ3AAAAAHAChDcAAAAAcAKENwAAAABwAoQ3AAAAAHAChDcAAAAAcAKENwAAAABwAoQ3AAAAAHAChDcAcBC33nqrOnbsqL/97W+nbfPwww+rY8eOevzxx21YGYDGSktLU8eOHbVs2TJ7lwKgCSC8AYADcXFx0ZYtW5SRkVHvXElJiX744Qc7VAUAABwB4Q0AHEiXLl3k6empr7/+ut65H374QV5eXgoPD7dDZQAAwN4IbwDgQLy9vXXxxRefMrx9+eWX+stf/iI3N7d651atWqXrrrtOCQkJuuiii/Tcc8+ppKREkjRs2DB17NjxlB9paWmSpDVr1ujmm29W79691a9fPz3yyCNKT0+3+B6PPPLIKfs423KwuuWgp/r4o+3bt+vuu+9Wv3791KtXL913333at2+f+fyGDRvUsWNHbdiwQZK0d+9eDR8+XH/72980d+7c036PuXPnSpKWLFmiK664QvHx8Rbnz7YE9ZNPPjllv3+8rm5p3NnaNbaGhr43Z/r+pztf93N4/PHHNWzYMIvv+9FHH1m8h3/8Pps3b7Zo++9//1sdO3a06KOsrEyvvPKKRowYofj4ePXq1Ut33nmnkpOTLa49XV233nqrRZu6Ok7lz+Ojzq233mrRT3l5uV5//XVdfvnlSkhI0IgRI7Rw4ULV1NRYXPPnWjZs2NCga8/GZDJpypQp6tatm1avXt3g6wBAkur/BgAAsKsrr7xSDz30kDIyMhQRESFJKioq0s8//6x3331XP//8s0X7L774Qo8++qiuvvpqPfTQQzp69KhmzZqllJQUvfvuu5o3b54qKip0/PhxPfDAAxo/fryGDh0qSQoLC9Py5cv12GOPaeTIkRo3bpzy8vI0Z84c3XTTTfr0008VHBwsqfaX3ptuuknXXXedJJn7a4guXbroqaeeMn+9ZMkS/fe//zV/vX79eo0dO1b9+vXT888/r/Lycr355pv629/+pk8++USxsbH1+nz55ZcVHx+v8ePHKyAgQIMHD5YkTZ8+XZLM3y8iIkJJSUmaNm2arr/+ek2bNk0+Pj6S1KD6y8rKlJCQoGnTppmPne66P763f27X2BrO5b158skn1bVr11N+/48//liStHPnTj3zzDP12v5Zfn6+Zs+efcpzPj4++v7779W7d2/zsS+//FIuLpb/Jjx58mRt2rRJkyZNUnR0tA4dOqTXXntNjzzyiFasWCGDwWBue/311+uGG24wf133c7Qmk8mk++67T1u2bNEDDzygTp06acOGDZo9e7aOHDmiZ5991tz2z2M2Nja2wdeeyXPPPaf//e9/ev311zVo0CCrv0YATRvhDQAczNChQ+Xl5aWvv/5ad9xxhyRp5cqVCg4OtvhlWar9ZXTmzJkaPHiwZs6caT7etm1b3XHHHfrpp5/MYaJuli06Olo9evSQJNXU1GjmzJkaNGiQXnnlFfP1vXr10pVXXqm3335bkydPliSVlpaqbdu25mvr+msIX19f83WS9Msvv1icf+WVV9SmTRstXLhQrq6ukqRBgwbpsssu05w5c/Taa69ZtD906JBWr16tzz//XO3bt5ckc9D19fWVJIvvt2LFCknSE088YQ5NkuTh4XHW2ktLSxUSEmLR3+mu++N7++d227Zta1QN5/LexMXFnfb71x0vLy8/Zds/mzNnjqKiopSXl1fv3JAhQ/Tdd9/p//2//ydJysjI0G+//aY+ffro6NGjkqSKigoVFxdr2rRpuvLKKyVJffv2VVFRkV588UVlZ2crNDTU3GdERIRFPXU/R2v6+eeftXbtWr366qu66qqrJEkXXXSRjEajXnvtNd12223m8fTnMfvTTz81+NrTeeWVV/Txxx9r3rx5GjJkiNVfH4Cmj2WTAOBgjEajhg0bZrF0csWKFbriiissZiok6cCBA8rIyNCwYcNUVVVl/khMTJSvr6/WrFlzxu918OBBHT9+XCNHjrQ4Hh0drZ49e2rjxo3mY+np6fLz87PCK7RUUlKi7du364orrjCHE0ny9/fXJZdcYlFDXftZs2apX79+Z/1luU63bt0kSe+8846ysrJUUVGhqqqqBl1rrdfdmBrO9b2xlr179+rjjz/WP/7xj1OeHzZsmFJTU3XgwAFJ0tdff63u3burZcuW5jYeHh56++23deWVVyozM1Pr16/XRx99ZN50p6Ki4pzrqqmpUVVVlUwm01nb1H38se3GjRvl5uamyy+/3OKaa665xnz+dM7nWkn64IMPtHDhQl111VUWs7MAcC6YeQMAB3TFFVfogQceUEZGhjw9PbVu3To99NBD9dqdOHFCUu0Ss1MtM8vKyjrj96m7PiQkpN65kJAQ7dq1S1LtDN+xY8fUqlWrc3shDVBYWCiTyXTaGgoLCy2O3XffffL397dYdnk2iYmJmjZtmhYuXKh58+adU31Hjx494/LCC1nDub431vLcc8/pqquuUs+ePU95Pjw8XPHx8fruu+/Url07ffnllxo5cqR5vNT55Zdf9Pzzz+vAgQPy8fFRp06d5O3tLUlnDGCnM3/+fM2fP1+urq4KCQnRoEGD9OCDD1ps4lM3W/1Hffv2lVS7FLRFixYWQViSeQbwTO/n+VwrSbt379agQYP0v//9T7fffru6dOlyxvYAcCqENwBwQEOGDJGPj4++/vpreXt7q1WrVoqPj6/Xzt/fX1LtvUV1v6D+UUBAwBm/T2BgoCQpOzu73rnjx4+rRYsWkqTk5GSVlZXV22TEGvz8/GQwGE5bQ12NdSZPnqyvv/5aEydO1AcffNDg5XU33nijVq9eraqqKj355JNq1aqVxo8ff8ZrampqtHXrVo0ePbpB3+PPM6PnW8O5vjfW8NVXX2nHjh0Wy2hP5dJLL9V3332nK664Qjt27NC8efMswtvhw4c1YcIEDR8+XG+++aZat24tg8GgDz74oN6yWens751U+/7deOONqqmp0bFjxzRr1izdc889+vzzz81tpk+fbhG2/3jfWkBAgPLy8lRdXW0Rwur+kaNuvJ/K+VwrSQ8++KBuu+02XXXVVZo2bZqWLFlSLwgCwNmwbBIAHJCHh4eGDx+ub775Rl999ZX5Hps/a9eunYKDg5WWlqaEhATzR3h4uF555ZV6MyF/FhMTo9DQUP3vf/+zOH7kyBFt2bJFvXr1kiT9+OOP6ty5s4KCgs75tdTU1Jzxl1Rvb2/Fx8frq6++UnV1tfl4YWGhfvzxx3r3+cXHx2vevHk6evSoXn755QbX8dprr+nHH3/Uiy++qCuuuEIJCQlnvd/s119/VUlJifr163fGdnWzSH/esON8azjX9+Z8VVRU6KWXXtKECRMs7kc7leHDh2vr1q3697//rd69eyssLMzi/I4dO1ReXq57771X0dHR5nBWF9zq3rO6nRrP9t5JtRvsJCQkqHv37rriiit0yy23aM+ePcrPzze3iYmJsfhv4Y/3F/bt21dVVVX1dnOtC39nej/P51qpdqbUaDTqySef1M6dO/Xuu++e9fUCwJ8x8wYADurKK6/UuHHj5OLiYrHT4R+5urrq4Ycf1pNPPilXV1ddcsklKigo0Pz585WZmXnW5X4uLi6aNGmSpkyZokceeUTXXHON8vLyNG/ePAUEBOjOO+/Uzp079cEHH+iqq67Sli1bzNceP35cUu0MS25ubr1gl5ubq5SUFB06dMgcAk/nkUce0d133617771XN998syorK7Vw4UJ
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"inertias = []\n",
"clusters_range = range(1, 11)\n",
"for i in clusters_range:\n",
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
" kmeans.fit(data_scaled)\n",
" inertias.append(kmeans.inertia_)\n",
"\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(clusters_range, inertias, marker='o')\n",
"plt.title('Метод локтя для оптимального k')\n",
"plt.xlabel('Количество кластеров')\n",
"plt.ylabel('Инерция')\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Расчет коэффициентов силуэта"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1oAAAImCAYAAABKNfuQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACjZklEQVR4nOzdd1yV9eP+8dfhsAVBHODeAwe4cO/V0jIbNhxZapll9UlNyzTN1Jyl5iottczKVamVaeXeW4YbB6igKMge5/z+8Cu/yAXK4WZcz8eDh3Luca77LSoX932/b5PVarUiIiIiIiIi2cbO6AAiIiIiIiL5jYqWiIiIiIhINlPREhERERERyWYqWiIiIiIiItlMRUtERERERCSbqWiJiIiIiIhkMxUtERERERGRbKaiJSIiIiIiks1UtERERERERLKZipaIFBg9e/akZ8+eGV7bs2cPjz/+ODVr1uSnn36y6fsPGzaMdu3aZXm7du3aMWzYMBskEhFbqV69OjNmzDA6hogYyN7oACIiRrly5QqvvfYatWrVYv78+VSvXt3oSCIiIpJPqGiJSIH19ddfk5iYyMSJE/H29jY6joiIiOQjunRQRAqkq1evsmTJErp06XJLyQoNDWXQoEE0b96cunXr0rNnT/bu3ZthnX/++Ydu3brh7+9Ps2bNGDVqFNevX8+wznfffUfbtm3x9/fnnXfeITY2FoDZs2fTtGlTGjZsyKhRo0hOTk7fJjk5mdGjRxMQEEDjxo3TLz2Ki4tjyJAh1K1bl9atW/Pdd9+lb3P+/HmqV6/OihUr0l9LSkqiffv2Gc7S3e7SyZ07d1K9enV27tx528/hxpm/hg0b3nLZ408//cRjjz1G7dq1adOmDTNmzCAtLS19+e0ulfx31pvvdbuPmznvddnk7Y7pvyIiInjvvfdo2rQp9erVo0ePHuzfvz99+X8v8bJarTz33HNUr16d8+fPZ1jvblkHDRpEq1atsFgsGd7/gw8+4KGHHgLg4sWL/O9//6NJkyb4+/vTs2dPDhw4AMCMGTPu+B4384WEhPDGG2/QpEkTatWqRcuWLRk7diyJiYl3HYOtW7feNXtmjxFgw4YNPPnkk/j7+991X/+2YsUKqlevzsGDB3nyySfx8/OjS5cu/P777xnWO3/+PEOHDqVFixbUqlWLpk2bMnToUK5evZq+TnBwMC+++CL16tWjQ4cOLF26NH3Z7b5+4davk3td1vfvr7tFixbd8vdrx44d1KhRgy+++OKO+/iv6dOn4+vry8qVKzO9jYjkbTqjJSIFitVq5cKFC4wdO5bU1FReffXVDMtPnDjBs88+S4UKFRgxYgQODg4sWrSI3r17s2DBAho1asTu3bsZMGAAjz/+OO+++y7Hjx/ns88+49ixY3z77beYzWbWr1/PmDFj6NmzJ61ateKHH35g/fr1AKxdu5axY8cSFhbG5MmTcXZ2Zvjw4QBMmjSJ5cuXM3ToUHx8fJg2bRphYWGEhYXx8MMPM336dDZt2sSYMWPw8fGhffv2tz3Or776KkNJeBBTpkzh+vXrFC5cOP21uXPnMm3aNHr06MHw4cMJDg5mxowZXLhwgXHjxmVqv7Vq1eKHH34AbpS2ZcuWpX/u5uaWLdnj4uJ4/vnnSUtLY8iQIXh7e7NgwQJefvllVq5cSYUKFW7Z5ueff85QxP7t6aef5plnnkn/fPTo0RmW/fHHH+zcuZOmTZsCkJiYyO+//06/fv1ITk6mb9++pKSkMGrUKBwcHJg1axY9e/bkxx9/5JlnnqFly5YZ9jtq1CgAfHx8iIiI4MUXX6Ru3bpMmDABR0dHNm3axNdff02JEiXo37//HcchMTERHx8fPv/889tmz+wxnj17lrfeeouWLVvyzjvvpH9N3Glf//Xqq6/So0cP3nnnHZYtW8bbb7/N3Llzad26NQkJCfTq1YsiRYowatQo3N3d2b9/PzNnzsTZ2ZkxY8aQkJBAv379KF26NDNmzGDfvn2MGjWKUqVK0apVq0xlyKqePXuybt06Pv30U9q0aYOjoyPvv/8+devW5bXXXsvUPubPn8+sWbMYO3YsTz75pE1yikjuo6IlIgXK7t27adOmDQ4ODnz55Ze3fKM9c+ZMHB0dWbRoUfo3+23atKFz585MnDiRZcuWsWrVKipUqMD48eOxs7OjefPmuLi4MHLkSDZu3Ei7du2YM2cOjRs3ZsSIEQA0btyY5s2bc/36dcaPH0/t2rUBiImJ4csvv+T111/HYrHwww8/0L9/f3r06AFAsWLF6N69O56enkyePBkHBwdatWrFsWPHmDt37m2L1oULF/jyyy+pVasWgYGBDzRehw8f5ueff8bX15eYmBgArl+/zqxZs+jevXv68bVo0QJPT09GjBhBnz59qFq16j337ebmRt26dQHYvHkzQPrn2WXlypWEhYWxcuVKfH19Aahfvz5du3Zl9+7dt/z5x8XFMXny5DuOnY+PT4aM/y6ELVq0wMfHh1WrVqUXrT///JP4+Hi6du3KgQMHOHXqFN999x316tVLz9KxY0dmzZrFjBkz8PHxybDff7/Xli1b8PX15fPPP09f3qxZM7Zu3crOnTvvWrQSEhIoXLjwHbNn9hiDgoJISUnhnXfeoVq1avfc13/17NmTgQMHAtCyZUuefPJJvvjiC1q3bk1oaCg+Pj58+umnlC1bFoAmTZpw8OBBdu3aBUBYWBh16tTh/fffp2zZsrRo0YIlS5awefNmmxUtk8nE+PHjefzxx5k0aRJms5lr166xcOFCzGbzPbf//vvvmTRpEmPGjOHpp5+2SUYRyZ106aCIFCg1a9ZkwoQJeHh4MHz48FvO+uzatYu2bdtm+MbR3t6exx57jCNHjhAXF8cnn3zCqlWrsLOzIzU1ldTUVB566CHs7OzYvXs3qampBAUF0aJFi/R9ODk54e/vj4uLS3rJghvfnCcmJnL06FGOHj1KUlJS+lkNuPGNtpOTE35+fjg4OGTYLjAwMMOlejd9+umnNGzYkLZt2z7QWFmtVsaOHcvTTz9NjRo10l/fv38/iYmJtGvXLv34U1NT0y8T3Lp1a4b9/Hud/15Wl9kc97vt3r17KVOmTHrJAnBxceGPP/7IcNbmplmzZlGkSBGef/75LL+XnZ0dTz75JOvWrSMhIQG4UfSaNWuGj48PjRo14sCBA9StW5e0tDRSU1MpXLgwzZs3Z/fu3ffcf4sWLfj2229xcnLixIkTbNiwgdmzZxMVFZXh8tPbuXDhAu7u7lk+pv+qVasW9vb2fPvtt4SFhZGcnExqaipWqzVT2//7bI7JZKJjx44cOnSIxMREfH19WbJkCaVLlyY0NJSNGzcyf/58Tp06lX58VapUYfbs2ZQtW5bk5GQ2bdpEdHQ0lStXzvA+Foslw9fd7fLdXCcz2cuWLcvgwYNZuXIlP/30EyNGjEgvg3fz999/M3r0aBo2bMizzz57z/VFJH/RGS0RKVDc3Nx48sknqVSpEs8//zxvv/02P/zwQ/pPpqOjoylWrNgt2xUrVgyr1UpsbCyFChXCyckJuPGN57/FxMRw5coV0tLSKFKkSIZlnp6eeHh4ZHjt5qVXly9fTi9N/93Ow8MDT0/PW7ZLTU3NcO8K3CiK69ev55dffmHNmjWZGZI7WrVqFaGhocyZM4dPP/00/fVr164B3PEMSkRERPrvw8LCbhmj+8mxatUqTCYTRYsWpUGDBrz11lu3fHN9O9euXaNo0aKZep/Q0FAWLlzIV199RXh4+H1lfeqpp5gzZw7r1q2jSZMmbN++ncmTJ6cvd3R0BG7ct/Xve3Uyc2bEYrEwdepUvvvuO+Lj4ylZsiR+fn7pX4t3ExYWRunSpe/jiDIqW7YskyZNYurUqemXed7UqFGje25fokSJDJ8XLVoUq9VKTEwMzs7OfP3118yZM4dr165RrFgxateujYuLyy33P8bExBAQEABA8eLFeeSRRzIsf+mll2557//mmzVrFrNmzcJsNlOsWDFatGjBW2+9dceJcR599FEmTJgAQPPmze95rACBgYG0adOGf/75h7/++uu+Hu8gInm
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"silhouette_scores = []\n",
"for i in clusters_range[1:]: \n",
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
" labels = kmeans.fit_predict(data_scaled)\n",
" score = silhouette_score(data_scaled, labels)\n",
" silhouette_scores.append(score)\n",
"\n",
"# Построение диаграммы значений силуэта\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(clusters_range[1:], silhouette_scores, marker='o')\n",
"plt.title('Коэффициенты силуэта для разных k')\n",
"plt.xlabel('Количество кластеров')\n",
"plt.ylabel('Коэффициент силуэта')\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Средний коэффициент силуэта: 0.250\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1YAAAJzCAYAAAAMSoJaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzddZyU1f7A8c907Wx3s7AsCyzs0imNiBhgoChgd9e9Xv15w6vXTuxCQRQLREK6e4FdOra7d2an8/n9MTCyAgoq5nm/XrxgnjzPmWeG5zvnnO+RSZIkIQiCIAiCIAiCIPxk8t+6AIIgCIIgCIIgCH90IrASBEEQBEEQBEH4mURgJQiCIAiCIAiC8DOJwEoQBEEQBEEQBOFnEoGVIAiCIAiCIAjCzyQCK0EQBEEQBEEQhJ9JBFaCIAiCIAiCIAg/kwisBEEQBEEQBEEQfiYRWAmCIAiCIAiCIPxMIrASBOGsmjp1KllZWe3+9OnTh2nTprFt27bfuniCIPzJZWVl8eqrr56w/PDhwwwcOJBhw4ZRVlZ2yv1fffVVsrKyyMnJwWq1nnSbTz75hKysLEaOHPlLFVsQhD8gEVgJgnDWde3alblz5zJ37lzmzJnDU089hUql4vrrr+fIkSO/dfEEQfiLOXLkCNdccw06nY7Zs2eTnp7+o/t4vV5WrVp10nWLFy/+hUsoCMIfkQisBEE460JCQsjNzSU3N5fevXszevRoXn31VeRyOV999dVvXTxBEP5CiouLmT59OgaDgdmzZ5OSknJa+/Xq1YslS5acsLy+vp78/Hyys7N/6aIKgvAHIwIrQRB+EzqdDo1Gg0wmCy6bOnUqU6dObbfd888/T1ZWVrsAbPbs2YwaNYq8vDyuvvpqDh8+DMDHH39MVlYWpaWl7Y7x9ddfk52dTW1tLQArVqxgypQp5OXl0b17d8aNG8fHH3/cbp+///3vJ3RhPPanqqoquM33u/58+umnJ3Q9Wrx4MePHjyc3N5dJkyaRn5/fbp8fK8/WrVvJyspi69at7fb7fn2dTv253W6efvpphg0bRnZ2drvr+qEg9/vHfuKJJ8jJyWHdunXAd92lTvbn+HKfTt03NDTwt7/9jYEDBwbf4127dgEwcuTIH31f8vPzufrqq+nZsyf9+vXjb3/7Gy0tLcHjf/XVV2RlZVFYWMjEiRPp0aMHF1xwAd9++227clgsFv73v/8xevRocnJymDBhAl988UW7bY4vT5cuXejbty933nknra2tp6xLgJKSEu644w769etH3759ufnmmykuLj7l9j9Uv8e/b2VlZdx1110MHjyY3Nxcpk6dyo4dO4Lrq6qqgvstWLCg3TlWr14dXHe8xYsXM2nSJPLy8hg8eDCPPfYYZrP5hLId72T34siRI/n73/9+ytffd6ysx1/fzp07mTx5Mjk5OQwePJjHH38cp9N5ymN8X3FxMdOmTcNoNDJ79mwSExNPe9/x48ezYcOGE7oDfvvtt3To0IEuXbqcsM+KFSuYNGlSsLz//e9/sdvtJ2xzOp//zZs3c91119GzZ08GDx7Ms88+i8/nC263ceNGLr/8cvLy8ujbty+33nrrD95TgiD88kRgJQjCWSdJEl6vF6/Xi8fjobGxkeeffx63280ll1xyyv0qKiqYOXNmu2XLli3j8ccf5/zzz+e1117D5/Nxyy234Ha7ueCCC9BoNHz99dft9pk/fz4DBw4kISGBNWvWcPvtt9OtWzdef/11Xn31VVJSUvjPf/5DYWFhu/1iYmKCXRjnzp3Lrbfe+oPXaTabeemll9ot2717Nw888AC5ubm88cYbJCQkcMstt9DU1ARwRuU5Uyerv3feeYcPP/yQ6dOn8+GHHzJ37lxmzJhxRsfdvXs3n3zyCS+99BJ5eXnt1h1fX4899li7dadzrTabjSuvvJKtW7fy4IMPMmPGDDQaDddddx1lZWXMmDGjXZlvvfXW4PliY2PZvn0711xzDVqtlpdeeol//OMfbNu2jWnTpp3wAH7zzTczatQoZsyYQYcOHbjnnntYu3YtAE6nkylTpvDNN99www038Prrr9O7d28eeeQR3nzzzXbHGTZsGHPnzmXWrFncf//9bNy4kSeeeOKU9VdfX8/kyZMpKyvjX//6F88++yxNTU1Mnz4dk8n0g3V/fP1+/30rKipi0qRJVFVV8eijj/Lcc88hk8mYPn36CeMZDQbDCd3aFi9ejFze/rHg9ddf57777iM3N5dXXnmF22+/naVLlzJ16tQzCmh+CbW1tVx//fVEREQwY8YM7rrrLr7++mseeuih09q/pKSE6dOnExISwuzZs4mLizuj85977rn4fL6T1tv5559/wvbffPMNt99+OxkZGbz22mvccccdLFiwgNtuuw1JkoAz+/w/8MAD9O7dmzfffJMJEybw7rvv8vnnnwNQWVnJbbfdRvfu3XnjjTd44oknKC0t5aabbsLv95/RdQqC8NMpf+sCCILw57d9+3a6det2wvL77ruPjh07nnK/J598kszMTPbt2xdc1tLSwpQpU7jvvvuAQAvMsV/7s7OzGTNmDAsWLODuu+9GJpNRV1fHli1bePbZZ4HAw+fEiRN55JFHgsfMy8ujf//+bN26lZ49ewaXq9VqcnNzg69LSkp+8DpfeeUVEhMT27VW1NXVce655/Lf//4XuVxOdHQ0EyZMoKCggNGjR59Rec7Uyepv9+7ddOnSheuuuy647FhLz+k61mI4atSoE9YdX18ul6vdutO51nnz5lFdXc28efOCXat69erFxRdfzPbt27nsssvalTk1NbXdOZ9//nk6dOjAW2+9hUKhAKBnz56cf/75fPnll1x11VXBbadOncrtt98OwNChQ5k4cSKvvfYaw4YN46uvvuLw4cN8+umnweBx6NCheL1eXn/9da644grCw8MBiIyMDJahb9++bNq0qV2df9/MmTNxu9188MEHxMTEANClSxeuvPJKCgsLGTZs2Cn3Pf5av/++zZgxA7VazUcffURISAgAw4cPZ8KECTzzzDPtWtvOOecc1q9fj9vtRq1W43K5WLlyJX379g22MJrNZt544w0uv/zydkFy586dueqqq06oz7PtnXfeISIigtdeey343srlch599FEOHTp0QqvZ8crKypg2bRpNTU14PJ6fFGxER0fTt29flixZwoUXXghAdXU1hYWFPPPMM7zxxhvBbSVJ4rnnnmPo0KE899xzweXp6elcc801rF27luHDh5/R5/+yyy4L3q8DBw5kxYoVrFmzhiuuuILdu3fjdDq5+eabgwFjfHw8K1euxG63B+8HQRDOLhFYCYJw1nXr1o1///vfQOCBo62tjXXr1vHiiy9it9u59957T9hn3bp1bNq0iXfeeYdp06YFl19xxRUA+P1+7HY7y5YtQ6vVkpSUBMCll17KwoULyc/Pp2/fvsyfPx+DwcCYMWMAuOGGG4BAy0hpaSkVFRXs2bMHCARpP9Xhw4eDrRbHyggwduxYxo4diyRJ2O12lixZglwup0OHDme1PKeqv5ycHN5++22WLl3KgAEDMBgMp/2QKUkSu3btYvHixSe0hJ2O07nWHTt2kJyc3G68ik6nY+nSpT96fIfDQWFhIddff32wlRQgJSWFjh07snHjxnaBwMSJE4P/lslkjBkzhldffRWn08m2bdtISko6oUXuwgsv5IsvvmgXAB07l9/v5+DBg+zYsYNBgwadspw7duwgNzc3GFRB4CF49erVP3qNP2Tbtm2MGDGi3UO0UqkMtu7abLbg8gEDBrBu3Tq2bt3K0KFDWbduHSEhIfTp0ycYWBUUFOB2u5kwYUK78/Tp04ekpCS2bdv2swOrY3Unl8tPaC07xu/34/V6yc/PZ8iQIcGgCgIBIgTq9IcCq4ULF9K9e3defPFFrrvuOh588EFmzpzZ7pw+ny/YkgSBe+L4c0GgO+B///tfrFYrISEhLFq0iG7dupGWltZuu5KSEurq6rj55puD9yEEAu+QkBA2btzI8OHDz+jz//17MT4+PtitsGfPnmg0Gi699FLGjRvHOeecQ//+/enRo8cp60Q
"text/plain": [
"<Figure size 1000x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.metrics import silhouette_score\n",
"from sklearn.cluster import KMeans\n",
"\n",
"# ========================\n",
"# Применение K-Means\n",
"# ========================\n",
"kmeans = KMeans(n_clusters=3, random_state=42) \n",
"df_clusters = kmeans.fit_predict(df_scaled)\n",
"\n",
"# ========================\n",
"# Оценка качества кластеризации\n",
"# ========================\n",
"silhouette_avg = silhouette_score(df_scaled, df_clusters)\n",
"print(f'Средний коэффициент силуэта: {silhouette_avg:.3f}')\n",
"\n",
"# ========================\n",
"# Визуализация кластеров\n",
"# ========================\n",
"from sklearn.decomposition import PCA\n",
"\n",
"pca = PCA(n_components=2)\n",
"df_pca = pca.fit_transform(df_scaled)\n",
"\n",
"plt.figure(figsize=(10, 7))\n",
"sns.scatterplot(x=df_pca[:, 0], y=df_pca[:, 1], hue=df_clusters, palette='viridis', alpha=0.7)\n",
"plt.title('Визуализация кластеров с помощью K-Means')\n",
"plt.xlabel('Первая компонентa PCA')\n",
"plt.ylabel('Вторая компонентa PCA')\n",
"plt.legend(title='Кластер', loc='upper right')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Средний коэффициент силуэта, равный 0.250, указывает на умеренно хорошую кластеризацию. \n",
"\n",
"Средний коэффициент силуэта (silhouette score) указывает на качество кластеризации, измеряя, насколько хорошо точки внутри одного кластера близки друг к другу по сравнению с точками из других кластеров. Значения коэффициента силуэта находятся в диапазоне от -1 до 1:\n",
"\n",
"1: Указывает на идеально плотные и четко разделенные кластеры. \n",
"0: Указывает на перекрытие кластеров или слабую структуру кластеризации. \n",
"Отрицательные значения: Указывают, что точки в кластере расположены ближе к другому кластеру, чем к своему."
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Средний коэффициент силуэта (агломеративная кластеризация): 0.225\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1YAAAJzCAYAAAAMSoJaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gU1frA8e/2bDab3juBEJKQkNBBelEERMGCUi3Ye7mWn3rVa7kWrBQLVkAUkSIKSu89QEILJb33bLLZ3n5/LNlLDCgo9vN5Hh7NzM7MmbNnZ/edc847EpfL5UIQBEEQBEEQBEH4xaR/dAEEQRAEQRAEQRD+6kRgJQiCIAiCIAiC8CuJwEoQBEEQBEEQBOFXEoGVIAiCIAiCIAjCryQCK0EQBEEQBEEQhF9JBFaCIAiCIAiCIAi/kgisBEEQBEEQBEEQfiURWAmCIAiCIAiCIPxKIrASBEEQBEEQBEH4leR/dAEE4Y8ydepU9u7d22aZVqslJSWFe+65h969e/9BJRMEQRD+qv773/9iMBh4+OGHOXHiBPfddx+7du1CJpP90UUTBOE3JgIr4R8tJSWFZ555BgCHw0FjYyNffPEFt9xyC8uWLSMxMfEPLqEgCILwV3LjjTcyZcoU+vbti0Kh4OmnnxZBlSD8Q0hcLpfrjy6EIPwRpk6dCsCCBQvaLDcajfTr149Jkybx2GOP/RFFEwRBEP7CbDYbJSUlBAUF4e/v/0cXRxCE34mYYyUIP6JWq1GpVEgkEs+yqVOnegKxVq+//jpJSUksW7bMs2zhwoUMHz6czMxMpkyZwsmTJwH4/PPPSUpKorCwsM0+vvnmG5KTk6msrARg/fr1TJo0iczMTLp27cqoUaP4/PPP22zz+OOPk5SUdNZ/ZWVlntcMGzaszXZffvklSUlJzJo1y7Ns9erVjB49moyMDCZMmEBWVlabbX6uPHv27CEpKYk9e/a02e7H9XU+9We1WnnllVcYPHgwycnJbc7rzDr+sR/v+8UXXyQtLY2tW7cCMGvWrHPW15nlPp+6r6mp4bHHHqNfv36e9/jgwYMADBs27Gffl6ysLKZMmUK3bt3o3bs3jz32GA0NDZ79L1u2jKSkJHJychg/fjzp6elcccUV/PDDD23Kodfr+e9//8uIESNIS0tj7NixfP31121ec2Z5unTpQq9evbj33ntpbGw8Z10CFBQUeIbC9urVi9tvv538/Pxzvv6n6vfM962oqIj77ruPSy65hIyMDKZOncr+/fs968vKyjzbrVy5ss0xNm3a5Fl3ptWrVzNhwgQyMzO55JJL+Pe//01TU1O7sp3pbG1x2LBhPP744+f8+8day3rm+R04cICJEyeSlpbGJZdcwvPPP4/ZbD7nPlr38+ijjzJgwABSU1Pp168fjz76aJv36Gztqqys7LzbdU1NDU888QSDBw8mPT2da665hg0bNrQpR+t27777bpvlJ0+ebNeG4eK14586/zPbw4//tV7bzue60lqW1n9du3blsssua9PGztZOWuvlzOvl+dblrFmzUCgUdOzYET8/P66//vp2dfhTxzIYDEydOpWUlBQsFovnXM9VH60cDgcffPABY8eOJT09nYyMDK6//np2797d5ljZ2dncfPPNdO/enb59+/LQQw9RXV19XnUOsGTJEsaMGUPXrl0ZMmQIs2bNwuFweNY//vjjTJ06la+//pqhQ4eSmZnJ9OnTOX78uOc1re/LmXVy6tQpUlNT27ynubm5TJ48mczMTEaMGMGXX37Z5lyOHz/OPffcQ9++fUlNTWXgwIG88MILbT57P34fof17frY2sG3bNpKSkjzXgrN97i0WC8OHDz9r+xH+mURgJfyjuVwu7HY7drsdm81GbW0tr7/+Olarlauvvvqc25WUlPDpp5+2WbZ27Vqef/55xowZw5w5c3A4HNxxxx1YrVauuOIKVCoV33zzTZttVqxYQb9+/YiIiGDz5s3cfffdpKamMnfuXGbNmkVMTAz/+c9/yMnJabNdSEgIixcv9vy78847f/I8m5qaeOutt9osO3ToEI888ggZGRm8++67REREcMcdd1BXVwdwQeW5UGerv3nz5vHZZ58xffp0PvvsMxYvXszs2bMvaL+HDh3iiy++4K233iIzM7PNujPr69///nebdedzrgaDgRtuuIE9e/bwr3/9i9mzZ6NSqbj55pspKipi9uzZbcp85513eo4XGhrKvn37uPHGG/Hy8uKtt97i//7v/9i7dy/Tpk1r9wP89ttvZ/jw4cyePZsOHTrwwAMPsGXLFgDMZjOTJk3i22+/ZcaMGcydO5cePXrw5JNP8t5777XZz+DBg1m8eDELFizg4YcfZseOHbz44ovnrL/q6momTpxIUVERzz77LK+99hp1dXVMnz4dnU73k3V/Zv3++H3Ly8tjwoQJlJWV8dRTTzFz5kwkEgnTp09vN89Ro9GwcePGNstWr16NVNr262ru3Lk89NBDZGRk8M4773D33XezZs0apk6d+rMBzcVWWVnJLbfcQkBAALNnz+a+++7jm2++4dFHHz3nNiaTiWnTppGfn88zzzzDRx99xLRp01i1ahVvvvlmm9e2vo9ntqdWP9Wu6+rquOaaa8jKyuLBBx9k1qxZREVFcffdd7cLXs+33i9WO/658w8NDW13ffvxZ+zHznZdadW67Zw5c0hISOCxxx5rd6Prp1xIXZ7pm2++8dx8OV+LFi2irq6Ozz77DKVS6VmekpLS5v2+5ppr2mw3c+ZM5s6dy8SJE/nwww95/vnn0el03H///ZhMJgCOHTvGlClTsFgsvPrqqzz33HMcOXKEW2655bzq/P333+fpp5+mX79+vPfee0yePJl58+bx9NNPtylLbm4ub775Jvfccw+vvfYajY2NTJkyhZqamnOe94svvojdbvf8bTKZuPXWW7Hb7cyaNYtx48bxzDPPeG6a1dTUMHnyZEwmEy+//DLz5s1jzJgxLFiwgPnz519Qnf+YzWbjpZde+tnXffjhhz8ZMAv/PGKOlfCPtm/fPlJTU9stf+ihh+jYseM5t3vppZdITEzk6NGjnmUNDQ1MmjSJhx56CHD3wLTe7U9OTmbkyJGsXLmS+++/H4lEQlVVFbt37+a1114D3D8+x48fz5NPPunZZ2ZmJn369GHPnj1069bNs1ypVJKRkeH5u6Cg4CfP85133iEyMrLNnfCqqiouu+wyXnjhBaRSKcHBwYwdO5bs7GxGjBhxQeW5UGerv0OHDtGlSxduvvlmz7IL/cJq7TEcPnx4u3Vn1lfrXeBW53Ouy5cvp7y8nOXLl5OcnAxA9+7dueqqq9i3bx/XXnttmzLHxsa2Oebrr79Ohw4deP/99z3zLbp168aYMWNYunQpkydP9rx26tSp3H333QAMHDiQ8ePHM2fOHAYPHsyyZcs4efIkX375pSd4HDhwIHa7nblz53L99dd7hh4FBgZ6ytCrVy927tzZps5/7NNPP8VqtfLJJ58QEhICQJcuXbjhhhvIyclh8ODB59z2zHP98fs2e/ZslEol8+fPx8fHB4AhQ4YwduxYXn311Ta9bYMGDWLbtm1YrVaUSiUWi4UNGzbQq1cvT09MU1MT7777Ltddd12bYKJz585Mnjy5XX3+1ubNm0dAQABz5szxvLdSqZSnnnqKEydOnPVudlFREeHh4bzyyivExMQA0LdvX3JyctoFm2e+jz/2U+36k08+oaGhgTVr1hAVFQW4g7Qbb7yRV199lbFjx3oCp0GDBvHDDz9QU1PjCdy+//77NvUOF68d/9z5n3mNa72+JScnEx0dfdZ6gLNfV1qduW1ERAQbN24kNzeXDh06nHN/v7QuWxkMBmbOnElqaupPfu7O5HA4PPN8e/Xq1Wadj49Pm/d727ZtbdbX1NTw4IMPtunxUalU3HvvvZw4cYKMjAzee+89/P39+fjjj1GpVACEhoby8MMPk5+f/5N1rtfrPYHbU089BcCAAQPw9/fnqaee4qabbvLMS9br9bz33nv07NkTgPT0dEaMGMH8+fN55JFH2p33mjVryMnJafN+lJeXk5aWxv/
"text/plain": [
"<Figure size 1000x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.cluster import AgglomerativeClustering\n",
"\n",
"# ========================\n",
"# Агломеративная кластеризация\n",
"# ========================\n",
"agg_cluster = AgglomerativeClustering(n_clusters=3) \n",
"labels_agg = agg_cluster.fit_predict(df_scaled)\n",
"\n",
"# ========================\n",
"# Оценка качества кластеризации\n",
"# ========================\n",
"silhouette_avg_agg = silhouette_score(df_scaled, labels_agg)\n",
"print(f'Средний коэффициент силуэта (агломеративная кластеризация): {silhouette_avg_agg:.3f}')\n",
"\n",
"# ========================\n",
"# Визуализация кластеров\n",
"# ========================\n",
"pca = PCA(n_components=2)\n",
"df_pca = pca.fit_transform(df_scaled)\n",
"\n",
"plt.figure(figsize=(10, 7))\n",
"sns.scatterplot(x=df_pca[:, 0], y=df_pca[:, 1], hue=labels_agg, palette='viridis', alpha=0.7)\n",
"plt.title('Визуализация кластеров с помощью агломеративной кластеризации')\n",
"plt.xlabel('Первая компонентa PCA')\n",
"plt.ylabel('Вторая компонентa PCA')\n",
"plt.legend(title='Кластер', loc='upper right')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Значение коэффициента силуэта лежит в диапазоне от -1 до 1. Ближе к 1: Хорошо сформированные, плотные кластеры, четко отделенные друг от друга. \n",
"\n",
"Ближе к 0: Кластеры пересекаются или слабо разделены, не имеют четких границ. Точки расположены одинаково близко как к своему кластеру, так и к соседним. \n",
"Ближе к -1 (Отрицательные значения): Некоторые точки скорее относятся к другим кластерам, чем к текущему (ближе к центрам других кластеров). Очень плохая кластеризация. \n",
"Ближе к 1: Все точки внутри каждого кластера плотно сгруппированы и значительно удалены от точек других кластеров. Свидетельствует о четкой и хорошо разделенной структуре данных. Единица говорит об идеальной кластеризации.\n",
"\n",
"Значение 0.225 указывает на то, что кластеры с нечеткой границей и неоптимальный выбор числа кластеров или особенности данных, затрудняющие их разделение."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вроде усёё :)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "mai",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}