1378 lines
2.5 MiB
Plaintext
1378 lines
2.5 MiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Начинаем работу ... \n",
|
|||
|
"\n",
|
|||
|
"Датафрейм: Продажа домов в округе Кинг (вариант-6) \n",
|
|||
|
"https://www.kaggle.com/datasets/harlfoxem/housesalesprediction"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living',\n",
|
|||
|
" 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',\n",
|
|||
|
" 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',\n",
|
|||
|
" 'lat', 'long', 'sqft_living15', 'sqft_lot15'],\n",
|
|||
|
" dtype='object')\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from scipy.cluster.hierarchy import dendrogram, linkage, fcluster\n",
|
|||
|
"from sklearn.cluster import KMeans\n",
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
"\n",
|
|||
|
"# Подключим датафрейм и выгрузим данные\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//kc_house_data.csv\")\n",
|
|||
|
"\n",
|
|||
|
"print(df.columns)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>date</th>\n",
|
|||
|
" <th>price</th>\n",
|
|||
|
" <th>bedrooms</th>\n",
|
|||
|
" <th>bathrooms</th>\n",
|
|||
|
" <th>sqft_living</th>\n",
|
|||
|
" <th>sqft_lot</th>\n",
|
|||
|
" <th>floors</th>\n",
|
|||
|
" <th>waterfront</th>\n",
|
|||
|
" <th>view</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>grade</th>\n",
|
|||
|
" <th>sqft_above</th>\n",
|
|||
|
" <th>sqft_basement</th>\n",
|
|||
|
" <th>yr_built</th>\n",
|
|||
|
" <th>yr_renovated</th>\n",
|
|||
|
" <th>zipcode</th>\n",
|
|||
|
" <th>lat</th>\n",
|
|||
|
" <th>long</th>\n",
|
|||
|
" <th>sqft_living15</th>\n",
|
|||
|
" <th>sqft_lot15</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>7129300520</td>\n",
|
|||
|
" <td>20141013T000000</td>\n",
|
|||
|
" <td>221900.0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>1.00</td>\n",
|
|||
|
" <td>1180</td>\n",
|
|||
|
" <td>5650</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>7</td>\n",
|
|||
|
" <td>1180</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1955</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>98178</td>\n",
|
|||
|
" <td>47.5112</td>\n",
|
|||
|
" <td>-122.257</td>\n",
|
|||
|
" <td>1340</td>\n",
|
|||
|
" <td>5650</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>6414100192</td>\n",
|
|||
|
" <td>20141209T000000</td>\n",
|
|||
|
" <td>538000.0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>2.25</td>\n",
|
|||
|
" <td>2570</td>\n",
|
|||
|
" <td>7242</td>\n",
|
|||
|
" <td>2.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>7</td>\n",
|
|||
|
" <td>2170</td>\n",
|
|||
|
" <td>400</td>\n",
|
|||
|
" <td>1951</td>\n",
|
|||
|
" <td>1991</td>\n",
|
|||
|
" <td>98125</td>\n",
|
|||
|
" <td>47.7210</td>\n",
|
|||
|
" <td>-122.319</td>\n",
|
|||
|
" <td>1690</td>\n",
|
|||
|
" <td>7639</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>5631500400</td>\n",
|
|||
|
" <td>20150225T000000</td>\n",
|
|||
|
" <td>180000.0</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>1.00</td>\n",
|
|||
|
" <td>770</td>\n",
|
|||
|
" <td>10000</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>770</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1933</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>98028</td>\n",
|
|||
|
" <td>47.7379</td>\n",
|
|||
|
" <td>-122.233</td>\n",
|
|||
|
" <td>2720</td>\n",
|
|||
|
" <td>8062</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>2487200875</td>\n",
|
|||
|
" <td>20141209T000000</td>\n",
|
|||
|
" <td>604000.0</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>3.00</td>\n",
|
|||
|
" <td>1960</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>7</td>\n",
|
|||
|
" <td>1050</td>\n",
|
|||
|
" <td>910</td>\n",
|
|||
|
" <td>1965</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>98136</td>\n",
|
|||
|
" <td>47.5208</td>\n",
|
|||
|
" <td>-122.393</td>\n",
|
|||
|
" <td>1360</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>1954400510</td>\n",
|
|||
|
" <td>20150218T000000</td>\n",
|
|||
|
" <td>510000.0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>2.00</td>\n",
|
|||
|
" <td>1680</td>\n",
|
|||
|
" <td>8080</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>1680</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1987</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>98074</td>\n",
|
|||
|
" <td>47.6168</td>\n",
|
|||
|
" <td>-122.045</td>\n",
|
|||
|
" <td>1800</td>\n",
|
|||
|
" <td>7503</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>5 rows × 21 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id date price bedrooms bathrooms sqft_living \\\n",
|
|||
|
"0 7129300520 20141013T000000 221900.0 3 1.00 1180 \n",
|
|||
|
"1 6414100192 20141209T000000 538000.0 3 2.25 2570 \n",
|
|||
|
"2 5631500400 20150225T000000 180000.0 2 1.00 770 \n",
|
|||
|
"3 2487200875 20141209T000000 604000.0 4 3.00 1960 \n",
|
|||
|
"4 1954400510 20150218T000000 510000.0 3 2.00 1680 \n",
|
|||
|
"\n",
|
|||
|
" sqft_lot floors waterfront view ... grade sqft_above sqft_basement \\\n",
|
|||
|
"0 5650 1.0 0 0 ... 7 1180 0 \n",
|
|||
|
"1 7242 2.0 0 0 ... 7 2170 400 \n",
|
|||
|
"2 10000 1.0 0 0 ... 6 770 0 \n",
|
|||
|
"3 5000 1.0 0 0 ... 7 1050 910 \n",
|
|||
|
"4 8080 1.0 0 0 ... 8 1680 0 \n",
|
|||
|
"\n",
|
|||
|
" yr_built yr_renovated zipcode lat long sqft_living15 \\\n",
|
|||
|
"0 1955 0 98178 47.5112 -122.257 1340 \n",
|
|||
|
"1 1951 1991 98125 47.7210 -122.319 1690 \n",
|
|||
|
"2 1933 0 98028 47.7379 -122.233 2720 \n",
|
|||
|
"3 1965 0 98136 47.5208 -122.393 1360 \n",
|
|||
|
"4 1987 0 98074 47.6168 -122.045 1800 \n",
|
|||
|
"\n",
|
|||
|
" sqft_lot15 \n",
|
|||
|
"0 5650 \n",
|
|||
|
"1 7639 \n",
|
|||
|
"2 8062 \n",
|
|||
|
"3 5000 \n",
|
|||
|
"4 7503 \n",
|
|||
|
"\n",
|
|||
|
"[5 rows x 21 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.head()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>id</th>\n",
|
|||
|
" <th>price</th>\n",
|
|||
|
" <th>bedrooms</th>\n",
|
|||
|
" <th>bathrooms</th>\n",
|
|||
|
" <th>sqft_living</th>\n",
|
|||
|
" <th>sqft_lot</th>\n",
|
|||
|
" <th>floors</th>\n",
|
|||
|
" <th>waterfront</th>\n",
|
|||
|
" <th>view</th>\n",
|
|||
|
" <th>condition</th>\n",
|
|||
|
" <th>grade</th>\n",
|
|||
|
" <th>sqft_above</th>\n",
|
|||
|
" <th>sqft_basement</th>\n",
|
|||
|
" <th>yr_built</th>\n",
|
|||
|
" <th>yr_renovated</th>\n",
|
|||
|
" <th>zipcode</th>\n",
|
|||
|
" <th>lat</th>\n",
|
|||
|
" <th>long</th>\n",
|
|||
|
" <th>sqft_living15</th>\n",
|
|||
|
" <th>sqft_lot15</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>count</th>\n",
|
|||
|
" <td>2.161300e+04</td>\n",
|
|||
|
" <td>2.161300e+04</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>2.161300e+04</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" <td>21613.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>mean</th>\n",
|
|||
|
" <td>4.580302e+09</td>\n",
|
|||
|
" <td>5.400881e+05</td>\n",
|
|||
|
" <td>3.370842</td>\n",
|
|||
|
" <td>2.114757</td>\n",
|
|||
|
" <td>2079.899736</td>\n",
|
|||
|
" <td>1.510697e+04</td>\n",
|
|||
|
" <td>1.494309</td>\n",
|
|||
|
" <td>0.007542</td>\n",
|
|||
|
" <td>0.234303</td>\n",
|
|||
|
" <td>3.409430</td>\n",
|
|||
|
" <td>7.656873</td>\n",
|
|||
|
" <td>1788.390691</td>\n",
|
|||
|
" <td>291.509045</td>\n",
|
|||
|
" <td>1971.005136</td>\n",
|
|||
|
" <td>84.402258</td>\n",
|
|||
|
" <td>98077.939805</td>\n",
|
|||
|
" <td>47.560053</td>\n",
|
|||
|
" <td>-122.213896</td>\n",
|
|||
|
" <td>1986.552492</td>\n",
|
|||
|
" <td>12768.455652</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>std</th>\n",
|
|||
|
" <td>2.876566e+09</td>\n",
|
|||
|
" <td>3.671272e+05</td>\n",
|
|||
|
" <td>0.930062</td>\n",
|
|||
|
" <td>0.770163</td>\n",
|
|||
|
" <td>918.440897</td>\n",
|
|||
|
" <td>4.142051e+04</td>\n",
|
|||
|
" <td>0.539989</td>\n",
|
|||
|
" <td>0.086517</td>\n",
|
|||
|
" <td>0.766318</td>\n",
|
|||
|
" <td>0.650743</td>\n",
|
|||
|
" <td>1.175459</td>\n",
|
|||
|
" <td>828.090978</td>\n",
|
|||
|
" <td>442.575043</td>\n",
|
|||
|
" <td>29.373411</td>\n",
|
|||
|
" <td>401.679240</td>\n",
|
|||
|
" <td>53.505026</td>\n",
|
|||
|
" <td>0.138564</td>\n",
|
|||
|
" <td>0.140828</td>\n",
|
|||
|
" <td>685.391304</td>\n",
|
|||
|
" <td>27304.179631</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>min</th>\n",
|
|||
|
" <td>1.000102e+06</td>\n",
|
|||
|
" <td>7.500000e+04</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>290.000000</td>\n",
|
|||
|
" <td>5.200000e+02</td>\n",
|
|||
|
" <td>1.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>1.000000</td>\n",
|
|||
|
" <td>1.000000</td>\n",
|
|||
|
" <td>290.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>1900.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>98001.000000</td>\n",
|
|||
|
" <td>47.155900</td>\n",
|
|||
|
" <td>-122.519000</td>\n",
|
|||
|
" <td>399.000000</td>\n",
|
|||
|
" <td>651.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>25%</th>\n",
|
|||
|
" <td>2.123049e+09</td>\n",
|
|||
|
" <td>3.219500e+05</td>\n",
|
|||
|
" <td>3.000000</td>\n",
|
|||
|
" <td>1.750000</td>\n",
|
|||
|
" <td>1427.000000</td>\n",
|
|||
|
" <td>5.040000e+03</td>\n",
|
|||
|
" <td>1.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>3.000000</td>\n",
|
|||
|
" <td>7.000000</td>\n",
|
|||
|
" <td>1190.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>1951.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>98033.000000</td>\n",
|
|||
|
" <td>47.471000</td>\n",
|
|||
|
" <td>-122.328000</td>\n",
|
|||
|
" <td>1490.000000</td>\n",
|
|||
|
" <td>5100.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>50%</th>\n",
|
|||
|
" <td>3.904930e+09</td>\n",
|
|||
|
" <td>4.500000e+05</td>\n",
|
|||
|
" <td>3.000000</td>\n",
|
|||
|
" <td>2.250000</td>\n",
|
|||
|
" <td>1910.000000</td>\n",
|
|||
|
" <td>7.618000e+03</td>\n",
|
|||
|
" <td>1.500000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>3.000000</td>\n",
|
|||
|
" <td>7.000000</td>\n",
|
|||
|
" <td>1560.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>1975.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>98065.000000</td>\n",
|
|||
|
" <td>47.571800</td>\n",
|
|||
|
" <td>-122.230000</td>\n",
|
|||
|
" <td>1840.000000</td>\n",
|
|||
|
" <td>7620.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>75%</th>\n",
|
|||
|
" <td>7.308900e+09</td>\n",
|
|||
|
" <td>6.450000e+05</td>\n",
|
|||
|
" <td>4.000000</td>\n",
|
|||
|
" <td>2.500000</td>\n",
|
|||
|
" <td>2550.000000</td>\n",
|
|||
|
" <td>1.068800e+04</td>\n",
|
|||
|
" <td>2.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>4.000000</td>\n",
|
|||
|
" <td>8.000000</td>\n",
|
|||
|
" <td>2210.000000</td>\n",
|
|||
|
" <td>560.000000</td>\n",
|
|||
|
" <td>1997.000000</td>\n",
|
|||
|
" <td>0.000000</td>\n",
|
|||
|
" <td>98118.000000</td>\n",
|
|||
|
" <td>47.678000</td>\n",
|
|||
|
" <td>-122.125000</td>\n",
|
|||
|
" <td>2360.000000</td>\n",
|
|||
|
" <td>10083.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>max</th>\n",
|
|||
|
" <td>9.900000e+09</td>\n",
|
|||
|
" <td>7.700000e+06</td>\n",
|
|||
|
" <td>33.000000</td>\n",
|
|||
|
" <td>8.000000</td>\n",
|
|||
|
" <td>13540.000000</td>\n",
|
|||
|
" <td>1.651359e+06</td>\n",
|
|||
|
" <td>3.500000</td>\n",
|
|||
|
" <td>1.000000</td>\n",
|
|||
|
" <td>4.000000</td>\n",
|
|||
|
" <td>5.000000</td>\n",
|
|||
|
" <td>13.000000</td>\n",
|
|||
|
" <td>9410.000000</td>\n",
|
|||
|
" <td>4820.000000</td>\n",
|
|||
|
" <td>2015.000000</td>\n",
|
|||
|
" <td>2015.000000</td>\n",
|
|||
|
" <td>98199.000000</td>\n",
|
|||
|
" <td>47.777600</td>\n",
|
|||
|
" <td>-121.315000</td>\n",
|
|||
|
" <td>6210.000000</td>\n",
|
|||
|
" <td>871200.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" id price bedrooms bathrooms sqft_living \\\n",
|
|||
|
"count 2.161300e+04 2.161300e+04 21613.000000 21613.000000 21613.000000 \n",
|
|||
|
"mean 4.580302e+09 5.400881e+05 3.370842 2.114757 2079.899736 \n",
|
|||
|
"std 2.876566e+09 3.671272e+05 0.930062 0.770163 918.440897 \n",
|
|||
|
"min 1.000102e+06 7.500000e+04 0.000000 0.000000 290.000000 \n",
|
|||
|
"25% 2.123049e+09 3.219500e+05 3.000000 1.750000 1427.000000 \n",
|
|||
|
"50% 3.904930e+09 4.500000e+05 3.000000 2.250000 1910.000000 \n",
|
|||
|
"75% 7.308900e+09 6.450000e+05 4.000000 2.500000 2550.000000 \n",
|
|||
|
"max 9.900000e+09 7.700000e+06 33.000000 8.000000 13540.000000 \n",
|
|||
|
"\n",
|
|||
|
" sqft_lot floors waterfront view condition \\\n",
|
|||
|
"count 2.161300e+04 21613.000000 21613.000000 21613.000000 21613.000000 \n",
|
|||
|
"mean 1.510697e+04 1.494309 0.007542 0.234303 3.409430 \n",
|
|||
|
"std 4.142051e+04 0.539989 0.086517 0.766318 0.650743 \n",
|
|||
|
"min 5.200000e+02 1.000000 0.000000 0.000000 1.000000 \n",
|
|||
|
"25% 5.040000e+03 1.000000 0.000000 0.000000 3.000000 \n",
|
|||
|
"50% 7.618000e+03 1.500000 0.000000 0.000000 3.000000 \n",
|
|||
|
"75% 1.068800e+04 2.000000 0.000000 0.000000 4.000000 \n",
|
|||
|
"max 1.651359e+06 3.500000 1.000000 4.000000 5.000000 \n",
|
|||
|
"\n",
|
|||
|
" grade sqft_above sqft_basement yr_built yr_renovated \\\n",
|
|||
|
"count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n",
|
|||
|
"mean 7.656873 1788.390691 291.509045 1971.005136 84.402258 \n",
|
|||
|
"std 1.175459 828.090978 442.575043 29.373411 401.679240 \n",
|
|||
|
"min 1.000000 290.000000 0.000000 1900.000000 0.000000 \n",
|
|||
|
"25% 7.000000 1190.000000 0.000000 1951.000000 0.000000 \n",
|
|||
|
"50% 7.000000 1560.000000 0.000000 1975.000000 0.000000 \n",
|
|||
|
"75% 8.000000 2210.000000 560.000000 1997.000000 0.000000 \n",
|
|||
|
"max 13.000000 9410.000000 4820.000000 2015.000000 2015.000000 \n",
|
|||
|
"\n",
|
|||
|
" zipcode lat long sqft_living15 sqft_lot15 \n",
|
|||
|
"count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n",
|
|||
|
"mean 98077.939805 47.560053 -122.213896 1986.552492 12768.455652 \n",
|
|||
|
"std 53.505026 0.138564 0.140828 685.391304 27304.179631 \n",
|
|||
|
"min 98001.000000 47.155900 -122.519000 399.000000 651.000000 \n",
|
|||
|
"25% 98033.000000 47.471000 -122.328000 1490.000000 5100.000000 \n",
|
|||
|
"50% 98065.000000 47.571800 -122.230000 1840.000000 7620.000000 \n",
|
|||
|
"75% 98118.000000 47.678000 -122.125000 2360.000000 10083.000000 \n",
|
|||
|
"max 98199.000000 47.777600 -121.315000 6210.000000 871200.000000 "
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.describe()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 15,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"id 0\n",
|
|||
|
"date 0\n",
|
|||
|
"price 0\n",
|
|||
|
"bedrooms 0\n",
|
|||
|
"bathrooms 0\n",
|
|||
|
"sqft_living 0\n",
|
|||
|
"sqft_lot 0\n",
|
|||
|
"floors 0\n",
|
|||
|
"waterfront 0\n",
|
|||
|
"view 0\n",
|
|||
|
"condition 0\n",
|
|||
|
"grade 0\n",
|
|||
|
"sqft_above 0\n",
|
|||
|
"sqft_basement 0\n",
|
|||
|
"yr_built 0\n",
|
|||
|
"yr_renovated 0\n",
|
|||
|
"zipcode 0\n",
|
|||
|
"lat 0\n",
|
|||
|
"long 0\n",
|
|||
|
"sqft_living15 0\n",
|
|||
|
"sqft_lot15 0\n",
|
|||
|
"dtype: int64\n",
|
|||
|
"id False\n",
|
|||
|
"date False\n",
|
|||
|
"price False\n",
|
|||
|
"bedrooms False\n",
|
|||
|
"bathrooms False\n",
|
|||
|
"sqft_living False\n",
|
|||
|
"sqft_lot False\n",
|
|||
|
"floors False\n",
|
|||
|
"waterfront False\n",
|
|||
|
"view False\n",
|
|||
|
"condition False\n",
|
|||
|
"grade False\n",
|
|||
|
"sqft_above False\n",
|
|||
|
"sqft_basement False\n",
|
|||
|
"yr_built False\n",
|
|||
|
"yr_renovated False\n",
|
|||
|
"zipcode False\n",
|
|||
|
"lat False\n",
|
|||
|
"long False\n",
|
|||
|
"sqft_living15 False\n",
|
|||
|
"sqft_lot15 False\n",
|
|||
|
"dtype: bool\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Процент пропущенных значений признаков\n",
|
|||
|
"for i in df.columns:\n",
|
|||
|
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
|
|||
|
" if null_rate > 0:\n",
|
|||
|
" print(f'{i} Процент пустых значений: %{null_rate:.2f}')\n",
|
|||
|
"\n",
|
|||
|
"print(df.isnull().sum())\n",
|
|||
|
"\n",
|
|||
|
"print(df.isnull().any())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"id int64\n",
|
|||
|
"date object\n",
|
|||
|
"price float64\n",
|
|||
|
"bedrooms int64\n",
|
|||
|
"bathrooms float64\n",
|
|||
|
"sqft_living int64\n",
|
|||
|
"sqft_lot int64\n",
|
|||
|
"floors float64\n",
|
|||
|
"waterfront int64\n",
|
|||
|
"view int64\n",
|
|||
|
"condition int64\n",
|
|||
|
"grade int64\n",
|
|||
|
"sqft_above int64\n",
|
|||
|
"sqft_basement int64\n",
|
|||
|
"yr_built int64\n",
|
|||
|
"yr_renovated int64\n",
|
|||
|
"zipcode int64\n",
|
|||
|
"lat float64\n",
|
|||
|
"long float64\n",
|
|||
|
"sqft_living15 int64\n",
|
|||
|
"sqft_lot15 int64\n",
|
|||
|
"dtype: object"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Проверка типов столбцов\n",
|
|||
|
"df.dtypes"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Атрибуты \n",
|
|||
|
"\n",
|
|||
|
"id - уникальный идентификатор дома \n",
|
|||
|
"\n",
|
|||
|
"date - Дата продажи дома \n",
|
|||
|
"\n",
|
|||
|
"price - Цена дома в долларах США \n",
|
|||
|
"\n",
|
|||
|
"bedrooms - Количество спален в доме \n",
|
|||
|
"\n",
|
|||
|
"bathrooms - Количество ванных комнат, включая дробные значения (н, 2.5 означает 2 ванны и 1 туалет) \n",
|
|||
|
"\n",
|
|||
|
"sqft_living - Жилая площадь дома (в кв. футах) \n",
|
|||
|
"\n",
|
|||
|
"sqft_lot - Общая площадь участка \n",
|
|||
|
"\n",
|
|||
|
"floors - Количество этажей в доме \n",
|
|||
|
"\n",
|
|||
|
"waterfront - Есть ли вид на воду (1 - да, 0 - нет) \n",
|
|||
|
"\n",
|
|||
|
"view - Оценка вида дома (0-4) \n",
|
|||
|
"\n",
|
|||
|
"condition - Оценка состояния дома (1 - плохое, 5 - отличное) \n",
|
|||
|
"\n",
|
|||
|
"grade - Оценка качество дома по архитектурным и строительным стандартам (1-13) \n",
|
|||
|
"\n",
|
|||
|
"sqft_basement - Площадь подвала дома. \n",
|
|||
|
"\n",
|
|||
|
"yr_built - Год постройки дома \n",
|
|||
|
"\n",
|
|||
|
"yr_renovated - Год послежней реновации дома (0, если реновация не проводилась) \n",
|
|||
|
"\n",
|
|||
|
"zipcode - Почтовый индекс местоположения дома \n",
|
|||
|
"\n",
|
|||
|
"sqft_living15 - Средняя жилая площадб домов в 15 ближайших соседях \n",
|
|||
|
"\n",
|
|||
|
"price_category - Категория цены дома (low, medium, high) \n",
|
|||
|
"\n",
|
|||
|
"**Цель:** Кластеризация домов на группы для определения схожих ценовых категорий и характеристик. \n",
|
|||
|
"\n",
|
|||
|
"К примеру, Группировка домов для анализа рыночных трендов. Определение похожих групп домов для маркетинговых или инвестиционных целей."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Очистка данных"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
" price bedrooms bathrooms sqft_living sqft_lot floors waterfront \\\n",
|
|||
|
"0 221900.0 3 1.00 1180 5650 1.0 0 \n",
|
|||
|
"1 538000.0 3 2.25 2570 7242 2.0 0 \n",
|
|||
|
"2 180000.0 2 1.00 770 10000 1.0 0 \n",
|
|||
|
"3 604000.0 4 3.00 1960 5000 1.0 0 \n",
|
|||
|
"4 510000.0 3 2.00 1680 8080 1.0 0 \n",
|
|||
|
"\n",
|
|||
|
" view condition sqft_basement yr_built \n",
|
|||
|
"0 0 3 0 1955 \n",
|
|||
|
"1 0 3 400 1951 \n",
|
|||
|
"2 0 3 0 1933 \n",
|
|||
|
"3 0 5 910 1965 \n",
|
|||
|
"4 0 3 0 1987 \n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Удалим несущественные столбцы\n",
|
|||
|
"columns_to_drop = ['id', 'date', 'grade', 'yr_renovated', 'sqft_living15', 'lat', 'long', 'sqft_lot15', 'sqft_above', 'zipcode']\n",
|
|||
|
"df_cleaned = df.drop(columns=columns_to_drop)\n",
|
|||
|
"\n",
|
|||
|
"print(df_cleaned.head()) # Вывод очищенного DataFrame"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Визуализация парных взаимосвязей"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABi8AAASgCAYAAACAO9vxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXxU9b3/8fdMkslkmUyGsIQRw5iEYNgCoogLKta6VNHaxVa9wVqtrahtXG619dalrbU/q62Ke61Y02ptq5dK0Xqva92tCwErCCEMEENYwmQyWSfb749zJ8lk3+dM5vV8PHhAzpk588n5noTz/X7O9/uxtLe3twsAAAAAAAAAAMAkrJEOAAAAAAAAAAAAoCuSFwAAAAAAAAAAwFRIXgAAAAAAAAAAAFMheQEAAAAAAAAAAEyF5AUAAAAAAAAAADAVkhcAAAAAAAAAAMBUSF4AAAAAAAAAAABTIXkBAAAAAAAAAABMheQFAGDCaG9vj3QIAAAAAAAAGAUkLwDEtMLCQhUWFvb7mhtuuEEnn3zyoI+5evVqzZ49e6ShDclbb72l2bNna8WKFeP6uYOxceNGnXbaaQoGg6N63F/96ldasmSJFi5cqLVr1+rll1/W9ddf37G/rKxMJ598smpqakb1cwersLBQs2fPDvszb948nXTSSbr11lvl9/v7fX95eblmz56tZ599dpwiBgAAQCy64YYbety3dv3zj3/8Q5J08skn64YbbohwtOOP+3oAiJz4SAcAAGa3atUqrVy5ctCv//rXv65ly5aNYUQ9PfPMM8rLy9PWrVv14YcfavHixeP6+X1pamrS9ddfr//8z/+UzWYbteNu3bpVjz76qM477zydc845ys7O1g9+8IOw12RnZ+sLX/iCfv7zn+uOO+4Ytc8eijlz5ujmm2/u+Lq5uVn//ve/9etf/1qbN2/WU089JYvF0ut7p06dqqefflpZWVnjFS4AAABi1JQpU3Tffff1us/j8YxvMCbEfT0ARAbJCwAYwFBvMjMzM5WZmTlG0fRUU1Ojl156Sbfeeqsefvhh/elPfzJN8uLJJ59UfHy8TjnllFE9bnV1tSTpzDPP1JFHHtnn6y677DKddNJJuuiiizR37txRjWEwUlNTtXDhwrBtRx11lOrq6nTvvfeqpKSkx/4Qm83W5z4AAABgNHHv2T/u6wEgMlg2CgAG0HXZqJ/85Cc67rjj1NraGvaa2267TUcffbSam5t7LBtVWFioG2+8UY888ohOOukkzZ8/X9/85je1cePGsGO89tpr+spXvqIFCxbotNNO09///nd98Ytf1OrVq/uNb926dWppadGyZct09tln68UXX+wY3A959tlnNWfOHP3lL3/RcccdpyVLlqi0tFSS9NJLL+krX/mK5s+fr+OOO04///nPVV9fH/b+l156SRdccIEWLVqkefPm6fTTT9cf//jHfuMKBoNas2aNzjrrrLDtf//733X22WdrwYIFWrp0qa677jrt3bu3Y39bW5vuv/9+nXTSSSooKND3vvc9vfDCC5o9e7bKy8u1evXqjqW+LrroIp188skqLCzU+++/r/fff1+zZ8/We++9J8l4gmzp0qV6+OGH+4zztNNO0/e///0e28855xxdfvnlkqRdu3bpe9/7no4++mgVFBToG9/4hl5//fV+v//+zJs3T5JUUVEhybhGrrvuOn3/+9/XwoULdfHFF/c6vbysrExXXnmllixZoqOOOkrf/e53tX379o79TU1NuuOOO3TiiSdq3rx5WrFihZ5//vlhxwkAAAD0JRAI6Pbbb9cpp5yi+fPn66yzztJf//rXsNe0trbqj3/8o1asWKEFCxbopJNO0p133qmmpqaO19xwww266KKLdPPNN+uII47Ql770JbW2tuqtt97Seeedp0WLFumoo47S5ZdfHnbv2x339QAw8ZC8AIAhOOecc3TgwIGOwXHJGGx/4YUXdOaZZyohIaHX97344ot6+eWX9V//9V/69a9/rQMHDuiqq67qSIK8++67WrVqlaZPn67Vq1frwgsv1M0336w9e/YMGNMzzzyjZcuWafLkyfryl7+s5uZm/fd//3eP17W2tuqxxx7Tbbfdph/96EfKycnRunXrdMUVVyg7O1v333+/rrzySj333HNatWpVR/Hr1157TVdccYXmzp2rBx54QKtXr9ahhx6qn/70pyopKekzrvfee0979+7Vqaee2rHtww8/1A9/+EOdeuqp+u1vf6sf/ehHevfdd3Xttdd2vOaOO+7QAw88oK9+9atavXq10tLSwqZof/3rX9dNN90kSbrpppt033336eabb9acOXM0Z84cPf3002GzLE4//XS98sorqqur6zXOs88+W6+//rpqa2s7tm3fvl1btmzROeeco7a2Nn33u99VQ0NDR2zp6em6/PLLtXPnzoGap1c7duyQJB166KEd21544QWlpKTowQcf1KWXXtrjPXv37tU3vvENeb1e3XLLLfrVr36lAwcO6KKLLlJ1dbXa29t1xRVX6E9/+pMuvvhiPfjgg1q0aJGuvvpqrV27dlhxAgAAIHa0tLT0+BPqE3TX2NioCy64QOvWrdOll16qBx54QIsXL9aNN96ohx56qON1N910U0eC48EHH9SFF16oP/zhD2H9DUn64IMPtGfPHt1///269tprVVFRoVWrVmnevHl68MEHddttt2nHjh267LLL1NbW1mtM3NcDwMQTE8tGPfzww3rzzTdVXFw8pPetXbtWjzzyiHbv3q2srCxdeeWVOuOMM8YoSgDRYPHixTrkkEP097//Xccee6wkY5B+//79Ouecc/p8X0tLi373u98pNTVVklRXV6frr79emzdv1rx587R69WrNmjVL9913X8daqRkZGbrmmmv6jeezzz7Tv//9b917772SJLfbraVLl+rpp5/WxRdf3OP13/ve93TSSSdJktrb23XnnXdq2bJluvPOOzte4/F49K1vfUuvv/66TjrpJJWWlurcc8/VjTfe2PGaRYsW6eijj9Z7772ngoKCXmN79913lZaWpsMOO6xj24cffii73a7LLrusowZGenq6Nm3apPb2dtXU1OgPf/iDVq5cqauuukqSdMIJJ+jiiy/W22+/LclYlis3N1eSlJubqzlz5khSx7ntPiV7/vz5am5u1gcffKATTzyxR5xnn322Vq9erZdeeklf/vKXJRmzQ9LS0nTyySerqqpKZWVlWrVqVcf7FyxYoPvuu2/AIuTt7e1qaWnp+Nrv9+v999/v6ICEntSSpISEBN16660d56W8vDzsWI8//njHbJYpU6ZIkg4//HCdf/75KikpUXx8vN544w395je/0Ze+9CVJ0rJly9TQ0KA777xTZ511luLjY+K/fQCIGPodAKLV559/3usyq9dee60uu+yyHtufffZZbd26VX/605+0aNEiSca9Z0tLix544AF985vf1IEDB/TXv/417BjHHXecpk6dqh/+8If65z//2XF/3dLSop/+9Kcdy++uX79ejY2N+u53v6tp06ZJMvoBL7/8surr6zvu/bvivh4AJp4JP/Pij3/8o+6+++4hv+9vf/ubbrzxRl144YVav369zjrrLF1zzTX6+OOPRz9IAFHDYrHo7LPP1ksvvdRxg7t+/Xp5PJ4+B/ElY5C96w126Aa8oaFBwWBQH3/8sU499dSwIm+nn376gDelzzzzjNLS0nTkkUeqpqZGNTU1Ou2007Rjxw69++67PV6fn5/f8e+ysjJVVlbq5JNPDnu66qijjlJqaqreeustSdKll16qX/7yl6qrq9Mnn3yi559/vmMZpv5u8nfv3q1DDjkkbNtRRx2lhoYGnXXWWbrrrrv0wQcf6Pjjj9eVV14pi8WiDRs2qLm5WV/4whfC3nf22Wf3ex76E4qhe6ch5NBDD9URRxwRNg17/fr1Ov3002Wz2TR58mTl5ubqJz/5ia6//nqtW7dObW1t+tGPfqRZs2b1+9n/+te/NHfu3I4/xx57rK655hrNmzdPd911V1h7Z2dn91vU/MMPP9TChQs7OjiS0YF79dVXdeKJJ+qdd96
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Настройка стиля графиков\n",
|
|||
|
"sns.set(style=\"whitegrid\")\n",
|
|||
|
"\n",
|
|||
|
"# Создание фигуры\n",
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"\n",
|
|||
|
"# График 1: Площадь vs Цена\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['sqft_living'], y=df_cleaned['price'], alpha=0.6, color='blue')\n",
|
|||
|
"plt.title('Living Area (sqft) vs Price')\n",
|
|||
|
"plt.xlabel('Living Area (sqft)')\n",
|
|||
|
"plt.ylabel('Price')\n",
|
|||
|
"\n",
|
|||
|
"# График 2: Количество спален vs Цена\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['floors'], y=df_cleaned['price'], alpha=0.6, color='green')\n",
|
|||
|
"plt.title('Floors vs Price')\n",
|
|||
|
"plt.xlabel('Floors')\n",
|
|||
|
"plt.ylabel('Price')\n",
|
|||
|
"\n",
|
|||
|
"# График 3: Количество ванных комнат vs Цена\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['bathrooms'], y=df_cleaned['price'], alpha=0.6, color='red')\n",
|
|||
|
"plt.title('Bathrooms vs Price')\n",
|
|||
|
"plt.xlabel('Bathrooms')\n",
|
|||
|
"plt.ylabel('Price')\n",
|
|||
|
"\n",
|
|||
|
"# График 4: Площадь участка vs Цена\n",
|
|||
|
"plt.subplot(2, 2, 4)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['sqft_lot'], y=df_cleaned['price'], alpha=0.6, color='purple')\n",
|
|||
|
"plt.title('Lot Area (sqft) vs Price')\n",
|
|||
|
"plt.xlabel('Lot Area (sqft)')\n",
|
|||
|
"plt.ylabel('Price')\n",
|
|||
|
"\n",
|
|||
|
"# Упорядочиваем графики\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Стандартизация данных для кластеризации"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Нормализация данных\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"data_scaled = scaler.fit_transform(df_cleaned)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArsAAAImCAYAAABTm0IfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOy9d5xddZ3///ycduvc6amThJCE0HvHgqjoKupaF0X9rqtfG7q6rvWnu7a1LcqqIPJlRV1RFlxBRHStgGCjCAihhfSZTDL99nvq5/P749x7M5OZJJNkkkkmn+fjkTK3nPO5dV7nfV7v11sopRQajUaj0Wg0Gs0cxJjtBWg0Go1Go9FoNAcKLXY1Go1Go9FoNHMWLXY1Go1Go9FoNHMWLXY1Go1Go9FoNHMWLXY1Go1Go9FoNHMWLXY1Go1Go9FoNHMWLXY1Go1Go9FoNHMWLXY1Go1Go9FoNHMWLXY1Go3mEETP+9HsDv3+0Gimjxa7Gs0Rxpve9CZWr1494c+JJ57IhRdeyKc//WkKhcKk+2zcuJFPfepTvOAFL+Dkk0/mwgsv5AMf+ABPPfXULvfzH//xH6xevZrPfvaze1zTW97yFs4++2x839/lbV72spdx2WWXAbB69WquuuqqaTzameWjH/0oF110UfPniy66iI9+9KMzuo/t27fz9re/na1btx7Q/ewP5XKZd77znZxyyimcddZZbNq0adJt7rvvPlavXs1999035TauuuoqVq9efYBXOjVvetObeNOb3nTQ9jX+s3bsscdy2mmn8apXvYrvfe97hGG419t85plneP3rX38AVqvRzE2s2V6ARqM5+Bx//PF88pOfbP4cBAGPP/44V155JU8++ST//d//jRACgF/96ld8+MMfZtWqVbzrXe+ip6eH7du381//9V+87nWv45vf/CYXXHDBhO1LKbnttts45phj+MlPfsIHP/hBUqnULtfz6le/mj/+8Y/cc889vOAFL5h0/eOPP87atWv50pe+BMDNN9/MggULZuKp2C+uvvpqstnsjG7zj3/8I7/73e8O+H72h9tuu4277rqLf/3Xf2XVqlX09PTM9pL2ivHv/YPB+M9bFEUUCgXuuecevvCFL/Dggw/y1a9+FcOYfu3pF7/4BQ8//PCBWq5GM+fQYlejOQLJZrOceuqpEy4766yzqFQqfP3rX+evf/0rp556Klu2bOEjH/kIz372s/nqV7+KaZrN21988cW8/vWv5yMf+Qh33nknjuM0r/v973/P9u3bufLKK3njG9/IHXfcwWtf+9pdrueFL3whra2t3H777VOK3R//+Mdks1le9KIXAUxa+2xx/PHHz6n9TJd8Pg/AG97whuZB0eHEypUrD+r+pvq8XXTRRRx99NF87nOf44477uDlL3/5QV2TRnMkoW0MGo2myYknnghAf38/ADfccAO+7/OJT3xigtAFSKVSfOQjH+HVr371JOvDLbfcwjHHHMMZZ5zBOeecw80337zb/SYSCS655BLuvvtuyuXyhOuCIOBnP/sZL33pS5vV4Z1tDP/1X//Fi1/8Yk466SSe/exn86lPfaq5nb6+PlavXs2tt946Ybs7WxKiKOK6667jkksu4eSTT+bUU0/l0ksv5c9//vMu1z3eXtA4LT/Vn8Za97SPW2+9lY997GMAPP/5z29ue2cbQ6lU4gtf+AIveMELOOmkk7jkkkv40Y9+NGltX//61/nSl77E+eefz8knn8xb3/rWKS0H4/E8j2984xvN5/Piiy/muuuuQ0oJxKflG4/n2GOPnVF7xWOPPcZb3/pWzjnnHE4//XTe+c538swzzzSvv/XWW1m9ejV9fX2THuv4dfzhD3/gda97HaeddhpnnXUW73rXu1i/fn3z+p1tDKtXr+YHP/gBH//4xzn77LM57bTTeN/73sfw8PCE/Vx//fU8//nP5+STT+bSSy/lzjvv3K1VY0+88Y1vZP78+dx0003Ny1zX5Stf+QoXX3wxJ554IqeffjpvectbePLJJ4H4fXb11Vc31914LUZHR/n0pz/N8573PE488UTOPvtsLr/88knPlUZzJKLFrkajabJx40YAlixZAsC9997L8ccfz/z586e8/Xnnncc//dM/0d3d3bwsn89z55138rd/+7cAvPKVr+Sxxx7j8ccf3+2+X/3qV+N5Hr/85S8nXH7PPfcwOjq6y8rwHXfcwRVXXMFll13G9ddfz+WXX85PfvKTaXmFx/PlL3+Za665hr/7u7/jW9/6Fp/97GfJ5/O8733vo1ar7fH+r33ta7n55psn/DnjjDPIZDK85CUvmdY+LrzwQt71rncBsXXh3e9+96T9uK7LG97wBn7605/ytre9jWuuuYYzzjiDj3/841x77bUTbvu9732PDRs28IUvfIF/+7d/Y82aNXzkIx/Z5WNQSvHOd76Tb33rW7z2ta/l2muv5cUvfjFf/epXm6fhP/nJT/Ka17wGiO0kU61xPFJKwjCc9Kchnhv8+c9/bvpQP//5z/Nv//ZvbNu2jUsvvXSCUN0Tvb29vPvd7+bEE0/km9/8Jp/73OfYuHEjb3/72yftczz/8R//gZSSK6+8kg9/+MPcddddfP7zn29ef/XVV/PlL3+Zv/mbv+Gaa67hlFNO4f3vf/+01zUVhmFw3nnn8eijjza9ux/+8Ie55ZZbePvb3863v/1tPvaxj/HMM8/wz//8zyileO1rXzvh+X/ta1+LUop3vOMd/OEPf+CDH/wg119/Pe95z3v405/+dNAtGxrNoYi2MWg0RyBKqQmNMYVCgfvvv59vfvObnHbaac0K7/bt2znuuOP2ats//elPkVLyile8AojtDp/5zGe46aabditATzjhBI477jh++tOf8upXv7p5+W233cbq1as56aSTprzf/fffT09PD5dddhmGYXD22WeTTqenbLTbHYODg/zTP/3ThIpfIpHgve99L08//fQerRMLFiyY4CP+7ne/y0MPPcTVV1/NihUrpr2PpUuXAnDcccdN6YW99dZbWbt2LTfddBOnnXYaAM9+9rMJw5BrrrmGSy+9lLa2NgByuRzXXHNNsyq/ZcsWrrrqKsbGxmhvb5+07XvuuYc//vGPXHnllbz0pS8F4IILLiCZTPK1r32NN7/5zaxatar5OKdjJ/n7v//7Pd4G4Ctf+QrLli3juuuua673Wc96Fi984Qv5+te/zte+9rVpbefRRx/FdV3e8Y53NA/SFixYwG9/+1uq1eouvc/HHHMMX/jCFyZs5xe/+AUA1WqV//zP/+Syyy7jgx/8YHNttVptj2ct9kRXVxdBEJDP58nlclQqFT7xiU80D5DOPvtsyuUyX/ziFxkeHp7wPms8/wMDA80zLWeeeSYA55xzDlu2bNnv9Wk0cwEtdjWaI5AHHniAE044YcJlhmFw/vnn85nPfKbpwzRNkyiK9mrbt9xyC+eccw6O41AsFoH4NPMdd9zBRz7ykd02Wr361a/m85//PAMDA8yfP598Ps9dd93Fhz/84V3e59xzz+Xmm2/mVa96FS94wQt47nOfy8te9rK99pJ+5StfAeLTwRs2bGDz5s3cddddALtNiZiKe++9l3//93/n3e9+9wQP8kzs4/7772fx4sVNodvg5S9/OT/60Y/461//ynOf+1wATjrppAn2k4ZIqtVqU4rd+++/H8uyePGLXzxp21/72te4//77WbVq1bTW2eDTn/70pPcawA9/+EN++MMfArGYfOyxx3jPe94zYb25XI7nPe95kxr2dscpp5xCIpHgNa95DS9+8Yt5znOewznnnMPJJ5+82/vtLNwXLFjQrOg/8sgjuK476Xm55JJL9ltMNiLEhBA4jsP1118PxAJ248aNbNq0aY/vkfnz5/O9730PpRR9fX1s3ryZDRs28NBDD+31e1ejmYtosavRHIGccMIJfPrTnwbiX7KJRIKFCxdOEqKLFi1q+nenIggCCoUCXV1dADzxxBNNb+FZZ5016fa33347b3jDG3a5vZe97GX8+7//Oz//+c95y1vews9+9jOEELtt3nnJS16ClJIbb7yRa665hquuuor
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Преобразование в DataFrame для удобства\n",
|
|||
|
"df_scaled = pd.DataFrame(data_scaled, columns=df_cleaned.columns)\n",
|
|||
|
"\n",
|
|||
|
"# Понижение размерности до 2 компонент\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"kc_pca = pca.fit_transform(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"plt.scatter(kc_pca[:, 0], kc_pca[:, 1], alpha=0.6)\n",
|
|||
|
"plt.title(\"PCA Visualization of Housing Data\")\n",
|
|||
|
"plt.xlabel(\"Principal Component 1\")\n",
|
|||
|
"plt.ylabel(\"Principal Component 2\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Агломеративная (иерархическая) кластеризация"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1kAAAJ4CAYAAACXhikUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACBs0lEQVR4nO3dd3xUVf7/8fckM0kghCodlCYgIEWKsruAArKusrqgX1ZRbOuCSLFggV17QVYpUqTZaIKisJaf7qqgomsBQRQ7Koj0ACEhQJKZTO7vjzjD9JY7mZnwej4ePB7JzJ07Z0ou933POZ9jMQzDEAAAAADAFGmJbgAAAAAAVCWELAAAAAAwESELAAAAAExEyAIAAAAAExGyAAAAAMBEhCwAAAAAMBEhCwAAAABMRMgCAAAAABMRsgAAAADARIQsAKaZOHGi2rVrF/DfxIkTE908AB4KCgrUvXt3bdmyRQUFBRo9erSeffbZRDcLAKoEa6IbAKBqqV+/vubMmeN129ixYxPUGgDB1KpVS9ddd52GDRsmwzDUrl07/etf/0p0swCgSiBkATCN0+lU9erV1bVrV6/bMzIyEtMgACGNHTtWl19+uY4cOaLTTjtN6enpiW4SAFQJDBcEYJrS0lJlZWVFtO3GjRt11VVXqUuXLurVq5fuuusu5eXlue9fvXq12rVrp127dnk9rn///l5DDx0OR9Ahir77+vLLLzVkyBB17txZf/7zn/Xf//7Xa9+FhYV69NFHNXDgQJ155pkaPHiwXn75Zb/n932eXbt2acSIEZo4caLmz5+v3/3ud+revbtuuukm7d692+vxa9as0fDhw9WtWzd16tRJF1xwgZ5//nn3/evXr3fvd9OmTV6PXbZsmdq1a6f+/fv7tefuu+/22ragoECdOnVSu3bttH79+oifP5iXXnpJQ4cOVdeuXdW5c2ddcskl+s9//uP3HgcaIhrs8xkxYoTXc7z55psaOnSounXrpt///ve69957VVBQ4L5/9uzZateunbp16ya73e712PHjx/sNSy0pKdFjjz2mfv36qVOnTvrzn/+sN9980+tx/fv314wZMzR58mT17NlTZ599tu68807l5+dH/PpDDZNdvXq1+zP1/BwOHTqkHj16BPws27Vrp/bt26tnz54aN26cDh8+7N6mXbt2mj17tlfbXO9LLO+lJJ1yyilq1aqVPv7447BDe32f64033lDPnj01bdo0Sd7fX99/nu3+/vvvNXbsWJ1zzjnq2LGj+vTpo4cffljFxcXubex2u5544gkNGDBAnTt31uDBg/Xvf/87ovdckvbs2aPbbrtNvXr1UpcuXXTNNdfo22+/de9/165dateund544w3deOON6tKli84991w9+eSTKisr8/pcfN+T2267zeszNQxDM2fOVJ8+fdS9e3fdeOON2rt3r3t7p9OphQsXavDgwercubO6du2qyy+/XJ9++mnIz1Hy/8x9fzcMQ5dffrnX8XLixIle3y1JeuGFFwJ+fwDEBz1ZAExTVFSkWrVqhd3us88+03XXXadzzjlHTzzxhAoKCjRz5kxdffXVevnllyMOalL5ibQkzZs3T3Xr1pVUfkLsG44kadSoUbrqqqt066236uWXX9Ytt9yiBQsWqF+/fiouLtbw4cN16NAhjR8/Xk2bNtWaNWv0z3/+UwcPHtSNN97o3k+/fv100003uX9v0KCBJGnt2rWqU6eO7r77bpWVlWnatGkaMWKE3njjDVWrVk3vv/++xowZo6uvvlrjxo1TcXGxli9frgcffFCdOnVSly5d3PvMzs7Wu+++q+7du7tve/PNN5WW5n9tLDs7W++//74Mw5DFYpEkvf3223I6nV7bRfP8np5//nk9/PDDGjdunLp3766CggI99dRTuv3229WtWzc1atTIve2cOXNUv359SXJ/HpJ02WWX6f/+7//cvz/wwANezzF37lzNmjVLw4cP16233qqdO3dq5syZ+uKLL7Ry5Uqv74TFYtEnn3yifv36SZKOHTumdevWeb03hmFozJgx+vzzzzV+/Hi1bt1a77zzjm699VbZ7Xb95S9/cW+7fPlynXbaaXr00UeVl5enadOmaceOHXrhhRdksVjCvv6bbrpJl19+uaTynqEOHTq4vx+nnnqqfvzxR7/3dNq0aSosLFTNmjW9bnd9txwOh37++Wc99thjeuSRRzR16tSAn00g0byXLg6HQ5MnT474OSSpuLhYDz74oG644Qb9+c9/9rrv3nvvVceOHd2///Wvf3X/nJubqyuvvFJdu3bVlClTlJGRoQ8++EDPPfecGjRooJEjR0qSbr/9dq1bt06jR49Wly5dtG7dOk2cOFE2my3se56Xl6fLL79c1apV0z333KNq1app8eLFuvLKK/Xyyy+rdevW7vbcf//96tevn2bPnq1NmzZpzpw5On78uO64446Ar3vjxo164403vG5btGiRFixYoDvvvFMtW7bUlClTdPPNN2vlypWSpKlTp2rFihWaMGGC2rVrp/379+vJJ5/UzTffrPfff1/VqlWL6r339Oqrr2rz5s0htykoKNATTzwR83MAiB4hC4Bp8vPz3YEjlGnTpqlly5ZasGCBe3hSly5ddNFFF2nVqlW68sorI37O48ePS5K6deumOnXqSJI+/PDDgNuOGDFCY8aMkST16dNHQ4YM0ZNPPql+/fpp9erV2rp1q1544QV169bNvU1paanmzp2ryy+/XLVr15ZUHh58h0RK5SFz9erVat68uSSpVatWGjJkiF555RVdccUV+umnnzRkyBD985//dD+mW7duOvvss7V+/XqvkNO3b1+tXbvWfaK3b98+bd68WT169PDrHevdu7fWrVunL7/80t2u//znP+rZs6dX70k0z+9p586d+tvf/uYVLJs2baqhQ4dq06ZNuuiii9y3n3HGGWrWrJnfPho1auT1ntWoUcP9c0FBgebNm6dhw4bp3nvvdd/etm1bXXnllX7fCdd74wpZ7777rurXr+/V+/Dxxx/rww8/1IwZM3ThhRdKKv88i4qKNHXqVA0ePFhWa/l/gWlpaXruueeUk5MjqfzzHTNmjD788EP17ds3otd/6qmnSiofGhvs++Hy1Vdf6dVXX9UZZ5yhI0eOeN3n+diePXvq448/1jfffBN0X76ifS9dli5dquPHj+uUU06J+Ln+3//7f7LZbLrhhhv8hhm2adMm6HuwdetWnXHGGZo5c6b7e/C73/1OH330kdavX6+RI0dq69ateuutt/SPf/xD11xzjaTy7/nu3bu1fv16DR48OOR7PmPGDOXn52vFihVq2rSppPLvzYUXXqiZM2dq1qxZ7m07duzoDrF9+/bV8ePHtXjxYo0ePdrreypJZWVlevjhh9WxY0evz+X48eO66aabdO2110oq7yV78MEHdeTIEdWsWVO5ubm69dZbvXpvMzMzNW7cOP3www8hvy+hHDt2TFOnTvVrj69Zs2apSZMmXr2iAOKL4YIATJObm6uGDRuG3KaoqEhffvml+vXrJ8MwVFpaqtLSUjVv3lytW7fWRx995LV9WVmZe5vS0lK//e3bt09paWl+J0OBDBkyxP2zxWLR+eefry1btqi4uFgbNmxQ06ZN3QHL5eKLL1ZJSYm+/PLLsPs/66yz3AFLkjp06KDmzZvrs88+kyTdcMMNmjJlio4dO6avv/5ab775phYsWCBJfsPf+vfvr19++UXbtm2TJP33v/9Vly5d3CeMnnJyctSrVy+tXbtWkpSXl6f169d7hZ9on9/TxIkTdfvtt+vIkSP64osv9Oqrr7qHGIZ6XKS++OIL2e12DR482Ov2Hj16qGnTptqwYYPX7QMGDNC7774rwzAklffwuYKUyyeffCKLxaJ+/fp5fX/69++vAwcOePUu9e/f3x2wXL9brVb352bm6zcMQw8//LAuu+wytW/fPuD9paWlstvt2rJlizZt2qROnTp5beP7N+EZLqN9LyXp4MGDevLJJ3XXXXcpMzMzotexf/9+PfXUUxo+fHjU87j+8Ic/aNmyZcrMzNRPP/2ktWvXat68ecrLy3O
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"[ 2 7 2 ... 14 14 14]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Построение дендрограммы\n",
|
|||
|
"linkage_matrix = linkage(data_scaled, method='ward')\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"dendrogram(linkage_matrix)\n",
|
|||
|
"plt.title('Дендрограмма агломеративной кластеризации')\n",
|
|||
|
"plt.xlabel('Индекс образца')\n",
|
|||
|
"plt.ylabel('Расстояние')\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"# Получение результатов кластеризации с заданным порогом\n",
|
|||
|
"result = fcluster(linkage_matrix, t=60, criterion='distance')\n",
|
|||
|
"print(result) # Вывод результатов кластеризации"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJ8AAAMQCAYAAACJzMTyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACgjElEQVR4nOzdd3RUdf7/8dckkwSUUKRLEUEFXbqAoIsCIqKiX1kb1b6AgigoKLuoID8RlU5UYFEpwqKIjdXVVURZG1VWLKgoqAQIIIYi6bm/P+IMU+6UO5mbKTwf53BIptz53Dt3JnNf8/68r8MwDEMAAAAAAACADVJiPQAAAAAAAAAkL8InAAAAAAAA2IbwCQAAAAAAALYhfAIAAAAAAIBtCJ8AAAAAAABgG8InAAAAAAAA2IbwCQAAAAAAALYhfAIAAAAAAIBtCJ8AAAAAAABgG8InAEBSGDx4sAYPHmx63euvv67mzZvrgQceqOBRAYjUsGHDtGLFilgPA1Hy8ssva8iQIbEeBgAgRgifAABJ7cCBA5o8eXKshwHAgldeeUU5OTm65pprYj0URMk111yj/fv36+WXX471UAAAMUD4BABIao888oiOHTumk046KdZDARCG/Px8TZ06VcOGDVNKCh9Vk4XD4dDQoUM1ffp05efnx3o4AIAKxl90AEDS+s9//qN33nlHw4cPV40aNbyuKy0t1fz583XJJZeoZcuWuvTSS7VkyRKv2wwePFgPPPCA5s6dq/PPP1/nnnuu7rzzTmVnZ3vd7r333tOAAQPUrl07tWzZUr1799bSpUvd169bt07NmzfXRx99pIEDB6p169bq1auXli1b5r7NY489pubNm+uzzz5zX/bKK6+oefPmeu2119zj8Z1aOG3aNDVv3lyvvPKKJKl58+aaM2eO123mzJmj5s2b+435L3/5i1q1aqULLrhA/+///T8dO3bM6zZbtmzRrbfeqvbt26tz584aPXq0cnJyvNZp3bp1kqTvvvtOPXv2VL9+/cLeLpI0f/58XXzxxTrnnHPUvHlz9z/fdfD0wAMPqEePHu7fn3vuObVr107Lly/32m5m/1zbSZI2bNig2267TR07dlTLli3Vo0cPzZkzR6Wlpe7bHD16VJMmTVLXrl3Vtm1bXXPNNfrggw/cz0egx/HcLkOHDlX79u3Vvn17DR8+XL/88ot7+eHsG5L/82oYhvr166fmzZtr165dkqSCggJNmDBBXbp00Xnnnaf77rtPhw4dct8nPz9f06ZNU69evdSyZUu1b99et9xyi7755puA21aSdu3a5bXtfH93PfbFF1/stZ99//337u3ru32CWblypQoKCtS9e3evy2fMmGG6rX33lRUrVuiKK65Qy5Yt1a1bN82ZM0clJSWW1lGSPv74Y9PH83wNPvDAAxo8eLBefvllde/eXe3atdNNN92kbdu2eS1/586dGjlypC644AK1bdtWgwcP1qZNm/we3/Ofa4zNmzfXCy+8oPvvv1/t2rXT+eefr0cffVQFBQXu+5eUlGj+/Pnq06ePWrdurbZt26pfv35e7yeu94F27dqpsLDQa3wjR470mprsOZ433njD67Zr1qzxex7DeXxJ6t69uwoKCrRy5UoBAE4szlgPAAAAO+Tm5mrixIn605/+pNtvv10vvfSS1/UTJkzQK6+8oqFDh6pdu3basGGDJk+erMOHD2v48OHu261evVo1atTQ+PHjVVpaqmnTpmnw4MF68803VblyZX3wwQcaPny4brzxRt11113Kz8/XsmXL9Mgjj6hly5Zq06aNe1mjRo3S1VdfrWHDhmn16tWaOHGiJGnAgAEaNWqUPvjgAz388MNatWqVDhw4oEcffVSXXXaZrr76atN1/Pnnn7Vw4ULL22bVqlW67777dOWVV+qee+5Rdna2ZsyYoe3bt+v555+Xw+HQ119/rUGDBqlNmzZ64oknVFJSomnTpum2225zh2GennzySbVs2VJ33HGHJIW1XV577TVNmzZNQ4cOVZcuXVS5cmVJ0g033BD2uuTk5Gj69Ol65JFHdNFFF3ldl5WVpdq1a0uS9u/frxEjRriv27Ztm26++Wb17t1bM2bMkGEYWrVqlbKystS0aVNdccUVKikp0a233uoODpo2bapXX31Vw4cP16JFi/Twww/r6NGj7jFfe+21uu666yRJZ5xxhnbs2KF+/fqpadOmevzxx1VcXKxnnnlG/fv31+uvv66aNWu6xxNs3zDz+uuv6/PPP/d7Dl577TU9+OCDqlq1qiZOnKgJEyZoxowZkqSxY8dq48aNGj16tBo3bqyffvpJs2bN0r333qs333xTDocj7O3ua8GCBe4QzOWOO+5Qenq6Jk2apDp16iglJUUrVqwIOe3qjTfeULdu3ZSenu51eX5+vnr06KGhQ4e6L/PdV+bNm6cZM2Zo0KBBGjdunL755hvNmTNHe/bssTz9Nj8/X/Xq1dOsWbPcl7meF0/ffPONfvzxR40ePVrVqlXT7NmzNWjQIL311luqU6eOtm/fruuvv15NmjTR+PHjlZaWpsWLF+umm27Sc889p06dOnlts27dukmS1/rPmjVLbdq00cyZM/XDDz9o5syZ2r9/v2bOnClJmjp1qv75z3/q3nvvVfPmzZWTk6OnnnpKd999tz744AP3a0sqq0D69NNP3a+X33//XR9++KFpldnJJ5+s999/X1dddZX7srfeekspKSleIW24j5+RkaHu3btr1apVGjhwoJWnAwCQ4AifAABJafLkyTp06JCeffZZOZ3ef+527Nihl156SaNHj3Y3wP3zn/8sh8OhefPmacCAAe5Kqby8PL3yyitq1KiRJKlp06bq27evXnvtNfXv31/bt29X37599fe//929/Hbt2um8887TunXrvMKnSy65xH27rl27at++fXr66afVv39/VapUSVOmTNGAAQM0f/58bd68WVWqVDE92PVcxzPPPFNfffWV+7KUlBQVFxcHvI9hGJo6daq6du2qqVOnui9v0qSJbr75Zn344Yfq1q2b5s6dq+rVq+u5555TRkaGJKlOnTq699579f3333st86efftJHH32kN954Q2eeeaYkhbVdvvjiC1WvXl2jR48OON5Qli9frhYtWugvf/mL33Vnn322GjZsKEl+wci2bdt0/vnn68knn3QfdF9wwQV6//33tW7dOl1xxRVau3at/ve//+mpp55Sz549JUmdO3fWL7/8os8++8wrzJKkevXqqW3btu7fH374YVWuXFkLFy5UlSpVJEldunRRz549tWDBAt1///3u2wbbN3xDod9//11Tp07Vn/70J6/n3jAMjR071t0nafPmze6G3YWFhfr99981fvx4XX755ZKkTp066ejRo5oyZYoOHDjgDuqs2rNnj/7xj394jefgwYP65Zdf9OCDD6p3797u2/73v/8NuqyjR49q69atuuyyy/yuy8vL06mnnuq1jT0dOXJETz/9tG644QaNHz9eUtnrunr16ho/frxuueUW9/4Zjry8PFWtWtXr8VzPo+/jzp07Vx06dJAktW7dWj179tTixYt13333KSsrS+np6Vq8eLH7/t26dVOfPn30xBNPeIVxjRs3Nl2/U045RXPnzpXT6dRFF12klJQUPfbYY7rrrrvUrFkz7du3T6NGjfKqysrIyNBdd92lb7/91muZF154oVavXu0On95//33Vrl3bK0zyvO1///tfFRYWKj09XQUFBVq9erU6duzoru6TZOnxW7VqpbfeektHjx413Z4AgOTEtDsAQNL58MMP9frrr2vIkCFq0aKF3/WfffaZDMNQjx49VFxc7P7Xo0cPFRQUeE2Had++vTt4kqRzzjlHjRo10oYNGyRJt99+u6ZMmaLff/9dX375pd566y3NmzdPkvymtvTt29fr9169emn//v3asWOHpLJw5uabb9ZTTz2lTz75RFOmTFG1atVM13Ht2rX65JNPvAIMSapZs6Z7apyZH3/8UXv37vVb944dO6pKlSr6+OOPJUmbNm3ShRde6A6eXON7//33dfbZZ7svO3bsmGbMmKHzzjvP68A
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x800 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Выбираем подмножество данных для кластеризации\n",
|
|||
|
"features = df[['price', 'sqft_living', 'floors', 'bathrooms']]\n",
|
|||
|
"\n",
|
|||
|
"scaled_features = scaler.fit_transform(features)\n",
|
|||
|
"\n",
|
|||
|
"# Построение дендрограммы\n",
|
|||
|
"linkage_matrix = linkage(scaled_features, method='ward') # Метод \"Ward\"\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(12, 8))\n",
|
|||
|
"dendrogram(linkage_matrix, labels=df.index, leaf_rotation=90, leaf_font_size=10)\n",
|
|||
|
"plt.title('Иерархическая кластеризация (дендрограмма)')\n",
|
|||
|
"plt.xlabel('Индекс дома')\n",
|
|||
|
"plt.ylabel('Евклидово расстояние')\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Визуализация распределения кластеров**"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 23,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABi0AAASgCAYAAACEzgvMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUVf7H8c+dljqQEEhC71VakKpS7UpZLLsq9oZix4KuugFdF1dFRUAEGzYUFcUCrj+x6yqKChZQeicJkJ5MJlPu749sIiEFSDIlyfv1PDyQcyb3fGfOTbjnfu85xzBN0xQAAAAAAAAAAECIWUIdAAAAAAAAAAAAgETSAgAAAAAAAAAAhAmSFgAAAAAAAAAAICyQtAAAAAAAAAAAAGGBpAUAAAAAAAAAAAgLJC0AAAAAAAAAAEBYIGkBAAAAAAAAAADCAkkLAAAAAAAAAAAQFkhaAAAQ5kzTDHUIAAAAAEKIMQGAxoSkBYB676KLLtJFF11U7WvuvPNOjRkz5oiPOWfOHHXv3r22oR2Vr7/+Wt27d9e4ceOC2m6wrVq1St27dy/3p0ePHhowYIDOO+88ffLJJ4c9xtH2Z21t3bpV06dP10knnaS+fftq1KhRmjp1qn7//fdyrxszZozuvPPOOm1748aNOv/88+v0mAAAAAisO++8s8I178F//vOf/0gKzPVjfXDRRRdV+Ex69+6tUaNGacaMGcrJyan2+3ft2qXu3bvrrbfeCkq8fr9fb7zxhiZNmqQhQ4ZowIABmjhxol566SUVFxeXve6tt95S9+7dtWvXrjpt/8knn9Szzz5bp8cEgHBmC3UAABAMU6ZM0cUXX3zErz/33HM1fPjwAEZU0dKlS9WtWzdt2LBBP/zwg4499tigth9s//jHP3TMMcdIKnlqKCcnR88995ymTJmiBQsWaOTIkVV+79H2Z2383//9n+644w517dpV1157rdq0aaO0tDS98MIL+utf/6r58+fr+OOPD1j7//nPf/TTTz8F7PgAAAAIjBYtWmju3LmV1nXo0CG4wYShXr16KTU1texrj8ej3377TY8++qjWr1+vV199VYZhVPq9iYmJWrJkidq1axfwOF0ul6655hqtXbtW559/vq688krZ7XZ9++23euihh/TFF19o3rx5cjgcAYth9uzZuv766wN2fAAINyQtADQKR3sxm5ycrOTk5ABFU1Fubq5WrlypGTNmaMGCBXrttdcafNKiS5cu6t+/f7mygQMHatSoUXrxxRerTVoEY3AiSTt27NC0adM0fPhwPf7447JarWV1p5xyis4//3xNmzZNn3zySUAHKQAAAKh/HA5Hhetd/Ck2NrbC5zNo0CAVFBToiSee0Nq1a6v8/IL52c6cOVM//vijXnrppXJtnnDCCerRo4duvfVWvfbaa0F7qAoAGgOWhwLQKBy8nNC9996r448/Xj6fr9xrHnjgAQ0ZMkQej6fC8lAXXXSR7r77bi1cuFCjRo1Snz59dN555+nnn38ud4zPPvtMZ511lvr27atTTz1V77//vk4++WTNmTOn2vjee+89eb1eDR8+XOPHj9eHH36o7Ozscq9566231KtXL73xxhs6/vjjNXjwYG3atEmStHLlSp111lnq06ePjj/+eP3zn/9UYWFhue9fuXKlLrjgAqWkpKh379467bTT9Morr1QbU/fu3bVhw4YKx+nevbvWrVsnSXrhhRd02mmnqU+fPho+fLimT5+u/Pz8at9vVWJjY9WxY0ft2bNH0p9LSb322msaPXq0BgwYoK+//rrC8lCmaWrRokU6/fTT1bdvX5188sl69tlny637unr1al144YXq16+fBg8erGnTpikzM7PaeEqne99zzz3lEhaSFBUVpWnTpunss8+udPp6aeyrVq0qV37ocma//vqrLrnkEh177LFKSUnRpZdeqjVr1kgqWaas9Om87t27l51Hfr9fCxcu1Mknn6zevXvr1FNP1UsvvVShndtuu0033nij+vfvr8suu0yS9P7772v8+PHq27evhg4dqttuu03p6enVfg4AAAAIjry8PM2cOVMnnXSS+vTpo7Fjx+rNN98s9xqfz6dXXnlF48aNK1u69JFHHpHb7S57zZ133qlLLrlEqampGjBggM444wz5fD59/fXX+utf/6qUlBQNGjRI1157rTZv3lxlPKeeeqpuvPHGCuUTJkzQtddeK6nkQZ9rrrlGQ4YMUb9+/fS3v/1Nn3/+eY0/g969e0tS2ZigsuvaypaH2rJli66//noNHjxYgwYN0uTJk8u9N7fbrYceekgjR45U7969NW7cOK1YsaLaWDIzM7V06VKdffbZlSZJxo4dq8svv1xJSUmVfn9lSxkfOk7w+/167LHHNGbMGPXu3VtjxozRrFmz5PF4JKlsXDp37txyY9QNGzZo8uTJGjBggAYMGKDrrrtOO3furNDOoWOpzMxM3XrrrTr++OPVp08fTZgwQcuWLav2cwCAYGOmBYBGZ8KECXr99de1atUqHXfccZJKLhQ/+OADnXnmmbLb7ZV+34cffqjOnTvrnnvukWma+ve//60bbrhBn3zyiaxWq7799ltNmTJFo0eP1k033aTt27crNTW13OChKkuXLtXw4cPVvHlz/eUvf9GcOXP09ttvl91oLuXz+fTcc8/pgQceUFZWljp37qz33ntPt912m8aNG6ebb75Zu3fv1mOPPaZNmzbp+eefl2EY+uyzz3Tdddfp4osv1g033KCioiItXrxY9913n3r37q1+/fpViOmkk05SdHS0li9frm7dupWVv//+++ratat69eql999/Xw8//LCmTZum7t27a8uWLfr3v/8tl8ulf//730fTLZKk4uJi7dq1S3379i1XPnfuXN1zzz0qKipSSkqK3nvvvXL1Dz30kF544QVddtllOv744/XLL7/okUcekdfr1eTJk/X999/rsssu09ChQ/X4448rJydHs2fP1sUXX6w333xTkZGRlcbz5ZdfqlevXlUOQoYNG6Zhw4Yd9fsslZ+fryuvvFJDhw7VnDlzVFxcrPnz5+uKK67QZ599pnPPPVdpaWl68803tWTJkrLZP9OnT9dbb72lyZMnKyUlRd9//73+9a9/KTc3V9ddd13Z8T/44AONHz9e8+fPl9/v1w8//KA77rhDU6ZM0aBBg5SWlqaHH35Yt956q15++eUavw8AAABUzuv1ViizWq2VLntUVFSkCy64QAcOHNCNN96o1q1ba+XKlbr77ru1f/9+XXPNNZJKlll95513dNVVV2ngwIFat26d5s2bp/Xr1+uZZ54pO/bq1asVERGhefPmqbCwUHv27NGUKVN09tlna+rUqcrNzdWjjz6qq6++Wh999JEslorPtY4fP14LFy5Ufn6+YmNjJUmbN2/W77//rmuvvVZ+v1+TJ09WYmKiHnroIdlsNr344ou69tpr9cEHH6h9+/ZH/Zlt3bpVktS2bduyskOvaw+Vnp6uv/3tb0pKStL06dMVHR2tOXPm6JJLLtH777+vpk2b6rrrrtOPP/6oG2+8UZ07d9ZHH32kW265RcXFxfrLX/5SaSzffPONvF6vRo8eXWW806ZNO+r3eLCnn35ar776qqZNm6a2bdtq7dq1euyxx2S323XjjTdqyZIl+tvf/qZzzjlH5557rqSSz+i8885Tp06d9O9//1ter1fz58/X+eefr3feeUcJCQllxz90LHXDDTfowIEDmjFjhmJjY/XOO+9o2rRpSk5O1tChQ2v1XgCgrjSKpMWCBQv01VdfVXgK9XCWLVumhQsXaufOnWrXrp2uv/56nX766QGKEkCwHHvssWrdurXef//9sqTFqlWrtG/fPk2YMKHK7/N6vXr22WfLLtYLCgo0bdo0rV+/Xr1799acOXPUtWtXzZ07t2ygkJCQoKlTp1Ybzx9//KHffvtNTzzxhCSpVatWGjp0qJYsWVIhaSFJ11xzjUaNGiWpZIbBI488ouHDh+uRRx4pe02HDh106aWX6vPPP9eoUaO0adMmTZw4UXfffXfZa1JSUjRkyBCtWrWq0qRFVFSUTj31VK1
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 1: sqft_living vs price\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['sqft_living'], y=df_cleaned['price'], hue=result, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('Living Area vs Price Clusters')\n",
|
|||
|
"plt.xlabel('Living Area (sqft)')\n",
|
|||
|
"plt.ylabel('Price')\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 2: bedrooms vs price\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['floors'], y=df_cleaned['price'], hue=result, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('Floors vs Price Clusters')\n",
|
|||
|
"plt.xlabel('Floors')\n",
|
|||
|
"plt.ylabel('Price')\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 3: bathrooms vs sqft_living\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['bathrooms'], y=df_cleaned['price'], hue=result, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('Bathrooms vs Price Clusters')\n",
|
|||
|
"plt.xlabel('Bathrooms')\n",
|
|||
|
"plt.ylabel('Price')\n",
|
|||
|
"\n",
|
|||
|
"# Парный график 4: sqft_living vs bedrooms\n",
|
|||
|
"plt.subplot(2, 2, 4)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['sqft_living'], y=df_cleaned['price'], hue=result, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('Living Area vs Price Clusters')\n",
|
|||
|
"plt.xlabel('Living Area (sqft)')\n",
|
|||
|
"plt.ylabel('Price')\n",
|
|||
|
"\n",
|
|||
|
"# Настройка графиков\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## KMeans (неиерархическая кластеризация) для сравнения"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 24,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Центры кластеров:\n",
|
|||
|
" [[2.22041876e+03 5.15870327e+05 2.07766990e+00 2.47761993e+00\n",
|
|||
|
" 3.43303826e+00]\n",
|
|||
|
" [2.21224000e+03 5.28096070e+05 1.07634518e+00 2.19979695e+00\n",
|
|||
|
" 3.87857868e+00]\n",
|
|||
|
" [1.32625308e+03 3.60889221e+05 1.10303964e+00 1.36120782e+00\n",
|
|||
|
" 2.67688806e+00]\n",
|
|||
|
" [3.88146209e+03 1.24719106e+06 1.89801444e+00 3.31836643e+00\n",
|
|||
|
" 4.38447653e+00]]\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABi0AAASgCAYAAACEzgvMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5xcVf3/8ff0ur2nbHpvJEDohIReFURFQIqAdKVJ8IsKIkivCSXUqBRRQQQCgkGK8JMSEQgkIW1TN9vr7PTy+2PdgWV3U3Zn9u5OXs/HI49k75m597Pn3pnccz73nGNKJBIJAQAAAAAAAAAAGMxsdAAAAAAAAAAAAAASSQsAAAAAAAAAADBAkLQAAAAAAAAAAAADAkkLAAAAAAAAAAAwIJC0AAAAAAAAAAAAAwJJCwAAAAAAAAAAMCCQtAAAAAAAAAAAAAMCSQsAAAAAAAAAADAgkLQAgAyXSCSMDgEAAABABqKtAQBIB5IWAFLuhz/8oX74wx922e7z+fS9731PU6dO1dKlS5OvnTBhgk455ZQe93f55ZdrwoQJuuaaa9IWc7qEQiEtXrxY3/nOd7Tnnntq9uzZOuWUU/TCCy90usFfsGCBJkyYkNJjh8Nh/fa3v9VLL72Ukv31dF770/PPP69TTjlFs2bN0owZM3Tsscfq3nvvlc/nMzSuge7555/XhAkTOv2ZNGmS9t57b/3oRz/Sf/7znx3uYyCcfwAAANoaX6GtkTrfvFf++p+ZM2dKkj744ANNmDBBH3zwgWFxGmHLli1d6mTixImaOXOmTjrpJP3lL3/Z4T7ScQ0CyGxWowMAsHvw+Xw699xztWrVKt1///2aM2dOssxsNuuTTz5RVVWVSktLO73P7/frzTff7O9wU6Kurk7nnnuutm3bph/+8IeaPn264vG43nzzTV1zzTVatmyZfvOb38hkMqXl+DU1Nfrd736nm2++OSX7u+6661Kyn95auHChHnroIf3oRz/ShRdeKJvNps8//1yPPvqo/vWvf+mZZ56RzWYzNMaBbuHChSoqKpIkxeNx1dXV6f7779eZZ56pv/zlL5o4cWKP7zX6/AMAAPSEtgZtjVQ4+eST9d3vfrfLdrOZ530l6cILL9QhhxwiqX2ETVtbm/785z/r2muvVTQa3W5y8Lvf/a4OOuigfooUQCYgaQEg7ToaEStXrtSDDz6oAw44oFP55MmTtXbtWv3973/XWWed1anszTfflMvlUnZ2dj9GnBrz589XVVWVnn32WY0cOTK5/ZBDDtGQIUN01113ae7cuTr00EONC3IXjB071rBjh8NhPfLIIzrnnHN0+eWXJ7fvv//+Gj16tC6++GItXbpURx99tGExDgaTJk3SsGHDOm2bPHmyDj/8cD399NO64YYbenyvkecfAACgJ7Q1aGukSmlpqfbYYw+jwxiwysvLu9TP/vvvr1WrVmnx4sXbTVqUlpZ2SRoCwPaQLgaQVm1tbTrvvPP05Zdf6uGHH+7SiJAkt9utOXPm6O9//3uXsldeeUVHHnmkrNbOOdZ4PK6HH35Yhx9+uKZOnaojjzxSf/jDHzq9JhaL6eGHH9Zxxx2n6dOna4899tApp5yi999/P/maBQsW6PDDD9dbb72l448/PrmvF154odO+fve73+moo47StGnTdNBBB+n666/f7pREK1eu1LvvvqtzzjmnUyOiw1lnnaXTTjtNbre72/fPmzevyxD1jil+tmzZIkkKBoO6/vrrdfDBB2vq1Kk66qij9Nhjj0lqH8Lb0UD5+c9/rnnz5iX3s2zZMp1++umaMWOGZs+erfnz56uhoaHTcSZPnqw///nPOuCAAzR79mytXbu2y5DtCRMm6KmnntK1116r2bNna+bMmfrpT3+qurq6TnE/9thjOvTQQzV9+nSdcsop+uc//9lpWHXHcOMFCxb0WJ8+n0/BYFDxeLxL2Zw5c3T55Zdr+PDhyW1NTU2aP3++Zs+erdmzZ+u3v/2t7rvvvk71sDN1LElLly7VqaeeqpkzZybr+amnnkqWdwwT/+Mf/6i5c+dq1qxZeu+993aqrr/poYce0tSpU9Xc3Nxp++LFizVlyhTV19crHo/r7rvv1rx58zR16lTNmzdPd955pyKRSI/73Z5hw4YpLy9PlZWVyTrYmfMfDod1zz33JM/tcccdp7/+9a+d9r106VKddNJJmjZtmg444ADdeOON8vv9vYoTAADgm2hr0NZIRVujL5YvX65zzjlH++yzj2bNmqULLrhAa9as6fSampoa/fznP9ecOXM0ffp0nXzyyXrjjTc6vWbChAlauHChTjrpJE2fPl0LFy7c5fv+jz/+WBMmTOgyemjlypWaMGGC/vGPf0iSXn75ZZ1wwgmaPn269t13X1111VWqrq7u1e9vNps1adKkZFuio76feOIJHXXUUZoxY4aee+65bqeHeuGFF3TiiSdqxowZOuSQQ3TnnXcqHA4ny1evXq3zzz9fs2bN0qxZs3TxxRdr8+bNvYoTwODDSAsAaeP3+/XjH/9YK1as0GOPPaa99tqrx9cec8wxuuyyyzoN2/b5fHrnnXf0xBNP6J133un0+uuvv17PP/+8zj//fM2cOVMfffSRfvvb36qlpUUXX3yxJOmOO+7QM888oyuvvFITJkxQdXW17r//fv30pz/VW2+9JZfLJUmqra3VDTfcoAsvvFBDhw7VY489pvnz52vatGkaM2aMXn75Zd1+++2aP3++JkyYoPXr1+vWW29VIBDQrbfe2u3v869//UuSOt3Af53D4dCvfvWrXavQb/jtb3+rd999V/Pnz1dhYaHeeecd3XbbbcrNzdXxxx+vhQsX6pJLLtGFF16oI444QpL00Ucf6eyzz9a+++6re+65R83Nzbr33nt1xhln6C9/+YucTqek9kbY448/rptuukmNjY0aM2ZMtzHcfffdOvzww3XXXXdp8+bNuvnmm2WxWHTXXXdJap+O6P7779c555yjfffdV//617902WWXddpHcXGxnn322e0+eZOfn68ZM2boscceU01NjQ4//HDNmjVL+fn5stlsuuCCC5KvjcfjOvfcc7V161b97Gc/U15enh5++GFt2LBBXq93l+r4rbfe0sUXX6wzzjhDl156qYLBYHJEwtSpUzVjxozkaxcuXKhf/OIXCgaDyWtyZ+r6644//njdc889ev311zsNTV+yZIkOPPBAFRQUaNGiRXrmmWc0f/58DR8+XJ9++qnuvvtu2Ww2/eQnP9ml30+SGhsb1djYqPLy8uS2nTn/V111ld5++21deOGFmjFjht5++21dc801stlsOu644/TSSy/pqquu0vHHH6/LLrtMW7du1d133621a9fqiSeeSNtUBQAAYPdAW4O2RqraGh3i8bii0WiX7d9ManV4//33de6552qfffbRb3/7W4VCIS1atEinnHKK/vSnP2nMmDGqq6vTySefLIfDocsvv1x5eXl6/vnndfHFF+u2227TCSeckNzfQw89pCuvvFKjRo3S0KFD9cgjj+zSff+sWbNUXl6uJUuWaO7cucntL7/8snJzczVnzhz95z//0dVXX62LLrpIe++9t6qqqnT77bfryiuv1JNPPrnDOupORUVFp7aE1J6wu/baa+X1ejVjxgz9+c9/7lT+1FNP6YYbbtB3v/tdXXHFFdq8ebNuu+02NTc364YbblBFRYVOOeUUjR49Wrfeequi0agefPBB/eAHP9Df/vY3FRQU9CpWAIPHbpG0WLRokd59990uT0bsyAsvvKCHH35YmzdvVnl5uS655BKmHgF2UkcjomOB3x09XX3IIYfI5XJ1Grb9j3/8QwUFBdpzzz07vbaiokJ/+tOfdMUVV+jHP/6xJOnAAw+UyWTSokWLdOqppyovL081NTW6/PLLOz2x43A4dOmll+rLL79MDm0NBAK66aabtN9++0mSRo4cqblz5+rtt9/WmDFj9OGHH2rYsGE67bTTZDabNXv2bLnd7i5Pw3/dtm3bJKnLVDyp9OGHH+qAAw7QscceK0naZ5995Ha7VVBQILvdrkmTJklqH8Y7efJkSdKdd96
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Убедитесь, что масштабирование применяется только к нужным признакам\n",
|
|||
|
"features_used = ['sqft_living', 'price', 'floors', 'bathrooms', 'bedrooms']\n",
|
|||
|
"data_to_scale = df_cleaned[features_used]\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"data_scaled = scaler.fit_transform(data_to_scale)\n",
|
|||
|
"\n",
|
|||
|
"random_state = 42\n",
|
|||
|
"kmeans = KMeans(n_clusters=4, random_state=random_state)\n",
|
|||
|
"labels = kmeans.fit_predict(data_scaled)\n",
|
|||
|
"centers = kmeans.cluster_centers_\n",
|
|||
|
"\n",
|
|||
|
"# Отображение центроидов\n",
|
|||
|
"centers_original = scaler.inverse_transform(centers) # Обратная стандартизация\n",
|
|||
|
"print(\"Центры кластеров:\\n\", centers_original)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов кластеризации KMeans\n",
|
|||
|
"plt.figure(figsize=(16, 12))\n",
|
|||
|
"plt.subplot(2, 2, 1)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['sqft_living'], y=df_cleaned['price'], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.scatter(centers[:, 0], centers[:, 1], s=300, c='red', label='Centroids')\n",
|
|||
|
"plt.title('KMeans Clustering: Square vs Price')\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 2)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['floors'], y=df_cleaned['price'], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.scatter(centers[:, 2], centers[:, 3], s=300, c='red', label='Centroids')\n",
|
|||
|
"plt.title('KMeans Clustering: Floors vs Price')\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 3)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['bathrooms'], y=df_cleaned['price'], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.scatter(centers[:, 1], centers[:, 4], s=300, c='red', label='Centroids')\n",
|
|||
|
"plt.title('KMeans Clustering: Bathrooms vs Price')\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(2, 2, 4)\n",
|
|||
|
"sns.scatterplot(x=df_cleaned['sqft_living'], y=df_cleaned['price'], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.scatter(centers[:, 3], centers[:, 4], s=300, c='red', label='Centroids')\n",
|
|||
|
"plt.title('KMeans Clustering: Square vs Price')\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### PCA для визуализации сокращенной размерности"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABi8AAAJHCAYAAADoqsXxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5QUZdbA4V9VdZycMzDEIUdJklFQMaCuObsGFHfd1dVVV8X0mXbNOYdVXBOKmAVFRUAQyTkNMMPkHDpX1fdHQcsEkDAwgPc5Z45MVXW91dXVY9++dd+rmKZpIoQQQgghhBBCCCGEEEIIcZhQW/sAhBBCCCGEEEIIIYQQQgghdiXJCyGEEEIIIYQQQgghhBBCHFYkeSGEEEIIIYQQQgghhBBCiMOKJC+EEEIIIYQQQgghhBBCCHFYkeSFEEIIIYQQQgghhBBCCCEOK5K8EEIIIYQQQgghhBBCCCHEYUWSF0IIIYQQQgghhBBCCCGEOKxI8kIIIYQQQgghhBBCCCGEEIcVSV4IIcQfiGmarX0IYjf+yK/NH/m5CyGEEEKIpuTz4R+DvM5CiN8jyQshBBdffDE5OTkNfnr27Mno0aO55557qK6ubvKY3Nxc7r77bo4//nh69+7N6NGjufHGG1m7du1ux3n88cfJycnhvvvuO5hPZ7eefvppcnJyWmXs5nz00Ufk5OSQn59/0B8XCAR44IEH+PTTT/f1MPfJeeedR05ODl9//fVBHedwey0PRE1NDf/85z9ZtGhReNnFF1/MxRdffMiOYW/fz2PHjuXWW29t0bE3bNjA+eef3yL7ys/PJycnh48++qhF9ieEEEKIw4vELa3jaIpbcnJyePrpp5ssX79+PUOHDmXUqFFs2bIlvG1OTg6PPfZYs/syDIMRI0YcsZ8/i4uL+fe//82JJ55Inz59GD58ONdcc02DuAQOTmxSVFTE1Vdfzfbt21tkf7t7XYUQRz5JXgghAOjevTvvvfde+Of111/nsssuY9q0aUyaNKnBHRHffPMNZ5xxBqtWreLaa6/l5Zdf5oYbbmDLli2cc845zJ07t8n+DcNg+vTpdOnShU8++QSv13son94fXklJCW+++SahUOigjbF582aWLFlCly5dePfddw/aOEebNWvW8Mknn2AYRnjZXXfdxV133XVIxt+f93NL+uqrr1iyZEmL7CslJYX33nuP0aNHt8j+hBBCCHH4kbjl6HYo4pbGNmzYwGWXXYbb7ebtt98mOzs7vE5VVb766qtmH/fLL79QUlJyiI6yZf36669MnDiR2bNnc8kll/DCCy9w++234/P5uPjii5k+ffpBHX/evHn88MMPLba/9957j7PPPrvF9ieEOHzYWvsAhBCHh6ioKPr27dtg2cCBA6mvr+epp55i2bJl9O3bl23btnHLLbcwYsQInnjiCTRNC28/fvx4zj//fG655Ra+++47HA5HeN1PP/1EUVERjz32GBdddBGfffaZfLg4ynz00UdkZmYyadIkbrrpJrZu3Uq7du1a+7COSJ06dTok4+zv+/lw5XA4mvwdE0IIIcTRReIW0ZI2bdrEpZdeSmRkJG+++SYZGRkN1vfv359FixaxevVqunfv3mDd559/Trdu3VizZs2hPOQDVlVVxd///neys7N5/fXXcbvd4XUnnHACV199NVOmTGH48OEkJSW14pHuPYkBhDh6SeWFEGKPevbsCUBBQQEAb731FoFAgDvuuKNBAADgdru55ZZb+NOf/tSkZHvatGl06dKFAQMGMHjwYN57773fHXvs2LE88MADXHrppfTu3Zvbb78dsD5sTZkyhWOPPZZevXpxzjnnMH/+/AaP9fv9PPjggwwbNox+/fpx22234ff7G2zTXPnrggULyMnJYcGCBeFlmzdv5i9/+QuDBg1i4MCBTJo0iU2bNjUY69///jejRo2iZ8+enHrqqXzxxRcN9msYBs899xyjR4+mT58+TJ48udmy9sb29nGzZs3iggsuoF+/fvTs2ZMTTzyRqVOnAtZUOscddxwAt912G2PHjg0/7oMPPuDMM8+kb9++9O7dm4kTJ/Lll1822HdOTs7vThWk6zrTp09nzJgxHH/88URERDT7GgeDQR555BFGjhxJ7969ueKKK5g+fXqTcvKPP/6YCRMm0KtXL0477TTmz59P9+7d91iO/cUXX3DmmWfSr18/hg0bxpQpUxqcq6effpoTTzyRmTNncsopp9CrVy8mTpzIkiVLWLp0KWeffTa9e/fmlFNOaXI9rV+/nkmTJtG/f3/69+/PddddR15eXnj9zuvm3XffZcyYMfTv3z98J9+ezvGCBQu45JJLALjkkkvC1+Ou1+af//xnzjzzzCbPd/LkyZx22mnh3xctWsRFF11Enz59GDRoELfccgsVFRW7PV+w/+/nXZ/zru+VxscOsHLlSi699FIGDBhAv379uOyyy1i6dClgvSbPPPMM0LDU2zAMXnrpJcaNG0fPnj054YQTeOutt5qMc9NNN3H99dfTt29fLr/88ibTRn300Ud0796dZcuWce6559KrVy/GjBnDq6++2mBfJSUl3HDDDeH3+JQpU3j88ccbvFeEEEIIcXiTuEXilr2JW3a1adMmLrnkEqKjo3n77bebJC7ASowlJSU1qb4IhUJ88803nHzyyU0eszeve0VFBffccw9jxoyhZ8+eDBo0iOuuu65BTHTxxRdz++2389JLLzF69Gh69erFeeedx/Lly8Pb+Hw+7r77bkaOHBk+n40/6zY2ffp0SkpK+Ne//tUgcQFWpclNN93EhRdeSF1dXZPH7m6a1ltvvbXB67Vt2zauueYaBg8eTJ8+fTj33HPDlRYfffQRt912GwDHHXdcg9fsgw8+4OSTTw5PB/f000+j63qDcS699FLuuusu+vfvz4QJE9B1vUEssfO9MX/+fP785z/Tp08fhg0bxn/+858G+6qrq2PKlCkMHTqUfv36ccMNN/DGG28cVlO2CSEkeSGE+B25ubkAtGnTBoA5c+bQvXt3UlNTm91+6NCh3HDDDSQnJ4eXVVVV8d1333H66acDcMYZZ7BixQpWrVr1u+NPnTqVXr168dxzz3HWWWfh9/u59NJL+fbbb7nhhht45plnSEtL48orr2zwgfDmm2/m/fffZ9KkSTzxxBNUV1fzxhtv7PPzLy4u5txzz2XLli3cfffd/Oc//6GsrIxLL72UqqoqTNPkuuuu49133+Xyyy/n+eefD3/w2bXU9j//+Q/PPvssZ511Fs888wxxcXE8+uijvzv+3jzu+++/57rrrqNHjx4899xzPP3007Rp04Z7772XZcuWkZKSEv6C+Nprrw3/e+rUqUyZMoXjjz+eF198kUceeQSHw8FNN91EUVFReP/vvfcekydP3uNx/vjjj5SWlnL66afjcrk46aST+PjjjwkEAg22mzJlCm+++SYXXXQRzz77LElJSdx5550Ntpk+fTq33nor/fv357nnnuOEE05g8uTJDT5oNvbcc89x44030rdvX5566imuu+46vv76ay6++GJ8Pl94u6KiIh566CGuueYannzySWpqarj++uu58cYbOfvss3n22WcxTZMbbrgh/Ljc3FzOO+88ysvLefjhh7n//vvJy8vj/PPPp7y8vMFxPPPMM9xyyy1MmTKFfv36/e457tGjB1OmTAmfm+amijrttNNYtWoVW7duDS+rqanhxx9/ZOLEiYBVsn7ZZZfhcrl44okn+Ne//sXChQu55JJLGjz/xvbn/bwv6urquPLKK4mPj+fpp5/m8ccfx+v1csUVV1BbW8vZZ5/NWWedBTQs9b777rt56qmnOO2003jhhRc48cQTeeCBB3j22Wcb7P/LL78kMjKS559/niuvvLLZYzAMg7///e9MmDCBl156if79+/Pvf/+bOXPmANa8ypdeeimLFy/mX//6Fw8++CBr167ltdde26/nLIQQQojWIXGLxC17E7fstHnzZi699FKioqJ4++23d3udaJrGCSec0CR5MX/+fPx+f5ObXfbmdTdNk0mTJjF37lxuuukmXn31Vf7yl78wf/78JvH
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x600 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"reduced_data = pca.fit_transform(data_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация сокращенных данных\n",
|
|||
|
"plt.figure(figsize=(16, 6))\n",
|
|||
|
"plt.subplot(1, 2, 1)\n",
|
|||
|
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=result, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('PCA reduced data: Agglomerative Clustering')\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(1, 2, 2)\n",
|
|||
|
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=labels, palette='Set1', alpha=0.6)\n",
|
|||
|
"plt.title('PCA reduced data: KMeans Clustering')\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Анализ инерции для метода локтя (метод оценки суммы квадратов расстояний)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 26,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA28AAAImCAYAAADE77LsAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACIkUlEQVR4nOzdd3xUVf7/8fekTnpIT4BASOgJPTQBEZG1oCti2a9iRxFxUdEfirAqKuoqCgKCIiq6q6uyIOpiAzs9oPRQAgQIpJBCes/8/ggZHUMJYZiSvJ6PRx4k95578pnJEfPmnHuuwWQymQQAAAAAcGgu9i4AAAAAAHB2hDcAAAAAcAKENwAAAABwAoQ3AAAAAHAChDcAAAAAcAKENwAAAABwAoQ3AAAAAHAChDcAAAAAcAKENwAAAABwAoQ3AAAAAHAChDcAcBC33nqrOnbsqL/97W+nbfPwww+rY8eOevzxx21YGYDGSktLU8eOHbVs2TJ7lwKgCSC8AYADcXFx0ZYtW5SRkVHvXElJiX744Qc7VAUAABwB4Q0AHEiXLl3k6empr7/+ut65H374QV5eXgoPD7dDZQAAwN4IbwDgQLy9vXXxxRefMrx9+eWX+stf/iI3N7d651atWqXrrrtOCQkJuuiii/Tcc8+ppKREkjRs2DB17NjxlB9paWmSpDVr1ujmm29W79691a9fPz3yyCNKT0+3+B6PPPLIKfs423KwuuWgp/r4o+3bt+vuu+9Wv3791KtXL913333at2+f+fyGDRvUsWNHbdiwQZK0d+9eDR8+XH/72980d+7c036PuXPnSpKWLFmiK664QvHx8Rbnz7YE9ZNPPjllv3+8rm5p3NnaNbaGhr43Z/r+pztf93N4/PHHNWzYMIvv+9FHH1m8h3/8Pps3b7Zo++9//1sdO3a06KOsrEyvvPKKRowYofj4ePXq1Ut33nmnkpOTLa49XV233nqrRZu6Ok7lz+Ojzq233mrRT3l5uV5//XVdfvnlSkhI0IgRI7Rw4ULV1NRYXPPnWjZs2NCga8/GZDJpypQp6tatm1avXt3g6wBAkur/BgAAsKsrr7xSDz30kDIyMhQRESFJKioq0s8//6x3331XP//8s0X7L774Qo8++qiuvvpqPfTQQzp69KhmzZqllJQUvfvuu5o3b54qKip0/PhxPfDAAxo/fryGDh0qSQoLC9Py5cv12GOPaeTIkRo3bpzy8vI0Z84c3XTTTfr0008VHBwsqfaX3ptuuknXXXedJJn7a4guXbroqaeeMn+9ZMkS/fe//zV/vX79eo0dO1b9+vXT888/r/Lycr355pv629/+pk8++USxsbH1+nz55ZcVHx+v8ePHKyAgQIMHD5YkTZ8+XZLM3y8iIkJJSUmaNm2arr/+ek2bNk0+Pj6S1KD6y8rKlJCQoGnTppmPne66P763f27X2BrO5b158skn1bVr11N+/48//liStHPnTj3zzDP12v5Zfn6+Zs+efcpzPj4++v7779W7d2/zsS+//FIuLpb/Jjx58mRt2rRJkyZNUnR0tA4dOqTXXntNjzzyiFasWCGDwWBue/311+uGG24wf133c7Qmk8mk++67T1u2bNEDDzygTp06acOGDZo9e7aOHDmiZ5991tz2z2M2Nja2wdeeyXPPPaf//e9/ev311zVo0CCrv0YATRvhDQAczNChQ+Xl5aWvv/5ad9xxhyRp5cqVCg4OtvhlWar9ZXTmzJkaPHiwZs6caT7etm1b3XHHHfrpp5/MYaJuli06Olo9evSQJNXU1GjmzJkaNGiQXnnlFfP1vXr10pVXXqm3335bkydPliSVlpaqbdu25mvr+msIX19f83WS9Msvv1icf+WVV9SmTRstXLhQrq6ukqRBgwbpsssu05w5c/Taa69ZtD906JBWr16tzz//XO3bt5ckc9D19fWVJIvvt2LFCknSE088YQ5NkuTh4XHW2ktLSxUSEmLR3+mu++N7++d227Zta1QN5/LexMXFnfb71x0vLy8/Zds/mzNnjqKiopSXl1fv3JAhQ/Tdd9/p//2//ydJysjI0G+//aY+ffro6NGjkqSKigoVFxdr2rRpuvLKKyVJffv2VVFRkV588UVlZ2crNDTU3GdERIRFPXU/R2v6+eeftXbtWr366qu66qqrJEkXXXSRjEajXnvtNd12223m8fTnMfvTTz81+NrTeeWVV/Txxx9r3rx5GjJkiNVfH4Cmj2WTAOBgjEajhg0bZrF0csWKFbriiissZiok6cCBA8rIyNCwYcNUVVVl/khMTJSvr6/WrFlzxu918OBBHT9+XCNHjrQ4Hh0drZ49e2rjxo3mY+np6fLz87PCK7RUUlKi7du364orrjCHE0ny9/fXJZdcYlFDXftZs2apX79+Z/1luU63bt0kSe+8846ysrJUUVGhqqqqBl1rrdfdmBrO9b2xlr179+rjjz/WP/7xj1OeHzZsmFJTU3XgwAFJ0tdff63u3burZcuW5jYeHh56++23deWVVyozM1Pr16/XRx99ZN50p6Ki4pzrqqmpUVVVlUwm01nb1H38se3GjRvl5uamyy+/3OKaa665xnz+dM7nWkn64IMPtHDhQl111VUWs7MAcC6YeQMAB3TFFVfogQceUEZGhjw9PbVu3To99NBD9dqdOHFCUu0Ss1MtM8vKyjrj96m7PiQkpN65kJAQ7dq1S1LtDN+xY8fUqlWrc3shDVBYWCiTyXTaGgoLCy2O3XffffL397dYdnk2iYmJmjZtmhYuXKh58+adU31Hjx494/LCC1nDub431vLcc8/pqquuUs+ePU95Pjw8XPHx8fruu+/Url07ffnllxo5cqR5vNT55Zdf9Pzzz+vAgQPy8fFRp06d5O3tLUlnDGCnM3/+fM2fP1+urq4KCQnRoEGD9OCDD1ps4lM3W/1Hffv2lVS7FLRFixYWQViSeQbwTO/n+VwrSbt379agQYP0v//9T7fffru6dOlyxvYAcCqENwBwQEOGDJGPj4++/vpreXt7q1WrVoqPj6/Xzt/fX1LtvUV1v6D+UUBAwBm/T2BgoCQpOzu73rnjx4+rRYsWkqTk5GSVlZXV22TEGvz8/GQwGE5bQ12NdSZPnqyvv/5aEydO1AcffNDg5XU33nijVq9eraqqKj355JNq1aqVxo8ff8ZrampqtHXrVo0ePbpB3+PPM6PnW8O5vjfW8NVXX2nHjh0Wy2hP5dJLL9V3332nK664Qjt27NC8efMswtvhw4c1YcIEDR8+XG+++aZat24tg8GgDz74oN6yWens751U+/7deOONqqmp0bFjxzRr1izdc889+vzzz81tpk+fbhG2/3jfWkBAgPLy8lRdXW0Rwur+kaNuvJ/K+VwrSQ8++KBuu+02XXXVVZo2bZqWLFlSLwgCwNmwbBIAHJCHh4eGDx+ub775Rl999ZX5Hps/a9eunYKDg5WWlqaEhATzR3h4uF555ZV6MyF/FhMTo9DQUP3vf/+zOH7kyBFt2bJFvXr1kiT9+OOP6ty5s4KCgs75tdTU1Jzxl1Rvb2/Fx8frq6++UnV1tfl4YWGhfvzxx3r3+cXHx2vevHk6evSoXn755QbX8dprr+nHH3/Uiy++qCuuuEIJCQlnvd/s119/VUlJifr163fGdnWzSH/esON8azjX9+Z8VVRU6KWXXtKECRMs7kc7leHDh2vr1q3697//rd69eyssLMzi/I4dO1ReXq57771X0dHR5nBWF9zq3rO6nRrP9t5JtRvsJCQkqHv37rriiit0yy23aM+ePcrPzze3iYmJsfhv4Y/3F/bt21dVVVX1dnOtC39nej/P51qpdqbUaDTqySef1M6dO/Xuu++e9fUCwJ8x8wYADurKK6/UuHHj5OLiYrHT4R+5urrq4Ycf1pNPPilXV1ddcsklKigo0Pz585WZmXnW5X4uLi6aNGmSpkyZokceeUTXXHON8vLyNG/ePAUEBOjOO+/Uzp079cEHH+iqq67Sli1bzNceP35cUu0MS25ubr1gl5ubq5SUFB06dMgcAk/nkUce0d133617771XN998syorK7Vw4UJ
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"inertias = []\n",
|
|||
|
"clusters_range = range(1, 11)\n",
|
|||
|
"for i in clusters_range:\n",
|
|||
|
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
|
|||
|
" kmeans.fit(data_scaled)\n",
|
|||
|
" inertias.append(kmeans.inertia_)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.plot(clusters_range, inertias, marker='o')\n",
|
|||
|
"plt.title('Метод локтя для оптимального k')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Инерция')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Расчет коэффициентов силуэта"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1oAAAImCAYAAABKNfuQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACjZklEQVR4nOzdd1yV9eP+8dfhsAVBHODeAwe4cO/V0jIbNhxZapll9UlNyzTN1Jyl5iottczKVamVaeXeW4YbB6igKMge5/z+8Cu/yAXK4WZcz8eDh3Luca77LSoX932/b5PVarUiIiIiIiIi2cbO6AAiIiIiIiL5jYqWiIiIiIhINlPREhERERERyWYqWiIiIiIiItlMRUtERERERCSbqWiJiIiIiIhkMxUtERERERGRbKaiJSIiIiIiks1UtERERERERLKZipaIFBg9e/akZ8+eGV7bs2cPjz/+ODVr1uSnn36y6fsPGzaMdu3aZXm7du3aMWzYMBskEhFbqV69OjNmzDA6hogYyN7oACIiRrly5QqvvfYatWrVYv78+VSvXt3oSCIiIpJPqGiJSIH19ddfk5iYyMSJE/H29jY6joiIiOQjunRQRAqkq1evsmTJErp06XJLyQoNDWXQoEE0b96cunXr0rNnT/bu3ZthnX/++Ydu3brh7+9Ps2bNGDVqFNevX8+wznfffUfbtm3x9/fnnXfeITY2FoDZs2fTtGlTGjZsyKhRo0hOTk7fJjk5mdGjRxMQEEDjxo3TLz2Ki4tjyJAh1K1bl9atW/Pdd9+lb3P+/HmqV6/OihUr0l9LSkqiffv2Gc7S3e7SyZ07d1K9enV27tx528/hxpm/hg0b3nLZ408//cRjjz1G7dq1adOmDTNmzCAtLS19+e0ulfx31pvvdbuPmznvddnk7Y7pvyIiInjvvfdo2rQp9erVo0ePHuzfvz99+X8v8bJarTz33HNUr16d8+fPZ1jvblkHDRpEq1atsFgsGd7/gw8+4KGHHgLg4sWL/O9//6NJkyb4+/vTs2dPDhw4AMCMGTPu+B4384WEhPDGG2/QpEkTatWqRcuWLRk7diyJiYl3HYOtW7feNXtmjxFgw4YNPPnkk/j7+991X/+2YsUKqlevzsGDB3nyySfx8/OjS5cu/P777xnWO3/+PEOHDqVFixbUqlWLpk2bMnToUK5evZq+TnBwMC+++CL16tWjQ4cOLF26NH3Z7b5+4davk3td1vfvr7tFixbd8vdrx44d1KhRgy+++OKO+/iv6dOn4+vry8qVKzO9jYjkbTqjJSIFitVq5cKFC4wdO5bU1FReffXVDMtPnDjBs88+S4UKFRgxYgQODg4sWrSI3r17s2DBAho1asTu3bsZMGAAjz/+OO+++y7Hjx/ns88+49ixY3z77beYzWbWr1/PmDFj6NmzJ61ateKHH35g/fr1AKxdu5axY8cSFhbG5MmTcXZ2Zvjw4QBMmjSJ5cuXM3ToUHx8fJg2bRphYWGEhYXx8MMPM336dDZt2sSYMWPw8fGhffv2tz3Or776KkNJeBBTpkzh+vXrFC5cOP21uXPnMm3aNHr06MHw4cMJDg5mxowZXLhwgXHjxmVqv7Vq1eKHH34AbpS2ZcuWpX/u5uaWLdnj4uJ4/vnnSUtLY8iQIXh7e7NgwQJefvllVq5cSYUKFW7Z5ueff85QxP7t6aef5plnnkn/fPTo0RmW/fHHH+zcuZOmTZsCkJiYyO+//06/fv1ITk6mb9++pKSkMGrUKBwcHJg1axY9e/bkxx9/5JlnnqFly5YZ9jtq1CgAfHx8iIiI4MUXX6Ru3bpMmDABR0dHNm3axNdff02JEiXo37//HcchMTERHx8fPv/889tmz+wxnj17lrfeeouWLVvyzjvvpH9N3Glf//Xqq6/So0cP3nnnHZYtW8bbb7/N3Llzad26NQkJCfTq1YsiRYowatQo3N3d2b9/PzNnzsTZ2ZkxY8aQkJBAv379KF26NDNmzGDfvn2MGjWKUqVK0apVq0xlyKqePXuybt06Pv30U9q0aYOjoyPvv/8+devW5bXXXsvUPubPn8+sWbMYO3YsTz75pE1yikjuo6IlIgXK7t27adOmDQ4ODnz55Ze3fKM9c+ZMHB0dWbRoUfo3+23atKFz585MnDiRZcuWsWrVKipUqMD48eOxs7OjefPmuLi4MHLkSDZu3Ei7du2YM2cOjRs3ZsSIEQA0btyY5s2bc/36dcaPH0/t2rUBiImJ4csvv+T111/HYrHwww8/0L9/f3r06AFAsWLF6N69O56enkyePBkHBwdatWrFsWPHmDt37m2L1oULF/jyyy+pVasWgYGBDzRehw8f5ueff8bX15eYmBgArl+/zqxZs+jevXv68bVo0QJPT09GjBhBnz59qFq16j337ebmRt26dQHYvHkzQPrn2WXlypWEhYWxcuVKfH19Aahfvz5du3Zl9+7dt/z5x8XFMXny5DuOnY+PT4aM/y6ELVq0wMfHh1WrVqUXrT///JP4+Hi6du3KgQMHOHXqFN999x316tVLz9KxY0dmzZrFjBkz8PHxybDff7/Xli1b8PX15fPPP09f3qxZM7Zu3crOnTvvWrQSEhIoXLjwHbNn9hiDgoJISUnhnXfeoVq1avfc13/17NmTgQMHAtCyZUuefPJJvvjiC1q3bk1oaCg+Pj58+umnlC1bFoAmTZpw8OBBdu3aBUBYWBh16tTh/fffp2zZsrRo0YIlS5awefNmmxUtk8nE+PHjefzxx5k0aRJms5lr166xcOFCzGbzPbf//vvvmTRpEmPGjOHpp5+2SUYRyZ106aCIFCg1a9ZkwoQJeHh4MHz48FvO+uzatYu2bdtm+MbR3t6exx57jCNHjhAXF8cnn3zCqlWrsLOzIzU1ldTUVB566CHs7OzYvXs3qampBAUF0aJFi/R9ODk54e/vj4uLS3rJghvfnCcmJnL06FGOHj1KUlJS+lkNuPGNtpOTE35+fjg4OGTYLjAwMMOlejd9+umnNGzYkLZt2z7QWFmtVsaOHcvTTz9NjRo10l/fv38/iYmJtGvXLv34U1NT0y8T3Lp1a4b9/Hud/15Wl9kc97vt3r17KVOmTHrJAnBxceGPP/7IcNbmplmzZlGkSBGef/75LL+XnZ0dTz75JOvWrSMhIQG4UfSaNWuGj48PjRo14sCBA9StW5e0tDRSU1MpXLgwzZs3Z/fu3ffcf4sWLfj2229xcnLixIkTbNiwgdmzZxMVFZXh8tPbuXDhAu7u7lk+pv+qVasW9vb2fPvtt4SFhZGcnExqaipWqzVT2//7bI7JZKJjx44cOnSIxMREfH19WbJkCaVLlyY0NJSNGzcyf/58Tp06lX58VapUYfbs2ZQtW5bk5GQ2bdpEdHQ0lStXzvA+Foslw9fd7fLdXCcz2cuWLcvgwYNZuXIlP/30EyNGjEgvg3fz999/M3r0aBo2bMizzz57z/VFJH/RGS0RKVDc3Nx48sknqVSpEs8//zxvv/02P/zwQ/pPpqOjoylWrNgt2xUrVgyr1UpsbCyFChXCyckJuPGN57/FxMRw5coV0tLSKFKkSIZlnp6eeHh4ZHjt5qVXly9fTi9N/93Ow8MDT0/PW7ZLTU3NcO8K3CiK69ev55dffmHNmjWZGZI7WrVqFaGhocyZM4dPP/00/fVr164B3PEMSkRERPrvw8LCbhmj+8mxatUqTCYTRYsWpUGDBrz11lu3fHN9O9euXaNo0aKZep/Q0FAWLlzIV199RXh4+H1lfeqpp5gzZw7r1q2jSZMmbN++ncmTJ6cvd3R0BG7ct/Xve3Uyc2bEYrEwdepUvvvuO+Lj4ylZsiR+fn7pX4t3ExYWRunSpe/jiDIqW7YskyZNYurUqemXed7UqFGje25fokSJDJ8XLVoUq9VKTEwMzs7OfP3118yZM4dr165RrFgxateujYuLyy33P8bExBAQEABA8eLFeeSRRzIsf+mll2557//mmzVrFrNmzcJsNlOsWDFatGjBW2+9dceJcR599FEmTJgAQPPmze95rACBgYG0adOGf/75h7/++uu+Hu8gInm
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"silhouette_scores = []\n",
|
|||
|
"for i in clusters_range[1:]: \n",
|
|||
|
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
|
|||
|
" labels = kmeans.fit_predict(data_scaled)\n",
|
|||
|
" score = silhouette_score(data_scaled, labels)\n",
|
|||
|
" silhouette_scores.append(score)\n",
|
|||
|
"\n",
|
|||
|
"# Построение диаграммы значений силуэта\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.plot(clusters_range[1:], silhouette_scores, marker='o')\n",
|
|||
|
"plt.title('Коэффициенты силуэта для разных k')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Коэффициент силуэта')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Средний коэффициент силуэта: 0.250\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1YAAAJzCAYAAAAMSoJaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzddZyU1f7A8c907Wx3s7AsCyzs0imNiBhgoChgd9e9Xv15w6vXTuxCQRQLREK6e4FdOra7d2an8/n9MTCyAgoq5nm/XrxgnjzPmWeG5zvnnO+RSZIkIQiCIAiCIAiCIPxk8t+6AIIgCIIgCIIgCH90IrASBEEQBEEQBEH4mURgJQiCIAiCIAiC8DOJwEoQBEEQBEEQBOFnEoGVIAiCIAiCIAjCzyQCK0EQBEEQBEEQhJ9JBFaCIAiCIAiCIAg/kwisBEEQBEEQBEEQfiYRWAmCIAiCIAiCIPxMIrASBOGsmjp1KllZWe3+9OnTh2nTprFt27bfuniCIPzJZWVl8eqrr56w/PDhwwwcOJBhw4ZRVlZ2yv1fffVVsrKyyMnJwWq1nnSbTz75hKysLEaOHPlLFVsQhD8gEVgJgnDWde3alblz5zJ37lzmzJnDU089hUql4vrrr+fIkSO/dfEEQfiLOXLkCNdccw06nY7Zs2eTnp7+o/t4vV5WrVp10nWLFy/+hUsoCMIfkQisBEE460JCQsjNzSU3N5fevXszevRoXn31VeRyOV999dVvXTxBEP5CiouLmT59OgaDgdmzZ5OSknJa+/Xq1YslS5acsLy+vp78/Hyys7N/6aIKgvAHIwIrQRB+EzqdDo1Gg0wmCy6bOnUqU6dObbfd888/T1ZWVrsAbPbs2YwaNYq8vDyuvvpqDh8+DMDHH39MVlYWpaWl7Y7x9ddfk52dTW1tLQArVqxgypQp5OXl0b17d8aNG8fHH3/cbp+///3vJ3RhPPanqqoquM33u/58+umnJ3Q9Wrx4MePHjyc3N5dJkyaRn5/fbp8fK8/WrVvJyspi69at7fb7fn2dTv253W6efvpphg0bRnZ2drvr+qEg9/vHfuKJJ8jJyWHdunXAd92lTvbn+HKfTt03NDTwt7/9jYEDBwbf4127dgEwcuTIH31f8vPzufrqq+nZsyf9+vXjb3/7Gy0tLcHjf/XVV2RlZVFYWMjEiRPp0aMHF1xwAd9++227clgsFv73v/8xevRocnJymDBhAl988UW7bY4vT5cuXejbty933nknra2tp6xLgJKSEu644w769etH3759ufnmmykuLj7l9j9Uv8e/b2VlZdx1110MHjyY3Nxcpk6dyo4dO4Lrq6qqgvstWLCg3TlWr14dXHe8xYsXM2nSJPLy8hg8eDCPPfYYZrP5hLId72T34siRI/n73/9+ytffd6ysx1/fzp07mTx5Mjk5OQwePJjHH38cp9N5ymN8X3FxMdOmTcNoNDJ79mwSExNPe9/x48ezYcOGE7oDfvvtt3To0IEuXbqcsM+KFSuYNGlSsLz//e9/sdvtJ2xzOp//zZs3c91119GzZ08GDx7Ms88+i8/nC263ceNGLr/8cvLy8ujbty+33nrrD95TgiD88kRgJQjCWSdJEl6vF6/Xi8fjobGxkeeffx63280ll1xyyv0qKiqYOXNmu2XLli3j8ccf5/zzz+e1117D5/Nxyy234Ha7ueCCC9BoNHz99dft9pk/fz4DBw4kISGBNWvWcPvtt9OtWzdef/11Xn31VVJSUvjPf/5DYWFhu/1iYmKCXRjnzp3Lrbfe+oPXaTabeemll9ot2717Nw888AC5ubm88cYbJCQkcMstt9DU1ARwRuU5Uyerv3feeYcPP/yQ6dOn8+GHHzJ37lxmzJhxRsfdvXs3n3zyCS+99BJ5eXnt1h1fX4899li7dadzrTabjSuvvJKtW7fy4IMPMmPGDDQaDddddx1lZWXMmDGjXZlvvfXW4PliY2PZvn0711xzDVqtlpdeeol//OMfbNu2jWnTpp3wAH7zzTczatQoZsyYQYcOHbjnnntYu3YtAE6nkylTpvDNN99www038Prrr9O7d28eeeQR3nzzzXbHGTZsGHPnzmXWrFncf//9bNy4kSeeeOKU9VdfX8/kyZMpKyvjX//6F88++yxNTU1Mnz4dk8n0g3V/fP1+/30rKipi0qRJVFVV8eijj/Lcc88hk8mYPn36CeMZDQbDCd3aFi9ejFze/rHg9ddf57777iM3N5dXXnmF22+/naVLlzJ16tQzCmh+CbW1tVx//fVEREQwY8YM7rrrLr7++mseeuih09q/pKSE6dOnExISwuzZs4mLizuj85977rn4fL6T1tv5559/wvbffPMNt99+OxkZGbz22mvccccdLFiwgNtuuw1JkoAz+/w/8MAD9O7dmzfffJMJEybw7rvv8vnnnwNQWVnJbbfdRvfu3XnjjTd44oknKC0t5aabbsLv95/RdQqC8NMpf+sCCILw57d9+3a6det2wvL77ruPjh07nnK/J598kszMTPbt2xdc1tLSwpQpU7jvvvuAQAvMsV/7s7OzGTNmDAsWLODuu+9GJpNRV1fHli1bePbZZ4HAw+fEiRN55JFHgsfMy8ujf//+bN26lZ49ewaXq9VqcnNzg69LSkp+8DpfeeUVEhMT27VW1NXVce655/Lf//4XuVxOdHQ0EyZMoKCggNGjR59Rec7Uyepv9+7ddOnSheuuuy647FhLz+k61mI4atSoE9YdX18ul6vdutO51nnz5lFdXc28efOCXat69erFxRdfzPbt27nsssvalTk1NbXdOZ9//nk6dOjAW2+9hUKhAKBnz56cf/75fPnll1x11VXBbadOncrtt98OwNChQ5k4cSKvvfYaw4YN46uvvuLw4cN8+umnweBx6NCheL1eXn/9da644grCw8MBiIyMDJahb9++bNq0qV2df9/MmTNxu9188MEHxMTEANClSxeuvPJKCgsLGTZs2Cn3Pf5av/++zZgxA7VazUcffURISAgAw4cPZ8KECTzzzDPtWtvOOecc1q9fj9vtRq1W43K5WLlyJX379g22MJrNZt544w0uv/zydkFy586dueqqq06oz7PtnXfeISIigtdeey343srlch599FEOHTp0QqvZ8crKypg2bRpNTU14PJ6fFGxER0fTt29flixZwoUXXghAdXU1hYWFPPPMM7zxxhvBbSVJ4rnnnmPo0KE899xzweXp6elcc801rF27luHDh5/R5/+yyy4L3q8DBw5kxYoVrFmzhiuuuILdu3fjdDq5+eabgwFjfHw8K1euxG63B+8HQRDOLhFYCYJw1nXr1o1///vfQOCBo62tjXXr1vHiiy9it9u59957T9hn3bp1bNq0iXfeeYdp06YFl19xxRUA+P1+7HY7y5YtQ6vVkpSUBMCll17KwoULyc/Pp2/fvsyfPx+DwcCYMWMAuOGGG4BAy0hpaSkVFRXs2bMHCARpP9Xhw4eDrRbHyggwduxYxo4diyRJ2O12lixZglwup0OHDme1PKeqv5ycHN5++22WLl3KgAEDMBgMp/2QKUkSu3btYvHixSe0hJ2O07nWHTt2kJyc3G68ik6nY+nSpT96fIfDQWFhIddff32wlRQgJSWFjh07snHjxnaBwMSJE4P/lslkjBkzhldffRWn08m2bdtISko6oUXuwgsv5IsvvmgXAB07l9/v5+DBg+zYsYNBgwadspw7duwgNzc3GFRB4CF49erVP3qNP2Tbtm2MGDGi3UO0UqkMtu7abLbg8gEDBrBu3Tq2bt3K0KFDWbduHSEhIfTp0ycYWBUUFOB2u5kwYUK78/Tp04ekpCS2bdv2swOrY3Unl8tPaC07xu/34/V6yc/PZ8iQIcGgCgIBIgTq9IcCq4ULF9K9e3defPFFrrvuOh588EFmzpzZ7pw+ny/YkgSBe+L4c0GgO+B///tfrFYrISEhLFq0iG7dupGWltZuu5KSEurq6rj55puD9yEEAu+QkBA2btzI8OHDz+jz//17MT4+PtitsGfPnmg0Gi699FLGjRvHOeecQ//+/enRo8cp60Q
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
"from sklearn.cluster import KMeans\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Применение K-Means\n",
|
|||
|
"# ========================\n",
|
|||
|
"kmeans = KMeans(n_clusters=3, random_state=42) \n",
|
|||
|
"df_clusters = kmeans.fit_predict(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Оценка качества кластеризации\n",
|
|||
|
"# ========================\n",
|
|||
|
"silhouette_avg = silhouette_score(df_scaled, df_clusters)\n",
|
|||
|
"print(f'Средний коэффициент силуэта: {silhouette_avg:.3f}')\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"# ========================\n",
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"df_pca = pca.fit_transform(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"sns.scatterplot(x=df_pca[:, 0], y=df_pca[:, 1], hue=df_clusters, palette='viridis', alpha=0.7)\n",
|
|||
|
"plt.title('Визуализация кластеров с помощью K-Means')\n",
|
|||
|
"plt.xlabel('Первая компонентa PCA')\n",
|
|||
|
"plt.ylabel('Вторая компонентa PCA')\n",
|
|||
|
"plt.legend(title='Кластер', loc='upper right')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Средний коэффициент силуэта, равный 0.250, указывает на умеренно хорошую кластеризацию. \n",
|
|||
|
"\n",
|
|||
|
"Средний коэффициент силуэта (silhouette score) указывает на качество кластеризации, измеряя, насколько хорошо точки внутри одного кластера близки друг к другу по сравнению с точками из других кластеров. Значения коэффициента силуэта находятся в диапазоне от -1 до 1:\n",
|
|||
|
"\n",
|
|||
|
"1: Указывает на идеально плотные и четко разделенные кластеры. \n",
|
|||
|
"0: Указывает на перекрытие кластеров или слабую структуру кластеризации. \n",
|
|||
|
"Отрицательные значения: Указывают, что точки в кластере расположены ближе к другому кластеру, чем к своему."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 29,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Средний коэффициент силуэта (агломеративная кластеризация): 0.225\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1YAAAJzCAYAAAAMSoJaAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gU1frA8e/2bDab3juBEJKQkNBBelEERMGCUi3Ye7mWn3rVa7kWrBQLVkAUkSIKSu89QEILJb33bLLZ3n5/LNlLDCgo9vN5Hh7NzM7MmbNnZ/edc847EpfL5UIQBEEQBEEQBEH4xaR/dAEEQRAEQRAEQRD+6kRgJQiCIAiCIAiC8CuJwEoQBEEQBEEQBOFXEoGVIAiCIAiCIAjCryQCK0EQBEEQBEEQhF9JBFaCIAiCIAiCIAi/kgisBEEQBEEQBEEQfiURWAmCIAiCIAiCIPxKIrASBEEQBEEQBEH4leR/dAEE4Y8ydepU9u7d22aZVqslJSWFe+65h969e/9BJRMEQRD+qv773/9iMBh4+OGHOXHiBPfddx+7du1CJpP90UUTBOE3JgIr4R8tJSWFZ555BgCHw0FjYyNffPEFt9xyC8uWLSMxMfEPLqEgCILwV3LjjTcyZcoU+vbti0Kh4OmnnxZBlSD8Q0hcLpfrjy6EIPwRpk6dCsCCBQvaLDcajfTr149Jkybx2GOP/RFFEwRBEP7CbDYbJSUlBAUF4e/v/0cXRxCE34mYYyUIP6JWq1GpVEgkEs+yqVOnegKxVq+//jpJSUksW7bMs2zhwoUMHz6czMxMpkyZwsmTJwH4/PPPSUpKorCwsM0+vvnmG5KTk6msrARg/fr1TJo0iczMTLp27cqoUaP4/PPP22zz+OOPk5SUdNZ/ZWVlntcMGzaszXZffvklSUlJzJo1y7Ns9erVjB49moyMDCZMmEBWVlabbX6uPHv27CEpKYk9e/a02e7H9XU+9We1WnnllVcYPHgwycnJbc7rzDr+sR/v+8UXXyQtLY2tW7cCMGvWrHPW15nlPp+6r6mp4bHHHqNfv36e9/jgwYMADBs27Gffl6ysLKZMmUK3bt3o3bs3jz32GA0NDZ79L1u2jKSkJHJychg/fjzp6elcccUV/PDDD23Kodfr+e9//8uIESNIS0tj7NixfP31121ec2Z5unTpQq9evbj33ntpbGw8Z10CFBQUeIbC9urVi9tvv538/Pxzvv6n6vfM962oqIj77ruPSy65hIyMDKZOncr+/fs968vKyjzbrVy5ss0xNm3a5Fl3ptWrVzNhwgQyMzO55JJL+Pe//01TU1O7sp3pbG1x2LBhPP744+f8+8day3rm+R04cICJEyeSlpbGJZdcwvPPP4/ZbD7nPlr38+ijjzJgwABSU1Pp168fjz76aJv36Gztqqys7LzbdU1NDU888QSDBw8mPT2da665hg0bNrQpR+t27777bpvlJ0+ebNeG4eK14586/zPbw4//tV7bzue60lqW1n9du3blsssua9PGztZOWuvlzOvl+dblrFmzUCgUdOzYET8/P66//vp2dfhTxzIYDEydOpWUlBQsFovnXM9VH60cDgcffPABY8eOJT09nYyMDK6//np2797d5ljZ2dncfPPNdO/enb59+/LQQw9RXV19XnUOsGTJEsaMGUPXrl0ZMmQIs2bNwuFweNY//vjjTJ06la+//pqhQ4eSmZnJ9OnTOX78uOc1re/LmXVy6tQpUlNT27ynubm5TJ48mczMTEaMGMGXX37Z5lyOHz/OPffcQ9++fUlNTWXgwIG88MILbT57P34fof17frY2sG3bNpKSkjzXgrN97i0WC8OHDz9r+xH+mURgJfyjuVwu7HY7drsdm81GbW0tr7/+Olarlauvvvqc25WUlPDpp5+2WbZ27Vqef/55xowZw5w5c3A4HNxxxx1YrVauuOIKVCoV33zzTZttVqxYQb9+/YiIiGDz5s3cfffdpKamMnfuXGbNmkVMTAz/+c9/yMnJabNdSEgIixcv9vy78847f/I8m5qaeOutt9osO3ToEI888ggZGRm8++67REREcMcdd1BXVwdwQeW5UGerv3nz5vHZZ58xffp0PvvsMxYvXszs2bMvaL+HDh3iiy++4K233iIzM7PNujPr69///nebdedzrgaDgRtuuIE9e/bwr3/9i9mzZ6NSqbj55pspKipi9uzZbcp85513eo4XGhrKvn37uPHGG/Hy8uKtt97i//7v/9i7dy/Tpk1r9wP89ttvZ/jw4cyePZsOHTrwwAMPsGXLFgDMZjOTJk3i22+/ZcaMGcydO5cePXrw5JNP8t5777XZz+DBg1m8eDELFizg4YcfZseOHbz44ovnrL/q6momTpxIUVERzz77LK+99hp1dXVMnz4dnU73k3V/Zv3++H3Ly8tjwoQJlJWV8dRTTzFz5kwkEgnTp09vN89Ro9GwcePGNstWr16NVNr262ru3Lk89NBDZGRk8M4773D33XezZs0apk6d+rMBzcVWWVnJLbfcQkBAALNnz+a+++7jm2++4dFHHz3nNiaTiWnTppGfn88zzzzDRx99xLRp01i1ahVvvvlmm9e2vo9ntqdWP9Wu6+rquOaaa8jKyuLBBx9k1qxZREVFcffdd7cLXs+33i9WO/658w8NDW13ffvxZ+zHznZdadW67Zw5c0hISOCxxx5rd6Prp1xIXZ7pm2++8dx8OV+LFi2irq6Ozz77DKVS6VmekpLS5v2+5ppr2mw3c+ZM5s6dy8SJE/nwww95/vnn0el03H///ZhMJgCOHTvGlClTsFgsvPrqqzz33HMcOXKEW2655bzq/P333+fpp5+mX79+vPfee0yePJl58+bx9NNPtylLbm4ub775Jvfccw+vvfYajY2NTJkyhZqamnOe94svvojdbvf8bTKZuPXWW7Hb7cyaNYtx48bxzDPPeG6a1dTUMHnyZEwmEy+//DLz5s1jzJgxLFiwgPnz519Qnf+YzWbjpZde+tnXffjhhz8ZMAv/PGKOlfCPtm/fPlJTU9stf+ihh+jYseM5t3vppZdITEzk6NGjnmUNDQ1MmjSJhx56CHD3wLTe7U9OTmbkyJGsXLmS+++/H4lEQlVVFbt37+a1114D3D8+x48fz5NPPunZZ2ZmJn369GHPnj1069bNs1ypVJKRkeH5u6Cg4CfP85133iEyMrLNnfCqqiouu+wyXnjhBaRSKcHBwYwdO5bs7GxGjBhxQeW5UGerv0OHDtGlSxduvvlmz7IL/cJq7TEcPnx4u3Vn1lfrXeBW53Ouy5cvp7y8nOXLl5OcnAxA9+7dueqqq9i3bx/XXnttmzLHxsa2Oebrr79Ohw4deP/99z3zLbp168aYMWNYunQpkydP9rx26tSp3H333QAMHDiQ8ePHM2fOHAYPHsyyZcs4efIkX375pSd4HDhwIHa7nblz53L99dd7hh4FBgZ6ytCrVy927tzZps5/7NNPP8VqtfLJJ58QEhICQJcuXbjhhhvIyclh8ODB59z2zHP98fs2e/ZslEol8+fPx8fHB4AhQ4YwduxYXn311Ta9bYMGDWLbtm1YrVaUSiUWi4UNGzbQq1cvT09MU1MT7777Ltddd12bYKJz585Mnjy5XX3+1ubNm0dAQABz5szxvLdSqZSnnnqKEydOnPVudlFREeHh4bzyyivExMQA0LdvX3JyctoFm2e+jz/2U+36k08+oaGhgTVr1hAVFQW4g7Qbb7yRV199lbFjx3oCp0GDBvHDDz9QU1PjCdy+//77NvUOF68d/9z5n3mNa72+JScnEx0dfdZ6gLNfV1qduW1ERAQbN24kNzeXDh06nHN/v7QuWxkMBmbOnElqaupPfu7O5HA4PPN8e/Xq1Wadj49Pm/d727ZtbdbX1NTw4IMPtunxUalU3HvvvZw4cYKMjAzee+89/P39+fjjj1GpVACEhoby8MMPk5+f/5N1rtfrPYHbU089BcCAAQPw9/fnqaee4qabbvLMS9br9bz33nv07NkTgPT0dEaMGMH8+fN55JFH2p33mjVryMnJafN+lJeXk5aWxv/
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.cluster import AgglomerativeClustering\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Агломеративная кластеризация\n",
|
|||
|
"# ========================\n",
|
|||
|
"agg_cluster = AgglomerativeClustering(n_clusters=3) \n",
|
|||
|
"labels_agg = agg_cluster.fit_predict(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Оценка качества кластеризации\n",
|
|||
|
"# ========================\n",
|
|||
|
"silhouette_avg_agg = silhouette_score(df_scaled, labels_agg)\n",
|
|||
|
"print(f'Средний коэффициент силуэта (агломеративная кластеризация): {silhouette_avg_agg:.3f}')\n",
|
|||
|
"\n",
|
|||
|
"# ========================\n",
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"# ========================\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"df_pca = pca.fit_transform(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"sns.scatterplot(x=df_pca[:, 0], y=df_pca[:, 1], hue=labels_agg, palette='viridis', alpha=0.7)\n",
|
|||
|
"plt.title('Визуализация кластеров с помощью агломеративной кластеризации')\n",
|
|||
|
"plt.xlabel('Первая компонентa PCA')\n",
|
|||
|
"plt.ylabel('Вторая компонентa PCA')\n",
|
|||
|
"plt.legend(title='Кластер', loc='upper right')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Значение коэффициента силуэта лежит в диапазоне от -1 до 1. Ближе к 1: Хорошо сформированные, плотные кластеры, четко отделенные друг от друга. \n",
|
|||
|
"\n",
|
|||
|
"Ближе к 0: Кластеры пересекаются или слабо разделены, не имеют четких границ. Точки расположены одинаково близко как к своему кластеру, так и к соседним. \n",
|
|||
|
"Ближе к -1 (Отрицательные значения): Некоторые точки скорее относятся к другим кластерам, чем к текущему (ближе к центрам других кластеров). Очень плохая кластеризация. \n",
|
|||
|
"Ближе к 1: Все точки внутри каждого кластера плотно сгруппированы и значительно удалены от точек других кластеров. Свидетельствует о четкой и хорошо разделенной структуре данных. Единица говорит об идеальной кластеризации.\n",
|
|||
|
"\n",
|
|||
|
"Значение 0.225 указывает на то, что кластеры с нечеткой границей и неоптимальный выбор числа кластеров или особенности данных, затрудняющие их разделение."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Вроде усёё :)"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "mai",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|