875 lines
1.6 MiB
Plaintext
Raw Normal View History

2024-12-12 20:25:35 +04:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living',\n",
" 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',\n",
" 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',\n",
" 'lat', 'long', 'sqft_living15', 'sqft_lot15'],\n",
" dtype='object')"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn import set_config\n",
"set_config(transform_output=\"pandas\")\n",
"\n",
"random_state = 42\n",
"\n",
"# Подключим датафрейм и выгрузим данные\n",
"df = pd.read_csv(\"data/house_data.csv\")\n",
"\n",
"df.columns"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Удалим ненужные столбцы"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sqft_living</th>\n",
" <th>price</th>\n",
" <th>floors</th>\n",
" <th>bathrooms</th>\n",
" <th>bedrooms</th>\n",
" <th>sqft_basement</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1180</td>\n",
" <td>221900.0</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2570</td>\n",
" <td>538000.0</td>\n",
" <td>2.0</td>\n",
" <td>2.25</td>\n",
" <td>3</td>\n",
" <td>400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>770</td>\n",
" <td>180000.0</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1960</td>\n",
" <td>604000.0</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>4</td>\n",
" <td>910</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1680</td>\n",
" <td>510000.0</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>5420</td>\n",
" <td>1225000.0</td>\n",
" <td>1.0</td>\n",
" <td>4.50</td>\n",
" <td>4</td>\n",
" <td>1530</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>1715</td>\n",
" <td>257500.0</td>\n",
" <td>2.0</td>\n",
" <td>2.25</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>1060</td>\n",
" <td>291850.0</td>\n",
" <td>1.0</td>\n",
" <td>1.50</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>1780</td>\n",
" <td>229500.0</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>3</td>\n",
" <td>730</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>1890</td>\n",
" <td>323000.0</td>\n",
" <td>2.0</td>\n",
" <td>2.50</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" sqft_living price floors bathrooms bedrooms sqft_basement\n",
"0 1180 221900.0 1.0 1.00 3 0\n",
"1 2570 538000.0 2.0 2.25 3 400\n",
"2 770 180000.0 1.0 1.00 2 0\n",
"3 1960 604000.0 1.0 3.00 4 910\n",
"4 1680 510000.0 1.0 2.00 3 0\n",
"5 5420 1225000.0 1.0 4.50 4 1530\n",
"6 1715 257500.0 2.0 2.25 3 0\n",
"7 1060 291850.0 1.0 1.50 3 0\n",
"8 1780 229500.0 1.0 1.00 3 730\n",
"9 1890 323000.0 2.0 2.50 3 0"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"columns_to_drop = ['id', 'date', 'grade', 'yr_renovated', 'sqft_living15', 'lat', 'long', 'sqft_lot15', 'sqft_above', 'zipcode']\n",
"df_cleaned = df[['sqft_living', 'price', 'floors', 'bathrooms', 'bedrooms', 'sqft_basement']]\n",
"\n",
"df_cleaned.head(10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Визуализация данных"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0, 0.5, 'sqft_basement')"
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABSIAAAP0CAYAAAC+n0IvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUVf4G8Hdq6kwaIQmEJCQhIfQWBBVFdO2KLLru2pCFnyi4FkRsiIKLq4KACiqIiCgooi6rK3Zd10VEQAGVIAQSipRQ0tvU3x8nd+70mSQzyUzyfp6HJ+TeuXfOnAwxvvme81VYrVYriIiIiIiIiIiIiIJI2d4DICIiIiIiIiIioo6PQSQREREREREREREFHYNIIiIiIiIiIiIiCjoGkURERERERERERBR0DCKJiIiIiIiIiIgo6BhEEhERERERERERUdAxiCQiIiIiIiIiIqKgYxBJREREREREREREQccgkoiIiIiIiIj8ZrVa23sIRBSmGEQSUcDdfPPNyM/Pd/jTr18/jB49GnPmzEFlZaXX648cOYL8/Hy8//77bTTiwBozZozDa+/duzfOOuss3H777dizZ09AniPc54iIiIgo2Dr7z6SBsmrVKpxzzjkYMGAAXnzxRWzfvh233XZbs++Tn5+PF154AQCwZcsW5OfnY8uWLX5dy68FUcehbu8BEFHH1KdPHzz22GO2z41GI3799VcsXLgQRUVFeOutt6BQKNxe27VrV6xbtw4ZGRltNdyAO//88zF16lQAgMlkQllZGVauXIkJEyZg48aNSEpKaucREhEREXV8nf1n0taqqanB008/jdGjR+Ovf/0r0tPT8dxzz2H//v2tum/fvn2xbt065Obm+vV4fi2IOg4GkUQUFLGxsRg0aJDDscLCQtTW1uL555/Hzp07Xc5LtFqtx3PhIjEx0eU19O/fHxdddBE++eQT3Hjjje0zMCIiIqJOpLP/TNpalZWVsFgsuOiii1BYWBiw+7r7unjDrwVRx8Gl2UTUpvr16wcAOHr0KACxZGbGjBm46667MGjQIEycONHt0osDBw7gzjvvxPDhw1FYWIgpU6Y4/Ca2sbERzzzzDM4//3z069cPV111FTZu3OhxHI2NjRg6dCiefvpph+MmkwkjRozA3//+dwDAL7/8ggkTJmDo0KEYPHgwbr31VuzYsaNFrz0uLs7t8fXr1+OKK66wLRV64YUXYDabHR7z2Wef4eqrr8aAAQMwbtw4lyXe0vKWt99+GxdccAGGDBmCTZs2AQA2bdqEG264AUOHDsVZZ52F++67D8eOHXO4vrS0FHfddRfOOeccDBo0CDfffDO2b99uOy99TT755BNMnToVgwYNwtlnn40XX3wRNTU1ePjhhzF06FCcffbZmD9/vsO+Qf/+979tYx8xYgRmzJiBEydOtGgOiYiIiAKhM/xMumnTJvzpT3/C4MGDUVhYiDvuuMOlknHt2rW45JJLMGDAANx4443YvHmzbcn0+++/jzFjxgAAHn74YeTn5+PBBx/EP//5T/z++++tWiptvzT7xx9/RH5+Pr7++muHxxQVFSE/Px+ff/65y9fi/fffR58+fbBz505cf/316N+/Py644AK8+uqrDvcoKyvDvffea/t6zZ49G4sWLbK9LiJqewwiiahNlZSUAAB69OhhO/bxxx8jJiYGL730EiZPnuxyzYkTJ3D99dejtLQUjz/+OObPn49Tp05hwoQJqKiogNVqxbRp0/D2229j4sSJeOmllzB48GDce++92LBhg9txRERE4JJLLsHHH3/sEJpt2rQJ5eXlGDt2LGpqajB58mQkJCTghRdewKJFi1BfX49Jkyahurra6+u0Wq0wmUwwmUwwGAw4evQo5s2bhy5duuCyyy6zPW7ZsmV49NFHMXLkSLz88su48cYb8corr+DRRx+1Pearr77CXXfdhfz8fCxduhSXXXYZ7r//frfPu2TJEjzwwAOYPXs2Bg8ejA0bNuCvf/0r0tLSsHDhQjz00EP46aefcP311+P06dMAgOLiYvzxj3/EkSNHMGvWLCxYsAAKhQITJkzADz/84HD/WbNmIS8vDy+99BJGjhyJ5557Dtdeey0iIyOxZMkSXHzxxVixYgU++eQTAMD27dsxc+ZMXHzxxXjllVfw0EMP4fvvv8d9993ndf6IiIiIgqmj/0x6+PBhTJ06Ff369cNLL72EefPmoaSkBLfddhssFgsA4I033sCcOXMwatQoLF26FP369cM999xju8fo0aOxZMkSAMAdd9yBdevWYerUqTj//PORnJyMdevWYfTo0c2ZdreGDBmCjIwMfPTRRw7H//3vfyM+Ph7nn3++2+ssFgvuueceXH755Vi+fDmGDBmCZ555Bt9++y0AwGAwYMKECfjxxx/x8MMP4x//+Af27NmDlStXtnrMRNRyXJpNREEhBXGSyspK/PDDD7YfyKTfQgOARqPBnDlzoNVqAYjqO3urVq2CwWDAa6+9huTkZABA79698Ze//AU7d+6EWq3Gt99+i0WLFuHyyy8HAIwaNQr19fVYsGABrrzySqjVrt/uxo4di/feew/bt2/HsGHDAAAfffQRsrOz0b9/f+zYsQPl5eW45ZZbMGTIEABAdnY21q1bh9raWuh0Oo+vf8OGDS4/cCoUCsyfPx+JiYkAgOrqarz44ou4/vrrMWvWLADAueeei/j4eMyaNQsTJ05Er169sHTpUgwYMADz58+3vTYAePbZZ12e94YbbsCll14KQPxwtmDBApx77rkOjx0yZAguv/xyvPrqq5g5cyaWLFkCrVaL1atXIzY2FoD4wfPKK6/EM888g3fffdd27ahRo2w/oPbq1Qv//ve/kZSUhNmzZwMARowYgQ8//BA//vgjLrvsMmzfvh2RkZG47bbbbF/f+Ph4/Pzzz7BarR73ZCIiIiIKhM76M+muXbvQ0NCAKVOmICUlBQCQmpqKL7/8EnV1dYiOjsZLL72ESy65xPZz6KhRo1BbW4v169cDEFsNFRQUAAAyMjJsS6MTExMDvlT66quvxsqVK9HQ0IDIyEhYrVZs3LgRl156qe3r4cxqtWLq1Km47rrrAABDhw7F559/jv/85z8YNWoUPvjgAxw4cADvvfee7es8YsQIXHTRRQEbNxE1HysiiSgotm7dir59+9r+nH322Zg+fTr69euHZ5991iGAys7O9vgDBiCq6gYNGmT7gQ8QP0h9/fXXOP/887F582YoFAqcf/75tipEk8mEMWPG4OTJk9i3b5/b+w4fPhzdunWz/fa1sbERX3zxBcaOHQtABG2JiYm4/fbbMXv2bHz++efo0qUL7r//fqSmpnp9/RdccAHeffddvPvuu1i/fj2WLVuGsWPHYsaMGXjnnXcAAD/99BMaGhowZswYl3ED4jfhDQ0N+PXXX3HBBRc43N++qtKe9MMiIH7Tf/LkSVx55ZUOj8nIyMDgwYNt1Y4//PADLrjgAlsICQBqtRpXXHEFfvnlF9TW1tqODx482Pb3Ll26AAAGDBhgO6ZQKBAXF2f77XxhYSHq6+tx5ZVX4tlnn8W2bdtw7rnn4s4772QISUREREHXWX8mHThwICIiInDttddi3rx5+Pbbb9G7d2/ce++9iI2NRUlJCU6fPo0LL7zQ4bqrr77av4kNsKuvvhp1dXW25dk//vgjjh49apsDT+x/NtVqtUhMTERdXR0A4Pvvv0ePHj0cwubY2FiXn6uJqG2xIpKIgqJv376YM2cOABFORUREIC0tzSHsksTExHi9V0VFBdLT072et1qttt8QOysrK3MI6CQKhQJXXXUV1q9fj1mzZuHrr79GXV0drrrqKtu41qxZg5deegkff/wx1q1bh8jISIwdOxazZs3y+oNqfHw8+vfv73Bs9OjRKCsrw/z58zF+/HhUVFQAAG677TaP466srITVakVCQoLDua5du7q9Jjo62vZ36f5SYGivS5cu2L17NwBRGeDpMVarFTU1NbZj7r5+9s/pbPDgwVi+fDlWrVqF1157DcuXL0eXLl1w++234+abb/Z4HREREVEgdNafSdPT0/Hmm29
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"sns.set(style=\"whitegrid\")\n",
"\n",
"plt.figure(figsize=(16, 12))\n",
"\n",
"#Отношение цены к спальным комнатам\n",
"plt.subplot(2, 2, 1)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['bedrooms'], color=\"brown\")\n",
"plt.title('Price vs Bedrooms')\n",
"plt.xlabel('Price')\n",
"plt.ylabel('Bedrooms')\n",
"\n",
"#Отношение цены к квадратному футу жилплощади\n",
"plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['sqft_living'], color=\"pink\")\n",
"plt.title('Price vs sqft_living')\n",
"plt.xlabel('Price')\n",
"plt.ylabel('sqft_living')\n",
"\n",
"#Отношение цены к количеству этажей\n",
"plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['floors'], color=\"green\")\n",
"plt.title('Price vs floors')\n",
"plt.xlabel('Price')\n",
"plt.ylabel('floors')\n",
"\n",
"#Отношение цены к размеру подвала\n",
"plt.subplot(2, 2, 4)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['sqft_basement'], color=\"orange\")\n",
"plt.title('Price vs sqft_basement')\n",
"plt.xlabel('Price')\n",
"plt.ylabel('sqft_basement')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Иерархическая агломеративная кластеризация"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0gAAAJ4CAYAAABf6sPCAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABYgElEQVR4nO3dd3wUdf7H8fcmuyGBBEIvAemEGjpWQAOiJ1hAj1MQsR0gAUTgp+AJotIOQQyE6ql0BYGjnCgeRfBOREEFOypFCd2QUJKQtr8/mJ3LJpuQbDaZkLyejwcPNrOzM5+dnZ2Z93y/M2tzOp1OAQAAAADkZ3UBAAAAAFBcEJAAAAAAwEBAAgAAAAADAQkAAAAADAQkAAAAADAQkAAAAADAQEACAAAAAAMBCQAAAAAMBCQAAAAAMBCQAEiSxo4dq/DwcI//xo4da3V5ADJJSEhQ+/btdeDAASUkJOipp57SW2+9ZXVZAFAi2K0uAEDxUbVqVcXExLgNGzZsmEXVAMhJhQoV9Nhjj6lv375yOp0KDw/X3//+d6vLAoASgYAEQJKUnp6usmXLqk2bNm7DAwICrCkIQK6GDRumBx98UOfPn1fdunXl7+9vdUkAUCLQxQ6AJCktLU2BgYF5Gnfv3r16+OGH1bp1a3Xq1EnPPfec4uLizOfXrVun8PBwHTt2zO11kZGRbt31UlNTc+zWl3Va+/fvV+/evRUREaG7775bH374odu0L1y4oKlTp6p79+5q1aqVevXqpTVr1mSbf9b5HDt2TAMGDNDYsWO1YMEC3XTTTWrfvr2GDh2q2NhYt9dv3bpV/fr1U9u2bdWyZUvdeeedWrFihfn8nj17zOnu27fP7bXLly9XeHi4IiMjs9XzwgsvuI2bkJCgli1bKjw8XHv27Mnz/HPy3nvvqU+fPmrTpo0iIiJ077336oMPPsi2jD11q8zp8xkwYIDbPDZv3qw+ffqobdu2uvnmmzVhwgQlJCSYz8+ZM0fh4eFq27atUlJS3F47YsSIbF05L1++rOnTp6tr165q2bKl7r77bm3evNntdZGRkZo1a5amTJmijh076vrrr9ezzz6r+Pj4PL//3LqWrlu3zvxMM38Of/zxhzp06ODxswwPD1fTpk3VsWNHDR8+XOfOnTPHCQ8P15w5c9xqcy0Xb5alJFWpUkUNGjTQp59+etXusFnn9f7776tjx46aOXOmJPf1N+u/zHX/+OOPGjZsmG644Qa1aNFCnTt31qRJk5ScnGyOk5KSotdff13dunVTRESEevXqpX/+8595WuaSdPz4cY0aNUqdOnVS69atNXDgQH3//ffm9I8dO6bw8HC9//77GjJkiFq3bq1bb71Vc+fOVUZGhtvnknWZjBo1yu0zdTqdio6OVufOndW+fXsNGTJEJ06cMMdPT0/XokWL1KtXL0VERKhNmzZ68MEH9dlnn+X6OUrZP/OsfzudTj344INu28uxY8e6rVuS9O6773pcfwAUDlqQAEiSkpKSVKFChauO98UXX+ixxx7TDTfcoNdff10JCQmKjo7WI488ojVr1uQ5ZElXDoIlaf78+apUqZKkKwezWYONJA0ePFgPP/ywnnnmGa1Zs0YjR47UwoUL1bVrVyUnJ6tfv376448/NGLECIWFhWnr1q3629/+prNnz2rIkCHmdLp27aqhQ4eaf1erVk2StG3bNlWsWFEvvPCCMjIyNHPmTA0YMEDvv/++goKC9PHHHysqKkqPPPKIhg8fruTkZK1cuVIvv/yyWrZsqdatW5vTLFeunLZv36727dubwzZv3iw/v+znpMqVK6ePP/5YTqdTNptNkvTRRx8pPT3dbbz8zD+zFStWaNKkSRo+fLjat2+vhIQEvfHGGxozZozatm2rGjVqmOPGxMSoatWqkmR+HpL0wAMP6M9//rP590svveQ2j3nz5mn27Nnq16+fnnnmGf3++++Kjo7W119/rdWrV7utEzabTbt371bXrl0lSZcuXdLOnTvdlo3T6VRUVJS+/PJLjRgxQg0bNtS///1vPfPMM0pJSdF9991njrty5UrVrVtXU6dOVVxcnGbOnKmjR4/q3Xfflc1mu+r7Hzp0qB588EFJV1pkmjdvbq4f1113nX7++edsy3TmzJm6cOGCypcv7zbctW6lpqbq119/1fTp0zV58mTNmDHD42fjSX6WpUtqaqqmTJmS53lIUnJysl5++WU9+eSTuvvuu92emzBhglq0aGH+/Ze//MV8fPr0afXv319t2rTRtGnTFBAQoF27duntt99WtWrVNGjQIEnSmDFjtHPnTj311FNq3bq1du7cqbFjx8rhcFx1mcfFxenBBx9UUFCQxo8fr6CgIC1ZskT9+/fXmjVr1LBhQ7OeiRMnqmvXrpozZ4727dunmJgYJSYm6v/+7/88vu+9e/fq/fffdxu2ePFiLVy4UM8++6zq16+vadOm6emnn9bq1aslSTNmzNA777yj0aNHKzw8XKdOndLcuXP19NNP6+OPP1ZQUFC+ln1mGzZs0FdffZXrOAkJCXr99de9ngeA/CsVAWnhwoX6z3/+o2XLluVp/D179uiRRx7x+Fzt2rW1bds2X5YHFAvx8fFmWMjNzJkzVb9+fS1cuNDs0tO6dWv17NlTa9euVf/+/fM8z8TERElS27ZtVbFiRUnSJ5984nHcAQMGKCoqSpLUuXNn9e7dW3PnzlXXrl21bt06HTx4UO+++67atm1rjpOWlqZ58+bpwQcfVGhoqKQrB/5ZuxFKVwLiunXrVKdOHUlSgwYN1Lt3b61fv14PPfSQfvnlF/Xu3Vt/+9vfzNe0bdtW119/vfbs2eMWULp06aJt27aZB2knT57UV199pQ4dOmRrlbrxxhu1c+dO7d+/36zrgw8+UMeOHd1aLfIz/8x+//13PfHEE26hMCwsTH369NG+ffvUs2dPc3izZs1Uu3btbNOoUaOG2zILDg42HyckJGj+/Pnq27evJkyYYA5v0qSJ+vfvn22dcC0bV0Davn27qlat6nbW/9NPP9Unn3yiWbNm6a677pJ05fNMSkrSjBkz1KtXL9ntV3Zffn5+evvttxUSEiLpyucbFRWlTz75RF26dMnT+7/uuuskXelOmtP64fLNN99ow4YNatasmc6fP+/2XObXduzYUZ9++qm+++67HKeVVX6XpcuyZcuUmJioKlWq5Hle//rXv+RwOPTkk09m65rXqFGjHJfBwYMH1axZM0VHR5vrwU033aT//ve/2rNnjwYNGqSDBw9qy5Ytev755zVw4EBJV9bz2NhY7dmzR7169cp1mc+aNUvx8fF65513FBYWJunKenPXXXcpOjpas2fPNsdt0aKFGUC7dOmixMRELVmyRE899ZTbeipJGRkZmjRpklq0aOH2uSQmJmro0KF69NFHJV1pnXr55Zd1/vx5lS9fXqdPn9Yzzzzj1mpapkwZDR8+XD/99FOu60tuLl26pBkzZmSrJ6vZs2erVq1abq2RAApXie9it2LFinyfeWnbtq3+85//uP2LiYmRzWZz28kCJcnp06dVvXr1XMdJSkrS/v371bVrVzmdTqWlpSktLU116tRRw4YN9d///tdt/IyMDHOctLS0bNM7efKk/Pz8sh3IeNK7d2/zsc1m0+23364DBw4oOTlZn3/+ucLCwsxw5HLPPffo8uXL2r9//1Wn365dOzMcSVLz5s1Vp04dffHFF5KkJ598UtOmTdOlS5f07bffavPmzVq4cKEkZesyFhkZqSNHjujQoUOSpA8//FCtW7c2D/YyCwkJUadOncwTL3FxcdqzZ49bcMnv/DMbO3asxowZo/Pnz+vrr7/Whg0bzG55ub0ur77++mulpKSoV69ebsM7dOigsLAwff75527Du3Xrpu3bt8vpdEq60rLmCkEuu3fvls1mU9euXd3Wn8jISJ05c8atVScyMtIMR66/7Xa7+bn58v07nU5NmjRJDzzwgJo2berx+bS0NKWkpOjAgQPat2+fWrZs6TZO1u9E5mCY32UpSWfPntXcuXP13HPPqUyZMnl6H6dOndIbb7yhfv365fu6pVtuuUXLly9
"text/plain": [
"<Figure size 1000x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from scipy.cluster.hierarchy import dendrogram, linkage\n",
"\n",
"\n",
"linkage_matrix = linkage(df_cleaned, method='ward')\n",
"plt.figure(figsize=(10, 7))\n",
"dendrogram(linkage_matrix)\n",
"plt.title('Дендрограмма агломеративной кластеризации')\n",
"plt.xlabel('Индекс образца')\n",
"plt.ylabel('Расстояние')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Получение результатов иерархической кластеризации"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ 2152, 18009, 1183, ..., 4881, 5049, 11343], dtype=int32)"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>sqft_living</th>\n",
" <th>price</th>\n",
" <th>floors</th>\n",
" <th>bathrooms</th>\n",
" <th>bedrooms</th>\n",
" <th>sqft_basement</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1180</td>\n",
" <td>221900.0</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2570</td>\n",
" <td>538000.0</td>\n",
" <td>2.0</td>\n",
" <td>2.25</td>\n",
" <td>3</td>\n",
" <td>400</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>770</td>\n",
" <td>180000.0</td>\n",
" <td>1.0</td>\n",
" <td>1.00</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>1960</td>\n",
" <td>604000.0</td>\n",
" <td>1.0</td>\n",
" <td>3.00</td>\n",
" <td>4</td>\n",
" <td>910</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>1680</td>\n",
" <td>510000.0</td>\n",
" <td>1.0</td>\n",
" <td>2.00</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21608</th>\n",
" <td>1530</td>\n",
" <td>360000.0</td>\n",
" <td>3.0</td>\n",
" <td>2.50</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21609</th>\n",
" <td>2310</td>\n",
" <td>400000.0</td>\n",
" <td>2.0</td>\n",
" <td>2.50</td>\n",
" <td>4</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21610</th>\n",
" <td>1020</td>\n",
" <td>402101.0</td>\n",
" <td>2.0</td>\n",
" <td>0.75</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21611</th>\n",
" <td>1600</td>\n",
" <td>400000.0</td>\n",
" <td>2.0</td>\n",
" <td>2.50</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>21612</th>\n",
" <td>1020</td>\n",
" <td>325000.0</td>\n",
" <td>2.0</td>\n",
" <td>0.75</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>21613 rows × 6 columns</p>\n",
"</div>"
],
"text/plain": [
" sqft_living price floors bathrooms bedrooms sqft_basement\n",
"0 1180 221900.0 1.0 1.00 3 0\n",
"1 2570 538000.0 2.0 2.25 3 400\n",
"2 770 180000.0 1.0 1.00 2 0\n",
"3 1960 604000.0 1.0 3.00 4 910\n",
"4 1680 510000.0 1.0 2.00 3 0\n",
"... ... ... ... ... ... ...\n",
"21608 1530 360000.0 3.0 2.50 3 0\n",
"21609 2310 400000.0 2.0 2.50 4 0\n",
"21610 1020 402101.0 2.0 0.75 2 0\n",
"21611 1600 400000.0 2.0 2.50 3 0\n",
"21612 1020 325000.0 2.0 0.75 2 0\n",
"\n",
"[21613 rows x 6 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"Text(0, 0.5, 'sqft_basement')"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABTgAAAeQCAYAAADpOTpRAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5xU5dn/8e/0ndnelypIVenSEUQ0irH3ROVnI2rUxyiWaAQpdsHeldhQYnksMTFq4qMxkSYgigqIVGnb2N6mnt8f684w7Gxh6+zu5/168drdc9/nPte5Zlhurjn3OSbDMAwBAAAAAAAAQAdkbu8AAAAAAAAAAKCpKHACAAAAAAAA6LAocAIAAAAAAADosChwAgAAAAAAAOiwKHACAAAAAAAA6LAocAIAAAAAAADosChwAgAAAAAAAOiwKHACAAAAAAAA6LAocAIAAAAAAADosChwAugQZsyYoUGDBoX9GTJkiKZOnar58+eruLi43v13796tQYMG6d13322jiFvWtGnTws598ODBGjdunK6++mpt2rSpRY7R0XMEAADQ2piTMicFEJ2s7R0AADTWkUceqblz5wZ/9nq9+uGHH/Twww9r48aN+stf/iKTyRRx34yMDL355pvq3bt3W4Xb4o499lhdc801kiSfz6fc3Fy9+OKLuuSSS/SPf/xDqamp7RwhAABA58eclDkpgOhDgRNAhxEXF6cRI0aEbRszZozKy8v1+OOP69tvv63VXsNut9fZ1lGkpKTUOoehQ4fqhBNO0Mcff6yLLrqofQIDAADoQpiTMicFEH1Yog6gwxsyZIgkae/evZKqlw7dfPPNuv766zVixAhddtllEZe6bNu2Tdddd53Gjh2rMWPG6KqrrtLWrVuD7W63Ww8++KCOPfZYDRkyRKeddpr+8Y9/1BmH2+3W0UcfrQceeCBsu8/n0/jx43X33XdLkr7//ntdcsklOvroozVy5Ehdeuml+uabb5p07omJiRG3v/322zrllFOCS6aeeOIJ+f3+sD7//Oc/dfrpp2vYsGE666yzai0rWrVqlQYNGqQ33nhDxx13nEaNGqVly5ZJkpYtW6YLL7xQRx99tMaNG6ebbrpJ+/btC9t/x44duv766zVp0iSNGDFCM2bM0Nq1a4PtNa/Jxx9/rGuuuUYjRozQxIkT9fTTT6usrEx/+tOfdPTRR2vixIlauHChDMMI7vv3v/89GPv48eN18803Kycnp0k5BAAAaAnMSWtjTgqgrVDgBNDhbd++XZLUq1ev4LaPPvpIsbGxeuaZZzRz5sxa++Tk5OiCCy7Qjh07NG/ePC1cuFD5+fm65JJLVFRUJMMwdO211+qNN97QZZddpmeeeUYjR47UjTfeqPfffz9iHA6HQyeddJI++uijsInPsmXLVFhYqDPOOENlZWWaOXOmkpOT9cQTT+iRRx5RZWWlrrjiCpWWltZ7noZhyOfzyefzyePxaO/evbrnnnuUlpamk08+Odjvueee05w5czRhwgQ9++yzuuiii/TCCy9ozpw5wT6fffaZrr/+eg0aNEhPPfWUTj75ZN1yyy0Rj/vkk0/qj3/8o+68806NHDlS77//vi6//HJ169ZNDz/8sG6//XatW7dOF1xwgfbv3y9J2rJli84++2zt3r1bs2fP1qJFi2QymXTJJZfoq6++Cht/9uzZGjhwoJ555hlNmDBBjz32mM4991zFxMToySef1IknnqjFixfr448/liStXbtWt956q0488US98MILuv3227Vy5UrddNNN9eYPAACgNTEnZU7KnBRoRwYAdAAXX3yxcdFFFxlerzf4Jz8/3/jHP/5hjB071rjggguMQCAQ7Dt8+HDD7XYH99+1a5cxcOBA45133jEMwzDuv/9+Y9iwYUZubm6wz759+4ypU6ca//73v40vv/zSGDhwoPHhhx+GxXHzzTcbkyZNMrxeb8Q4V65caQwcONBYvXp1cNstt9xiTJ8+3TAMw1i3bp0xcOBAY+3atcH2nTt3Gg8++KCxb9++Os//uOOOMwYOHFjrz6BBg4wPPvgg2K+kpMQYNmyYceedd4bt/9ZbbxkDBw40Nm/ebBiGYZx99tnGeeedF9bnueeeC8tRzbk89dRTwT5+v9+YNGmScfnll4ftu3PnTuOoo44yHnjgAcMwDOMPf/iDMW7cOKO0tDTYx+v1GieddJJxzjnnGIYRek1uuOGGYJ+8vDxj4MCBxoUXXhjcFggEjFGjRhl33313MM6RI0eGvb7//ve/jSeeeCL4HgAAAGgNzEmZkzInBaIT9+AE0GGsXr1aRx11VNg2s9msiRMnasGCBWE3cz/88MNlt9vrHGvt2rUaMWKE0tPTg9uysrL0+eefS1Lw091jjz1WPp8v2GfatGn64IMP9NNPP+mII46oNe7YsWPVvXt3ffjhhxo9erTcbrc+/fRTXXnllZKkAQMGKCUlRVdffbWmT5+uyZMna9KkSXV+Un2g4447Ttdee62k6k/OCwoK9NFHH+nmm29WZWWlzj//fK1bt05VVVWaNm1arbil6k/ue/XqpR9++EF/+MMfwsY/+eST9dBDD9U67oHnuX37duXl5dX6ZLp3794aOXJk8JPwr776Sscdd5zi4uKCfaxWq0455RQ99dRTKi8vD24fOXJk8Pu0tDRJ0rBhw4LbTCaTEhMTg1cTjBkzRo888ohOPfVUnXTSSTr22GN1zDHH6Nhjj20whwAAAM3FnJQ5qcScFIg2FDgBdBhHHXWU5s+fL6l6guFwONStW7ewCUuN2NjYescqKipSz5496203DEOjRo2K2J6bmxtxMmkymXTaaafp7bff1uzZs/X555+roqJCp512WjCu119/Xc8884w++ugjvfnmm4qJidEZZ5yh2bNn1zsBTkpK0tChQ8O2TZ06Vbm5uVq4cKHOOeccFRUVSVJw8hop7uLiYhmGoeTk5LC2jIyMiPu4XK7g9zXj10z6DpSWlqYNGzZIkoqLi+vsYxiGysrKgtsivX4HHvNgI0eO1PPPP6+XX35ZL730kp5//nmlpaXp6quv1owZM+rcDwAAoCUwJ2VOKjEnBaINBU4AHUZsbGytyVRTxcfHq6CgoNb2FStWqGfPnoqPj5fL5dKrr74acf/DDjuszrHPOOMMPffcc1q1apX+8Y9/aMyYMerRo0ew/fDDD9fChQvl9/u1fv16/fWvf9Vf/vIX9e7dO+K9mRoyZMgQLV++XIWFhUpISJBU/Wl/nz59avVNS0tTUlKSzGaz8vPzw9pqJor1SUpKkqRa+0pSXl5ecIKamJhYZx9JSk5OVm5uboPHq8vkyZM1efJkVVZWauXKlXr11Vd19913a/jw4WGftAMAALQ05qSRMSdlTgq0Jx4yBKBLGj16tL799tuwCeX+/fs1c+ZMffHFFxo7dqwqKipkGIaGDh0a/LN582Y99dRTYUttDtavXz8dddRR+vDDD/XFF1/o9NNPD7Z9/PHHGj9+vPLy8mSxWDRy5EjNmzdPCQkJwSduHqrvvvtOiYmJSk5O1vDhw2Wz2ZSTkxMWt9Vq1cMPP6zdu3fL4XBo5MiR+uc//xl24/nPPvuswWP17dtX6enp+vvf/x62fdeuXfrmm2+CVxeMGTNGn3/+edin4n6/Xx9++KGGDh1a71UBDXnggQd0zjnnyDAMOZ1OHXfccfrjH/8oSU3OIQAAQHtgTsqcFEDL4ApOAF3SpZdeqvfff18zZ87UVVddJZvNpmeeeUZZWVk67bTTFB8frzFjxuiaa67RNddco379+mn9+vV6/PHHNXnyZKWkpNQ7/hlnnKEHHnhAVqtV06dPD24fNWqUAoGArr32Wl155ZWKjY3VRx99pNLSUp144on1jllQUKBvvvkm+HNlZaXef/99rVixQrNmzZLFYlFycrJmzpypxx57TGVlZRo3bpxycnL02GOPyWQyafDgwZKkWbNm6ZJLLtF1112nCy64QNu3b9ezzz7bYN7MZrNmzZql22+/XTfddJNOP/10FRYW6sknn1RiYqIuu+wySdJ1112n//znP/p//+//6corr5TNZtNrr72mXbt
"text/plain": [
"<Figure size 1600x2400 with 8 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from scipy.cluster import hierarchy\n",
"\n",
"#Получение результатов\n",
"result = hierarchy.fcluster(linkage_matrix, 10, criterion=\"distance\")\n",
"\n",
"display(result)\n",
"display(df_cleaned)\n",
"sns.set(style=\"whitegrid\")\n",
"\n",
"plt.figure(figsize=(16, 24))\n",
"\n",
"#Вывод до кластеризации и после\n",
"#Отношение цены к спальным комнатам\n",
"plt.subplot(4, 2, 1)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['bedrooms'], color=\"brown\")\n",
"plt.title('Price vs Bedrooms')\n",
"plt.xlabel('Price')\n",
"plt.ylabel('Bedrooms')\n",
"\n",
"plt.subplot(4, 2, 2)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['bedrooms'], color=\"brown\", hue=result)\n",
"plt.title('Price vs Bedrooms')\n",
"plt.xlabel('Price')\n",
"plt.ylabel('Bedrooms')\n",
"\n",
"#Отношение цены к квадратному футу жилплощади\n",
"plt.subplot(4, 2, 3)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['sqft_living'], color=\"pink\")\n",
"plt.title('Price vs sqft_living')\n",
"plt.xlabel('Price')\n",
"plt.ylabel('sqft_living')\n",
"\n",
"plt.subplot(4, 2, 4)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['sqft_living'], color=\"pink\", hue=result)\n",
"plt.title('Price vs sqft_living')\n",
"plt.xlabel('Price')\n",
"plt.ylabel('sqft_living')\n",
"\n",
"#Отношение цены к количеству этажей\n",
"plt.subplot(4, 2, 5)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['floors'], color=\"green\")\n",
"plt.title('Price vs floors')\n",
"plt.xlabel('Price')\n",
"plt.ylabel('floors')\n",
"\n",
"plt.subplot(4, 2, 6)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['floors'], color=\"green\", hue=result)\n",
"plt.title('Price vs floors')\n",
"plt.xlabel('Price')\n",
"plt.ylabel('floors')\n",
"\n",
"#Отношение цены к размеру подвала\n",
"plt.subplot(4, 2, 7)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['sqft_basement'], color=\"orange\")\n",
"plt.title('Price vs sqft_basement')\n",
"plt.xlabel('Price')\n",
"plt.ylabel('sqft_basement')\n",
"\n",
"plt.subplot(4, 2, 8)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['sqft_basement'], color=\"orange\", hue=result)\n",
"plt.title('Price vs sqft_basement')\n",
"plt.xlabel('Price')\n",
"plt.ylabel('sqft_basement')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Неиерархическая четкая кластеризация (k-means)\n",
"Выделим признаки, нужные для кластеризации"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[2.45509345e+03, 6.89163184e+05, 1.62145610e+00, 2.36151252e+00,\n",
" 3.61175337e+00, 3.62289981e+02],\n",
" [5.09415263e+03, 2.80610884e+06, 1.88157895e+00, 3.87368421e+00,\n",
" 4.35263158e+00, 1.03540000e+03],\n",
" [3.63203336e+03, 1.35883310e+06, 1.80105777e+00, 3.07282343e+00,\n",
" 4.03173312e+00, 6.45260374e+02],\n",
" [1.67717404e+03, 3.45165436e+05, 1.38799505e+00, 1.85914295e+00,\n",
" 3.15818379e+00, 2.07165455e+02]])"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABSIAAAP0CAYAAAC+n0IvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hURdsG8Ht7yW56JRACBBIgJIQSitRQBQQRC1IUBQFFfZVuBQUsCIhSlCpIEUTsIE0RG1WKSC8JAiG9J9t3vz/yZZclmxCSTTbl/l2Xl3LmnDmTJ4OZPDtFYLFYLCAiIiIiIiIiIiKqREJXN4CIiIiIiIiIiIhqPyYiiYiIiIiIiIiIqNIxEUlERERERERERESVjolIIiIiIiIiIiIiqnRMRBIREREREREREVGlYyKSiIiIiIiIiIiIKh0TkURERERERERERFTpmIgkIiIiIiIiIiKiSsdEJBHVWBaLxdVNICIiIqI6jOPRu2OMiOh2TEQS3YPRo0dj9OjRxa7n5eXh0UcfRWRkJPbt22e9Nzw8HMOHDy+xvpdffhnh4eGYOXNmpbW5suh0Oqxbtw7Dhg1D27ZtERsbi+HDh+Pbb7+1G2wsWbIE4eHhTn23Xq/HO++8gx9++MEp9ZX0fa0Khw8fRnh4OA4fPlwj66+NwsPDi/0TFRWFgQMHYtWqVTCbzaU+//XXXyM8PBw3btyoohYTEVFdwvGoDcej1YvRaMTMmTMRExODNm3a4NChQ9i2bRvef//9e6rnzvHrvX7/avpY7MaNG8XGohEREYiJicFDDz2Er7766q51VEafJ3IWsasbQFTT5eXlYdy4cTh//jyWLVuG7t27W8uEQiFOnjyJpKQkBAYG2j1XUFCA/fv3V3VznSItLQ3jxo3DrVu3MHr0aERFRcFsNmP//v2YOXMmjh07hjlz5kAgEFTK+1NSUrB+/Xq8++67Tqlv1qxZTqmHao+HH34YjzzyiPXPGo0Ge/bswYIFC5CTk4MpU6aU+GyPHj2wdetW+Pv7V0VTiYiIOB7leLRa+P333/HNN9/gueeeQ+fOndGiRQu8+uqriI2NrVC9jzzyCLp27Vrm+2vLWOzZZ59Fjx49ABTOKs3Pz8e2bdvw2muvwWg0lvoBw73GjKgqMRFJVAFFg75z587hk08+wX333WdX3qJFC1y+fBm7du3CmDFj7Mr2798PhUIBd3f3Kmyxc8yYMQNJSUnYunUrQkNDrdd79OiBevXqYdGiRejZsyd69erlukbeg7CwMFc3gaqZwMBAtG7d2u5ap06dcPXqVWzatAkvvvgiJBKJw2e9vb3h7e1dBa0kIiLieJTj0eojKysLAPDQQw+hQYMGTqs3MDCwWBK9NLVlLBYSElJsPNq5c2ecP38e69atKzURea8xI6pKXJpNVE75+fl45plncOHCBaxcubLYoA8AlEolunfvjl27dhUr27lzJ/r16wex2P7zALPZjJUrV6JPnz6IjIxEv379sGHDBrt7TCYTVq5ciUGDBiEqKgqtW7fG8OHDcejQIes9S5YsQZ8+ffDrr7/igQcesNb17bff2tW1fv169O/fH61atULXrl0xe/Zs5OXllfh1nzt3Dn/88QfGjh1rN+grMmbMGIwcORJKpdLh83FxccWW/ty5fEKr1WL27Nno1q0bIiMj0b9/f6xZswZA4VKFogHlK6+8gri4OGs9x44dw6hRoxAdHY3Y2FjMmDEDGRkZdu9p0aIFtm3bhvvuuw+xsbG4fPlysaUw4eHh2LRpE1577TXExsYiJiYG//vf/5CWlmbX7jVr1qBXr16IiorC8OHD8csvv9gtIylaVrFkyZIS41nk8uXLGDFiBFq1aoU+ffoU+56XpV8AwJYtW9CvXz9ERUVh1KhRSExMLBZrRzEACvvkQw89hJiYGNx333148803kZ2dbff86dOnMXbsWHTo0AFt2rTBxIkTcenSJWt50VKagwcPWmcn9OjRA9u2bUNKSgqef/55xMTEoHv37li3bp1d3ffSF5OSktC8eXNs3LjR7npGRgZatmxprfvPP//Eo48+ipiYGLRv3x7PPvssrly5UvI34i4iIyORn59vjUt4eDiWLl2Khx56CFFRUVi6dKnD5UAHDhzA8OHD0bp1a3Tp0gVvvvkmcnJyrOWJiYmYPHkyYmNjER0djSeffBJnz54tdzuJiKhu4HiU41FnjUfv9j3Q6XSYN28eOnfujJiYGEyfPh2bN2+2Lv+dOXOmNaa9e/fG6NGjERcXh5s3b+Kbb76p0FLp25cZf/rpp4iMjCw2Rl23bh1atmyJ9PT0Yt/LmTNnYsyYMdi+fTv69euHyMhIDBkyBL/99ptdHSdOnMDIkSPRunVr9OjRA+vXr8eYMWNK3Lbg+PHjCA8PLzar+Ny5cwgPD8fevXsBAD/++CMGDx6MqKgodOzYEVOnTkVycnK5YiEUCtG8eXPrGL/o+/vZZ5+hf//+iI6Oxvbt2x0uzf72228xdOhQREdHo0ePHli4cCH0er21/OLFi5gwYQLatGmDNm3aYNKkSbh+/Xq52klUGiYiicqhoKAA48ePx9mzZ7Fq1Sp06NChxHsHDBhgXQ5TJC8vD7/99hsGDRpU7P7Zs2fj448/xuDBg/Hpp5+if//+eOedd7Bs2TLrPQsWLMDy5cvx2GOPYfXq1ZgzZw6ysrLwv//9DxqNxnpfamoq3n77bTzxxBNYuXIl6tevjxkzZlgTMT/++CM++OADjBw5EmvWrMGkSZPw3XffYc6cOSV+Pb///jsA2A24bieTyfDmm2+iU6dOJdZxN++88w5+++03zJgxwzq4mj9/PrZv3w5/f38sXboUQOFyhaL/Pnr0KMaMGQO5XI7Fixfj1VdfxZEjR/DEE09Aq9Va6zaZTFi7di3mzZuHV155BU2aNHHYhg8//BBmsxmLFi3C9OnTsX//frzzzjvW8qVLl2LBggW4//77sXz5ckRHR+Oll16yq8Pf3x9bt261W+JbknfffRetW7fGJ598gq5du2Lu3LlYv369tbws/WLjxo2YNWsWunfvbm3TG2+8UexdjmKwfPlyTJ48Ga1bt8bHH3+MSZMmYffu3Rg9erQ1focOHcLjjz9u/R7NnTsXt27dwvDhw4sl9yZPnoy4uDisWLECjRo1wqxZs/DEE0+gadOmWL58OaKiovDuu+/in3/+AXDvfTEwMBCxsbHYsWOH3fVdu3bBYrFg4MCBuH79Op577jlERkbik08+wbx58xAfH4/x48ffdZ/HksTHx8PNzQ0+Pj7Wa59++ikeeOABfPzxx+jXr1+xZ/bv348JEybAx8cHixcvxtSpU7Fv3z68/PLLAAqTp8OHD8eZM2fwxhtvYOHChTCbzRg5cmSFkqZERFS7cTzK8aizxqNl+R5MnToV27Ztw/jx47F48WJkZWVh8eLF1vLnnnsOzz77rLVds2bNwtKlS+Hn54fu3bs7ban0Aw88AKPRiD179thd37FjB7p06WI3Rrvdv//+izVr1uDFF1/EsmXLIBKJ8MILL1gTmleuXLHOGF60aBFeeOEFrFy5En///XeJbWnTpg1CQkKKjUd//PFHeHp6onv37vj7778xffp09O3bF6tWrcIrr7yCQ4cOlbrNz93Ex8cjJCTE7tqSJUvwzDPPYP78+Q4/kNi0aRNmzJiBli1bYunSpRg/fjw2bNiAuXPnWuscPnw40tPT8f7772PevHm4fv06Hn/8caSnp5e7rUSOcGk20T0qGvQV/VAqKCgo9f4ePXpAoVDYLYfZu3cvfHx80LZtW7t74+Pj8eWXX2Ly5MkYP348AKBLly4QCARYsWIFRowYAS8vL6SkpODll1+2+9RUJpPhhRdewIULF6xT+DUaDebNm2cdhIWGhqJnz544cOAAmjRpgiNHjqB+/foYOXIkhEIhYmNjoVQqi33CeLtbt24BAOrXr1/2oN2jI0eO4L777sPAgQMBAB06dIBSqYSPjw+kUimaN28OoHC5QosWLQAACxcuRKNGjbBixQqIRCIAQHR0NAYOHIjt27dj5MiR1vonTpxo3W+lJM2aNbPb8+e
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.cluster import KMeans\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"scaler = StandardScaler()\n",
"\n",
"features_used = ['sqft_living', 'price', 'floors', 'bathrooms', 'bedrooms', 'sqft_basement']\n",
"data_to_scale = df_cleaned\n",
"\n",
"random_state = 42\n",
"kmeans = KMeans(n_clusters=4, random_state=random_state)\n",
"labels = kmeans.fit_predict(data_to_scale)\n",
"centers = kmeans.cluster_centers_\n",
"\n",
"plt.figure(figsize=(16, 12))\n",
"\n",
"plt.subplot(2, 2, 1)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['bedrooms'], hue=labels,)\n",
"plt.scatter(centers[:, 1], centers[:, 4], s=40, color='red')\n",
"plt.title('KMeans Clustering: bedrooms vs Price')\n",
"\n",
"plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['sqft_living'], hue=labels,)\n",
"plt.scatter(centers[:, 1], centers[:, 0], s=40, color='red')\n",
"plt.title('KMeans Clustering: sqft_living vs Price')\n",
"\n",
"plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['floors'], hue=labels,)\n",
"plt.scatter(centers[:, 1], centers[:, 2], s=40, color='red')\n",
"plt.title('KMeans Clustering: floors vs Price')\n",
"\n",
"plt.subplot(2, 2, 4)\n",
"sns.scatterplot(x=df_cleaned['price'], y=df_cleaned['sqft_basement'], hue=labels,)\n",
"plt.scatter(centers[:, 1], centers[:, 5], s=40, color='red')\n",
"plt.title('KMeans Clustering: sqft_basement vs Price')\n",
"\n",
"display(centers)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Понижение размерности"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Text(0.5, 1.0, 'PCA reduced data: KMeans Clustering')"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABTgAAAImCAYAAAB3kuWeAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3xTVf8H8E/26N5lFRAou7TsLUtARBAQB0NkCAiKD0PAB2SIIo9MWSIiAoIyfiAoIgqKisgqsmTIasvq3mmzc39/1CaEpAPoSMrn/Xrx0t5z77knJwl8+71niARBEEBERERERERERETkhsTl3QAiIiIiIiIiIiKih8UEJxEREREREREREbktJjiJiIiIiIiIiIjIbTHBSURERERERERERG6LCU4iIiIiIiIiIiJyW0xwEhERERERERERkdtigpOIiIiIiIiIiIjcFhOcRERERERERERE5LaY4CQickIQhPJuAhXgcX5vHufXTkRERCWHMcXjge8zPU6Y4CR6AEOHDkXdunXt/jRq1AidOnXC3LlzkZmZ6XBNTEwM5syZg27duiEiIgKdOnXCpEmTcPny5QLvs3TpUtStWxfz5s0rzZdToBUrVqBu3brlcm9ndu3ahbp16+L27dulfp3BYMD8+fPx3XffPWgzH8hLL72EunXr4scffyzV+7jae/kosrKyMHXqVERHR1uPDR06FEOHDi2zNhT3+9ylSxdMnz69RO999epVvPzyyyVS1+3bt1G3bl3s2rWrROojIqKKgbFu+ahIsW7dunWxYsUKh+NXrlxBmzZt8OSTTyI2NtZ6bt26dbFkyRKndVksFnTo0MFtY5bExER89NFH6NmzJ5o0aYL27dtj7NixdrEsUDrxbEJCAkaPHo07d+6USH0Fva9EroQJTqIH1KBBA2zbts3654svvsCrr76KnTt3YsyYMXZPyX766Sf069cPFy5cwOuvv47PPvsMEydORGxsLF544QUcOXLEoX6LxYLdu3cjPDwce/bsgVarLcuX99hLSkrCxo0bYTKZSu0eN27cwOnTpxEeHo6tW7eW2n0qmkuXLmHPnj2wWCzWY7Nnz8bs2bPL5P4P830uSfv378fp06dLpK7g4GBs27YNnTp1KpH6iIio4mCsW7GVRax7v6tXr+LVV1+FSqXC5s2bUaNGDWuZWCzG/v37nV538uRJJCUllVErS9apU6fQt29fHDp0CK+88grWrFmDGTNmQKfTYejQodi9e3ep3v/PP//Eb7/9VmL1bdu2DQMHDiyx+ohKg7S8G0Dkbjw9PREZGWl3rEWLFsjJycHy5ctx9uxZREZG4ubNm5g2bRo6dOiAZcuWQSKRWM/v3r07Xn75ZUybNg2//PIL5HK5teyPP/5AQkIClixZgiFDhmDv3r38x6SC2bVrF6pUqYIxY8ZgypQpiIuLQ/Xq1cu7WW6pdu3aZXKfh/0+uyq5XO7w9xgRERHAWJdK1vXr1zFs2DB4eHhg48aNqFy5sl1506ZNER0djYsXL6JBgwZ2Zd9//z3q16+PS5culWWTH1lGRgb+85//oEaNGvjiiy+gUqmsZT169MDo0aMxa9YstG/fHoGBgeXY0uJj3EjugCM4iUpIo0aNAAB3794FAHz55ZcwGAyYOXOmXcAHACqVCtOmTcOAAQMcpvrs3LkT4eHhaNasGVq1aoVt27YVee8uXbpg/vz5GDZsGCIiIjBjxgwAef+4zpo1C23btkXjxo3xwgsv4OjRo3bX6vV6fPjhh2jXrh2ioqLwzjvvQK/X253jbNrE8ePHUbduXRw/ftx67MaNG3jjjTfQsmVLtGjRAmPGjMH169ft7vXRRx/hySefRKNGjfDss89i3759dvVaLBasXr0anTp1QpMmTTBu3Din06HuV9zrDh48iEGDBiEqKgqNGjVCz549sWXLFgB503a7du0KAHjnnXfQpUsX63U7duxA//79ERkZiYiICPTt2xc//PCDXd1169Ytclqy2WzG7t270blzZ3Tr1g1qtdrpe2w0GrFo0SJ07NgRERERGDlyJHbv3u0wDembb75Br1690LhxY/Tp0wdHjx5FgwYNCp3Gs2/fPvTv3x9RUVFo164dZs2aZddXK1asQM+ePXHgwAH07t0bjRs3Rt++fXH69GmcOXMGAwcOREREBHr37u3webpy5QrGjBmDpk2bomnTphg/fjxu3bplLc//3GzduhWdO3dG06ZNraM7Cuvj48eP45VXXgEAvPLKK9bP472fzREjRqB///4Or3fcuHHo06eP9efo6GgMGTIETZo0QcuWLTFt2jSkpaUV2F/Aw3+f733N935X7m87APz9998YNmwYmjVrhqioKLz66qs4c+YMgLz3ZOXKlQDspwhZLBasXbsWTz31FBo1aoQePXrgyy+/dLjPlClTMGHCBERGRmL48OEOU9R37dqFBg0a4OzZs3jxxRfRuHFjdO7cGZ9//rldXUlJSZg4caL1Oz5r1iwsXbrU7rtCREQVE2NdxrrFiXXvdf36dbzyyivw8vLC5s2bHZKbQF7yPDAw0GEUp8lkwk8//YRnnnnG4ZrivO9paWmYO3cuOnfujEaNGqFly5YYP368XRw9dOhQzJgxA2vXrkWnTp3QuHFjvPTSSzh37pz1HJ1Ohzlz5qBjx47W/rw/Prrf7t27kZSUhP/+9792yU0gb8TqlClTMHjwYGg0GodrC1pGaPr06Xbv182bNzF27Fi0atUKTZo0wYsvvmgdsblr1y688847AICuXbvavWc7duzAM888Y116YsWKFTCbzXb3GTZsGGbPno2mTZuiV69eMJvNdvFn/nfj6NGjGDFiBJo0aYJ27dph4cKFdnVpNBrMmjULbdq0QVRUFCZOnIgNGza41PIQVLEwwUlUQmJiYgAA1apVAwAcPnwYDRo0QEhIiNPz27Rpg4kTJyIoKMh6LCMjA7/88guee+45AEC/fv1w/vx5XLhwocj7b9myBY0bN8bq1avx/PPPQ6/XY9iwYfj5558xceJErFy5EqGhoRg1apRdAPD2229j+/btGDNmDJYtW4bMzExs2LDhgV9/YmIiXnzxRcTGxmLOnDlYuHAhUlJSMGzYMGRkZEAQBIwfPx5bt27F8OHD8cknn1j/obt3isbChQuxatUqPP/881i5ciV8fX2xePHiIu9fnOt+/fVXjB8/Hg0bNsTq1auxYsUKVKtWDe+99x7Onj2L4OBgaxLp9ddft/7/li1bMGvWLHTr1g2ffvopFi1aBLlcjilTpiAhIcFa/7Zt2zBu3LhC2/n7778jOTkZzz33HJRKJZ5++ml88803MBgMdufNmjULGzduxJAhQ7Bq1SoEBgbi3XfftTtn9+7dmD59Opo2bYrVq1ejR48eGDdunF1gcb/Vq1dj0qRJiIyMxPLlyzF+/Hj8+OOPGDp0KHQ6nfW8hIQELFiwAGPHjsXHH3+MrKwsTJgwAZMmTcLAgQOxatUqCIKAiRMnWq+LiYnBSy+9hNTUVPzvf//DBx98gFu3buHll19GamqqXTtWrlyJadOmYdasWYiKiiqyjxs2bIhZs2ZZ+8bZtPQ+ffrgwoULiIuLsx7LysrC77//jr59+wLIm+r06quvQqlUYtmyZfjvf/+LEydO4JVXXrF7/fd7mO/zg9BoNBg1ahT8/PywYsUKLF26FFqtFiNHjkR2djYGDhyI559/HoD9FKE5c+Zg+fLl6NOnD9asWYOePXti/vz5WLVqlV39P/zwAzw8PPDJJ59g1KhRTttgsVjwn//8B7169cLatWvRtGlTfPTRRzh8+DCAvDW7hg0bhr/++gv//e9/8eGHH+Ly5ctYv379Q71mIiJyL4x1GesWJ9bNd+PGDQwbNgyenp7YvHlzgZ8TiUSCHj16OCQ4jx49Cr1e7/AQtTjvuyAIGDNmDI4cOYIpU6bg888/xxtvvIGjR486xJA//vgjfv75Z8ycORNLlixBSkoK3nzzTWs8PX/+fPz++++YNm0aPv/8c3Tt2hUfffQRdu7cWeBrP3z4MAIDAxEREeG0vF69epg2bZrdVP0HYbFYMGbMGGi1Wnz
"text/plain": [
"<Figure size 1600x600 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.decomposition import PCA\n",
"from sklearn.preprocessing import StandardScaler\n",
"\n",
"pca = PCA(n_components=2)\n",
"reduced_data = pca.fit_transform(data_to_scale)\n",
"\n",
"# Визуализация сокращенных данных\n",
"plt.figure(figsize=(16, 6))\n",
"plt.subplot(1, 2, 1)\n",
"sns.scatterplot(x=reduced_data.iloc[:, 0], y=reduced_data.iloc[:, 1], hue=result)\n",
"plt.title('PCA reduced data: Agglomerative Clustering')\n",
"\n",
"plt.subplot(1, 2, 2)\n",
"sns.scatterplot(x=reduced_data.iloc[:, 0], y=reduced_data.iloc[:, 1], hue=labels)\n",
"plt.title('PCA reduced data: KMeans Clustering')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Выбор количества кластеров на основе инерции"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1IAAAImCAYAAABZ4rtkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB4LklEQVR4nO3dd3hU1drG4WcmvUM6RWroSYBAiGCoYi9HsfeCBUX5EBRFkWNDLHSxgA1RUGwH8aBwxA5KU3oPJbT0SkhP5vsjZGRISCOZnfK7ryuXZM/aM+8MS82T9e61TRaLxSIAAAAAQJWZjS4AAAAAABoaghQAAAAAVBNBCgAAAACqiSAFAAAAANVEkAIAAACAaiJIAQAAAEA1EaQAAAAAoJoIUgAAAABQTQQpAAAAAKgmghQAAAAAVBNBCgDOcMcdd6hLly66+eabzzrmscceU5cuXfTUU0/ZsTIANXX06FF16dJFX3/9tdGlAGgkCFIAUA6z2azNmzcrPj6+zGPZ2dn6+eefDagKAADUFwQpAChH9+7d5eLiohUrVpR57Oeff5abm5uCgoIMqAwAANQHBCkAKIe7u7sGDx5cbpD67rvvdMkll8jR0bHMY6tWrdKIESMUFhamCy64QC+99JKys7MlScOGDVOXLl3K/Tp69Kgkac2aNbr11lvVp08fRUVFafz48YqLi7N5jfHjx5f7HJW1LJW2LJb3dbpt27Zp5MiRioqKUkREhEaNGqV9+/ZZH1+3bp26dOmidevWSZL27t2r4cOH6+abb9Ybb7xx1td44403JElffPGFLrvsMoWGhto8Xlmb5Oeff17u855+Xmn7VmXjalpDVT+bil7/bI+X/j089dRTGjZsmM3rfvbZZzaf4emv89dff9mM/eSTT9SlSxeb58jNzdX06dN18cUXKzQ0VBEREbrnnnu0a9cum3PPVtcdd9xhM6a0jvKcOT9K3XHHHTbPk5eXpzfffFOXXnqpwsLCdPHFF2v+/PkqLi62OefMWtatW1elcytjsVg0ceJEhYeHa/Xq1VU+DwBKlf0pAJKkefPmafXq1fr4449r9fxJkybpiy++sDnWqlUr/fTTTzWuFUDduPzyyzV27FjFx8crODhYkpSVlaXffvtNH374oX777Teb8d9++60ef/xxXXXVVRo7dqyOHTummTNnKiYmRh9++KHmzp2r/Px8JSUl6ZFHHtFDDz2kIUOGSJICAwO1dOlSPfnkk7ryyiv14IMPKi0tTXPmzNFNN92k//znP/Lz85NU8gPoTTfdpBEjRkiS9fmqonv37vr3v/9t/f6LL77Ql19+af1+7dq1uu+++xQVFaWXX35ZeXl5mjdvnm6++WZ9/vnn6tixY5nnfP311xUaGqqHHnpIPj4+GjhwoCTp+eeflyTr6wUHB2vDhg2aNGmSrr/+ek2aNEkeHh6SVKX6c3NzFRYWpkmTJlmPne280z/bM8fVtIbqfDaTJ09Wjx49yn39JUuWSJJ27NihF154oczYM2VkZGjWrFnlPubh4aGffvpJffr0sR777rvvZDbb/p50woQJ2rhxo8aNG6c2bdooNjZWs2fP1vjx47V8+XKZTCbr2Ouvv1433HCD9fvSv8faZLFYNGrUKG3evFmPPPKIunbtqnXr1mnWrFk6cuSIXnzxRevYM+dsx44dq3xuRV566SX997//1Ztvvqno6Ohaf48AGj+CVDkWLVqkWbNmqW/fvrV+/p49ezRq1Cjdfvvt1mMODg41rhVA3RkyZIjc3Ny0YsUK3X333ZKkH374QX5+fjY/uEolPxhOmzZNAwcO1LRp06zH27Vrp7vvvlu//vqr9Qf70tWnNm3aqFevXpKk4uJiTZs2TdHR0Zo+fbr1/IiICF1++eV6//33NWHCBElSTk6O2rVrZz239PmqwtPT03qeJP3+++82j0+fPl1t27bV/Pnzrf9tio6O1kUXXaQ5c+Zo9uzZNuNjY2O1evVqLVu2TJ06dZIka+j09PSUJJvXW758uSTp6aeftgYYSXJ2dq609pycHPn7+9s839nOO/2zPXPc1q1ba1RDdT6bkJCQs75+6fG8vLxyx55pzpw5atmypdLS0so8NmjQIP3444964oknJEnx8fHatGmT+vbtq2PHjkmS8vPzdfLkSU2aNEmXX365JKlfv37KysrSK6+8ouTkZAUEBFifMzg42Kae0r/H2vTbb7/pjz/+0IwZM3TFFVdIki644AK5urpq9uzZuvPOO63z6cw5++uvv1b53LOZPn26lixZorlz52rQoEG1/v4ANA209p0mISFBo0aN0rRp09SuXbtaP99isSgmJkahoaEKCAiwfvn6+p578QBqnaurq4YNG2bT3rd8+XJddtllNr/Bl6QDBw4oPj5ew4YNU2FhofUrMjJSnp6eWrNmTYWvdfDgQSUlJenKK6+0Od6mTRv17t1b69evtx6Li4uTl5dXLbxDW9nZ2dq2bZsuu+wym1/weHt7a+jQoTY1lI6fOXOmoqKiKv3BtVR4eLgk6YMPPlBiYqLy8/NVWFhYpXNr633XpIbqfja1Ze/evVqyZImeffbZch8fNmyYDh06pAMHDkiSVqxYoZ49e6pVq1bWMc7Oznr//fd1+eWXKyEhQWvXrtVnn31m3TAlPz+/2nUVFxersLBQFoul0jGlX6ePXb9+vRwdHXXppZfanHP11VdbHz+bczlXKvll5/z583XFFVfYrFoCQHURpE6zY8cOOTk5admyZerZs2eZx3/++WeNGDFC4eHhuuiiizRr1iyb/wFVdv7hw4eVnZ2tDh061On7AFB7LrvsMuvufWlpafrzzz+tvwU/XXp6uqSSNqgePXrYfGVlZSkxMbHC1yk939/fv8xj/v7+OnHihKSSX8gcP35crVu3Prc3Vo4TJ07IYrFUWkOpUaNGac2aNdVq/YqMjNSkSZP0+eefa+DAgQoLC1OPHj2sqycVOXbsmE1AqKma1FDdz6a2vPTSS7riiivUu3fvch8PCgpSaGiofvzxR0klbX2lq06n+/3333XZZZdp0KBBevjhh7Vs2TLrKllFYehs3nrrLev8HjRokJ5++mklJCTYjLn77rtt/j3YsGGD9bGMjAw1b968TEdG6cpYRZ/nuZwrSbt371Z0dLT++9//aufOnZW/WQA4C1r7TjNs2LAyF/iW+u233zR27FhNnDhRAwYM0OHDh/Xiiy/q4MGD1naOis6XSn6zKEkff/yxfvvtN5nNZg0aNEiPPfZYnfx2GcC5GzRokDw8PLRixQq5u7urdevWCg0NLTPO29tbUsm1KP369SvzuI+PT4Wv06xZM0lScnJymceSkpLUvHlzSdKuXbuUm5tbZoOI2uDl5SWTyXTWGkprLDVhwgStWLFCY8aM0aJFi6rcAnbjjTdq9erVKiws1OTJk9W6dWs99NBDFZ5TXFysLVu26LrrrqvSa5y5YniuNVT3s6kN33//vbZv327T6lmeCy+8UD/++KMuu+wybd++XXPnzrUJCIcPH9bo0aM1fPhwzZs3T+edd55MJpMWLVpUprVTqvyzk0o+vxtvvFHFxcU6fvy4Zs6cqfvvv1/Lli2zjin9pUKp069z8vHxUVpamoqKimwCUekvHErne3nO5VxJ+r//+z/deeeduuKKK6zXLdNiD6AmWJGqonfeeUc33nijbr75ZrVp00bR0dF6/vnntWLFiipfn7B3716ZzWYFBgbqnXfe0VNPPaXVq1fr4YcfrtZOQwDsx9nZWcOHD9fKlSv1/fffl7saJUkdOnSQn5+fjh49qrCwMOtXUFCQpk+fXulvvtu3b6+AgAD997//tTl+5MgRbd68WREREZKkX375Rd26datRS3BxcXGFPzC6u7srNDRU33//vYqKiqzHT5w4oV9++aXMdWGhoaGaO3eujh07ptdff73KdcyePVu//PKLXnnlFV122WUKCwur9Pqkv//+W9nZ2YqKiqpwXOnqypmbLZxrDdX9bM5Vfn6+XnvtNY0ePdrm+qXyDB8+XFu2bNEnn3yiPn36KDAw0Obx7du3Ky8vTw888IDatGljDUqlIar0Myv
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import math\n",
"from sklearn import cluster\n",
"\n",
"max_clusters = int(math.sqrt(len(df)))\n",
"clusters_range = range(1, 11)\n",
"kmeans_per_k = [\n",
" cluster.KMeans(n_clusters=k, random_state=random_state).fit(data_to_scale)\n",
" for k in clusters_range\n",
" ]\n",
"inertias = [model.inertia_ for model in kmeans_per_k]\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(clusters_range, inertias, marker='o')\n",
"plt.xlabel('Количество кластеров')\n",
"plt.ylabel('Инерция')\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Расчет коэффициентов силуэта\n",
"Средний коэффициент силуэта (silhouette score) используется для оценки качества кластеризации. Его значение лежит в диапазоне от -1 до 1. Что означают различные значения:\n",
"\n",
"0.71.0 : Кластеры хорошо разделены и компактны. Это отличный результат кластеризации.\n",
"\n",
"0.5-0.7 : Кластеры четко различимы, но есть некоторое пересечение между ними. Это хороший результат. \n",
"\n",
"0.25-0.5 : Кластеры перекрываются, что указывает на менее четкую границу между группами. Качество кластеризации удовлетворительное, но может потребоваться уточнение числа кластеров или доработка данных. \n",
"\n",
"Близко к 0.0: Кластеры сильно перекрываются или распределение данных не позволяет выделить четкие группы. В этом случае нужно пересмотреть выбор числа кластеров, алгоритм или исходные данные. \n",
"\n",
"Меньше 0.0: Плохая кластеризация: точки ближе к центрам чужих кластеров, чем к своим. Это сигнал о том, что данные плохо структурированы для текущей кластеризации."
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2MAAAIRCAYAAADHrGV3AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACGJElEQVR4nOzdeVxVdf7H8fcFLpssAsom4i6KG5poLqilY03ZZNa02q42bc5YqTk5ZatNuZT608bSyimrKc3JscWspnIHc2dxRUFlERRk3+7vD+SON1CRuJ4LvJ6PBw/hnO8593M+468fb8/3fI/JYrFYBAAAAAC4rJyMLgAAAAAAmiLCGAAAAAAYgDAGAAAAAAYgjAEAAACAAQhjAAAAAGAAwhgAAAAAGIAwBgAAAAAGcDG6gMZg+/btslgsMpvNRpcCAAAAwEClpaUymUzq3bv3RcdyZ6weWCwWOcq7sy0Wi0pKShymnsaIHtsX/bUv+mtf9Ne+6K990V/7or/25Uj9vZRswJ2xelB1R6xHjx4GVyIVFBQoISFBHTt2lKenp9HlNEr02L7or33RX/uiv/ZFf+2L/toX/bUvR+rv7t27az2WO2MAAAAAYADCGAAAAAAYgDAGAAAAAAYgjAEAAACAAQhjAAAAAGAAwhgAAAAAGIAwBgAAAAAGIIwBAAAAgAEIYwAAAABgAMIYAAAAABiAMAYAAAAABiCMAQAAAIABCGMAAAAAYADCGAAAAAAYgDDWiJRXWLT3cLZ2Jxdo7+FslVdYjC4JAAAAwHm4GF0A6sfGXce1eNVuZeUUSZJWbMxWgO9eTRjdQwN7hhpcHQAAAIBf485YI7Bx13HNfD/WGsSqZOUUaeb7sdq467hBlQEAAAA4H8JYA1deYdHiVbsvOObtf+9hyiIAAADgYAhjDVz8oaxqd8R+7eTpQsUfyrpMFQEAAACoDcJYA5ede+EgdqnjAAAAAFwehLEGzt/HvV7HAQAAALg8CGMNXGT7AAX4XjhotWjuocj2AZepIgAAAAC1QRhr4JydTJowuscFx4y/sbucnUyXqSIAAAAAtUEYawQG9gzVtHuja7xD9sSdfXjPGAAAAOCAeOlzIzGwZ6j6dw/RLwnHtCfhkP67t0DZucVyMnFHDAAAAHBE3BlrRJydTOrWzl892zXTsN6Vd8N+3nHM4KoAAAAA1IQw1kgN6B4kSdqWmKGColKDqwEAAADwa4SxRqp1kJdaB3mprLxCm/ekGV0OAAAAgF8hjDVSJpNJg3u1kiSt38lURQAAAMDREMYascG9Kp8b256UobxCpioCAAAAjoQw1oiFB/uoTbC3ysot2rz7hNHlAAAAADiH4WGsoqJC8+bNU0xMjKKiojR+/HilpKTUOHb+/PmKiIio8WvatGnWcStWrNANN9ygqKgojRw5UosXL1Z5ebl1/xdffFHjOVJTU+1+vZfb4CimKgIAAACOyPD3jC1cuFDLly/Xq6++quDgYL3++usaN26cVq9eLVdXV5uxDzzwgG6//Xabbe+++64++ugj3XfffZIqg9Zzzz2nv/3tbxowYID27Nmjv/3tbyopKdFjjz0mSUpKSlK/fv00Z84cm3P5+/vb70INMrhXqD78OlE79mUqN79EPs1cL34QAAAAALsz9M5YSUmJli5dqokTJ2rYsGHq0qWL5s6dq7S0NK1du7ba+GbNmqlly5bWr8zMTC1btkzPPvusIiIiJEkfffSRRo8erdtuu03h4eG67rrr9MADD+izzz6znmffvn2KiIiwOVfLli3l7Ox82a79cgkL9Fa7UB+VV1i0eQ9TFQEAAABHYeidscTEROXn52vAgAHWbT4+PoqMjFRsbKxGjRp1weNfeOEF9e3bVzfddJN121NPPVXtDpeTk5NycnKsPyclJenqq6+up6uoZLFYVFBQUK/nrIvCwkKbPyWpf2SgDh/P1Y/bjmpwj5ZGldZo1NRj1B/6a1/0177or33RX/uiv/ZFf+3LkfprsVhkMplqNdbQMJaWVvn+q5CQEJvtgYGB1n3n88MPP2j79u1atWqVzfYrrrjC5uczZ87oo48+UkxMjCQpJydH6enpiouL0/Lly3Xq1Cn17NlTkydPVrt27ep8LaWlpUpISKjz8fUtOTnZ+n2gR5kkafehbMVt36Nm7o3vDqARzu0x6h/9tS/6a1/0177or33RX/uiv/blKP399eNW52NoGKtKrr8u1s3NzeZOVk3effddXXXVVeratet5x+Tn5+uRRx5RcXGxpkyZIknav3+/pMrEOnPmTBUVFWnRokW68847tXr1arVo0aJO12I2m9WxY8c6HVufCgsLlZycrLZt28rDw8O6/T/b8nXo+BmdKmuuvl3DDKyw4Ttfj1E/6K990V/7or/2RX/ti/7aF/21L0fq74EDB2o91tAw5u7uLqny2bGq7yWpuLj4gk08fvy4tmzZosWLF593TGZmph566CGlpqZqyZIlCgurDCB9+/bVpk2b5OfnZ719uGDBAg0bNkwrV67UhAkT6nQtJpNJnp6edTrWHjw8PGzqGdK7tQ4dj9fW+EzdOLSzgZU1Hr/uMeoX/bUv+mtf9Ne+6K990V/7or/25Qj9re0URcngBTyqpidmZGTYbM/IyFBQUNB5j1u3bp38/f01aNCgGvcfPHhQt956q7KysvThhx+qR48eNvv9/f1tmuTh4aGwsDClp6fX9VIcXtUS93sOntSpM0UGVwMAAADA0DDWpUsXeXl5acuWLdZtubm5io+PV3R09HmPi4uLU79+/eTiUv3GXkpKiu699155eHjo448/VqdOnWz2f/LJJ+rfv7/NYht5eXlKTk52iGmG9hLk76nO4c1VYZE27mJVRQAAAMBohoYxV1dXjR07VrNmzdJ3332nxMRETZo0ScHBwRo5cqTKy8uVmZmpoiLbOznx8fHq0qVLjef861//qpKSEs2ZM0cuLi7KzMy0fknSkCFDVFFRoSlTpmj//v3avXu3Hn/8cfn7+2vMmDF2v2YjDe7FC6ABAAAAR2H4S58nTpyosrIyTZ8+XUVFRYqOjtaSJUtkNpuVmpqq4cOHa+bMmTZBKTMzU82bN692rvT0dG3dulWSdOONN1bbn5SUpJCQEL333nuaPXu27rjjDlksFg0aNEjLli2Tm5ub3a7TEQzqFaqlq/dq76EsZeUUKsCXh0cBAAAAoxgexpydnTV58mRNnjy52r6wsDAlJSVV275z584azxUUFFTj+F/r1q2bli5deunFNnCBfp7q0sZPiUdOaeOuE7ohpr3RJQEAAABNlqHTFHH5xZxdyOPnHUxVBAAAAIxEGGtiBvUKlckkJSRn6+Rp499QDgAAADRVhLEmJsDXQ5HtAiRJG3YdN7gaAAAAoOkijDVBg3uFSmKqIgAAAGAkwlgTNLBn5VTFpCOnlJFdcPEDAAAAANQ7wlgT5O/jru7tW0iS1u9kqiIAAABgBMJYEzU4qnKqIi+ABgAAAIxBGGuiBvYIlZNJ2p9yWmlZ+UaXAwAAADQ5hLEmqrm3m3p0ZKoiAAAAYBTCWBPGC6ABAAAA4xDGmrAru4fIycmkQ8dydDwzz+hyAAAAgCaFMNaE+Xq5KapTS0lMVQQAAAAuN8JYE8cLoAEAAABjEMaauCt7hMjZyaTkE7lKST9jdDkAAABAk0EYa+K8PV3VOyJQElMVAQAAgMuJMAbrVEVeAA0AAABcPoQxqH/3ELk4O+lo2hkdScs1uhwAAACgSSCMQV4eZvWpmqq4g6mKAAAAwOVAGIMkKSbqf6sqWiwWg6sBAAAAGj/CGCRJ/boFy+zipGOZeUo+wVRFAAAAwN4IY5AkebqbdUUXVlUEAAAALhfCGKxiolpJYqoiAAAAcDkQxmAVHRksV7OzTpzM16FjOUaXAwAAADRqhDFYebi5KLprkKTKu2MAAAAA7IcwBhuDo6peAH2cqYoAAACAHRHGYKNv1yC5uTorPbtA+1NOG10OAAAA0GgRxmDD3dVF/SKDJbGqIgAAAGBPhDFUE2OdqsiqigAAAIC9EMZQTZ8uQfJwc1bmqUIlHT1ldDkAAABAo0QYQzV
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.metrics import silhouette_score\n",
"\n",
"silhouette_scores = []\n",
"for i in clusters_range[1:]: \n",
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
" labels = kmeans.fit_predict(data_to_scale)\n",
" score = silhouette_score(data_to_scale, labels)\n",
" silhouette_scores.append(score)\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(clusters_range[1:], silhouette_scores, marker='o')\n",
"plt.xlabel('Количество кластеров')\n",
"plt.ylabel('Инерция')\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Средний коэффициент силуэта: 0.593\n"
]
},
{
"data": {
"text/plain": [
"Text(0, 0.5, 'Вторая компонентa PCA')"
]
},
"execution_count": 84,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAAJzCAYAAACLe2tcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUVeLG8e/0mfReCARC6L0FQUUR7KIu9t47q7vW1dVdXV39ua5txbbqqmvH3lFX7Io0KdJbEkp6r9Pv74+YkTEJJEBCgPfzPD5L7rn33DMnk33mnXPuOSbDMAxERERERERkjzLv6QaIiIiIiIiIwpmIiIiIiEi3oHAmIiIiIiLSDSiciYiIiIiIdAMKZyIiIiIiIt2AwpmIiIiIiEg3oHAmIiIiIiLSDSiciYiIiIiIdAMKZyIiIiIiIt2AwpmIdHvnnnsuAwcODPtv3LhxnHfeecyfP39PN09E9nEDBw5k5syZLY6vXbuWiRMncuihh5KXl9fm9TNnzmTgwIEMHz6curq6Vs959dVXGThwIFOmTNldzRaRvZDCmYjsFYYMGcKsWbOYNWsWr7zyCvfeey82m42LL76YdevW7enmich+Zt26dVxwwQW4XC5eeukl+vTps8Nr/H4/X3zxRatlH3/88W5uoYjsjRTORGSvEBUVxahRoxg1ahRjx47l8MMPZ+bMmZjNZt5+++093TwR2Y9s2LCB888/n8jISF566SV69erVruvGjBnD7NmzWxwvLi5m4cKFDB48eHc3VUT2MgpnIrLXcrlcOBwOTCZT6Ni5557LueeeG3beAw88wMCBA8NC3EsvvcTUqVMZPXo055xzDmvXrgXg5ZdfZuDAgeTm5obV8d577zF48GAKCwsB+PzzzznrrLMYPXo0w4YN4+ijj+bll18Ou+bmm29uMR2z+b8tW7aEzvntNKbXXnutxTSqjz/+mGOPPZZRo0Zx0kknsXDhwrBrdtSeefPmMXDgQObNmxd23W/7qz395/V6+cc//sGhhx7K4MGDw17X9oLyb+u+++67GT58ON988w3w69Sv1v7btt3t6fuSkhL+9Kc/MXHixNDvePHixQBMmTJlh7+XhQsXcs455zBy5EjGjx/Pn/70JyoqKkL1v/322wwcOJClS5cyffp0RowYwfHHH88nn3wS1o7a2lr+7//+j8MPP5zhw4czbdo03nzzzbBztm3PoEGDyMnJ4eqrr6aysrLNvgTYuHEjv//97xk/fjw5OTlcfvnlbNiwoc3zt9e/2/7e8vLyuOaaazjooIMYNWoU5557LosWLQqVb9myJXTd+++/H3aPL7/8MlS2rY8//piTTjqJ0aNHc9BBB/HXv/6V6urqFm3bVmvvxSlTpnDzzTe3+fNvNbd129f3008/cfrppzN8+HAOOugg7rrrLtxud5t1/NaGDRs477zziI6O5qWXXqJHjx7tvvbYY4/lu+++azG18ZNPPiErK4tBgwa1uObzzz/npJNOCrX373//Ow0NDS3Oac/f/9y5c7nooosYOXIkBx10EP/85z8JBAKh877//ntOO+00Ro8eTU5ODldeeeV231MisvspnInIXsEwDPx+P36/H5/PR2lpKQ888ABer5eTTz65zes2bdrE888/H3bss88+46677uK4447jscceIxAIcMUVV+D1ejn++ONxOBy89957Yde8++67TJw4kfT0dL766itmzJjB0KFDefzxx5k5cya9evXizjvvZOnSpWHXJScnh6Zjzpo1iyuvvHK7r7O6upqHH3447NiyZcu44YYbGDVqFE888QTp6elcccUVlJWVAXSoPR3VWv89/fTT/Pe//+X888/nv//9L7NmzeLRRx/tUL3Lli3j1Vdf5eGHH2b06NFhZdv211//+tewsva81vr6es4880zmzZvHjTfeyKOPPorD4eCiiy4iLy+PRx99NKzNV155Zeh+KSkpLFiwgAsuuACn08nDDz/Mn//8Z+bPn895553X4kP85ZdfztSpU3n00UfJysrij3/8I19//TUAbrebs846iw8++IBLLrmExx9/nLFjx3Lrrbfy5JNPhtVz6KGHMmvWLF588UWuv/56vv/+e+6+++42+6+4uJjTTz+dvLw87rjjDv75z39SVlbG+eefT1VV1Xb7ftv+/e3vbf369Zx00kls2bKF2267jfvvvx+TycT555/f4vnOyMjIFlP0Pv74Y8zm8I8Wjz/+ONdddx2jRo3ikUceYcaMGXz66aece+65HQpFu0NhYSEXX3wx8fHxPProo1xzzTW899573HTTTe26fuPGjZx//vlERUXx0ksvkZqa2qH7H3XUUQQCgVb77bjjjmtx/gcffMCMGTPo27cvjz32GL///e95//33ueqqqzAMA+jY3/8NN9zA2LFjefLJJ5k2bRrPPPMMb7zxBgCbN2/mqquuYtiwYTzxxBPcfffd5ObmctlllxEMBjv0OkVk51n3dANERNpjwYIFDB06tMXx6667juzs7Davu+eee+jfvz8rVqwIHauoqOCss87iuuuuA5pGgppHHQYPHswRRxzB+++/zx/+8AdMJhNFRUX8+OOP/POf/wSaPsBOnz6dW2+9NVTn6NGjOeCAA5g3bx4jR44MHbfb7YwaNSr088aNG7f7Oh955BF69OgRNmpSVFTEUUcdxd///nfMZjNJSUlMmzaNJUuWcPjhh3eoPR3VWv8tW7aMQYMGcdFFF4WONY84tVfzyOXUqVNblG3bXx6PJ6ysPa/1nXfeYevWrbzzzjuhaWJjxozhd7/7HQsWLODUU08Na3NmZmbYPR944AGysrL497//jcViAWDkyJEcd9xxvPXWW5x99tmhc88991xmzJgBwKRJk5g+fTqPPfYYhx56KG+//TZr167ltddeCwXQSZMm4ff7efzxxznjjDOIi4sDICEhIdSGnJwcfvjhh7A+/63nn38er9fLc889R3JyMgCDBg3izDPPZOnSpRx66KFtXrvta/3t7+3RRx/FbrfzwgsvEBUVBcDkyZOZNm0a9913X9io3yGHHMK3336L1+vFbrfj8XiYM2cOOTk5oZHO6upqnnjiCU477bSwoD1gwADOPvvsFv3Z2Z5++mni4+N57LHHQr9bs9nMbbfdxpo1a1qM3m0rLy+P8847j7KyMnw+304FlqSkJHJycpg9ezYnnHACAFu3bmXp0qXcd999PPHEE6FzDcPg/vvvZ9KkSdx///2h43369OGCCy7g66+/ZvLkyR36+z/11FND79eJEyfy+eef89VXX3HGGWewbNky3G43l19+eSh0pqWlMWfOHBoaGkLvBxHpXApnIrJXGDp0KH/729+Apg8tNTU1fPPNNzz00EM0NDRw7bXXtrjmm2++4YcffuDpp5/mvPPOCx0/44wzAAgGgzQ0NPDZZ5/hdDrJyMgA4JRTTuHDDz9k4cKF5OTk8O677xIZGckRRxwBwCWXXAI0jdDk5uayadMmfv75Z6Ap6O2stWvXhkZPmtsIcOSRR3LkkUdiGAYNDQ3Mnj0bs9lMVlZWp7anrf4bPnw4Tz31FJ9++ikTJkwgMjKy3R9UDcNg8eLFfPzxxy1G5NqjPa910aJF9OzZM+z5HZfLxaeffrrD+hsbG1m6dCkXX3xxaLQWoFevXmRnZ/P999+HhYnp06eH/m0ymTjiiCOYOXMmbreb+fPnk5GR0WJk8IQTTuDNN98MC1HN9woGg6xevZpFixZx4IEHttnORYsWMWrUqFAwg6YP0l9++eUOX+P2zJ8/n8MOOyzsg7jVag2NMtfX14eOT5gwgW+++YZ58+YxadIkvvnmG6Kiohg3blwonC1ZsgSv18u0adPC7jNu3DgyMjKYP3/+Loez5r4zm80tRu2aBYNB/H4/Cxcu5OCDDw4FM2gKmdDUp9sLZx9++CHDhg3joYce4qKLLuLGG2/k+eefD7tnIBAIjWhB03ti23tB09TGv//979TV1REVFcVHH33E0KFD6d27d9h5GzdupKioiMsvvzz0PoSm8B4VFcX333/P5MmTO/T3/9v3YlpaWmiK5MiRI3E4HJxyyikcffT
"text/plain": [
"<Figure size 1000x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Применение K-Means\n",
"kmeans = KMeans(n_clusters=4, random_state=42) \n",
"df_clusters = kmeans.fit_predict(data_to_scale)\n",
"\n",
"# Оценка качества кластеризации\n",
"silhouette_avg = silhouette_score(data_to_scale, df_clusters)\n",
"print(f'Средний коэффициент силуэта: {silhouette_avg:.3f}')\n",
"\n",
"# Визуализация кластеров\n",
"pca = PCA(n_components=2)\n",
"df_pca = pca.fit_transform(data_to_scale)\n",
"\n",
"plt.figure(figsize=(10, 7))\n",
"sns.scatterplot(x=df_pca.iloc[:, 0], y=df_pca.iloc[:, 1], hue=df_clusters, palette='viridis', alpha=0.7)\n",
"plt.title('Визуализация кластеров с помощью K-Means')\n",
"plt.xlabel('Первая компонентa PCA')\n",
"plt.ylabel('Вторая компонентa PCA')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2024-12-12 20:25:55 +04:00
"В нашем случае средний коэффициент силуэта: 0.593 указывает на то, что кластеры хорошо разлечимы, но есть небольшие пересечения. Данный результат соответствует хорошему состоянию."
2024-12-12 20:25:35 +04:00
]
}
],
"metadata": {
"kernelspec": {
"display_name": "kernel",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}