236 lines
393 KiB
Plaintext
236 lines
393 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Бизнес-цель: pазделить клиентов на группы для предложения им справедливой цены за страховку"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 38,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAroAAAIkCAYAAADxkYgqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXhU5fnw8e+ZPbNk3/cNCPuO7LsgKmrFFduKS6vWaldbbUtRq6221apv/amtdRertgqoFRAFcWERJOxb9pA9k0xm38/7x5CYkAlJEEgCz+e6uDQzZ2bOnDlz5j73uZ/7kWRZlhEEQRAEQRCEc4yir1dAEARBEARBEM4EEegKgiAIgiAI5yQR6AqCIAiCIAjnJBHoCoIgCIIgCOckEegKgiAIgiAI5yQR6AqCIAiCIAjnJBHoCoIgCIIgCOckEegKgiAIgiAI5yQR6AqCIAiCIAjnJBHoCoIgCIIgCOckEej2Q2VlZUiS1OmfwWBg1KhRPPDAA9jt9r5eTUEQBEEQhH5NkmVZ7uuVEDoqKysjJyeHvLw8vvvd7wIgyzINDQ18+OGHlJWVMXnyZD7//HOUSmUfr60gCIIgCEL/pOrrFRC6lp+fz/3339/hNo/Hw5QpU9i6dSuffvopc+fO7ZuVEwRBEARB6OdE6cIAo9VqmTNnDgCNjY0d7svOziY7O7vTY9asWdNW/rBp06a22wsLC1m8eDG5ubkYDAaio6MZN24cTzzxBD6fD4BgMEhWVhZxcXF4PJ6w6zRz5kxUKhXHjh0DoKWlhUcffZRZs2aRmpqKRqMhNTWV73//+xQXF3f53pYtWxa2ZEOSJJYtW9ZhWUmSmD17dqfneOqpp9oeU1ZW1nb7hg0bWLhwIZmZmeh0OuLi4pgyZQovvfRSp+d49913uf7668nPz0ev1xMVFcWMGTP473//22nZ1jKTE9fvxPfUfl2g958VgNls5s477yQ7OxuNRtNpG/XGpk2butzW4dbrTG+Tl156CUmSwn4eEH57dbVtT3T//fd32J6yLHPxxRcjSRJvvvlmh2VlWWbRokVh7zvZc5/sX7j9tLy8nFtuuYW0tDQ0Gg3p6enccsstVFRUhH0dm83GAw88wKhRo9q2/9ixY1m+fHnbd7VVV6VP4b4Xre/5hRdeYNq0aURGRqLX65kwYQIvvPBCt++/1cn2p5Ptow6HgxUrVlBQUIBOpyM2NpZLLrmEL774osttfeL3olW4bT179uywr1tYWIhSqey0z3388cdotVpmzpyJ2+3u9vlLSkpITk4mKyuLqqqqDvd5vV4ef/xxxo0bh8FgwGQyMWPGDNasWdNpfU62L3f1feqrY0j77+rq1auZNGkSer2ehIQEbr75Zurq6jo9pjfHj8rKSpYsWcLgwYMxmUwYjUaGDx/Ogw8+2Klkr/XzNZlM2Gy2Ts/16quvtr23cMeWPXv2cN1115GSkoJGoyErK4u77roLs9ncYbn2n8H+/fu55JJLiI6Oxmg0smDBAnbu3Nnpubva9+Cb78uJiSyAL774gksuuYTY2Fh0Oh0FBQWsWLECp9MZ9rlKSkr44Q9/SE5ODlqtlsTERGbPnt32fls/r+7+td+Xevu96e9ERneA8Xq9bV+SMWPG9Gj5X/ziF2HvO3bsGI2NjcybN4+EhAQcDgfr1q3jZz/7Gfv27eP5559HoVBw66238vvf/57//ve/LF26tMNzHD58mM8++4xLLrmE9PR0AA4ePMjvf/975syZw3e+8x0MBgOHDh1i5cqVfPDBB3z99ddkZWV1uc4/+clPiI6OBsBisfDkk0/2aNs0NjaGPXAAFBcX43a7ufjii4mNjcVisfD+++9z0003cezYMX73u9+1LXvfffeh0WiYPn06KSkpNDQ0sGbNGq666iqeeuop7rrrrh6tT2+d7LNqDcC++uorxowZw1VXXYXRaARCB7Ly8vJTes1Zs2Z1+PF+4oknwi7XV9vkTJAkiRdffJFRo0Zx2223MXny5Lb98YknnmDt2rUsW7aMa6+9tsfPeeONN4YNOh544IFOtx05coTp06fT0NDA4sWLGT58OPv27eOFF17gvffe4/PPP2fw4MFty9fX1zNr1iwOHTrEmDFjuOOOOwgGgxw6dIhHH32UX/ziF23fl/ZGjx7NFVdc0fb3qlWr2L17d4dlZFnmhhtu4I033mDQoEEsXboUjUbDRx99xC233MKBAwf461//2uPtcOL+1CrcPup2u5k7dy7bt29n3Lhx/PSnP6Wuro4333yTdevW8cYbb3D11Vf3+LV74+677yYYDHa6fd68ebzyyitcf/31LF26lP/85z8oFOHzQfX19SxcuBCfz8emTZtIS0tru8/j8XDRRRexadMmxowZwy233ILP5+ODDz7g8ssv5//9v//Hj3/849P+vs7mMeS///0v69at46qrrmL+/Pls3bqVF198kc8++4zt27cTExPTtmxvjh9NTU2UlZUxbdo0kpKS8Pl8fPrpp6xYsYLNmzezYcOGTuvicrl4/fXXuf322zvc/swzz6BUKgkEAp0es2bNGq655hoUCgWXX345GRkZHDhwgL///e+sW7eObdu2dXgPEAoqp02bxrhx47jjjjsoLy/n7bffZubMmXzyySdccMEFvdqGJ3r77be5/vrr0Wq1XHvttSQmJrJ+/XoefPBB1q1bx6ZNm9DpdG3Lf/7551xyySXYbDYWLlzIddddR3NzM7t27eLJJ59k2bJljBkzhhUrVrQ9pvU39cTjQ7hjyIm6+t70e7LQ75SWlsqAnJeXJ69YsUJesWKF/Pvf/17+0Y9+JOfl5ck6nU7+y1/+0ulxWVlZclZWVofbHnnkERmQx48fLwPyxo0bT/raXq9XzsvLkw0GQ9ttVVVVskqlkmfPnt1p+V/+8pcyIK9atartNovFIpvN5k7LfvLJJ7JCoZBvvfXWsK99ww03yIBcVlbWaVvceOONHZYF5FmzZnW47fbbb5cVCoU8ZswYGZBLS0tP+l6tVqtsMBjk4cOHd7i9uLi407I2m00eOXKkHBUVJTscjm7Xr9WNN94Ydl16+1nt379fBuSxY8fKfr+/w+NmzZol9/arvGHDBhmQ77///m7XS5bP/DZ58cUXZUB+8cUXwz4m3Hp1tW1PtGLFirD7/ocffihLkiRPnTpV9vv98q5du2SNRiMPGjRIttlsJ33O7p67Vbj9dM6cOTIgP/fccx1uf/rpp2VAnjt3bofblyxZIgPyb37zm07PX1tbK/t8vg63HT16VAbkZcuWdbg93Pb6xz/+IQPyTTfdJHu93rbbPR6PvHjxYhmQd+zY0dXbb7Nx40YZkFesWBH2/nD76AMPPCAD8g033CAHg8G227/++mtZo9HI0dHRstVqbbv9VLZ1uNf997//3eF7Fm6fe/LJJ2VAvv3228M+v81mkydMmCBHRETIX375ZafH/+Y3v5EBefny5R3em9VqlSdMmCBrNBq5qqqq7faT7ctdfZ/66hjS+l0F5LVr13a4795775UB+cc//nGH23tz/AgnGAzKM2fOlAG5oaGh03pffvnl8qhRozo8prCwUAbkK664otPn3NjYKEdGRsppaWkdfm9kWZbfeOONTu+h9TMA5HvvvbfD8mvXrpUBeeTIkR1uP9k2Dfd9aWlpkaOiomStVivv3r277fZAICBfe+21MiA/+OCDbbe73W45LS1NVigU8ocfftjpNSorK8O+dnfH567WvSffm/5KlC70Y8XFxTzwwAM88MADPPjgg/zf//0fxcXFzJ8/n/nz53f7+NraWh5++GEWLFjApZde2u3yVquVt956i6qqqg7ZqdTUVBYvXsynn35KUVFR2+0+n49XXnmFlJQULrnkkrbbo6KiiI2N7fT8c+bMYfjw4WHPyFufD0LlGb21e/du/vnPf3LLLbcwevTobpc3m828+OKLOByOTpm43NzcTssbjUaWLVtGS0sLX331Va/XrzvdfVatl62GDBlyWgYgulwuADQaTY+W74ttcqZddNFF/OQnP+HLL7/k3nvv5frrr0eWZd544422TNfpVlFRwcaNGxk2bBg
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler, LabelEncoder\n",
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"\n",
|
|||
|
"data = pd.read_csv(\"../dataset.csv\")\n",
|
|||
|
"\n",
|
|||
|
"label_encoders = {}\n",
|
|||
|
"for column in ['sex', 'smoker', 'region']:\n",
|
|||
|
" label_encoders[column] = LabelEncoder()\n",
|
|||
|
" data[column] = label_encoders[column].fit_transform(data[column])\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"reduced_data = PCA(n_components=2).fit_transform(data)\n",
|
|||
|
"\n",
|
|||
|
"pca_df = pd.DataFrame(reduced_data, columns=['PC1', 'PC2'])\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"sns.scatterplot(x='PC1', y='PC2', data=pca_df, s=50, alpha=0.7)\n",
|
|||
|
"plt.title('Визуализация данных после понижения размерности', fontsize=14)\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 39,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA04AAAIjCAYAAAA0vUuxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABkdklEQVR4nO3dd3hUZd7G8fvMpEIKCZACBAiCFOlIaAqiKFhYWHfVRV3EwqovqMjqK+jadbPqq2JFQRd2FxXXAnYUUUAE6VGaSAmEkgKEVMgkmTnvHyEDQ3oIOTPJ93Nd50rynOec+c1sdHP7lGOYpmkKAAAAAFAhm9UFAAAAAIC3IzgBAAAAQBUITgAAAABQBYITAAAAAFSB4AQAAAAAVSA4AQAAAEAVCE4AAAAAUAWCEwAAAABUgeAEAAAAAFUgOAEAAABAFQhOANAIbNmyRTfeeKNat26twMBAtWrVSjfccIO2bNlidWkAAPgEwzRN0+oiAABnz8cff6xx48YpMjJSt956q+Lj47Vnzx69/fbbOnLkiObPn6/f//73VpcJAIBXIzgBQAO2a9cu9ezZU23bttXy5cvVsmVL97nDhw/rwgsv1L59+/TLL7+oQ4cOFlYKAIB3Y6oeADRgzz33nI4dO6ZZs2Z5hCZJatGihd58803l5+fr2Wefdbc/9thjMgxDhw8f9ui/bt06GYahuXPnerS7XC7NmDFD5513noKCghQdHa3bb79dR48e9ejXvn17XXXVVWVqnDx5sgzD8GgzDEOPPfaY++fi4mJdccUVioyM1NatW93tc+bM0cUXX6yoqCgFBgaqW7dumjlzZrU+mwkTJqh9+/Yebfv27VNwcLAMw9CePXtqVbskzZs3T/369VNwcLAiIyP1pz/9Sfv27fPoc9FFF6l79+5av369Bg8erODgYMXHx+uNN97w6Ld06VIZhqEPP/ywzOuEhIRowoQJHm3t27cv0/bBBx/IMAyP97tnz55y//ecNGmSDMMocw8AaOwITgDQgH322Wdq3769LrzwwnLPDx06VO3bt9cXX3xR69e4/fbbdf/992vIkCF66aWXdPPNN+udd97RyJEjVVRUVOv7nuq2227T0qVL9dlnn6lbt27u9pkzZ6pdu3Z68MEH9fzzzysuLk7/8z//o9dee61Wr/PII4+ooKDgjGp9+umnNX78eHXq1EkvvPCCpkyZoiVLlmjo0KHKysry6Hv06FFdccUV6tevn5599lm1adNGd955p/75z3+eUQ2nKi4u1kMPPVStvjt37tTs2bPr7LUBoCHxs7oAAMDZkZ2drYMHD2rMmDGV9uvZs6c+/fRT5ebmKjQ0tEavsWLFCr311lt65513dP3117vbhw8frlGjRumDDz7waK+NBx98UPPmzdNHH32kIUOGeJxbtmyZgoOD3T9PnjxZo0aN0gsvvKBJkybV6HW2bNmif//737r88sv11Vdf1arWvXv36tFHH9VTTz2lBx980N1+9dVXq0+fPnr99dc92g8ePKjnn39eU6dOlVQSQgcMGKDp06frz3/+s/z9/WtVx6lmz56tlJQUDR8+XLt3766070MPPaTOnTsrOzv7jF8XABoaRpwAoIHKzc2VpCrDUOn5nJycGr/GBx98oPDwcF166aU6fPiw++jXr59CQkL0/fffe/QvKiry6Hf48OFKR3heffVVJSYm6uWXXy43AJ4amrKzs3X48GENGzZMu3fvrvEf/9OnT1ffvn11zTXXlHu+OrV//PHHcrlcuvbaaz36xcTEqFOnTmU+Dz8/P91+++3unwMCAnT77bcrIyND69evr1H95Tl27JieeOIJTZ48WW3btq207/r16/XBBx8oMTFRNht/HgDA6Rr1vxmXL1+u0aNHq1WrVjIMQwsXLqzR9QUFBZowYYJ69OghPz8/jR07tkyf1NRUXX/99Tr33HNls9k0ZcqUOqkdAKpSGohKA1RFqhuwyrNjxw5lZ2crKipKLVu29Djy8vKUkZHh0f+bb74p0+/tt98u995fffWV7rnnHklSZmZmuX1+/PFHjRgxQk2bNlWzZs3UsmVL94hOTYLTihUr9Nlnn+mZZ54pd81SdWvfsWOHTNNUp06dyvTdtm1bmc+jVatWatq0qUfbueeeK0kea6xq64UXXlBBQYHHKFdFpk2bpgsvvLDctVwAgEY+VS8/P1+9evXSLbfcoquvvrrG1zudTgUHB+vuu+/WRx99VG4fh8Ohli1b6m9/+5tefPHFMy0ZAKotPDxcsbGx+uWXXyrt98svv6h169YKCwur8Wu4XC5FRUXpnXfeKff86RtSDBgwQE899ZRH26uvvqpPPvmkzLVr1qzRxIkT1bRpUz311FO65ppr1LlzZ/f5Xbt26ZJLLlGXLl30wgsvKC4uTgEBAfryyy/14osvyuVyVft9PPDAAxo5cqQuvvjiMpsl1KR2l8slwzD01VdfyW63l7lHSEhItWs6U4cPH9Zzzz2n6dOnKzIystK+33zzjb799lutWrWqnqoDAN/TqIPT5Zdfrssvv7zC8w6HQw899JDee+89ZWVlqXv37nrmmWd00UUXSZKaNm3q3r3pxx9/LLPoVyrZ3eill16SpDpd7AsA1XHVVVdp9uzZWrFihS644IIy53/44Qft2bPHY7pYTZxzzjn69ttvNWTIEI9pcxVp0aKFRowY4dFW0Wj/pZdeqpkzZ6qgoEALFy7UX/7yF/cOc1LJxhcOh0OffvqpxzS006fDVWXhwoVatWqVNmzYcMa1n3POOTJNU/Hx8e6Ro8ocPHhQ+fn5HqNOv/32mySV2fGvpp566imFhoa6R+0qYpqmpk2bpt///vcaOHDgGb0mADRkjXqqXlUmT56sVatWaf78+frll190zTXXaNSoUdqxY4fVpQFAtdx///0KDg7W7bffriNHjnicy8zM1B133KEmTZro/vvvr9X9r732WjmdTj355JNlzhUXF5f7H5Sqa/DgwbLb7WratKneeOMNLV++3GPHt9IRnVMfR5idna05c+ZU+zWcTqcefPBBXX/99erdu3etay119dVXy2636/HHH9fpj0k0TbPM/wbFxcV688033T8XFhbqzTffVMuWLdWvX79a17Fnzx7NnDlTjz32WJWBtvT/4xITE2v9egDQGDTqEafKpKSkaM6cOUpJSVGrVq0kSffdd58WLVqkOXPm6O9//7vFFQJA1Tp16qR//etfuuGGG9SjRw/deuutio+P1549e/T222/r8OHDeu+993TOOeeUufa7777zmL5X+h+NNm3apE2bNqlHjx4aNmyYbr/9diUmJiopKUmXXXaZ/P39tWPHDn3wwQd66aWX9Mc//vGM38fIkSN144036n//9381evRoxcbG6rLLLlNAQIBGjx6t22+/XXl5eZo9e7aioqKUmpparfvu37/fPb2vLpxzzjl66qmnNH36dO3Zs0djx45VaGiokpOTtWDBAv3lL3/Rfffd5+7fqlUrPfPMM9qzZ4/OPfdcvf/++0pKStKsWbPK7KiXlJRUZqqf0+nUgQMHtGzZMg0bNszdvmzZMnXt2lU333xzlTV/8803mjhxosc0SABAWQSnCmzatElOp7PMVAuHw6HmzZtbVBUA1Nw111yjLl26KDEx0R2WmjdvruHDh+vBBx9U9+7dy73uuuuuK7f9hRde0JEjR9xrgd544w3169dPb775ph588EH5+fmpffv2uvHGG8tsH34mZsyYoa+//lqTJk3Sxx9/rM6dO+vDDz/U3/72N913332KiYnRnXfeqZYtW+qWW26p9n3vvPPOM54Wd6pp06bp3HPP1YsvvqjHH39ckhQXF6fLLrtMv/vd7zz6RkRE6F//+pfuuusuzZ49W9HR0Xr11Vc1ceLEMvd9+umny329b7/9Vjt27CizmcTf//73ctdZnS44ONjjYcMAgPIZ5ulzCRopwzC0YMEC985477//vm644QZt2bKlzP/xhISEKCYmxqNtwoQJysrKqnRnvosuuki9e/fWjBkz6rh6AKg/EyZMkKQKN1FA9Vx00UU6fPiwNm/efEb3mTt3rh577LE62YU
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA18AAAIjCAYAAAD80aFnAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACZyUlEQVR4nOzdeVhUZf8G8Htm2LdhHTZRBBXEDUVFMhUUUzPLLFPTNDO33KlMW9S0VyvLzCUtc+vnmmbumYrgiqK4LyCbosgqsgiyzZzfH8jkCCiDwGG5P9c11wvPnOV7AHu5eZ7zPRJBEAQQERERERFRlZKKXQAREREREVF9wPBFRERERERUDRi+iIiIiIiIqgHDFxERERERUTVg+CIiIiIiIqoGDF9ERERERETVgOGLiIiIiIioGjB8ERERERERVQOGLyIiIiIiomrA8EVERERERFQNGL6IiMrh2rVrGDZsGBwdHaGvrw8HBwcMHToU165dE7u0em/ixImQSCRa7/fDDz9AIpHg1q1blV8UERFRKRi+iIieY8eOHWjXrh0CAwMxcuRI/PLLLxg1ahSCgoLQrl07/P3332KXSERERLWAjtgFEBHVZNHR0Xjvvffg4uKCY8eOwcbGRv3elClT0KVLF7z33nu4fPkyXFxcRKyUiIiIajrOfBERPcPChQuRk5OD3377TSN4AYC1tTV+/fVXZGdn4/vvv1ePz5kzBxKJBKmpqRrbnzt3DhKJBOvWrdMYV6lUWLx4MVq0aAEDAwPY2tpi7NixePDggcZ2zs7OeO2110rUWNqyO4lEgjlz5qg/LywsxKuvvgpLS0tcv35dPb527Vp0794dCoUC+vr68PDwwIoVK8r1tXn//ffh7OysMbZhwwZIpVJ8++23GuNHjhxBly5dYGxsDHNzc7zxxhu4ceOG+v2srCx8+OGHaNSoEfT19dGgQQOMGzcOSUlJJc7777//olmzZjAxMcHkyZMhCAIAIDg4GK6urjAzM0NAQACUSqXGfmFhYWjbti2MjIwwZMgQ5ObmAgCuXr2K1q1bw9jYGMOHD0dOTo56n1u3bpX6PZswYQIkEgnef/999di6desgkUhw7tw5jW1TU1NLfD+Akt8joOjnTSKRwNfXV2M8Ly8Ps2fPRpMmTaCvrw8nJydMnz4deXl5JY45ceLEEl+z1157Tf29Kr6mZ72KrystLQ2ffPIJWrVqBRMTE5iZmaFPnz64dOlSiXOUZcOGDejYsSOMjIxgYWGBrl274uDBg+r3nZ2dNb6OALBt2zZIJJISP1/Af/++yqo5KCgIEomk1BnpTZs2QSKRICQkBPHx8RgyZIh6KbGLiwumT5+OrKys557ryVdwcDAA4Pjx4xg4cCAaNmyo/h5NmzYNjx49KvfXiojqPs58ERE9w549e+Ds7IwuXbqU+n7Xrl3h7OyMffv2VfgcY8eOxbp16zBy5EhMnjwZsbGxWLZsGS5cuICTJ09CV1e3wscu9uGHHyI4OBiHDh2Ch4eHenzFihVo0aIFXn/9dejo6GDPnj346KOPoFKpMGHCBK3OcfDgQXzwwQeYOHEiZsyYoR4/fPgw+vTpAxcXF8yZMwePHj3C0qVL0blzZ5w/fx7Ozs5IS0vD5cuX8eGHH8LOzg5RUVFYuXIlDhw4gNDQUCgUCgBATEwM+vfvjyZNmmD+/Pk4cOCAOuxMmDABkyZNwoULF/DTTz/BxsYGM2fOBABkZmaid+/eMDQ0xDfffIMLFy5gyZIlAIrC67hx45CYmIglS5bA0NAQv/76a5nXGRUVhVWrVmn1tSmP9PR0LFiwoMS4SqXC66+/jhMnTmDMmDFo3rw5rly5gp9++gk3b97Ezp07tTqPjY0N/u///k/9+Y4dO/D3339rjLm6ugIo+nrv3LkTAwcOROPGjZGUlIRff/0V3bp1w/Xr1+Hg4PDMc3399deYM2cOXnrpJcydOxd6eno4c+YMjhw5gldeeaXUfQoLC/HFF1889zqerHfatGnqj319feHk5ISNGzfizTff1Nhn48aNcHV1hY+PD44dO4akpCRMmjQJFhYWuHbtGpYsWYLAwECcOHEChoaGGDBgAJo0aaJxnubNm2PMmDHqsebNmwMoCow5OTkYP348rKysEBoaiqVLl+Lu3bvYtm3bc6+HiOoJgYiISpWeni4AEN54441nbvf6668LAITMzExBEARh9uzZAgAhJSVFY7uzZ88KAIS1a9eqx44fPy4AEDZu3Kix7YEDB0qMN2rUSOjbt2+J80+YMEF4+j/nAITZs2cLgiAIM2fOFGQymbBz584S++bk5JQY69Wrl+Di4vLMaxYEQRgxYoTQqFEjQRAE4dy5c4KJiYkwcOBAQalUamzn6ekpKBQK4f79++qxS5cuCVKpVBg+fHiZx7969aqgr68vfPDBB+qxyZMnC6ampkJqaqogCIJQUFAgdOrUSQAgnDlzRr3dkCFDBIVCIeTm5gqCIAiLFi0SJBKJEB4ert7m7bffFgAIW7duVY/NnDlT0NfXFxITEwVBEITY2NgS37N33nlHaNmypeDk5CSMGDFCPb527VoBgHD27FmN60hJSdH4fhR7emz69OmCQqEQvLy8hG7duqnH/+///k+QSqXC8ePHNfZfuXKlAEA4efKkxjEnTJhQ4mvZt29f9ffqacU/r6XJzc0t8f2MjY0V9PX1hblz55a6T7HIyEhBKpUKb775ZoljqFQq9ceNGjXS+Dr+8ssvgr6+vuDn51dqzV988YUgkUg0xp4+RvH3MT09XT2WnJws6OjolPg+POnQoUMCgDKv7enzPKm0f0sLFiwQJBKJcPv27TLPSUT1C5cdEhGVoXj5kamp6TO3K34/MzNT63Ns27YNcrkcPXv2RGpqqvrl5eUFExMTBAUFaWxfUFCgsV1qaqp6+Vxpli1bhgULFmDJkiV44403SrxvaGio/jgjIwOpqano1q0bYmJikJGRUa5riImJQd++feHp6Yn/+7//g1T63/+1JCQk4OLFi3j//fdhaWmpHm/dujV69uyJ/fv3q8dUKpXGddna2uLVV1/FX3/9BZVKBQAIDAxE165dYWVlBQDQ0dGBl5cXAKBjx47qYw0YMADJycm4evWqej8PDw+4ubmpt/H29i51v7y8PJw4caLUaw0LC8O2bduwYMECjet8UfHx8Vi6dCm++uormJiYaLy3bds2NG/eHO7u7hpfn+7duwNAiZ+R3NzcEj8jBQUFFapLX19ffZ1KpRL379+HiYkJ3NzccP78+Wfuu3PnTqhUKsyaNavE16qs7pQ5OTmYO3cuJk6ciIYNG5a6TX5+PvT19Z957uHDhyMvLw/bt29Xj23duhWFhYUYNmyYeuzpf0+enp5o3759hWaqnvy3lJ2djdTUVLz00ksQBAEXLlzQ+nhEVDdx2SERURmKQ9WT94CUprwhrTSRkZHIyMhQL6t7WnJyssbnBw8eLHHvWVn++ecf9ZK8tLS0Urc5efIkZs+ejZCQEI17nYCiMCaXy595juzsbPTq1QtJSUmwsrIq8Uv17du3AUAj9BRr3rw5/v33X2RnZ8PY2BhxcXFo3LhxqedJTU2FQqHAnTt30Llz52fWBACOjo4AgDt37sDLywt37txRj5V3v9LMmDEDXbp0wWuvvVbqvVUVNXv2bDg4OGDs2LEagQEo+hm5ceNGmd/3p39GVq9ejdWrV5fYrlGjRlrXpVKp8PPPP+OXX35BbGysxn10xQG4LNHR0ZBKpRrLXJ9n0aJFyM3Nxeeff46AgIBSt0lPTy8RUJ/m7u6ODh06YOPGjRg1ahSAoiWHnTp10lhGePLkSfj5+ZXY/8kgVV5xcXGYNWsWdu/eXeJ+zfL+IYOI6j6GLyKiMsjlctjb2+Py5cvP3O7y5ctwdHSEmZmZ1udQqVRQKBTYuHFjqe8//Qu3t7c3vvnmG42xZcuWYdeuXSX2DQ0NxejRo2FsbIxvvvkGAwcO1AhB0dHR6NGjB9zd3bFo0SI4OTlBT08P+/fvx08//aSebXqW1NRUGBsbY8+ePejfvz8WLFiA2bNnl+fSS7Czs8OhQ4c0xtasWYP
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Оценка инерции\n",
|
|||
|
"from sklearn.cluster import KMeans\n",
|
|||
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"inertia = []\n",
|
|||
|
"for k in range(1, 11):\n",
|
|||
|
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
|
|||
|
" kmeans.fit(reduced_data)\n",
|
|||
|
" inertia.append(kmeans.inertia_)\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.plot(range(1, 11), inertia, marker='o')\n",
|
|||
|
"plt.title('Оценка инерции')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Инерция')\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"# Оценка коэффициента силуэта\n",
|
|||
|
"silhouette_scores = []\n",
|
|||
|
"for k in range(2, 11):\n",
|
|||
|
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
|
|||
|
" kmeans.fit(reduced_data)\n",
|
|||
|
" score = silhouette_score(reduced_data, kmeans.labels_)\n",
|
|||
|
" silhouette_scores.append(score)\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.plot(range(2, 11), silhouette_scores, marker='o')\n",
|
|||
|
"plt.title('Оценка коэффициента силуэта')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Коэффициент силуэта')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Оптимальным решением будет взять четыре кластера. Хотя коэффиуиент силуета максимален при двух кластерах, при четырех от все еще высок и при этом на четырех кластерах виден \"локоть\" на графике оценки инерции"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 40,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAlgAAAHLCAYAAAAUfOasAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB5WElEQVR4nO3dd1xTV/8H8E8IJMwEQYYoIiqK24qKOKuiqNhWa91tcVuLC3xcraVqh9Vq3aPWPmKrtI6ntXUUtbiq4kJxz4rFBaiMAMrM/f3hL7cJBEgwiODn/Xrlpbn33HO/dyR8c+6550oEQRBARERERCZjVt4BEBEREVU2TLCIiIiITIwJFhEREZGJMcEiIiIiMjEmWEREREQmxgSLiIiIyMSYYBERERGZGBMsIiIiIhNjgkVERERkYkywiIjohbt79y7Cw8PF97dv38amTZvKLyAiE2OCVclt27YNEolE76tx48blHR4RvaIkEgmCg4OxZ88e3L59G9OmTcNff/1V3mERmYx5eQdAL8ZHH32EBg0aiO+/+OKLcoyGiF511atXx+jRo9GjRw8AQLVq1XDw4MHyDYrIhCR82HPltm3bNvTv3x8HDhzA66+/Lk5//fXX8ejRI1y8eLH8giOiV97ff/+NR48eoXHjxrCxsSnvcIhMhpcIK7mcnBwAgJmZYYc6NTUVkydPhru7O+RyOerWrYv58+dDrVaLZW7fvg2JRIKFCxcWWr5x48Y6iZzG7Nmz9V6mLFj29ddfR+PGjRETE4O2bdvCysoKnp6eWLNmTaE6k5KSMHLkSLi4uMDS0hLNmjXDhg0bdMpoYtX32rhxIwAgPDwcEokEhw8fxtixY+Ho6AiFQoH3338fKSkpOvX99ttvCAwMhJubG+RyOerUqYPPPvsM+fn5hbZDIpGgT58+heIeO3ZsoUu02nFu375dp3xWVhaqVKlSaJ//888/+PDDD1G/fn1YWVnB0dER/fv3x+3btwutsyiabS/4qlWrlt7yRe1L7XVmZGRgypQpqF27NiwsLHTKPXr0qNh4Xn/99ULnxKlTp8TlC9q4cSN8fHxgZWUFBwcHDBo0CHfu3ClUpyHnVE5ODsLCwuDj4wOlUgkbGxt06NABBw4c0Cmnff4vXrwYHh4esLKyQqdOnXR+sCQlJcHJyQmvv/46tH/H3rx5EzY2Nhg4cKDR233w4EFIJJJCLT3Dhg0rdMzUajWWLFmCRo0awdLSEi4uLhg7dmyhcxoA/vjjD3Tq1Al2dnZQKBRo1aoVIiIiio3viy++gJmZmU45ANi6dat4TKpWrYp3330X9+7dKzLeOnXqwNfXF8nJybCysip0Pumjb3s3btwIMzMzfPXVV4XKG3Lerl+/Hl26dIGzszPkcjkaNmyI1atX611/cftL89kv7lUwblOdw0WdH4GBgZBIJJg9e7Y4bfXq1WjWrJl4rjdr1gzff/+9znLnz5/HsGHDULt2bVhaWsLV1RUjRozA48ePdcppvt8Lfr5Pnz4NiURSqK9dwWkAEBwcDIlEgmHDhonTNN9P2sdJrVajadOmeut42fASYSWnSbDkcnmJZZ88eYJOnTrh3r17GDt2LGrWrIljx45h5syZePDgAZYsWfLc8axevRq2trYAgJkzZ+otk5KSgl69emHAgAEYPHgwtmzZgnHjxkEmk2HEiBEAgKdPn+L111/HzZs3MX78eHh6emLr1q0YNmwYUlNTMWnSJJ06Bw8ejF69eulMa9eunc778ePHw97eHrNnz8a1a9ewevVq/PPPP+KXFvDsA29ra4vQ0FDY2tpi//79CAsLg0qlwtdff61Tn6WlJXbt2oWkpCQ4OzuLcW/evBmWlpZ6t93S0hLr16/XScx++eUXZGVlFSp76tQpHDt2DIMGDUKNGjVw+/ZtrF69Gq+//jouX74Ma2trvevQZ+7cufD09AQALFq0SO8fYY2+ffvi7bffBgD89ddfWLt2rc78qVOnYs2aNRg5ciTatWsHCwsL/PLLL/j1118Njkfb9OnT9U7/4osv8Mknn2DAgAEYNWoUHj58iOXLl6Njx444e/Ys7O3txbKGnFMqlQrr1q3D4MGDMXr0aKSnp+P7779HQEAATp48iebNm+us/4cffkB6ejqCg4ORlZWFpUuXokuXLrhw4QJcXFzg7OyM1atXo3///li+fDkmTpwItVqNYcOGwc7ODqtWrSrVdhtq7NixCA8Px/DhwzFx4kTExcVhxYoVOHv2LI4ePQoLCwsAz87pESNGoFGjRpg5cybs7e1x9uxZREZGYsiQIXrrXr9+PWbNmoVFixbplNGsr1WrVpg3bx4SExOxdOlSHD16tNAxKSgsLEzveW6IvXv3YsSIERg/fjxmzJiht0xJ5+3q1avRqFEjvPnmmzA3N8eOHTvw4YcfQq1WIzg4WGcbi9tfH3/8MUaNGgUAePToEUJCQjBmzBh06NChUEymPof1OXz4MHbv3l1oenp6Orp37446depAEARs2bIFo0aNgr29Pfr16wcA2LdvH27duoXhw4fD1dUVly5dwtq1a3Hp0iUcP35c74+e0rh58ya+++47g8r++OOPuHDhgknWW+YEqtSWLFkiABDOnTunM71Tp05Co0aNdKZ99tlngo2NjXD9+nWd6TNmzBCkUqkQHx8vCIIgxMXFCQCEr7/+utD6GjVqJHTq1KnQ9I8++kgAIDx69KjYsp06dRIACIsWLRKnZWdnC82bNxecnZ2FnJwcne3auHGjWC4nJ0fw8/MTbG1tBZVKVWKsGuvXrxcACD4+PmL9giAICxYsEAAIv/32mzjtyZMnhZYfO3asYG1tLWRlZelsR6NGjYSmTZsKCxcuFKf/+OOPQo0aNYQOHTro7H9NnIMHDxbMzc2FhIQEcV7Xrl2FIUOGFNoOfbFER0cLAIQffvihyO3VtnbtWgGAcPr0aXFaYGCg4OHhUahsbm6uAECYM2eOOE2z7+Li4sRp1apVEwICAnSW/fTTTwUAwsOHD4uNp1OnTjrnxO7duwUAQo8ePQTtr6vbt28LUqlU+OKLL3SWv3DhgmBubq4z3dBzKi8vT8jOztapLyUlRXBxcRFGjBghTtMcKysrK+Hu3bvi9BMnTggAhJCQEJ06Bg8eLFhbWwvXr18Xvv76awGAsH379lJt96FDhwQAwv79+3WWDwoK0jlmf/31lwBA2LRpk065yMhInempqamCnZ2d4OvrKzx9+lSnrFqt1hvfrl27BHNzc2HKlCk65XNycgRnZ2ehcePGOnXt3LlTACCEhYUVGe/FixcFMzMzoWfPnoXOJ320lz99+rRga2sr9O/fX8jPzy9U1tDzVt/nKSAgQKhdu7b43tD9paE5V9avX19oXlmcwwcOHBAACAcOHBDL+fr6ivv1008/LRSHRl5enqBQKITx48eL0/Ttk59++kkAIBw+fFicVtTn+9SpU4W2X98+GTBggNC4cWPB3d1dCAoKEqcXPE5ZWVlCzZo1xe3Rt19fJrxEWMlpmnKdnJxKLLt161Z06NABVapUwaNHj8SXv78/8vPzcfjwYZ3yT5480Sn36NGjQpfKNDS/TItqudFmbm6OsWPHiu9lMhnGjh2LpKQkxMTEAAB2794NV1dXDB48WCxnYWGBiRMnIiMjA4cOHSpxPQWNGTNG/FUPAOPGjYO5ubnOrz8rKyvx/+np6Xj06BE6dOiAJ0+e4OrVq4XqHD58ONavXy++X79+PYKCgoq8ZNuiRQs0atQIP/74I4BnlwEPHDig02yuL5bc3Fw8fvwYdevWhb29Pc6cOWPQNhtzXAxtDU1PT4ejo6NB6y+OIAiYOXMm+vXrB19fX515v/zyC9RqNQYMGKBz/rm6usLLy6vQZT1DzimpVAqZTAbg2WWI5ORk5OXloWXLlnr3Z58+fVC9enXxfevWreHr61uotWDFihVQKpV455138Mknn+C9997DW2+9Vart1rSE3r17t8jlgWefZaVSiW7duunsHx8fH9ja2or7Z9++fUhPT8eMGTMKnQP6WidOnjyJAQMGoF+/foVabE+fPo2kpCR8+OGHOnUFBgbC29sbu3b
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAj4AAAHHCAYAAAC/R1LgAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd5hURdaH3+rcPTkHcs5BcpIgWUSSCiaSq66KBMP64bqm1cWcBTNgwARiFgWVDJJzkJyZHDt33/r+aBimmZ4Z0hDrfZ55oKvq1q17p6f73Drn/I6QUkoUCoVCoVAorgB0F3oBCoVCoVAoFOcLZfgoFAqFQqG4YlCGj0KhUCgUiisGZfgoFAqFQqG4YlCGj0KhUCgUiisGZfgoFAqFQqG4YlCGj0KhUCgUiisGZfgoFAqFQqG4YlCGj0KhUCgUiisGZfgoFAqFQqG4YlCGj0Jxjpk2bRpCCFatWhXUnpeXR5s2bbBYLMyZM6fMY4UQLF68uES/lJIqVaoghOC6666rkPVfani9Xho2bIgQgpdeeulCL0ehUFzkKMNHoTgP5Ofn06tXLzZs2MDs2bPp06dPmeMtFgszZswo0b5gwQIOHjyI2WyuqKVecrz55pvs37//Qi9DoVBcIijDR6GoYAoKCujduzfr1q1j1qxZ9O3bt9xjrr32Wr7++mt8Pl9Q+4wZM2jZsiXJyckVtdxLivT0dJ5++mkeeeSRC70UhUJxiaAMH4WiAiksLKRPnz6sWbOGWbNm0a9fv1M67uabbyYrK4u5c+cWtXk8HmbOnMktt9wS8hhN03jttddo1KgRFouFpKQk7r77bnJycoLGfffdd/Tr14/U1FTMZjO1atXiv//9L36/P2hc165dady4MVu2bKFbt27YbDYqVarECy+8UOLcb775Jo0aNcJmsxETE0OrVq1C7lidjMvl4sknn6Ru3bpYLBZSUlIYPHgwu3btOpXbxP/93/9Rr149brvttlMaD7B3716EEEybNq2oraCggJYtW1KjRg2OHDkSNP7JJ58scj8W/xk5cmTRmH379nHvvfdSr149rFYrcXFx3Hjjjezdu7fE+XNzc5kwYQLVq1fHbDZTuXJlhg8fTmZmJvPnzw95ruI/Tz75ZNFchw4dYvTo0SQlJWE2m2nUqBEfffRR0PmOz/nll1/y6KOPkpycTFhYGNdffz0HDhwIGrto0SJuvPFGqlatitlspkqVKkyYMAGn0xk0buTIkQghaN68eYnrmzRpEkIIwsPDg9pPXjvAiy++iBCCrl27lphHoagoDBd6AQrF5Yrdbqdv376sXLmSmTNnnlZMTvXq1Wnfvj2ff/550Q7RL7/8Ql5eHsOGDeONN94occzdd9/NtGnTGDVqFGPHjmXPnj289dZbrF27liVLlmA0GoFAHFF4eDgPPPAA4eHh/PHHHzz++OPk5+fz4osvBs2Zk5NDnz59GDx4MDfddBMzZ87kkUceoUmTJkXrev/99xk7diw33HAD48aNw+VysWHDBv76669SjTQAv9/Pddddx++//86wYcMYN24cBQUFzJ07l02bNlGrVq0y79GKFSuYPn06ixcvRghxyvf2ZLxeL0OGDGH//v0sWbKElJSUkOM++eSTov9PmDAhqG/lypUsXbqUYcOGUblyZfbu3cuUKVPo2rUrW7ZswWazAQFD+Oqrr2br1q2MHj2aFi1akJmZyffff8/Bgwdp0KBB0Hnee+89tm7dyquvvlrU1rRpUwDS0tJo164dQgjGjBlDQkICv/zyC3fccQf5+fmMHz8+aI3PPvssQggeeeQR0tPTee211+jRowfr1q3DarUC8PXXX+NwOLjnnnuIi4tjxYoVvPnmmxw8eJCvv/46aD6DwcDmzZtZu3YtV111VVH7tGnTsFgs5d733NxcJk2aVO44heKcIxUKxTll6tSpEpDVqlWTRqNRfvvtt6d97MqVK+Vbb70lIyIipMPhkFJKeeONN8pu3bpJKaWsVq2a7NevX9FxixYtkoD87LPPguabM2dOifbj8xXn7rvvljabTbpcrqK2Ll26SEB+/PHHRW1ut1smJyfLIUOGFLUNGDBANmrU6JSv8TgfffSRBOQrr7xSok/TtDKP1TRNtmnTRt58881SSin37NkjAfniiy+We97jY6dOnSo1TZO33nqrtNls8q+//go5/t///rcUQgS1VatWTY4YMaLodah7umzZshL37/HHH5eA/Oabb0Je08mMGDFCVqtWLeS67rjjDpmSkiIzMzOD2ocNGyajoqKK1vTnn39KQFaqVEnm5+cXjfvqq68kIF9//fUyr2PSpElSCCH37dsXtK6wsDDZv39/OWbMmKL2RYsWSavVKgcOHCjDwsKC5gHkE088UfT6X//6l0xMTJQtW7aUXbp0CXmNCkVFoFxdCkUFkZaWhsVioUqVKmd0/E033YTT6eTHH3+koKCAH3/8sdQdlK+//pqoqCh69uxJZmZm0U/Lli0JDw/nzz//LBp7/OkeAi6ezMxMrr76ahwOB9u2bQuaNzw8PMiNZDKZaNOmDbt37y5qi46O5uDBg6xcufK0rm/WrFnEx8dz//33l+grbwdn2rRpbNy4keeff/60znkyDz/8MJ999hlfffUVbdq0CTnG4/GUG0xe/J56vV6ysrKoXbs20dHRrFmzpqhv1qxZNGvWjEGDBpWY43R2raSUzJo1i/79+yOlDPqd9+7dm7y8vKDzAgwfPpyIiIii1zfccAMpKSn8/PPPIa/DbreTmZlJhw4dkFKydu3aEusYPXo0M2bMwO12AzB16lQGDx5MVFRUmes/dOgQb775Jv/5z39KuMQUiopGGT4KRQXx7rvvYjKZ6NOnD9u3by9q9/v9HD16NOjH4/GUOD4hIYEePXowY8YMvvnmG/x+PzfccEPIc+3YsYO8vDwSExNJSEgI+iksLCQ9Pb1o7ObNmxk0aBBRUVFERkaSkJBQZNzk5eUFzVu5cuUSX8gxMTFBcUOPPPII4eHhtGnThjp16nDfffexZMmScu/Prl27qFevHgbD6Xnc8/PzmThxIg8//PAZG5UQ+P28/PLLACXioIqTm5tb7pez0+nk8ccfp0qVKpjNZuLj40lISCA3Nzfonu7atYvGjRuf8ZqPk5GRQW5uLu+9916J3/eoUaMAgn7nAHXq1Al6LYSgdu3aQXFI+/fvZ+TIkcTGxhIeHk5CQgJdunQBSr43APr164fBYOC7777Dbrfz1VdfFZ2/LJ544glSU1O5++67T/fSFYqzRsX4KBQVRMOGDfn555/p3r07PXv2ZMmSJVSpUoUDBw5Qo0aNoLF//vlnyADPW265hTvvvJOjR4/St29foqOjQ55L0zQSExP57LPPQvYnJCQAgS/xLl26EBkZydNPP02tWrWwWCysWbOGRx55BE3Tgo7T6/Uh55NSFv2/QYMGbN++nR9//JE5c+Ywa9YsJk+ezOOPP85TTz1V2u05Y1566SU8Hg9Dhw4t+tI+ePAgEDBg9u7dS2pqKiaTqcx5li9fzrPPPsvKlSuZMGECffr0IT4+vsS4o0ePlptFd//99zN16lTGjx9P+/btiYqKQgjBsGHDStzTc8HxOW+77TZGjBgRcszxWKBTxe/307NnT7Kzs3nkkUeoX78+YWFhHDp0iJEjR4a8DqPRyG233cbUqVNxOBzExcVxzTXXBMUpnczWrVuZNm0an376aVHcmUJxPlGGj0JRgbRp04Zvv/2Wfv360bNnTxYtWkRycnJQthZAs2bNQh4/aNAg7r77bpYvX86XX35Z6nlq1arFvHnz6NixY5C74mTmz59PVlYW33zzDZ07dy5q37Nnz2leWTBhYWEMHTqUoUOH4vF4GDx4MM8++ywTJ04sNdC1Vq1a/PXXX3i93tP6Aty/fz85OTk0atSoRN///vc//ve//7F27dqQGUfFGT16NI8++iiHDx+mYcOGTJgwIeQX9pYtW2jRokWZc82cOZMRI0YU7SBBIGMtNzc3aFytWrXYtGlTmXOdCgkJCUREROD3++nRo8cpHbNjx46g11JKdu7cWWQgbdy4kb///pvp06c
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.cluster import AgglomerativeClustering\n",
|
|||
|
"import scipy.cluster.hierarchy as sch\n",
|
|||
|
"optimal_k = 4\n",
|
|||
|
"# Иерархический\n",
|
|||
|
"dendrogram = sch.dendrogram(sch.linkage(reduced_data, method='ward'))\n",
|
|||
|
"plt.title('Дендрограмма для иерархической кластеризации')\n",
|
|||
|
"plt.xlabel('Объекты')\n",
|
|||
|
"plt.ylabel('Евклидово расстояние')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"hierarchical = AgglomerativeClustering(n_clusters=optimal_k)\n",
|
|||
|
"clusters_hierarchical = hierarchical.fit_predict(reduced_data)\n",
|
|||
|
"\n",
|
|||
|
"# Неиерархический\n",
|
|||
|
"kmeans = KMeans(n_clusters=optimal_k, random_state=42)\n",
|
|||
|
"clusters_kmeans = kmeans.fit_predict(reduced_data)\n",
|
|||
|
"\n",
|
|||
|
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=clusters_kmeans, palette='viridis', s=50)\n",
|
|||
|
"plt.title(f\"K-Means с {optimal_k} кластерами\")\n",
|
|||
|
"plt.xlabel(\"Главный компонент 1\")\n",
|
|||
|
"plt.ylabel(\"Главный компонент 2\")\n",
|
|||
|
"plt.legend(title=\"Кластеры\")\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 41,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Качество кластеризации для K-Means:\n",
|
|||
|
"Коэффициент силуэта: 0.6121\n",
|
|||
|
"Коэффициент Дэвиса-Болдина: 0.4656\n",
|
|||
|
"\n",
|
|||
|
"Качество кластеризации для агломеративной кластеризации:\n",
|
|||
|
"Коэффициент силуэта: 0.6077\n",
|
|||
|
"Коэффициент Дэвиса-Болдина: 0.4677\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.metrics import silhouette_score, davies_bouldin_score\n",
|
|||
|
"\n",
|
|||
|
"# Оценка качества для K-Means\n",
|
|||
|
"silhouette_kmeans = silhouette_score(reduced_data, clusters_kmeans)\n",
|
|||
|
"davies_bouldin_kmeans = davies_bouldin_score(reduced_data, clusters_kmeans)\n",
|
|||
|
"\n",
|
|||
|
"print(\"Качество кластеризации для K-Means:\")\n",
|
|||
|
"print(f\"Коэффициент силуэта: {silhouette_kmeans:.4f}\")\n",
|
|||
|
"print(f\"Коэффициент Дэвиса-Болдина: {davies_bouldin_kmeans:.4f}\\n\")\n",
|
|||
|
"\n",
|
|||
|
"# Оценка качества для агломеративной кластеризации\n",
|
|||
|
"silhouette_hierarchical = silhouette_score(reduced_data, clusters_hierarchical)\n",
|
|||
|
"davies_bouldin_hierarchical = davies_bouldin_score(reduced_data, clusters_hierarchical)\n",
|
|||
|
"\n",
|
|||
|
"print(\"Качество кластеризации для агломеративной кластеризации:\")\n",
|
|||
|
"print(f\"Коэффициент силуэта: {silhouette_hierarchical:.4f}\")\n",
|
|||
|
"print(f\"Коэффициент Дэвиса-Болдина: {davies_bouldin_hierarchical:.4f}\")"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "aimenv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.5"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|