565 lines
1.3 MiB
Plaintext
Raw Normal View History

2024-11-15 17:21:29 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Начало лабораторной\n",
"\n",
"Цены на кофе - https://www.kaggle.com/datasets/mayankanand2701/starbucks-stock-price-dataset"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Атрибуты\n",
"\n",
"Date — Дата\n",
"\n",
"Open — Открытие\n",
"\n",
"High — Макс. цена\n",
"\n",
"Low — Мин. цена\n",
"\n",
"Close — Закрытие\n",
"\n",
"Adj Close — Скорректированная цена закрытия\n",
"\n",
"Volume — Объем торгов"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Бизнес-цель: Улучшение финансового планирования\n",
"\n",
"Использование подходов кластеризации для предсказания объемов продаж и доходов по различным сегментам"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Выгружаем данные**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Date Open High Low Close Adj Close Volume\n",
"0 1992-06-26 0.328125 0.347656 0.320313 0.335938 0.260703 224358400\n",
"1 1992-06-29 0.339844 0.367188 0.332031 0.359375 0.278891 58732800\n",
"2 1992-06-30 0.367188 0.371094 0.343750 0.347656 0.269797 34777600\n",
"3 1992-07-01 0.351563 0.359375 0.339844 0.355469 0.275860 18316800\n",
"4 1992-07-02 0.359375 0.359375 0.347656 0.355469 0.275860 13996800\n"
]
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from scipy.cluster.hierarchy import dendrogram, linkage, fcluster\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.decomposition import PCA\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.metrics import silhouette_score\n",
"\n",
"df = pd.read_csv(\"./static/csv/Starbucks Dataset.csv\")\n",
"print(df.head()) "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Очистка данных**"
]
},
{
"cell_type": "code",
"execution_count": 102,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Open High Low Close Adj Close Volume\n",
"0 0.328125 0.347656 0.320313 0.335938 0.260703 224358400\n",
"1 0.339844 0.367188 0.332031 0.359375 0.278891 58732800\n",
"2 0.367188 0.371094 0.343750 0.347656 0.269797 34777600\n",
"3 0.351563 0.359375 0.339844 0.355469 0.275860 18316800\n",
"4 0.359375 0.359375 0.347656 0.355469 0.275860 13996800\n"
]
}
],
"source": [
"\n",
"df_cleaned = df.drop(columns=['Date'], errors='ignore').dropna()\n",
"print(df_cleaned.head()) # Вывод очищенного DataFrame\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**Визуализация парных взаимосвязей**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjAAAASgCAYAAABWngGUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5xcd33v/9cp02d22s72Xe2qd1tWc7ckN8AN4xQSIBAuSbhgHAIBkh8kQAKX5GJDCCUJLclNaAkQejEY4YYtS7Zsy7Jk9dX2vjM7vZzz+2OlxbLkgsHe9er9fDz0QHvOmZ3vd75eHvvR+1sM13VdRERERERERERERERE5hBzthsgIiIiIiIiIiIiIiLyVAowRERERERERERERERkzlGAISIiIiIiIiIiIiIic44CDBERERERERERERERmXMUYIiIiIiIiIiIiIiIyJyjAENEREREREREREREROYcBRgiIiIiIiIiIiIiIjLnKMAQEREREREREREREZE5RwGGiIiIiIiIiIiIiIjMOQowRETOAg8++CBve9vbuOiii1izZg2XX34573vf+zh8+PBsN+1F0dvby7Jly/jmN795xvvf/OY3WbZsGb29vWf8+rl4Pq8REREREXkped3rXsfrXve62W7GCy6TyfCpT32K6667jnXr1nHBBRfw+te/np/97GenPPcXf/EXbNu2bZZaKSJydlCAISIyz332s5/lNa95DYVCgf/v//v/+MIXvsCb3/xmHn/8cW688Ua+//3vz3YT55wtW7bwta99jYaGhtluioiIiIiIvIgOHz7MK1/5Sr7+9a9z44038ulPf5q//du/JRaL8b//9//mM5/5zGw3UUTkrGLPdgNEROSFs337dm677Tbe9ra3cfPNN89c37RpE6985St55zvfyV/8xV+wdOlSlixZMostnVsSiQSJRGK2myEiIiIiIi+iSqXC29/+djweD1/+8pdJJpMz96644gr+6q/+ik984hNs27aN5cuXz2JLRUTOHlqBISIyj33qU59i4cKFvPWtbz3tnsfj4W/+5m+wLIvPfe5zM9eXLVvGf/7nf/Ke97yHdevWceGFF/LhD3+YUql0yut/+tOf8qpXvYo1a9Zw0UUX8aEPfYh8Pj9z/5Of/CRXXnklP//5z7nuuutYvXo1V199Nd/61reetr3f/e53WbZsGQcOHDjtvZYtW8bjjz8OwL//+7/zspe9jDVr1nDJJZfwgQ98gGw2+3w+ojM603ZQ//M//8MrXvEK1qxZw/XXX899993HypUrT9uW6pFHHuHVr341a9asYcuWLXz+85//jbVLREREROSl4N577+X3f//3Wb9+PZs3b+ad73wnAwMDANxxxx2n/G4P8K1vfYtly5bx3//93zPX9u3bx7Jly9i9e/dp3/+f//mfWb16Nel0+pTr//Zv/8aqVasYGxvDcRw+/vGPs23bNlavXs22bdu47bbbqFQqT9vuO++8kwMHDvCnf/qnp4QXJ91yyy289rWvpVqtnvH1tVqNL33pS1x33XWsXbuWLVu2cOutt55SS42Pj/POd75zZnvfG2644bQaqb+/n3e84x1s2rSJc845h9e//vWnfF4iImcTBRgiIvPU+Pg4jz32GFu3bsUwjDM+E4vFuPDCC7njjjtOuf6JT3yCsbEx/uEf/oE3velNfO1rX+M973nPzP3vfve7vPWtb2XhwoV8+tOf5uabb+Y73/kOb3nLW3Bdd+a5kZER/uZv/oY/+IM/4LOf/SxtbW285z3vedqzN6644gqCweBp21p973vfY8mSJaxcuZLvfe97fPSjH+U1r3kNX/jCF3jrW9/Kt7/9bf72b//2WT8Tx3GoVqun/XEc5xlf961vfYu/+Iu/4LzzzuMzn/kMV199NW95y1uo1WqnPfuBD3yAa665hs9+9rOsW7eOj370o2zfvv1Z2yYiIiIiMh9861vf4o1vfCPNzc187GMf4y//8i/ZvXs3v/u7v8vY2BgXXHABXq+XX/ziFzOvuf/++wHYtWvXzLW77rqLRCLBOeecc9p7XHfddVSrVW6//fZTrn//+9/n4osvJplM8rnPfY6vfOUrvPWtb+WLX/wiv/d7v8cXvvAF/umf/ulp237XXXdhWRaXXXbZGe+nUin+6q/+itWrV5/x/l//9V/zkY98hCuuuIJ/+qd/4jWveQ3/+Z//eUqd9K53vYvDhw/zwQ9+kM997nOsXLmS97znPTOfwfj4OK9+9avZu3cvf/VXf8Vtt92G4zi85jWvOWvOMBQReTJtISUiMk/19fUB0Nra+ozPLViwgDvuuIN0Ok00GgWmt1D653/+Z2zb5rLLLsM0TT7ykY/wtre9jYULF3LrrbdyySWXcOutt858n87OTt7whjdw5513smXLFgAKhQIf/vCHueCCC2ae2bp1K3feeSeLFi06rS2BQICrr76aH/zgB/zZn/0ZALlcju3bt8+sInnggQdoa2vjNa95DaZpsmnTJoLB4Gmzr87kve99L+9973uf9bmn+sQnPsHWrVv50Ic+BMAll1yCx+PhtttuO+3Zd7zjHfze7/0eAOeeey4/+clPuP/++9m6deuv/L4iIiIiIi8ljuNw6623cvHFF5/yu/J5553HK17xCr7whS/w7ne/m02bNnHffffxpje9CYD77ruPVatWsXPnzpnX3H333TO1yFO1trayceNGvve97/Hbv/3bABw/fpxHH32Uj3/848B03bB69WpuuukmYHob3UAgQCQSedr2Dw4OEo/HCYVCv3LfDx06xNe//nXe+c538sd//McAXHTRRTQ0NPDud7+bu+66i8suu4wHHniAt771rVxxxRUz7YrFYni9XmB6tfnk5CRf+cpXZmq5Sy+9lFe84hV84hOf4B//8R9/5baJiLyUaQWGiMg8dXKGj8fjecbnLMs65XmYntFk27/MuK+++moAdu7cyZEjRxgcHGTbtm2nrGLYuHEj4XCYe++995Tvf+655878vampCeCUraae6oYbbpgpPmB6iXm5XOb6668H4Pzzz+fo0aO86lWv4lOf+hR79uzhuuuu43Wve90z9hPg5ptv5utf//ppf558PshTdXd309/fz8te9rJTrl9zzTVnfH7Dhg0zfw8EAtTX15PJZJ61bSIiIiIiL3VHjx5lZGSEa6+99pTrHR0drFu3jgceeACALVu28OCDD1Iulzl69CiDg4O8+c1vpq+vj76+PrLZLLt3756ZGHUm119/PTt37mRkZASYXn0RDofZtm0bAJs3b57Zyurzn/88hw4d4rWvfS033HDD035Py7LOuMr6uTjZt6fWCddccw2WZbFjx46Zdn3yk5/klltu4b//+78ZHR3lPe95D+eddx4wHeasWLGCxsbGmVrLNE0uvfTSU1atiIicLRRgiIjMUydn65xcifF0enp6CIVCxGKxmWuNjY2nPHNy/9d0Os3k5CQAH/zgB1m1atUpf7LZLMPDw6e8NhAIzPz95OypJ4clT7V582YaGxtntpH6/ve/z6ZNm2bCj1e84hXcdtttBINBPvOZz/Bbv/VbXH755fzgBz94xn7C9GeyZs2a0/480yqV8fHxUz6Dk+rr68/4/JP7C9N9fqb+ioiIiIjMFydrhTP9rlxfX8/U1BQwHWAUCgUeeugh7rvvPrq6uti6dSvBYJCdO3fyi1/8AsMwuPjii5/2vV72spdh2zY//OEPgem64eqrr8bv9wPwpje9ib/+67+mWCxy6623cs0113DttdfObNV0Jq2traTTaXK53NM+Mzg4eMbrJ1eEp1KpU67btk08Hp/p+8c//nHe8IY38Nhjj/G+972Pyy67jP/1v/7XTN02OTnJww8/fFqt9aUvfYmpqSkKhcLTtk1EZD7SFlIiIvNUMpnk3HPP5cc//jF/+qd/esal19lslnvvvXdmltJJExMTp3w9OjoKTG8tVVdXBzCz9PupTm5D9XyZpsl1113H9773Pd785jdz77338jd/8zenPHPttddy7bXXMjU1xT333MPnPvc53vWud7F+/frTwpdf18ngZGxs7JTrT/1aRER
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"sns.set(style=\"whitegrid\")\n",
"\n",
"plt.figure(figsize=(16, 12))\n",
"plt.subplot(2, 2, 1)\n",
"sns.scatterplot(x=df_cleaned['Open'], y=df_cleaned['High'], alpha=0.6)\n",
"plt.title('Open vs High')\n",
"\n",
"plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x=df_cleaned['Low'], y=df_cleaned['Close'], alpha=0.6)\n",
"plt.title('Low vs Close')\n",
"\n",
"plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x=df_cleaned['High'], y=df_cleaned['Adj Close'], alpha=0.6)\n",
"plt.title('High vs Adj Close')\n",
"\n",
"plt.subplot(2, 2, 4)\n",
"sns.scatterplot(x=df_cleaned['Volume'], y=df_cleaned['Adj Close'], alpha=0.6)\n",
"plt.title('Volume vs Adj Close')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"**Стандартизация данных для кластеризации**\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 112,
"metadata": {},
"outputs": [],
"source": [
"scaler = StandardScaler()\n",
"data_scaled = scaler.fit_transform(df_cleaned)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"**Агломеративная (иерархическая) кластеризация**\n"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1kAAAJ1CAYAAAArGDrKAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABuhklEQVR4nO3deXgTdeLH8U/apAe0hXK05T6l3DcIuwJaFFlFXcBFBfFkQbkUcBUUFTyQVQ4R5PLiEAQUFvG3KgoouB4IKIIXqAjKUQqUQqFHkia/P2rGpk3bNAxNC+/X8/R50pnJ5JvJZDKf+R5jcbvdbgEAAAAATBES7AIAAAAAwIWEkAUAAAAAJiJkAQAAAICJCFkAAAAAYCJCFgAAAACYiJAFAAAAACYiZAEAAACAiQhZAAAAAGAiQhYAAAAAmIiQBcA048ePV2Jios+/8ePHB7t4API4deqUOnTooF27dunUqVO699579eqrrwa7WABwQbAGuwAALizVq1fXnDlzvKaNHDkySKUBUJhKlSrpzjvv1IABA+R2u5WYmKh///vfwS4WAFwQCFkATJOTk6MKFSqobdu2XtPDwsKCUyAARRo5cqRuvvlmnT59WvXq1VNoaGiwiwQAFwSaCwIwjdPpVEREhF/Lbt++XbfeeqvatGmjzp0766GHHlJqaqoxf82aNUpMTNTBgwe9npeUlOTV9NDhcBTaRDH/ur755hv17dtXrVu31nXXXaf333/fa93p6el65plndOWVV6pVq1bq06eP3nrrrQKvn/91Dh48qMGDB2v8+PGaP3++/vKXv6hDhw4aPny4Dh065PX8DRs2aODAgWrXrp1atmyp3r17a9myZcb8rVu3GuvdsWOH13Nff/11JSYmKikpqUB5Jk6c6LXsqVOn1LJlSyUmJmrr1q1+v35h3nzzTfXr109t27ZV69atdcMNN+i9994rsI19NREt7PMZPHiw12u8++676tevn9q1a6e//vWveuyxx3Tq1Clj/uzZs5WYmKh27drJbrd7PXf06NEFmqVmZ2fr2WefVY8ePdSyZUtdd911evfdd72el5SUpJkzZ2rKlCnq1KmTLr30Uj344INKS0vz+/0X1Ux2zZo1xmea93M4ceKEOnbs6POzTExMVNOmTdWpUyeNGjVKJ0+eNJZJTEzU7Nmzvcrm2S6BbEtJqlatmho2bKjPPvus2Ka9+V/rv//9rzp16qTp06dL8t5/8//lLfePP/6okSNHqkuXLmrRooW6deump556SllZWcYydrtdzz//vHr27KnWrVurT58++s9//uPXNpekw4cPa+zYsercubPatGmj22+/Xd9//72x/oMHDyoxMVH//e9/dc8996hNmza6/PLL9eKLL8rlcnl9Lvm3ydixY70+U7fbrVmzZqlbt27q0KGD7rnnHh05csRYPicnRwsXLlSfPn3UunVrtW3bVjfffLO++OKLIj9HqeBnnv9/t9utm2++2et4OX78eK99S5JWrFjhc/8BcH5QkwXANJmZmapUqVKxy23btk133nmnunTpoueff16nTp3SrFmzdNttt+mtt97yO6hJuSfSkjRv3jxVqVJFUu4Jcf5wJEnDhg3TrbfeqjFjxuitt97S/fffrwULFqhHjx7KysrSwIEDdeLECY0ePVq1atXShg0b9Mgjj+j48eO65557jPX06NFDw4cPN/6Pi4uTJG3cuFGxsbGaOHGiXC6Xpk+frsGDB+u///2vIiMj9fHHH2vEiBG67bbbNGrUKGVlZWn58uV64okn1LJlS7Vp08ZYZ8WKFbVp0yZ16NDBmPbuu+8qJKTgtbGKFSvq448/ltvtlsVikSR98MEHysnJ8VquJK+f17Jly/TUU09p1KhR6tChg06dOqWXXnpJDzzwgNq1a6eEhARj2Tlz5qh69eqSZHweknTjjTfqH//4h/H/5MmTvV5j7ty5euGFFzRw4ECNGTNGv//+u2bNmqWdO3dq1apVXvuExWLR559/rh49ekiSzp49q82bN3ttG7fbrREjRuirr77S6NGj1ahRI3344YcaM2aM7Ha7/v73vxvLLl++XPXq1dMzzzyj1NRUTZ8+XQcOHNCKFStksViKff/Dhw/XzTffLCm3Zqh58+bG/lG3bl399NNPBbbp9OnTlZ6erpiYGK/pnn3L4XDol19+0bPPPqunn35a06ZN8/nZ+FKSbenhcDg0ZcoUv19DkrKysvTEE09oyJAhuu6667zmPfbYY2rRooXx/0033WQ8TklJ0aBBg9S2bVtNnTpVYWFh2rJli1577TXFxcVp6NChkqQHHnhAmzdv1r333qs2bdpo8+bNGj9+vGw2W7HbPDU1VTfffLMiIyP16KOPKjIyUosXL9agQYP01ltvqVGjRkZ5Jk2apB49emj27NnasWOH5syZo4yMDP3rX//y+b63b9+u//73v17TFi1apAULFujBBx9UgwYNNHXqVN13331atWqVJGnatGl64403NG7cOCUmJuro0aN68cUXdd999+njjz9WZGRkibZ9Xm+//ba+/vrrIpc5deqUnn/++YBfA0DJEbIAmCYtLc0IHEWZPn26GjRooAULFhjNk9q0aaNrr71Wq1ev1qBBg/x+zYyMDElSu3btFBsbK0n65JNPfC47ePBgjRgxQpLUrVs39e3bVy+++KJ69OihNWvWaO/evVqxYoXatWtnLON0OjV37lzdfPPNqly5sqTc8JC/SaSUGzLXrFmjOnXqSJIaNmyovn37au3atbrlllv0888/q2/fvnrkkUeM57Rr106XXnqptm7d6hVyunfvro0bNxonesnJyfr666/VsWPHArVjXbt21ebNm/XNN98Y5XrvvffUqVMnr9qTkrx+Xr///rvuvvtur2BZq1Yt9evXTzt27NC1115rTG/WrJlq165dYB0JCQle2ywqKsp4fOrUKc2bN08DBgzQY489Zkxv0qSJBg0aVGCf8GwbT8jatGmTqlev7lX78Nlnn+mTTz7RzJkzdc0110jK/TwzMzM1bdo09enTR1Zr7k9gSEiIXnvtNUVHR0vK/XxHjBihTz75RN27d/fr/detW1dSbtPYwvYPj927d+vtt99Ws2bNdPr0aa95eZ/bqVMnffbZZ/ruu+8KXVd+Jd2WHkuXLlVGRoaqVavm92v93//9n2w2m4YMGVKgmWHjxo0L3QZ79+5Vs2bNNGvWLGM/+Mtf/qJPP/1UW7du1dChQ7V3716tX79eDz/8sG6//XZJufv5oUOHtHXrVvXp06fIbT5z5kylpaXpjTfeUK1atSTl7jfXXHONZs2apRdeeMFYtkWLFkaI7d69uzIyMrR48WLde++9XvupJLlcLj311FNq0aKF1+eSkZGh4cOH64477pCUW0v2xBNP6PTp04qJiVFKSorGjBnjVXsbHh6uUaNGac+ePUXuL0U5e/aspk2bVqA8+b3wwguqWbOmV60ogPOL5oIATJOSkqL4+Pgil8nMzNQ333yjHj16yO12y+l0yul0qk6dOmrUqJE+/fRTr+VdLpexjNPpLLC+5ORkhYSEFDgZ8qVv377GY4vFoquuukq7du1SVlaWvvzyS9WqVcsIWB7XX3+9srOz9c033xS7/vbt2xsBS5KaN2+uOnXqaNu2bZKkIUOGaOrUqTp79qy+/fZbvfvuu1qwYIEkFWj+lpSUpP3792vfvn2SpPfff19t2rQxThjzio6OVufOnbVx40ZJUmpqqrZu3eoVfkr6+nmNHz9eDzzwgE6fPq2dO3fq7bffNpoYFvU8f+3cuVN2u119+vTxmt6xY0fVqlVLX375pdf0nj17atOmTXK73ZJya/g8Qcrj888/l8ViUY8ePbz2n6SkJB07dsyrdikpKckIWJ7/rVar8bmZ+f7dbreeeuop3XjjjWratKnP+U6nU3a7Xbt27dKOHTvUsmVLr2XyfyfyhsuSbktJOn78uF588UU99NBDCg8P9+t9HD16VC+99JIGDhxY4n5cl112mV5//XWFh4fr559/1saNGzVv3jylpqYa29PTVLZXr15ez509e7aefPLJYl/j888/V7NmzRQ
"text/plain": [
"<Figure size 1000x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[11 9 6 ... 14 14 15]\n"
]
}
],
"source": [
"linkage_matrix = linkage(data_scaled, method='ward')\n",
"plt.figure(figsize=(10, 7))\n",
"dendrogram(linkage_matrix)\n",
"plt.title('Дендрограмма агломеративной кластеризации')\n",
"plt.xlabel('Индекс образца')\n",
"plt.ylabel('Расстояние')\n",
"plt.show()\n",
"\n",
"# Получение результатов кластеризации с заданным порогом\n",
"result = fcluster(linkage_matrix, t=10, criterion='distance')\n",
"print(result) # Вывод результатов кластеризации"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"**Визуализация распределения кластеров**\n"
]
},
{
"cell_type": "code",
"execution_count": 107,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjAAAASgCAYAAABWngGUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5ydVZ348c9Tbu93Zu70mpn0DoTQQ0BBAVF0d93Fti7ruoIV64oUFcsKogKuYt2fYltRlGJFREQINYH0Nr232+tTfn8MGR2TUDNzJ8n3/XrlReY5995zznMyw/nO9znnKLZt2wghhBBCCCGEEEIIIYQQQswjarkbIIQQQgghhBBCCCGEEEII8fckgSGEEEIIIYQQQgghhBBCiHlHEhhCCCGEEEIIIYQQQgghhJh3JIEhhBBCCCGEEEIIIYQQQoh5RxIYQgghhBBCCCGEEEIIIYSYdySBIYQQQgghhBBCCCGEEEKIeUcSGEIIIYQQQgghhBBCCCGEmHckgSGEEEIIIYQQQgghhBBCiHlHEhhCCCHEPGDbdrmbIIQQQgghhDjKSVwhhDjWSAJDCCFm2RNPPMG73/1uTjvtNFasWME555zDVVddxb59+8rdtDnR19fHokWL+NnPfnbI8p/97GcsWrSIvr6+Q379QryU9/ytZDLJLbfcwkUXXcSaNWs45ZRTeOtb38of/vCHGa/76Ec/ysaNG19SHc9V94c//GEef/zxI/q5QgghhBDi2PLmN7+ZN7/5zeVuxqwr59z85ZK4QgghjjxJYAghxCy67bbbuPTSS8nlcvzXf/0X3/rWt3jnO9/J9u3bed3rXsc999xT7ibOOxs2bODHP/4xsVhsTurbt28fr33ta/npT3/K6173Om699VY+9alPEQ6H+c///E+++tWvzmr9O3bs4Be/+AWWZc1qPUIIIYQQQsx35Z6bvxzlbrvEFUKIY5Ve7gYIIcSx6v777+fGG2/k3e9+N1dcccX09XXr1vHa176WK6+8ko9+9KMsXLiQjo6OMrZ0folGo0Sj0Tmpq1Qq8b73vQ+Hw8EPfvADKioqpsvOPfdcPvGJT/DlL3+ZjRs3snjx4jlpkxBCCCGEEMejo3lufjS3XQgh5jtZgSGEELPklltuoa2tjcsvv/ygMofDwSc/+Uk0TeMb3/jG9PVFixbx/e9/n4985COsWbOGU089leuvv55CoTDj/b///e+55JJLWLFiBaeddhqf/vSnyWaz0+U333wzr3jFK/jjH//IRRddxPLlyznvvPO48847D9veu+66i0WLFrF79+6D6lq0aBHbt28H4H//9385//zzWbFiBWeccQbXXnst6XT6pdyiQzrUdlA///nPefWrX82KFSt4zWtew8MPP8zSpUsP2pZqy5YtvPGNb2TFihVs2LCBb37zm89Z1wMPPMDu3bt573vfOyPIOOA973kPb3rTmzAM45DvX7RoETfffPOMazfffDOLFi2a/npiYoIrr7xyeguxiy++eHocNm3axFve8hYA3vKWt8zYEuCFjvEtt9zCunXrOP3000kkEmzdupW3vvWtnHDCCaxZs4a3ve1tbN68+TnvgxBCCCGEOHY89NBD/Mu//AsnnHACJ598MldeeSWDg4MA3HfffTPm9gB33nknixYt4v/+7/+mr+3YsYNFixbx1FNPHfT5X/va11i+fDmJRGLG9e9+97ssW7aM8fFxLMvipptuYuPGjSxfvpyNGzdy4403UiqVDtvulzs3N02T22+/nYsuuoiVK1eyYcMGbrjhhhmx1HPNzQ8YGBjgAx/4AOvWrWPVqlW89a1vnXG/ZqPtElcIIcThSQJDCCFmwcTEBFu3buXss89GUZRDviYcDnPqqady3333zbj+5S9/mfHxcb70pS9x2WWX8eMf/5iPfOQj0+V33XUXl19+OW1tbdx6661cccUV/PKXv+Rd73rXjAPbRkdH+eQnP8lb3vIWbrvtNhoaGvjIRz5y2LM3zj33XLxe70HbWt199910dHSwdOlS7r77br7whS9w6aWX8q1vfYvLL7+cX/ziF3zqU5963ntiWRaGYRz05/mWON9555189KMfZe3atXz1q1/lvPPO413vehemaR702muvvZYLLriA2267jTVr1vCFL3yB+++//7Cf/ac//QlN0zjrrLMOWV5VVcUnPvEJli9f/rz9O5wPfehD7Nu3j+uuu45vfOMbLF26lI985CM88sgjLFu2jKuvvhqAq6++mmuuuQZ44WM8MDDAAw88wE033cTHPvYxNE3jsssuIxKJcPPNN3PTTTeRy+X4t3/7N1Kp1EvugxBCCCGEODrceeedvP3tb6e2tpYvfvGLfOxjH+Opp57in/7pnxgfH+eUU07B6XTyl7/8Zfo9jzzyCMCMsxP+9Kc/EY1GWbVq1UF1XHTRRRiGwW9/+9sZ1++55x5OP/10Kioq+MY3vsEPf/hDLr/8cr797W/zz//8z3zrW9/if/7nfw7b9pc7N7/66qv57Gc/y7nnnsv//M//cOmll/L9739/xhz6uebmMBXHvfGNb2Tbtm184hOf4MYbb8SyLC699NLnPMNQ4gohhJg9soWUEELMgv7+fgDq6+uf83XNzc3cd999JBIJQqEQMLWF0te+9jV0Xeess85CVVU++9nP8u53v5u2tjZuuOEGzjjjDG644Ybpz2lpaeFtb3sbDzzwABs2bAAgl8tx/fXXc8opp0y/5uyzz+aBBx5gwYIFB7XF4/Fw3nnnce+99/L+978fgEwmw/333z+9iuTRRx+loaGBSy+9FFVVWbduHV6v96Cnrw7l4x//OB//+Mef93V/78tf/jJnn302n/70pwE444wzcDgc3HjjjQe99gMf+AD//M//DMDq1av53e9+xyOPPMLZZ599yM8eGhoiEong8/ledLteqEcffZTLL7+cc889F5jaQiwcDuN0OvH7/bS3twPQ3t5Oe3s7tm2/4DE2DIOPfOQjnHjiiQBs3ryZyclJ3vKWt7B27VoA2tra+PGPf0wmkyEQCMxaP4UQQgghRHlZlsUNN9zA6aefPmOuvHbtWl796lfzrW99iw9/+MOsW7eOhx9+mMsuuwyAhx9+mGXLlvHYY49Nv+fBBx+cjkX+Xn19PSeddBJ33303//AP/wBAT08PTz/9NDfddBMwNQdevnw5r3/964GpObDH43nO+ejLmZvv3buXn/70p1x55ZW84x3vAOC0004jFovx4Q9/mD/96U+cddZZzzk3h6nV5vF4nB/+8IfTsdyZZ57Jq1/9ar785S/zla985Yi3/YWSuEIIcbySFRhCCDELDjzN4nA4nvN1mqbNeD1MPdGk63/NL5933nkAPPbYY+zfv5+hoSE2btw4YxXDSSedhN/v56GHHprx+atXr57+e01NDcCM5cJ/7+KLL54OPmBqiXmxWOQ1r3kNAOvXr6ezs5NLLrmEW265hWeeeYaLLrpoxhLlw7niiiv46U9/etCfvz0f5O91d3czMDDA+eefP+P6BRdccMjXH5hww1RCprKykmQyedjP1zTtkCs5jqSTTz6Zm2++mfe85z383//9H2NjY3zkIx+ZDgT+3osd4yVLlkz/vaOjg2g0yjvf+U6uvvpqfve731FZWcmHPvSh6fEXQgghhBDHps7OTkZHR7nwwgtnXG9qamLNmjU8+uijAGzYsIEnnniCYrFIZ2cnQ0NDvPOd76S/v5/+/n7S6TRPPfXU9C+3D+U1r3kNjz32GKOjo8DU6gu/38/GjRuBqTnwga2svvnNb7J3717e9KY3cfHFFx/2M1/O3PxA3/4+TrjgggvQNI1NmzZNt+u55uYPP/wwS5Ysobq6enoerqoqZ5555oxVK0ey7S+UxBVCiOOVJDCEEGIWHHha58BKjMPp7e3F5/MRDoenr1VXV894zYE9VBOJBPF4HIDrrruOZcuWzfiTTqcZGRmZ8V6PxzP99wNPT/1tsuTvnXzyyVRXV09vI3XPPfewbt266Unqq1/9am688Ua8Xi9f/epXecMb3sA555zDvffe+5z9hKl7smLFioP+PNcqlYmJiRn34IDKysp
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"plt.figure(figsize=(16, 12))\n",
"plt.subplot(2, 2, 1)\n",
"sns.scatterplot(x=df_cleaned['Open'], y=df_cleaned['High'], hue=result, palette='Set1', alpha=0.6)\n",
"plt.title('Open vs High Clusters')\n",
"\n",
"plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x=df_cleaned['Low'], y=df_cleaned['Close'], hue=result, palette='Set1', alpha=0.6)\n",
"plt.title('Low vs Close Clusters')\n",
"\n",
"plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x=df_cleaned['High'], y=df_cleaned['Adj Close'], hue=result, palette='Set1', alpha=0.6)\n",
"plt.title('High vs Adj Close Clusters')\n",
"\n",
"plt.subplot(2, 2, 4)\n",
"sns.scatterplot(x=df_cleaned['Volume'], y=df_cleaned['Adj Close'], hue=result, palette='Set1', alpha=0.6)\n",
"plt.title('Volume vs Adj Close Clusters')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"**KMeans (неиерархическая кластеризация) для сравнения**\n"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Центры кластеров:\n",
" [[1.11514668e+01 1.12730106e+01 1.10263573e+01 1.11517428e+01\n",
" 8.88448878e+00 1.38211576e+07]\n",
" [7.75090604e+01 7.82228147e+01 7.67780822e+01 7.75181228e+01\n",
" 7.11609142e+01 8.57799102e+06]\n",
" [6.49827065e+00 6.66207109e+00 6.35238963e+00 6.51956023e+00\n",
" 5.11050859e+00 4.27841811e+07]]\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjAAAASgCAYAAABWngGUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5wcdf348dfMbO/X++Uu7dIraYQkEEroRVHwC1IUpEpTRH+gglIEKUoTCKAoVUWRJlWkBEgjkJCeXMr1tru3t313Zn5/HDk5Umi53CV5Px+PPHI7Mzufz8xn7x7z3venKKZpmgghhBBCCCGEEEIIIYQQQgwgan9XQAghhBBCCCGEEEIIIYQQ4rMkgSGEEEIIIYQQQgghhBBCiAFHEhhCCCGEEEIIIYQQQgghhBhwJIEhhBBCCCGEEEIIIYQQQogBRxIYQgghhBBCCCGEEEIIIYQYcCSBIYQQQgghhBBCCCGEEEKIAUcSGEIIIYQQQgghhBBCCCGEGHAkgSGEEEIIIYQQQgghhBBCiAFHEhhCCCG+FtM0+7sKQgghhBBCiH2ExBdCCCE+TRIYQoi9zne/+12++93vbrc9Go3y7W9/mzFjxvDaa6/1HFtTU8Opp5660/Ndfvnl1NTU8NOf/rTP6txXUqkUf/rTn/jmN7/J5MmTmTp1KqeeeirPPPNMrwf/u+66i5qamt1adjqd5sYbb+S5557bLefbWbvuSel0mr/85S9861vfYtKkSUyaNImTTjqJhx9+mEQi0a9121M+77Py2Xb6Ku02ENpaCCGEEGIbiS/+R+KL3aO+vp6amhr+8Y9/9Ev5e9KKFSu48sorOfjggxk3bhyHHXYYP//5z6mrq+t1XE1NDXfddVc/1VIIsTez9HcFhBBid4hGo5xzzjmsWbOGe+65hzlz5vTsU1WVDz/8kObmZoqLi3u9Lx6P88Ybb+zp6u4W7e3tnHPOOTQ1NfHd736XcePGYRgGb7zxBj/96U9ZsmQJv/71r1EUpU/Kb21t5ZFHHuGmm27aLef75S9/uVvO81V1dXVx7rnnsmbNGr7zne9wySWXoCgKS5Ys4Q9/+AP//Oc/mT9//nafof1df7ebEEIIIURfkPhC4gvx+R577DFuvPFGpk2bxo9+9CMKCwvZsmULDz30EK+88gqPPPIII0aM6O9qCiH2cpLAEELs9bYFF6tXr+YPf/gDM2fO7LV/1KhRbNiwgZdeeomzzjqr17433ngDp9OJz+fbgzXePa666iqam5t56qmnqKqq6tl+8MEHU1payu23384hhxzCoYce2n+V/BKGDh3ar+VfffXVrF+/nieffLLXQ/ZBBx3ECSecwHe+8x1+/OMf85e//KXPgra9UX+3mxBCCCHE7ibxhcQX4vMtXbqUG264gdNOO42rr766Z/u0adM47LDDOPHEE/l//+//7RejUIQQfUumkBJC7NVisRjnnnsua9eu5YEHHtguuABwuVzMmTOHl156abt9L774IvPmzcNi6Z3PNQyDBx54gMMPP5wxY8Ywb948/vKXv/Q6Rtd1HnjgAY499ljGjRvHhAkTOPXUU3n//fd7jrnrrrs4/PDD+e9//8txxx3Xc65nnnmm17keeeQRjjzySMaOHcusWbO49tpriUajO73u1atX88477/D973+/V3CxzVlnncVpp52Gy+Xa4fvnzp273ZD2f/zjH9TU1FBfXw9AMpnk2muvZfbs2YwZM4YjjzyShx56COgeEr0tcPnZz37G3Llze86zZMkSTj/9dMaPH8/UqVO56qqrCAaDvcoZNWoUf/vb35g5cyZTp05lw4YN2w3xrqmp4bHHHuPqq69m6tSpTJw4kUsvvZT29vZe9X7ooYc49NBDGTduHKeeeir/+c9/qKmpYeHChT11/bzhyuvXr+fll1/mvPPO22EPoerqai699FIWL17c077b7tdHH33ESSedxLhx4zjuuOO2+5ylUiluueUW5syZw5gxYzjuuON48cUXt2uPO++8k5tvvpkDDzyQcePG8f3vf5/NmzfvtM7f+973+MY3vrHd9gsvvJDjjz8egGAwyI9+9CNmzpzJ2LFjOeGEE7b77H1dn223aDTKL37xC2bMmMHEiRO5/PLL+dOf/rTdFAOmaTJ//vyeoeannHIKy5cv3611E0IIIYT4siS+kPhid8QXX1QqleKee+7paasjjjiCBx54AMMwALj44ot7nu23OfPMMxkzZgzJZLJn2w033MC8efN2WMa8efO45JJLttt+wgkncMEFFwCwdetWzj//fKZNm8b48eM55ZRTePPNN3dZ94ceegiv18sVV1yx3b7c3Fx++tOfcuihhxKPx3f4/tbWVn72s58xZ84cxo0bx8knn8zrr7/e65gFCxbw7W9/m4kTJzJlyhQuuOACNm7c2OuY1157jW984xuMHTuWmTNncv311++0TCHE3kkSGEKIvVY8HucHP/gBq1atYv78+UybNm2nxx599NE9w7y3iUajvPXWWxx77LHbHX/ttddy5513cvzxx3Pfffdx5JFHcuONN3LPPff0HHPrrbdy7733csopp/Dggw/y61//mnA4zKWXXtprvYS2tjZ+9atfccYZZ/DAAw9QXl7OVVdd1fPg9fzzz/Pb3/6W0047jYceeoiLLrqIf/3rX/z617/e6fW8/fbbAL0e7D/Nbrf3fIn8Vd1444289dZbXHXVVT0P8bfccgtPP/00hYWF3H333QBccMEFPT8vXryYs846C4fDwe9+9zv+3//7fyxatIgzzjij1wO2rus8/PDD3HDDDfzsZz9jyJAhO6zDHXfcgWEY3H777fzkJz/hjTfe4MYbb+zZf/fdd3Prrbdy1FFHce+99zJ+/Hguu+yyXucoLCzkqaee4lvf+tZOr/Xz7id0f4YURdnuofq8887j0EMP5e6776a6uprLLrus52HfNE0uuuginnzySc4++2z+8Ic/9Hyp/9kg889//jO1tbXcdNNNXH/99Xz88cdcddVVO63P8ccfz8qVK9myZUvPtkgkwltvvcUJJ5wAwJVXXsnGjRu57rrrmD9/PqNGjeKqq67qFQTvTDab3eG/z1tU8cILL+Tf//43P/zhD7njjjuIxWLcdttt2x23dOlSXn31VX7+85/z29/+ltbWVi644AKy2ezn1k0IIYQQoi9IfCHxxe6KL74I0zQ5//zzefDBB/nWt77V87n43e9+1zP91Zw5c1i3bh0dHR1Ad8Jj2bJlZDIZPvzww55zvfXWWxxyyCE7LOf444/nzTff7JXA2rhxI2vWrOGEE07AMAzOO+88EokEt9xyC/feey+BQIALLrigV6zx2bq/8847zJgxA6fTucNjjj76aC666KIdJr3a29s5+eSTWbJkCZdffjl33XUXZWVlXHTRRTz77LMA1NXVceGFFzJmzBj+8Ic/cMMNN7Bp0yZ+8IMf9CR4nnvuOS666CIGDx7MPffcw8UXX8yzzz7LhRdeKIvBC7EPkSmkhBB7pW3BxdKlS3te78rBBx+M0+nsNcz71VdfJS8vj8mTJ/c6dtOmTfz1r3/liiuu4Ac/+AHQPY2Qoijcf//9/N///R85OTm0trZy+eWX9+rVY7fb+eEPf8jatWuZMGECAIlEghtuuKHnYb+qqopDDjmEN998kyFDhrBo0SLKy8s57bTTUFWVqVOn4nK56Ozs3On1NDU1AVBeXv7Fb9qXtGjRImbOnMkxxxwDdA8Fdrlc5OXlYbPZGDlyJACVlZWMGjUKgNtuu43q6mruv/9+NE0DYPz48RxzzDE8/fTTnHbaaT3nP//88zn44IN3WYfhw4f3mgN3+fLlPT3d4vE48+fP57TTTuPHP/4x0N1OiUSCp556quc9Nputpy12ZluvsLKysp0e4/f78fv9NDQ09Nr+3e9+l4suugiAWbNmcdJJJ/XMk/zuu+/y9ttvc8cdd3D00Uf3HJNIJLj11ls59thje3rn+Xw+7r333p77tnXrVu666y5CoRA5OTnb1eeII47guuuu4/nnn+8p/5VXXkHX9Z6gedGiRVx00UUcdthhAEydOpVAIIDNZtv
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"random_state = 9\n",
"kmeans = KMeans(n_clusters=3, random_state=random_state)\n",
"labels = kmeans.fit_predict(data_scaled)\n",
"centers = kmeans.cluster_centers_\n",
"\n",
"# Отображение центроидов\n",
"centers = scaler.inverse_transform(centers) # Обратная стандартизация\n",
"print(\"Центры кластеров:\\n\", centers)\n",
"\n",
"# Визуализация результатов кластеризации KMeans\n",
"plt.figure(figsize=(16, 12))\n",
"plt.subplot(2, 2, 1)\n",
"sns.scatterplot(x=df_cleaned['Open'], y=df_cleaned['High'], hue=labels, palette='Set1', alpha=0.6)\n",
"plt.scatter(centers[:, 0], centers[:, 1], s=300, c='red', label='Centroids')\n",
"plt.title('KMeans Clustering: Open vs High')\n",
"plt.legend()\n",
"\n",
"plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x=df_cleaned['Low'], y=df_cleaned['Close'], hue=labels, palette='Set1', alpha=0.6)\n",
"plt.scatter(centers[:, 2], centers[:, 3], s=300, c='red', label='Centroids')\n",
"plt.title('KMeans Clustering: Low vs Close')\n",
"plt.legend()\n",
"\n",
"plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x=df_cleaned['High'], y=df_cleaned['Adj Close'], hue=labels, palette='Set1', alpha=0.6)\n",
"plt.scatter(centers[:, 1], centers[:, 4], s=300, c='red', label='Centroids')\n",
"plt.title('KMeans Clustering: High vs Adj Close')\n",
"plt.legend()\n",
"\n",
"plt.subplot(2, 2, 4)\n",
"sns.scatterplot(x=df_cleaned['Volume'], y=df_cleaned['Adj Close'], hue=labels, palette='Set1', alpha=0.6)\n",
"plt.scatter(centers[:, 3], centers[:, 4], s=300, c='red', label='Centroids')\n",
"plt.title('KMeans Clustering: Volume vs Adj Close')\n",
"plt.legend()\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"**PCA для визуализации сокращенной размерности**\n"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjAAAAJHCAYAAAA+Dx+UAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3yTVfvH8W+apruUQmnLLntT9lBBqIoLRRB/TgQUZTkRQR+QoQ+KCooyFERFsT6iooBbcKIPoihD2cgebSm0dKdpmt8ffRoJbYGOO0np5/16+dKek9znytW05vS6zzkmh8PhEAAAAAAAAAAAgBfx8XQAAAAAAAAAAAAAZ6KAAQAAAAAAAAAAvA4FDAAAAAAAAAAA4HUoYAAAAAAAAAAAAK9DAQMAAAAAAAAAAHgdChgAAAAAAAAAAMDrUMAAAAAAAAAAAABehwIGAAAAAAAAAADwOhQwAOAC4HA4PB0CSlCVvzdV+bUDAADAFZ8Nqwa+zwAqGgUM4AI2ZMgQtWjRwuWftm3bqk+fPpo+fbpOnTpV5Dn79u3TtGnTdPnll6t9+/bq06ePxo0bpx07dpQ4zosvvqgWLVroqaeeMvLllGju3Llq0aKFR8YuzkcffaQWLVro8OHDhj8vNzdXTz/9tD755JPShlkqt9xyi1q0aKGvvvrK0HG87XtZHmlpaZowYYI2bNjgbBsyZIiGDBnithjO9+c5Li5Ojz32WIWOvXv3bt16660Vcq3Dhw+rRYsW+uijjyrkegAAwHswZ/GMC2nO0qJFC82dO7dI+65du9SzZ09deuml2r9/v/OxLVq00AsvvFDstfLz89WrV69K+9kzMTFRzz33nK666irFxsbqkksu0ahRo1zmJJIx85KEhATde++9OnLkSIVcr6TvK4CqhwIGcIFr3bq1li1b5vznzTff1LBhw7R8+XKNHDnS5e6Ir7/+WgMHDtTWrVs1evRovfbaa3r44Ye1f/9+/d///Z9+/vnnItfPz8/XihUr1Lx5c61cuVLZ2dnufHlVXlJSkt566y3l5eUZNsbevXu1ceNGNW/eXO+9955h41xotm/frpUrVyo/P9/ZNnXqVE2dOtUt45fl57kiffnll9q4cWOFXCsyMlLLli1Tnz59KuR6AADAuzBnubC5Y85ypt27d2vYsGEKDAzUO++8o5iYGGefj4+Pvvzyy2Kf99tvvykpKclNUVas33//XQMGDNB3332nO++8U6+++qomTZqknJwcDRkyRCtWrDB0/P/+97/64YcfKux6y5Yt00033VRh1wNQefl6OgAAxgoJCVGHDh1c2rp27arMzEy9/PLL2rx5szp06KCDBw9q4sSJ6tWrl+bMmSOz2ex8fL9+/XTrrbdq4sSJ+vbbb+Xn5+fs++mnn5SQkKAXXnhBd9xxhz799FM+ZFxgPvroI9WtW1cjR47U+PHjdeDAATVs2NDTYVVKTZs2dcs4Zf159lZ+fn5Ffo8BAIALB3MWVKS///5bQ4cOVXBwsN566y3VqVPHpb9Tp07asGGDtm3bptatW7v0ffbZZ2rVqpW2b9/uzpDLLTU1VQ899JBiYmL05ptvKjAw0Nl35ZVX6t5779WUKVN0ySWXKCIiwoORnj8+/wMoxAoMoIpq27atJOno0aOSpKVLlyo3N1eTJ092mQhIUmBgoCZOnKgbb7yxyBLu5cuXq3nz5urcubO6d++uZcuWnXPsuLg4Pf300xo6dKjat2+vSZMmSSr40DVlyhRddNFFateunf7v//5P69atc3mu1WrVM888o4svvlgdO3bU448/LqvV6vKY4pbDrl+/Xi1atND69eudbXv37tV9992nbt26qWvXrho5cqT+/vtvl7Gee+45XXrppWrbtq2uu+46ff755y7Xzc/P14IFC9SnTx/FxsZqzJgxxS5zP9P5Pm/NmjW67bbb1LFjR7Vt21ZXXXWV4uPjJRVsq3PZZZdJkh5//HHFxcU5n/fBBx9o0KBB6tChg9q3b68BAwboiy++cLl2ixYtzrltkN1u14oVK9S3b19dfvnlCgoKKvZ7bLPZNGvWLPXu3Vvt27fX3XffrRUrVhRZXv7xxx/rmmuuUbt27XT99ddr3bp1at269VmXZ3/++ecaNGiQOnbsqIsvvlhTpkxxydXcuXN11VVXafXq1erfv7/atWunAQMGaOPGjdq0aZNuuukmtW/fXv379y/yftq1a5dGjhypTp06qVOnTho7dqwOHTrk7C9837z33nvq27evOnXq5Lyr72w5Xr9+ve68805J0p133ul8P57+3rzrrrs0aNCgIq93zJgxuv76651fb9iwQXfccYdiY2PVrVs3TZw4USdPniwxX1LZf55Pf82n/6ycGbsk/fXXXxo6dKg6d+6sjh07atiwYdq0aZOkgu/JvHnzJLku/c7Pz9eiRYt0xRVXqG3btrryyiu1dOnSIuOMHz9eDzzwgDp06KDhw4cX2ULqo48+UuvWrbV582bdfPPNateunfr27avXX3/d5VpJSUl6+OGHnT/jU6ZM0YsvvujyswIAALwXcxbmLOczZznd33//rTvvvFOhoaF65513ihQvpILiWERERJFVGHl5efr666917bXXFnnO+XzfT548qenTp6tv375q27atunXrprFjx7rMh4YMGaJJkyZp0aJF6tOnj9q1a6dbbrlFW7ZscT4mJydH06ZNU+/evZ35PPNz7plWrFihpKQk/etf/3IpXkgFK07Gjx+v22+/XRkZGUWeW9J2rY899pjL9+vgwYMaNWqUunfvrtjYWN18883OFRcfffSRHn/8cUnSZZdd5vI9++CDD3Tttdc6t4abO3eu7Ha7yzhDhw7V1KlT1alTJ11zzTWy2+0u84jCn41169bprrvuUmxsrC6++GI9//zzLtfKyMjQlClT1LNnT3Xs2FEPP/ywlixZ4lXbtwEoPQoYQBW1b98+SVL9+vUlSWvXrlXr1q0VFRVV7ON79uyphx9+WLVq1XK2paam6ttvv9UNN9wgSRo4cKD+/PNPbd269Zzjx8fHq127dlqwYIEGDx4sq9WqoUOH6ptvvtHDDz+sefPmKTo6WiNGjHD5YPjoo4/q/fff18iRIzVnzhydOnVKS5YsKfXrT0xM1M0336z9+/dr2rRpev7555WcnKyhQ4cqNTVVDodDY8eO1Xvvvafhw4frlVdecX4AOn3p7fPPP6/58+dr8ODBmjdvnqpXr67Zs2efc/zzed7333+vsWPHqk2bNlqwYIHmzp2r+vXr68knn9TmzZsVGRnp/CPx6NGjnf8dHx+vKVOm6PLLL9fChQs1a9Ys+fn5afz48UpISHBef9myZRozZsxZ4/zxxx91/Phx3XDDDQoICNDVV1+tjz/+WLm5uS6PmzJlit566y3dcccdmj9/viIiIvTEE0+4PGbFihV67LHH1KlTJy1YsEBXXnmlxowZ4/KB80wLFizQuHHj1KFDB7388ssaO3asvvrqKw0ZMkQ5OTnOxyUkJGjmzJkaNWqUXnrpJaWlpemBBx7QuHHjdNNNN2n+/PlyOBx6+OGHnc/bt2+fbrnlFp04cULPPvusZsyYoUOHDunWW2/ViRMnXOKYN2+eJk6cqClTpqhjx47nzHGbNm00ZcoUZ26K2zbq+uuv19atW3XgwAFnW1pamn788UcNGDBAUsES9mHDhikgIEBz5szRv/71L/3666+68847XV7/mcry81waGRkZGjFihMLDwzV37ly9+OKLys7O1t1336309HTddNNNGjx4sCTXpd/Tpk3Tyy+/rOuvv16vvvqqrrrqKj399NOaP3++y/W/+OILBQcH65VXXtGIESOKjSE/P18PPfSQrrnmGi1atEidOnXSc889p7Vr10oq2Gt56NCh+uOPP/Svf/1LzzzzjHbs2KE33nijTK8ZAAC4H3MW5iznM2cptHfvXg0dOlQhISF65513SnyfmM1mXXnllUUKGOvWrZPVai1ys8v5fN8dDodGjhypn3/+WePHj9frr7+u++67T+vWrSsyF/jqq6/0zTffaPLkyXrhhReUnJys+++/3zkvevrpp/X
"text/plain": [
"<Figure size 1600x600 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pca = PCA(n_components=2)\n",
"reduced_data = pca.fit_transform(data_scaled)\n",
"\n",
"# Визуализация сокращенных данных\n",
"plt.figure(figsize=(16, 6))\n",
"plt.subplot(1, 2, 1)\n",
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=result, palette='Set1', alpha=0.6)\n",
"plt.title('PCA reduced data: Agglomerative Clustering')\n",
"\n",
"plt.subplot(1, 2, 2)\n",
"sns.scatterplot(x=reduced_data[:, 0], y=reduced_data[:, 1], hue=labels, palette='Set1', alpha=0.6)\n",
"plt.title('PCA reduced data: KMeans Clustering')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"**Анализ инерции для метода локтя (метод оценки суммы квадратов расстояний)**\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2cAAAImCAYAAADXOPIYAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABy+UlEQVR4nO3dd3TV9f3H8ddNcrMHZJABBkLCTsKQISgyVFxoBWpxgKOiqFh+ihZFqa0DtQoiiKgUZ5WCClItVeseIAgOEkgYIYSZvSE7ub8/wr1wSQghJPd7kzwf5+SEfNd938unJS8/y2SxWCwCAAAAABjKxegCAAAAAACEMwAAAABwCoQzAAAAAHAChDMAAAAAcAKEMwAAAABwAoQzAAAAAHAChDMAAAAAcAKEMwAAAABwAoQzAAAAAHAChDMAAAAAcAKEMwBwgKlTp6pXr1667rrrTnnNfffdp169eumhhx5yYGUAmurgwYPq1auX1qxZY3QpANoIwhkAOIiLi4t+++03ZWRk1DlXUlKir7/+2oCqAACAsyCcAYCD9O3bVx4eHvr000/rnPv666/l5eWl0NBQAyoDAADOgHAGAA7i7e2tUaNG1RvO/vvf/+rSSy+Vm5tbnXNffPGFJk6cqLi4OJ1//vl68sknVVJSIkkaO3asevXqVe/XwYMHJUnr16/XDTfcoHPPPVfDhg3T/fffr/T0dLvXuP/+++t9xumGa1mHa9b3daLExETddtttGjZsmAYNGqQ777xTu3fvtp3ftGmTevXqpU2bNkmSdu3apYsvvljXXXedXnzxxVO+xosvvihJev/993X55ZcrNjbW7vzphoi+99579T73xPusQ9dOd11Ta2jsZ9PQ65/qvPXv4aGHHtLYsWPtXnflypV2n+GJr/Pzzz/bXfvOO++oV69eds8oKyvTggULNG7cOMXGxmrQoEG69dZblZycbHfvqeqaOnWq3TXWOupzcvuwmjp1qt1zysvL9dJLL+myyy5TXFycxo0bp2XLlqmmpsbunpNr2bRpU6PuPR2LxaI5c+YoPj5eP/zwQ6PvAwCrur8FAABazBVXXKF7771XGRkZCgsLkyQdOXJE3333nd544w199913dtd//PHHeuCBB3TVVVfp3nvv1aFDh7Rw4UKlpKTojTfe0JIlS1RRUaHs7Gzdc889uuuuuzR69GhJUqdOnbR27Vo9+OCDGj9+vKZPn678/HwtXrxYkydP1ocffqigoCBJtb/UTp48WRMnTpQk2/Mao2/fvvrrX/9q+/n999/XBx98YPt548aNmjZtmoYNG6annnpK5eXlevXVV3XdddfpvffeU3R0dJ1nPvfcc4qNjdVdd92lgIAAjRw5UpL02GOPSZLt9cLCwrR582bNnTtXv//97zV37lz5+PhIUqPqLysrU1xcnObOnWs7dqr7TvxsT76uqTWcyWfz6KOPql+/fvW+/qpVqyRJ27dv1+OPP17n2pMVFhbqhRdeqPecj4+PvvrqK5177rm2Y//973/l4mL/33Nnz56tLVu2aNasWYqMjNS+ffu0aNEi3X///Vq3bp1MJpPt2t///ve69tprbT9b/x6bk8Vi0Z133qnffvtN99xzj3r37q1NmzbphRde0IEDB/TEE0/Yrj25zUZHRzf63oY8+eST+s9//qOXXnpJF1xwQbO/RwBtH+EMABxo9OjR8vLy0qeffqpbbrlFkvT5558rKCjI7pdhqfaXzfnz52vkyJGaP3++7Xi3bt10yy236Ntvv7WFBWsvWWRkpAYMGCBJqqmp0fz583XBBRdowYIFtvsHDRqkK664Qq+99ppmz54tSSotLVW3bt1s91qf1xi+vr62+yTp+++/tzu/YMECde3aVcuWLZOrq6sk6YILLtAll1yixYsXa9GiRXbX79u3Tz/88IM++ugj9ejRQ5JsQdbX11eS7F5v3bp1kqSHH37YFookyd3d/bS1l5aWKjg42O55p7rvxM/25OsSEhKaVMOZfDYxMTGnfH3r8fLy8nqvPdnixYsVERGh/Pz8OucuvPBCffnll/rzn/8sScrIyNCvv/6qwYMH69ChQ5KkiooKHT16VHPnztUVV1whSRo6dKiOHDmiZ555Rjk5OQoJCbE9MywszK4e699jc/ruu++0YcMGPf/887ryyislSeeff748PT21aNEi3XTTTbb2dHKb/fbbbxt976ksWLBAq1at0pIlS3ThhRc2+/sD0D4wrBEAHMjT01Njx461G9q4bt06XX755XY9DZKUmpqqjIwMjR07VlVVVbavIUOGyNfXV+vXr2/wtfbu3avs7GyNHz/e7nhkZKQGDhyon376yXYsPT1dfn5+zfAO7ZWUlCgxMVGXX365LXxIkr+/v8aMGWNXg/X6hQsXatiwYaf9ZdgqPj5ekvT6668rKytLFRUVqqqqatS9zfW+m1LDmX42zWXXrl1atWqV/vKXv9R7fuzYsUpLS1Nqaqok6dNPP1X//v3VuXNn2zXu7u567bXXdMUVVygzM1MbN27UypUrbYvaVFRUnHFdNTU1qqqqksViOe011q8Tr/3pp5/k5uamyy67zO6eq6++2nb+VM7mXkl69913tWzZMl155ZV2vasAcKboOQMAB7v88st1zz33KCMjQx4eHvrxxx9177331rmuoKBAUu0QsPqGgWVlZTX4Otb7g4OD65wLDg5WUlKSpNoeusOHD6tLly5n9kYaobi4WBaL5ZQ1FBcX2x2788475e/vbzcs8nSGDBmiuXPnatmyZVqyZMkZ1Xfo0KEGh/+1ZA1n+tk0lyeffFJXXnmlBg4cWO/50NBQxcbG6ssvv1T37t313//+V+PHj7e1F6vvv/9eTz31lFJTU+Xj46PevXvL29tbkhoMWKeydOlSLV26VK6urgoODtYFF1yg//u//7NbJMfa23yioUOHSqodqtmxY0e7oCvJ1oPX0Od5NvdK0o4dO3TBBRfoP//5j26++Wb17du3wesB4FQIZwDgYBdeeKF8fHz06aefytvbW126dFFsbGyd6/z9/SXVzu2x/gJ6ooCAgAZfp0OHDpKknJycOueys7PVsWNHSVJycrLKysrqLOLRHPz8/GQymU5Zg7VGq9mzZ+vTTz/VzJkz9e677zZ6+Nsf/vAH/fDDD6qqqtKjjz6qLl266K677mrwnpqaGm3dulWTJk1q1Guc3LN5tjWc6WfTHD755BNt27bNbphrfS666CJ9+eWXuvzyy7Vt2zYtWbLELpzt379fM2bM0MUXX6xXX31V55xzjkwmk9599906w1ql0392Uu3n94c//EE1NTU6fPiwFi5cqNtvv10fffSR7ZrHHnvMLkyfOG8sICBA+fn5qq6utgtZ1v+IYW3v9TmbeyXp//7v/3TTTTfpyiuv1Ny5c/X+++/XCXoA0BgMawQAB3N3d9fFF1+szz77TJ988oltjsvJunfvrqCgIB08eFBxcXG2r9DQUC1YsKBOT8bJoqKiFBISov/85z92xw8cOKDffvtNgwYNkiR988036tOnjwIDA8/4vdTU1DT4S6i3t7diY2P1ySefqLq62na8uLhY33zzTZ15drGxsVqyZIkOHTqk5557rtF1LFq0SN98842eeeYZXX755YqLizvtfK9ffvlFJSUlGjZsWIPXWXuBTl4Q42xrONPP5mxVVFTo2Wef1YwZM+zmg9Xn4osv1tatW/XOO+/o3HPPVadOnezOb9u2TeXl5brjjjsUGRlpC1/WYGb9zKwrHZ7us5NqF7CJi4tT//79dfnll+vGG2/Uzp07VVhYaLsmKirK7n8LJ87vGzp0qKqqquqshmoNdw19nmdzr1Tb0+np6alHH31U27dv1xtvvHHa9wsA9aHnDAAMcMUVV2j69OlycXGxWynwRK6urrrvvvv06KOPytXVVWPGjFFRUZGWLl2qzMzM0w7Hc3Fx0axZszRnzhzdf//9uvrqq5Wfn68lS5YoICBAt956q7Zv3653331XV155pX777TfbvdnZ2ZJqe0jy8vLqBLe8vDylpKRo3759tpB3Kvfff79uu+023XHHHbrhhhtUWVmpZcuWqaK
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"inertias = []\n",
"clusters_range = range(1, 11)\n",
"for i in clusters_range:\n",
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
" kmeans.fit(data_scaled)\n",
" inertias.append(kmeans.inertia_)\n",
"\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(clusters_range, inertias, marker='o')\n",
"plt.title('Метод локтя для оптимального k')\n",
"plt.xlabel('Количество кластеров')\n",
"plt.ylabel('Инерция')\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"**Расчет коэффициентов силуэта**\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1oAAAImCAYAAABKNfuQAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACef0lEQVR4nOzdd3hUZcLG4d9k0kmnBUjoEkoKNUjv9gIooFJcFbAgWEFdXbviCogCgqjggoqIFAWxIEWRHmoChNAhEEiAQEJ6mfn+iOQzhpKEmZyU576uXJCZc8488+6s5Ml5z3tMVqvVioiIiIiIiNiMg9EBREREREREKhoVLRERERERERtT0RIREREREbExFS0REREREREbU9ESERERERGxMRUtERERERERG1PREhERERERsTEVLRERERERERtT0RIREREREbExFS0RqTSGDh3K0KFDCzy2detW7rrrLpo3b853331n19d/8cUX6dmzZ7H369mzJy+++KIdEomIvQQFBTF16lSjY4iIgRyNDiAiYpRz587x2GOP0aJFC2bNmkVQUJDRkURERKSCUNESkUrriy++ICMjg/fff5+aNWsaHUdEREQqEE0dFJFK6fz588ybN48777yzUMk6evQoY8aMoVOnTrRs2ZKhQ4eybdu2Atv8/vvv9O/fn7CwMDp27Mhrr73GxYsXC2zz9ddf06NHD8LCwnjmmWdISUkBYMaMGXTo0IG2bdvy2muvkZWVlb9PVlYWb7zxBu3ataN9+/b5U49SU1MZO3YsLVu2pFu3bnz99df5+5w4cYKgoCAWL16c/1hmZia9evUqcJbuclMnN2/eTFBQEJs3b77s95B35q9t27aFpj1+99133H777QQHB9O9e3emTp1Kbm5u/vOXmyr596yXXutyX5dyXmva5OXe0z8lJCTwwgsv0KFDB1q1asWQIUPYsWNH/vP/nOJltVq57777CAoK4sSJEwW2u1rWMWPG0LVrVywWS4HXf/nll7n55psBOH36NM8++yw33ngjYWFhDB06lJ07dwIwderUK77GpXz79u3jySef5MYbb6RFixZ06dKFt99+m4yMjKuOwfr166+avajvEWDVqlX069ePsLCwqx7r7xYvXkxQUBC7du2iX79+hIaGcuedd/LLL78U2O7EiROMGzeOzp0706JFCzp06MC4ceM4f/58/jbR0dEMHjyYVq1a0bt3b+bPn5//3OU+v1D4c3KtaX1//9zNnTu30P+/Nm3aRNOmTfn444+veIx/mjJlCs2aNWPJkiVF3kdEyjed0RKRSsVqtXLq1CnefvttcnJyePTRRws8f/DgQQYOHEj9+vV55ZVXcHJyYu7cuTz44IPMnj2b8PBwIiIiePzxx7nrrrt47rnnOHDgAB9++CH79+/nq6++wmw2s3LlSt58802GDh1K165d+fbbb1m5ciUAP/30E2+//TYnT55k4sSJuLq68tJLLwEwYcIEFi1axLhx4/D392fy5MmcPHmSkydPcssttzBlyhTWrl3Lm2++ib+/P7169brs+/z8888LlITrMWnSJC5evIiXl1f+YzNnzmTy5MkMGTKEl156iejoaKZOncqpU6d49913i3TcFi1a8O233wJ5pW3hwoX533t4eNgke2pqKvfffz+5ubmMHTuWmjVrMnv2bB5++GGWLFlC/fr1C+3zww8/FChif3fvvfcyYMCA/O/feOONAs/9+uuvbN68mQ4dOgCQkZHBL7/8wogRI8jKymL48OFkZ2fz2muv4eTkxPTp0xk6dCgLFixgwIABdOnSpcBxX3vtNQD8/f1JSEhg8ODBtGzZkvfeew9nZ2fWrl3LF198QY0aNRg5cuQVxyEjIwN/f38++uijy2Yv6ns8fvw4Tz31FF26dOGZZ57J/0xc6Vj/9OijjzJkyBCeeeYZFi5cyNNPP83MmTPp1q0b6enpDBs2DF9fX1577TU8PT3ZsWMH06ZNw9XVlTfffJP09HRGjBhBnTp1mDp1Ktu3b+e1116jdu3adO3atUgZimvo0KGsWLGC//73v3Tv3h1nZ2f+/e9/07JlSx577LEiHWPWrFlMnz6dt99+m379+tklp4iUPSpaIlKpRERE0L17d5ycnPjss88K/aA9bdo0nJ2dmTt3bv4P+927d+eOO+7g/fffZ+HChXz//ffUr1+f8ePH4+DgQKdOnXBzc+PVV1/ljz/+oGfPnnzyySe0b9+eV155BYD27dvTqVMnLl68yPjx4wkODgYgOTmZzz77jCeeeAKLxcK3337LyJEjGTJkCADVqlVj0KBB+Pj4MHHiRJycnOjatSv79+9n5syZly1ap06d4rPPPqNFixbs2bPnusYrKiqKH374gWbNmpGcnAzAxYsXmT59OoMGDcp/f507d8bHx4dXXnmFhx56iBtuuOGax/bw8KBly5YA/PnnnwD539vKkiVLOHnyJEuWLKFZs2YAtG7dmr59+xIREVHof//U1FQmTpx4xbHz9/cvkPHvhbBz5874+/vz/fff5xet3377jbS0NPr27cvOnTs5fPgwX3/9Na1atcrP0qdPH6ZPn87UqVPx9/cvcNy/v9a6deto1qwZH330Uf7zHTt2ZP369WzevPmqRSs9PR0vL68rZi/qe9y7dy/Z2dk888wzNGnS5JrH+qehQ4cyatQoALp06UK/fv34+OOP6datG0ePHsXf35///ve/BAYGAnDjjTeya9cutmzZAsDJkycJCQnh3//+N4GBgXTu3Jl58+bx559/2q1omUwmxo8fz1133cWECRMwm81cuHCBOXPmYDabr7n/N998w4QJE3jzzTe599577ZJRRMomTR0UkUqlefPmvPfee3h7e/PSSy8VOuuzZcsWevToUeAHR0dHR26//XZ2795Namoq77zzDt9//z0ODg7k5OSQk5PDzTffjIODAxEREeTk5LB37146d+6cfwwXFxfCwsJwc3PLL1mQ98N5RkYGMTExxMTEkJmZmX9WA/J+0HZxcSE0NBQnJ6cC++3Zs6fAVL1L/vvf/9K2bVt69OhxXWNltVp5++23uffee2natGn+4zt27CAjI4OePXvmv/+cnJz8aYLr168vcJy/b/PPaXVFzVHSfbdt20ZAQEB+yQJwc3Pj119/LXDW5pLp06fj6+vL/fffX+zXcnBwoF+/fqxYsYL09HQgr+h17NgRf39/wsPD2blzJy1btiQ3N5ecnBy8vLzo1KkTERER1zx+586d+eqrr3BxceHgwYOsWrWKGTNmkJiYWGD66eWcOnUKT0/PYr+nf2rRogWOjo589dVXnDx5kqysLHJycrBarUXa/+9nc0wmE3369CEyMpKMjAyaNWvGvHnzqFOnDkePHuWPP/5g1qxZHD58OP/9NW7cmBkzZhAYGEhWVhZr164lKSmJRo0aFXgdi8VS4HN3uXyXtilK9sDAQJ5//nmWLFnCd999xyuvvJJfBq9mzZo1vPHGG7Rt25aBAwdec3sRqVh0RktEKhUPDw/69etHw4YNuf/++3n66af59ttv838znZSURLVq1QrtV61aNaxWKykpKVSpUgUXFxcg7wfPv0tOTubcuXPk5ubi6+tb4DkfHx+8vb0LPHZp6tXZs2fzS9M/9/P29sbHx6fQfjk5OQWuXYG8orhy5UqWLl3K8uXLizIkV/T9999z9OhRPvnkE/773//mP37hwgWAK55BSUhIyP/7yZMnC41RSXJ8//33mEwmqlatSps2bXjqqacK/XB9ORcuXKBq1apFep2jR48yZ84cPv/8c+Li4kqU9Z577uGTTz5hxYoV3HjjjWzcuJGJEyfmP+/s7AzkXbf192t1inJmxGKx8MEHH/D111+TlpZGrVq1CA0Nzf8sXs3JkyepU6dOCd5RQYGBgUyYMIEPPvggf5rnJeHh4dfcv0aNGgW+r1q1KlarleTkZFxdXfniiy/45JNPuHDhAtWqVSM4OBg3N7dC1z8mJyfTrl07AKpXr86tt95a4Pl//etfhV77n/mmT5/O9OnTMZvNVKtWjc6dO/PUU09dcWGc2267jffeew+ATp06XfO9AuzZs4f
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"silhouette_scores = []\n",
"for i in clusters_range[1:]: \n",
" kmeans = KMeans(n_clusters=i, random_state=random_state)\n",
" labels = kmeans.fit_predict(data_scaled)\n",
" score = silhouette_score(data_scaled, labels)\n",
" silhouette_scores.append(score)\n",
"\n",
"# Построение диаграммы значений силуэта\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(clusters_range[1:], silhouette_scores, marker='o')\n",
"plt.title('Коэффициенты силуэта для разных k')\n",
"plt.xlabel('Количество кластеров')\n",
"plt.ylabel('Коэффициент силуэта')\n",
"plt.grid(True)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Средний коэффициент силуэта: 0.547\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA00AAAJzCAYAAADTBPhFAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hUVf7H8fedlkoIJPRelN6kWlDAsq4dyyoKisoKCvby01XXiusq6ipF2rqI6IoFC/ZesNEEVBCX3nsJaVPP74+bDISEIROSmZB8Xs/DY+beO/d+5zCR+cw59xzLGGMQERERERGREjniXYCIiIiIiEhlptAkIiIiIiISgUKTiIiIiIhIBApNIiIiIiIiESg0iYiIiIiIRKDQJCIiIiIiEoFCk4iIiIiISAQKTSIiIiIiIhEoNImIiIiIiESg0CQiZTZkyBDatGlT5E+PHj248sormTt3brzLE5Eqrk2bNowdO7bY9j/++IPjjz+eU045hTVr1hzy+WPHjqVNmzZ06tSJ7OzsEo/573//S5s2bRgwYEB5lS0iRyGFJhE5Iu3bt2fmzJnMnDmTV155hccffxy32821117L//73v3iXJyLVzP/+9z+GDh1KUlISM2bMoHnz5od9TiAQ4Isvvihx3wcffFDOFYrI0UihSUSOSGpqKl27dqVr1650796d0047jbFjx+JwOJg1a1a8yxORamTlypVcddVVpKSkMGPGDJo0aVKq5x133HF8+OGHxbZv3bqV+fPn065du/IuVUSOMgpNIlLukpKSSEhIwLKs8LYhQ4YwZMiQIsc99dRTtGnTpki4mjFjBqeeeirdunVj8ODB/PHHHwC8/PLLtGnThtWrVxc5xzvvvEO7du3YvHkzAJ999hmXX3453bp1o2PHjpx55pm8/PLLRZ5z9913FxtWWPhnw4YN4WMOHo7z6quvFhsO9MEHH3DWWWfRtWtXLrzwQubPn1/kOYer56effqJNmzb89NNPRZ53cHuVpv18Ph///Oc/OeWUU2jXrl2R1xUpwB587tGjR9OpUye++eYbYP8QppL+HFh3adp+27Zt/N///R/HH398+O/4559/BmDAgAGH/XuZP38+gwcPpkuXLvTq1Yv/+7//Y9euXeHzz5o1izZt2rB48WIGDhxI586dOffcc/noo4+K1LFv3z7+8Y9/cNppp9GpUyfOOecc3njjjSLHHFhP27Zt6dmzJzfeeCO7d+8+ZFsCrFq1ilGjRtGrVy969uzJ8OHDWbly5SGPj9S+B/69rVmzhptuuokTTzyRrl27MmTIEBYsWBDev2HDhvDz3n333SLX+PLLL8P7DvTBBx9w4YUX0q1bN0488UT+/ve/s3fv3mK1Haik9+KAAQO4++67D/n4YIW1Hvj6Fi5cyKWXXkqnTp048cQTeeSRR8jPzz/kOQ62cuVKrrzySmrUqMGMGTNo2LBhqZ971llnMWfOnGJD9D766CNatGhB27Ztiz3ns88+48ILLwzX++ijj5Kbm1vsmNL8/v/www9cc801dOnShRNPPJEnn3ySYDAYPu67777jL3/5C926daNnz55cf/31Ed9TIlL+FJpE5IgYYwgEAgQCAfx+P9u3b+epp57C5/Nx0UUXHfJ569atY9q0aUW2ffLJJzzyyCOcffbZjB8/nmAwyIgRI/D5fJx77rkkJCTwzjvvFHnO22+/zfHHH0+DBg346quvGDlyJB06dGDChAmMHTuWJk2a8PDDD7N48eIiz6tTp054WOHMmTO5/vrrI77OvXv38q9//avItiVLlnDHHXfQtWtXnn/+eRo0aMCIESPYsWMHQFT1RKuk9psyZQovvvgiV111FS+++CIzZ85k3LhxUZ13yZIl/Pe//+Vf//oX3bp1K7LvwPb6+9//XmRfaV5rTk4OgwYN4qeffuLOO+9k3LhxJCQkcM0117BmzRrGjRtXpObrr78+fL26desyb948hg4dSmJiIv/617/429/+xty5c7nyyiuLfbgePnw4p556KuPGjaNFixbccsstfP311wDk5+dz+eWXM3v2bIYNG8aECRPo3r079957LxMnTixynlNOOYWZM2fy0ksvcfvtt/Pdd98xevToQ7bf1q1bufTSS1mzZg0PPvggTz75JDt27OCqq65iz549Edv+wPY9+O9txYoVXHjhhWzYsIH77ruPMWPGYFkWV111VbH7B1NSUooNNfvggw9wOIr+kz9hwgRuu+02unbtynPPPcfIkSP5+OOPGTJkSFRhpTxs3ryZa6+9llq1ajFu3Dhuuukm3nnnHe66665SPX/VqlVcddVVpKamMmPGDOrVqxfV9f/0pz8RDAZLbLezzz672PGzZ89m5MiRtGzZkvHjxzNq1CjeffddbrjhBowxQHS//3fccQfdu3dn4sSJnHPOOUydOpXXX38dgPXr13PDDTfQsWNHnn/+eUaPHs3q1au57rrrCIVCUb1OESk7V7wLEJGj27x58+jQoUOx7bfddhutWrU65PMee+wxjjnmGH777bfwtl27dnH55Zdz2223AXbPSeG39O3ateP000/n3Xff5eabb8ayLLZs2cKPP/7Ik08+CdgfLAcOHMi9994bPme3bt3o3bs3P/30E126dAlv93g8dO3aNfx41apVEV/nc889R8OGDYv0MmzZsoU//elPPProozgcDjIzMznnnHNYtGgRp512WlT1RKuk9luyZAlt27blmmuuCW8r7KEprcKevlNPPbXYvgPby+v1FtlXmtf61ltvsXHjRt56663wcKfjjjuOCy64gHnz5nHJJZcUqblp06ZFrvnUU0/RokULJk2ahNPpBKBLly6cffbZvPnmm1xxxRXhY4cMGcLIkSMB6Nu3LwMHDmT8+PGccsopzJo1iz/++INXX301HAz79u1LIBBgwoQJXHbZZaSnpwNQu3btcA09e/bk+++/L9LmB5s2bRo+n4///Oc/1KlTB4C2bdsyaNAgFi9ezCmnnHLI5x74Wg/+exs3bhwej4fp06eTmpoKQL9+/TjnnHN44oknivSSnXzyyXz77bf4fD48Hg9er5fPP/+cnj17hnsG9+7dy/PPP89f/vKXIgH42GOP5YorrijWnhVtypQp1KpVi/Hjx4f/bh0OB/fddx/Lly8v1tt1oDVr1nDllVeyY8cO/H5/mYJEZmYmPXv25MMPP+S8884DYOPGjSxevJgnnniC559/PnysMYYxY8bQt29fxowZE97evHlzhg4dytdff02/fv2i+v2/5JJLwu/X448/ns8++4yvvvqKyy67jCVLlpCfn8/w4cPDYbB+/fp8/vnn5Obmht8PIlKxFJpE5Ih06NCBhx56CLA/TGRlZfHNN9/wzDPPkJuby6233lrsOd988w3ff/89U6ZM4corrwxvv+yyywAIhULk5ubyySefkJiYSKNGjQC4+OKLee+995g/fz49e/bk7bffJiUlhdNPPx2AYcOGAXaPxurVq1m3bh2//PILYAewsvrjjz/CvQ2FNQKcccYZnHHGGRhjyM3N5cMPP8ThcNCiRYsKredQ7depUycmT57Mxx9/TJ8+fUhJSSn1B0hjDD///DMffPBBsR6s0ijNa12wYAGNGzcucn9IUlISH3/88WHPn5eXx+LFi7n22mvDvZsATZo0oVWrVnz33XdFPuQPHDgw/LNlWZx++umMHTuW/Px85s6dS6NGjYr1pJ133nm88cYbRcJN4bVCoRC///47CxYs4IQTTjhknQsWLKBr167hwAT2B9wvv/zysK8xkrlz59K/f/8iH5BdLle4VzYnJye8vU+fPnzzzTf89NNP9O3bl2+++YbU1FR69OgRDk2LFi3C5/NxzjnnFLlOjx49aNSoEXPnzj3i0FTYdg6Ho1gvV6FQKEQgEGD+/PmcdNJJ4cAEdvgDu00jhab33nuPjh078swzz3DNNddw5513Mm3atCLXDAaD4R4gsN8TB14L7CF6jz76KNnZ2aSmpvL+++/ToUMHmjVrVuS4VatWsWXLFoYPHx5+H4IdqlNTU/nuu+/o169fVL//B78X69evHx7q16VLFxISErj
"text/plain": [
"<Figure size 1000x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.metrics import silhouette_score\n",
"from sklearn.cluster import KMeans\n",
"\n",
"# ========================\n",
"# Применение K-Means\n",
"# ========================\n",
"kmeans = KMeans(n_clusters=3, random_state=42) \n",
"df_clusters = kmeans.fit_predict(df_scaled)\n",
"\n",
"# ========================\n",
"# Оценка качества кластеризации\n",
"# ========================\n",
"silhouette_avg = silhouette_score(df_scaled, df_clusters)\n",
"print(f'Средний коэффициент силуэта: {silhouette_avg:.3f}')\n",
"\n",
"# ========================\n",
"# Визуализация кластеров\n",
"# ========================\n",
"from sklearn.decomposition import PCA\n",
"\n",
"pca = PCA(n_components=2)\n",
"df_pca = pca.fit_transform(df_scaled)\n",
"\n",
"plt.figure(figsize=(10, 7))\n",
"sns.scatterplot(x=df_pca[:, 0], y=df_pca[:, 1], hue=df_clusters, palette='viridis', alpha=0.7)\n",
"plt.title('Визуализация кластеров с помощью K-Means')\n",
"plt.xlabel('Первая компонентa PCA')\n",
"plt.ylabel('Вторая компонентa PCA')\n",
"plt.legend(title='Кластер', loc='upper right')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Средний коэффициент силуэта, равный 0.547, указывает на умеренно хорошую кластеризацию."
]
}
],
"metadata": {
"kernelspec": {
"display_name": "aimenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}