390 lines
599 KiB
Plaintext
390 lines
599 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"metadata": {},
|
|||
|
"cell_type": "markdown",
|
|||
|
"source": [
|
|||
|
"# Бизнес-цели:\n",
|
|||
|
"\n",
|
|||
|
"Классификация: Предсказать вероятность инсульта на основе данных пациента."
|
|||
|
],
|
|||
|
"id": "2ad9ed4c50755332"
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"id": "initial_id",
|
|||
|
"metadata": {
|
|||
|
"collapsed": true,
|
|||
|
"ExecuteTime": {
|
|||
|
"end_time": "2024-12-24T19:29:26.262589Z",
|
|||
|
"start_time": "2024-12-24T19:29:26.246360Z"
|
|||
|
}
|
|||
|
},
|
|||
|
"source": [
|
|||
|
"# Импорт необходимых библиотек\n",
|
|||
|
"import pandas as pd\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"from sklearn.cluster import KMeans, AgglomerativeClustering\n",
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"\n",
|
|||
|
"# Загрузка данных\n",
|
|||
|
"df = pd.read_csv(\"healthcare-dataset-stroke-data.csv\")\n",
|
|||
|
"print(df.columns)\n",
|
|||
|
"print(df)\n",
|
|||
|
"df = df.drop(['gender', 'ever_married', 'heart_disease'], axis=1)\n",
|
|||
|
"\n",
|
|||
|
"# Обработка пропущенных значений\n",
|
|||
|
"df[\"bmi\"] = df[\"bmi\"].fillna(df[\"bmi\"].median())\n",
|
|||
|
"\n",
|
|||
|
"# Удаление столбца 'id' и целевой переменной 'stroke'\n",
|
|||
|
"X = df.drop(['id', 'stroke'], axis=1)\n",
|
|||
|
"\n",
|
|||
|
"# Преобразование категориальных переменных\n",
|
|||
|
"categorical_features = ['work_type', 'Residence_type', 'smoking_status']\n",
|
|||
|
"numerical_features = ['age', 'hypertension', 'avg_glucose_level', 'bmi']"
|
|||
|
],
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['id', 'gender', 'age', 'hypertension', 'heart_disease', 'ever_married',\n",
|
|||
|
" 'work_type', 'Residence_type', 'avg_glucose_level', 'bmi',\n",
|
|||
|
" 'smoking_status', 'stroke'],\n",
|
|||
|
" dtype='object')\n",
|
|||
|
" id gender age hypertension heart_disease ever_married \\\n",
|
|||
|
"0 9046 Male 67.0 0 1 Yes \n",
|
|||
|
"1 51676 Female 61.0 0 0 Yes \n",
|
|||
|
"2 31112 Male 80.0 0 1 Yes \n",
|
|||
|
"3 60182 Female 49.0 0 0 Yes \n",
|
|||
|
"4 1665 Female 79.0 1 0 Yes \n",
|
|||
|
"... ... ... ... ... ... ... \n",
|
|||
|
"5105 18234 Female 80.0 1 0 Yes \n",
|
|||
|
"5106 44873 Female 81.0 0 0 Yes \n",
|
|||
|
"5107 19723 Female 35.0 0 0 Yes \n",
|
|||
|
"5108 37544 Male 51.0 0 0 Yes \n",
|
|||
|
"5109 44679 Female 44.0 0 0 Yes \n",
|
|||
|
"\n",
|
|||
|
" work_type Residence_type avg_glucose_level bmi smoking_status \\\n",
|
|||
|
"0 Private Urban 228.69 36.6 formerly smoked \n",
|
|||
|
"1 Self-employed Rural 202.21 NaN never smoked \n",
|
|||
|
"2 Private Rural 105.92 32.5 never smoked \n",
|
|||
|
"3 Private Urban 171.23 34.4 smokes \n",
|
|||
|
"4 Self-employed Rural 174.12 24.0 never smoked \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"5105 Private Urban 83.75 NaN never smoked \n",
|
|||
|
"5106 Self-employed Urban 125.20 40.0 never smoked \n",
|
|||
|
"5107 Self-employed Rural 82.99 30.6 never smoked \n",
|
|||
|
"5108 Private Rural 166.29 25.6 formerly smoked \n",
|
|||
|
"5109 Govt_job Urban 85.28 26.2 Unknown \n",
|
|||
|
"\n",
|
|||
|
" stroke \n",
|
|||
|
"0 1 \n",
|
|||
|
"1 1 \n",
|
|||
|
"2 1 \n",
|
|||
|
"3 1 \n",
|
|||
|
"4 1 \n",
|
|||
|
"... ... \n",
|
|||
|
"5105 0 \n",
|
|||
|
"5106 0 \n",
|
|||
|
"5107 0 \n",
|
|||
|
"5108 0 \n",
|
|||
|
"5109 0 \n",
|
|||
|
"\n",
|
|||
|
"[5110 rows x 12 columns]\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"execution_count": 13
|
|||
|
},
|
|||
|
{
|
|||
|
"metadata": {
|
|||
|
"ExecuteTime": {
|
|||
|
"end_time": "2024-12-24T19:29:26.498330Z",
|
|||
|
"start_time": "2024-12-24T19:29:26.337254Z"
|
|||
|
}
|
|||
|
},
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"# Создание ColumnTransformer для обработки данных\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_features),\n",
|
|||
|
" ('cat', OneHotEncoder(), categorical_features)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Преобразование данных\n",
|
|||
|
"X_scaled = preprocessor.fit_transform(X)\n",
|
|||
|
"\n",
|
|||
|
"# Понижение размерности с помощью PCA\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"X_pca = pca.fit_transform(X_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация данных после PCA\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"plt.scatter(X_pca[:, 0], X_pca[:, 1], alpha=0.5)\n",
|
|||
|
"plt.title('Данные после PCA')\n",
|
|||
|
"plt.xlabel('PC1')\n",
|
|||
|
"plt.ylabel('PC2')\n",
|
|||
|
"plt.show()"
|
|||
|
],
|
|||
|
"id": "dd008dbe6af34ca4",
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
],
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAq0AAAIhCAYAAABkLoMIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeZydeVXg/8+z37X2VFVSnc7S6X3vtrtBFrtRdH4qIowLLsj4cqYdXFEZwWVwd1QYFmnBGXF4MQ7aKCCI6DiKdDcg0NAL0J3eklT21Harbt3tuc/2/f7+eO69qX1JqpJK5bxfL0gnufXUU99blXvu+Z7vOYbWWiOEEEIIIcQWZl7sGxBCCCGEEGI1ErQKIYQQQogtT4JWIYQQQgix5UnQKoQQQgghtjwJWoUQQgghxJYnQasQQgghhNjyJGgVQgghhBBbngStQgghhBBiy5OgVQghhBBCbHkStAohhBBCiC3Pvtg3IIQQc731rW/lb//2b5f8u9e85jX8wR/8wQW+I3EuXv/61/Poo4/O+zPHcRgYGOC+++7jTW96E93d3fP+fnR0lA996EN8/vOfZ2Jigr6+Pu644w7uv/9+rrvuuiU/z7ve9S7+9E//lB/90R/lv/7X/7ppX48Q4uKToFUIseXs2LGDBx54YN6f/czP/MxFuhtxrm644QZ+4zd+o/P7KIp4+umneec738kzzzzDX/3VX2EYBgD/7//9P375l3+Zq6++mje+8Y1cccUVjI2N8aEPfYgf+IEf4P3vfz8veclL5l1fKcUnPvEJrrnmGj75yU/y5je/mWw2e0G/RiHEhSNBqxBiS0mShFwux2233Tbvz13XvTg3JM5ZoVBY9Dzedddd1Ot1/viP/5ivfe1r3HbbbRw/fpy3vOUtvOxlL+Pd7343lmV1Hv/t3/7t/NAP/RBvectb+Nd//dd53wef//znGRsb453vfCc/+qM/yt///d/z/d///RfqyxNCXGBS0yqE2FLiOCaTyazpsX/zN3/Da1/7Wm677TZuueUWXv3qV/OP//iPnb//+Mc/zrXXXsvJkyfnfdwrXvEK3vrWtwJw8uRJrr32Wj7+8Y/Pe8xb3/pWXvGKVyz5MUt5/etfz+tf//pF9/dd3/Vd3HTTTdx77728973vJUmSFb+ma6+9dsn/zb2XJEn48Ic/zKte9SpuueUW7r33Xt7xjncQBMG8az388MO87nWv47bbbuOlL30pb3vb26hUKvPuebnPN9e//Mu/8NrXvpabb76Zl7zkJfzu7/4ujUZjxa9jJTfddBMAp0+fBuAv/uIvCMOQX//1X58XsAJks1ne8pa38O///b9ndnZ23t997GMf45prruHOO+/knnvu4SMf+cg535MQYuuTTKsQYkvxfX9RreNSPvzhD/O7v/u7/OzP/ix33nkns7Oz/Nmf/RlvfvObuf322xkeHr4Ad7u8//E//gfvete7+NEf/VF+5Vd+hWeeeYb3vve9nDlzht///d9f8WO/7/u+b17G8H3vex+HDh3q/P5tb3sbn/zkJ/lP/+k/8U3f9E0cPHiQP/mTP+GZZ57hAx/4AIZh8NnPfpY3vvGNfOu3fivvfve7KZfL/NEf/RGnTp3iz//8zzvXWriF/zd/8zd89KMf7fz+U5/6FG9+85t51atexZve9CZOnTrFu971Lg4dOsQHP/jBzvb+eoyOjgKwe/duAD73uc9xww03MDQ0tOTjX/ziF/PiF7943p+Vy2X+9V//lTe96U1AWu/8y7/8yzz99NPceOON674nIcTWJ0GrEGJLKZfLDA4Orvq4EydO8BM/8RP81E/9VOfPRkZGeO1rX8tjjz3Gd33Xd23mba6oWq3yvve9jx/8wR/k13/91wF46UtfSk9PD7/+67/Oj//4j3P11Vcv+/HDw8PzttX7+vo6/33o0CE++tGP8ku/9Evcf//9ALzkJS9hcHCQX/7lX+aRRx7hW77lW3jve9/L9ddfzwMPPNAJLF3X5T3veQ9TU1MMDAwAi7fwP/e5z3X+W2vNO97xDl72spfxjne8o/Pne/fu5T/8h//Aww8/zL333rvs16G1Jo7jzu9nZ2d59NFHef/738/tt9/eybiOjY1x/fXXL3udpXzqU59CKcWrX/1qIC0j+O3f/m0efPBBfud3fmdd1xJCXBokaBVCbCkTExPcfPPNqz6uvVVfqVQ4cuQIx44d48tf/jIAYRjOe6xSal7wtJSFj9FaL3pMOwgzTRPTXL666oknnqDZbPKKV7xi3jXbW/xf+MIXVgxaV9I+kb8wKP+u7/oufuVXfoUvf/nL3HPPPRw8eJCf/dmfnZcJ/c7v/E6+8zu/c82f68iRI4yNjfGTP/mT876Ou+66i0KhwBe+8IUVg9avfOUri7Kepmnyzd/8zfz2b/92594sy1q1bGKhj33sY9xzzz24rtspeXjFK17B3//93/OWt7yFQqGwrusJIbY+CVqFEFtGGIaMjY2xf//+VR97/Phx3va2t/HFL34Rx3HYv39/py3SwoDzla985arX+7Vf+zV+7dd+bd6fjYyMzPv9Jz7xCT7xiU9gGAb9/f3ceeed/PzP/zxXXXXVvMeVy2WATiZ0oYmJiVXvZzntus4dO3bM+3Pbtunt7aVarTI7O4vWmv7+/nP+PHD26/it3/otfuu3fmvR36/2ddx4442djzMMA8/z2Llz56KActeuXZ361qVEUcTs7GwnO3zw4EGeeeYZIA2gF/q7v/s7fviHf3jFexNCXHokaBVCbBnPPPMMSZJw4MCBFR+nlOL+++/HcRw++tGPcv3112PbNocOHeKTn/zkose///3vnxfkvfGNb1z0mJ/5mZ+ZlzX8kz/5E55//vl5j7nvvvv46Z/+abTWTE5O8qd/+qe84Q1v4F/+5V/mPa6rqwuAd7zjHezdu3fR52oHX+eiXe87OTk5L6iOooiZmRl6e3spFAoYhsH09PS8jw2CgC996Uvceuut9PT0rPq52l/HL//yL3P33Xcvey/Lyefza8qav/SlL+VDH/oQk5OTi4JxSA+U/fRP/zQPPPAAr3zlK/n4xz9OLpfjfe9736KM99ve9jY+8pGPSNAqxDYk3QOEEFvGww8/TLFY5NZbb13xcTMzM4yOjvJ93/d93Hzzzdh2+v77kUceAdKgdq5rrrmGm2++ufO/pdpnjYyMzHvMUkFdT08PN998M7fccgvf+q3fyk/+5E8yOTnJkSNH5j3u1ltvxXEcxsfH513Ttm3e+c53LupmsB7t4PHTn/70vD//9Kc/TZIk3HnnneTzea6//no++9nPznvMI488wv3339/JkCqlFp3Wn2v//v309/dz8uTJeV/H0NAQ//2//3cOHjx4zl/HXD/yIz+C4zj83u/93qIygUajwR//8R/T29vLy1/+csIw5FOf+hSveMUrePGLX8w999wz73/f+73fy7PPPsuTTz65IfcmhNg6JNMqhNgSHn/8cf76r/+aG264YclgKAxDpqenOX78OFdeeSUjIyN8+MMfZnh4mK6uLj73uc/xv//3/wbSDgSbYXp6mieffJI4jjlz5gwf/OAHGRgYWJRN7e3t5T/+x//Ie97zHmq1Gvfccw/j4+O85z3vwTCMZac7rcWBAwd4zWtewx//8R/j+z533XUXzzzzDA888AD33HMPL3vZywD4uZ/7Od74xjfyi7/4i3zv934vU1NTvPOd7+Tbvu3bGBgY4NFHH+XYsWPccccdy34uy7L4hV/4Bd72trdhWRb33XcflUqF973vfYyPj2/YKf0rrriC3/zN3+TXfu3X+JEf+RFe97rXsXPnTo4fP84HP/hBTpw4wZ//+Z/jeR7/8A//QLlc5ru/+7uXvNarX/1q3vOe9/Dggw8u6hErhLi0SdAqhNgSfuiHfghIt71/8Ad/cMnHPPzww/T19fEHf/AHvO997+P3fu/3eOtb34rruhw4cID3v//9/P7v/z5f/epXF/VM3QgPP/wwDz/8MJAGpjfddBO///u/Ty6XW/TYN73pTezYsYO//Mu/5AMf+ADd3d28+MUv5hd/8RcpFovndR+/93u/x54
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"execution_count": 14
|
|||
|
},
|
|||
|
{
|
|||
|
"metadata": {
|
|||
|
"ExecuteTime": {
|
|||
|
"end_time": "2024-12-24T19:29:26.685881Z",
|
|||
|
"start_time": "2024-12-24T19:29:26.513644Z"
|
|||
|
}
|
|||
|
},
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"# Определение оптимального количества кластеров с помощью метода локтя\n",
|
|||
|
"inertia = []\n",
|
|||
|
"for k in range(1, 11):\n",
|
|||
|
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
|
|||
|
" kmeans.fit(X_scaled)\n",
|
|||
|
" inertia.append(kmeans.inertia_)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация метода локтя\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"plt.plot(range(1, 11), inertia, marker='o')\n",
|
|||
|
"plt.title('Метод локтя')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Инерция')\n",
|
|||
|
"plt.show()"
|
|||
|
],
|
|||
|
"id": "df93661e1340ff3",
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
],
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsQAAAIhCAYAAABJ3KyyAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAB9g0lEQVR4nO3dd1xWdf/H8dfFBpmyZKm4N7jQSu+ULMustJ3dlmnjLscvNSu1u9tuK8vRMLWy1DQty7SlTdNKy5UJblNcyBJQEJTN9fuDvO7IhQqcC877+XjwSM78nPPpwreH7znHYrVarYiIiIiImJSD0QWIiIiIiBhJgVhERERETE2BWERERERMTYFYRERERExNgVhERERETE2BWERERERMTYFYRERERExNgVhERERETE2BWERERERMTYFYRERERExNgVhEpBINHDiQ5s2bc/fdd59zmZEjR9K8eXOefvrpaqxMRETORYFYRKSSOTg4EBcXR2pq6hnzTp06xerVqw2oSkREzkWBWESkkrVq1QpXV1e++eabM+atXr0ad3d3goODDahMRETOxsnoAkREahsPDw+uvvpqvvnmGwYNGlRu3ldffUXv3r35+eefz1hv5cqVzJo1i7179+Lt7c0NN9zAqFGj8PDwIDY2lqSkpLPu74cffiA8PJxffvmFmTNnsmfPHpycnOjWrRtPPPEEISEhtmVHjx7N8uXLz9jGpEmTuPXWW895TAMHDmTjxo1nnbdnzx7bn7dt28Zrr73G9u3bKSoqIiYmhtGjR9O0aVMANmzYwH333ceCBQvo0qULf/zxB4899hgBAQFcddVVzJgx46z7GDZsGMOHD2fJkiXMnTuXxMREioqKbPP79+/PSy+9dM76RUTOR4FYRKQK9OnTh8cff5zU1FTq1asHQG5uLj///DPz5s07IxB/+eWXPPHEE9x00008/vjjJCUl8eqrr7Jv3z7mzZvHjBkzKCwsJD09nWHDhvHoo4/So0cPAIKCgvjss8946qmn6Nu3L4888gjHjx9n+vTp3HXXXXz66af4+/sDUFBQwF133WULv6e3VxGtWrXiP//5j+37JUuW8Mknn9i+X79+PQ8++CBdunThxRdfpKCggLfffpu7776bjz/+mMaNG5+xzSlTptCmTRseffRRfHx86N69OwDPPfccgG1/9erVY9OmTTzzzDPcfvvtPPPMM9SpUwegwvWLiJyLArGISBXo0aMH7u7u5a4Sf//99/j7+9OxY8dyy1qtVqZOnUr37t2ZOnWqbXrDhg0ZNGgQP/30ky38HjlyBID69esTHR0NQGlpKVOnTqVbt25MmzbNtn6HDh3o06cPc+bM4cknnwQgLy+Phg0b2tY9vb2K8PT0tK0HsGbNmnLzp02bRoMGDZg9ezaOjo4AdOvWjWuvvZbp06fz+uuvl1v+0KFDrF27li+++MJ2Bfn0Px48PT0Byu1vxYoVAIwbN84WhgFcXFwqfAwiImejMcQiIlXAzc2N2NjYcuOIV6xYwQ033IDFYim37P79+0lNTSU2Npbi4mLbV+fOnfH09OSXX345774OHDhAeno6ffv2LTe9fv36tG/fvtxQh5SUFLy8vCrhCMs7deoU27Zt44YbbrCFYQBvb2969ux5xnCLU6dO8eqrr9KlSxdbGL6Qdu3aATB37lyOHj1KYWEhxcXFlXcQImJaukIsIlJFbrjhBoYNG0Zqaiqurq6sW7eOxx9//IzlsrKygLJhAqeHCvzV0aNHz7uf0+sHBAScMS8gIICdO3cCZVeik5OTCQ8Pv7gDqYCcnBysVus5a8jJySk37V//+hfe3t7lhlxcSOfOnXnmmWeYPXv2Occai4hcCgViEZEq8o9//IM6derwzTff4OHhQXh4OG3atDljOW9vbwCefPJJYmJizpjv4+Nz3v34+voCkJGRcca89PR0/Pz8ANi1axf5+fk0b978Yg/lgry8vLBYLOes4XSNpz355JN88803jBgxgkWLFtmGSFzInXfeydq1aykuLubZZ58lPDycRx99tDIOQURMTEMmRESqiIuLC7169eLbb7/l66+/5sYbbzzrco0aNcLf358jR47Qtm1b21dwcDDTpk2zXeE9l8jISAIDA894ekRiYiJxcXF06NABgB9//JGWLVtSt27diz6W0tLSckMh/s7Dw4M2bdrw9ddfU1JSYpuek5PDjz/+eMa46TZt2jBjxgySkpKYMmVKhet4/fXX+fHHH3nppZe44YYbaNu2rcYQi8hlUyAWEalCffr0YcuWLWzYsOGcgdjR0ZGRI0eyePFinn/+eX755Re+/vprhgwZws6dO2nduvV59+Hg4MCoUaNYu3Yto0eP5qeffuKzzz7jgQcewMfHhwceeIAdO3awaNEiOnfuTFxcnO1r165dABw+fJhjx46dse1jx46xceNGDh06ZLuSfS6jR4/mwIEDPPzww/zwww9888033H///RQWFjJ06NAzlg8ODubxxx/n448/ZuvWrefdNsC+ffuYP38+vXv35uqrr77g8iIiFaUhEyIiVejKK6/E29ubkJCQsz527LQ77riDOnXq8O677/LRRx/h4eFBhw4dmDp1KhERERfcz6233kqdOnV4++23GTp0KJ6ennTv3p1Ro0YRGBjIXXfdRUZGBvPnz2f+/PlnrP/mm29Sv379M55F/MsvvzBmzBhCQkK4//77z1vDFVdcwbx585g+fTqjRo3CxcWFTp068fLLL5/zxrkBAwawbNkynnvuOZYsWYKDw7mv0zz33HO4uLgwbty4C54PEZGLYbFarVajixARkaoVGxvLsGHDzvnyjQvNFxGpzTRkQkRERERMTYFYRMQELnQz3aXebCciUhtoyISIiIiImJquEIuIiIiIqSkQi4iIiIipKRCLiIiIiKkpEIuIiIiIqenFHJchMzMH3ZJYtSwW8Pf30rk2EfXcfNRzc1Lfzae6e356fxWhQHwZrFb0Ia4mOtfmo56bj3puTuq7+dhjzzVkQkRERERMTYFYRERERExNgVhERERETE2BWERERERMTYFYRERERExNgVhERERETE2BWERERERMTYFYREREREzN0EB86NAhhgwZQvv27enRowfvvvuubV5iYiKDBg0iOjqaPn36sHbt2nLr/vrrr/Tt25eoqCjuu+8+EhMTy81/77336N69O+3bt2fcuHHk5eXZ5hUUFDBu3Dg6depEt27dmDt3btUeqIiIiIjYLcMCcWlpKQ8//DB+fn58+umnPPfcc7z55pt8+eWXWK1Whg4dSkBAAEuXLuWWW25h2LBhJCcnA5CcnMzQoUO59dZb+eSTT6hbty6PPfYY1j9fe/Ltt98yY8YM/vvf/zJ//nzi4+OZMmWKbd+TJ09m+/btzJ8/n//85z/MmDGDb775xpDzICIiIiLGMuzVzRkZGbRs2ZIJEybg6elJw4YNueKKK9i8eTMBAQEkJiayePFiPDw8aNy4MevWrWPp0qUMHz6cJUuW0KZNGwYPHgzApEmTuOqqq9i4cSNdunRhwYIF3H///fTs2ROA5557jiFDhjBmzBisVitLlizhnXfeoXXr1rRu3Zq9e/eyaNEirr/+eqNOh4iIiIgYxLArxEFBQbz22mt4enpitVrZvHkzmzZtIiYmhvj4eFq1aoWHh4dt+Y4dOxIXFwdAfHw8nTp1ss1zd3endevWxMXFUVJSwrZt28rNj46OpqioiN27d7N7926Ki4tp3759uW3Hx8dTWlpa9QcuIiIiInbFsCvEfxUbG0tycjI9e/akd+/evPjiiwQFBZVbxt/fn9TUVADS09PPOf/EiRMUFBSUm+/k5ISvry+pqak4ODjg5+eHi4uLbX5AQAAFBQVkZWVRt27dCtdtsVzK0crFOH2Oda7NQz03H/XcnNR386nunl/MfuwiEE+fPp2MjAwmTJjApEmTyMvLKxdYAVxcXCgsLAQ47/z8/Hzb92ebb7VazzoPsG2/ovz9vS5q+UtRUmpl44FjHM3JJ8jLjZjIujg6mO+nR3Wca7Ev6rn5qOfmpL6bjz323C4Ccdu2bYGypz888cQT3HbbbeW
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"execution_count": 15
|
|||
|
},
|
|||
|
{
|
|||
|
"metadata": {
|
|||
|
"ExecuteTime": {
|
|||
|
"end_time": "2024-12-24T19:29:29.605508Z",
|
|||
|
"start_time": "2024-12-24T19:29:26.713790Z"
|
|||
|
}
|
|||
|
},
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"# Определение оптимального количества кластеров с помощью коэффициента силуэта\n",
|
|||
|
"silhouette_scores = []\n",
|
|||
|
"for k in range(2, 11): # Коэффициент силуэта не определен для k=1\n",
|
|||
|
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
|
|||
|
" kmeans.fit(X_scaled)\n",
|
|||
|
" score = silhouette_score(X_scaled, kmeans.labels_)\n",
|
|||
|
" silhouette_scores.append(score)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация коэффициента силуэта\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"plt.plot(range(2, 11), silhouette_scores, marker='o')\n",
|
|||
|
"plt.title('Коэффициент силуэта')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Коэффициент силуэта')\n",
|
|||
|
"plt.show()"
|
|||
|
],
|
|||
|
"id": "94de6a586c29ba3",
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
],
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAsAAAAIhCAYAAABANwzIAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACEo0lEQVR4nO3deVzT9R8H8NcO2LivIR54gRciAkoeqXmnad6ZWqllanlXnqglpXngkalpWlqaluaZR2aZVtrPLEnBCxXwFhGQ+xiM7fcHbLrYlCnsu7HX8/HgEfvuu+/3vbczX3z4fD9fkUaj0YCIiIiIyEaIhS6AiIiIiMicGICJiIiIyKYwABMRERGRTWEAJiIiIiKbwgBMRERERDaFAZiIiIiIbAoDMBERERHZFAZgIiIiIrIpDMBEREREZFMYgInI4g0dOhRDhw7V23bq1Cn07t0bjRs3xvbt2yv0/DNmzECnTp1Mfl2nTp0wY8aMCqiIiIiehlToAoiITJWamoq3334bgYGBWL9+PRo2bCh0SUREZEUYgInI6nz11VfIz89HZGQkfHx8hC6HiIisDKdAEJFVSUtLw7fffotevXqVCr/Xrl3DxIkT0aZNG4SEhGDo0KGIiorS2+e3335D//79ERwcjGeffRZz5sxBVlaW3j5btmxBx44dERwcjHfffRfZ2dkAgDVr1qB169YICwvDnDlzUFBQoHtNQUEBPvzwQzzzzDNo2bIlVq5cCQDIycnB1KlTERISgvbt22PLli2619y6dQsNGzbErl27dNuUSiU6d+6sN6ptaArIyZMn0bBhQ5w8edLgY6B4pDwsLKzU9I3t27ejZ8+eaNKkCTp06ICVK1eiqKhI97yhKR8P16o9l6Gv/9b5sHv37mH69Olo3bo1QkND8dprr+H06dO65xs2bKjrGwBoNBoMHjwYDRs2xK1bt/T2e9S5J06ciOeeew5qtVrv/LNmzUK3bt0AAHfv3sV7772HVq1aITg4GEOHDsWZM2cAACtXrjR6Dm19sbGxGD9+PFq1aoXAwEC0a9cO8+bNQ35+vtH3T0SWgyPARGQVNBoNEhMTMW/ePKhUKrz11lt6z8fFxeHll19GnTp1MHv2bNjZ2WHTpk0YPnw4NmzYgBYtWuCff/7BmDFj0Lt3b0yePBlXrlzB8uXLcfnyZWzevBkSiQSHDx/GRx99hKFDh+K5557Dtm3bcPjwYQDAjz/+iHnz5uH27dtYsmQJ5HI5wsPDAQCLFy/Gzp07MW3aNFStWhWffPIJbt++jdu3b6N79+5YsWIF/vjjD3z00UeoWrUqOnfubPB9fvnll3ph72ksXboUWVlZcHV11W1bu3YtPvnkE7z22msIDw/HxYsXsXLlSiQmJmL+/PllOm5gYCC2bdsGoDhM79ixQ/fY2dnZ4GtycnIwZMgQFBUVYerUqfDx8cGGDRswYsQI7N69G3Xq1Cn1mh9++EEvID/spZdewsCBA3WPP/zwQ73nDh06hJMnT6J169YAgPz8fPz0008YNWoUCgoKMHLkSBQWFmLOnDmws7PD6tWrMXToUHz//fcYOHAg2rVrp3fcOXPmAACqVq2Ke/fu4dVXX0VISAgWLlwIe3t7/PHHH/jqq69QpUoVjB49ukx9JCLhMAATkVX4559/0KFDB9jZ2eGLL74oFZhWrVoFe3t7bNq0SRfCOnTogBdffBGRkZHYsWMH9uzZgzp16mDBggUQi8Vo06YNHBwc8MEHH+D3339Hp06d8Pnnn6Nly5aYPXs2AKBly5Zo06YNsrKysGDBAjRp0gQAkJmZiS+++AJjx46FWq3Gtm3bMHr0aLz22msAAIVCgUGDBsHd3R1LliyBnZ0dnnvuOVy+fBlr1641GIATExPxxRdfIDAwEOfPn3+qfp09exY//PADAgICkJmZCQDIysrC6tWrMWjQIN37a9u2Ldzd3TF79my88cYbqF+//mOP7ezsjJCQEADAsWPHAED32Jjdu3fj9u3b2L17NwICAgAAzZo1Q9++ffHPP/+U+vPMycnBkiVLjPaiatWqeud8OHi3bdsWVatWxZ49e3QB+JdffkFubi769u2LM2fOICEhAVu2bEFoaKiulq5du2L16tVYuXIlqlatqnfch891/PhxBAQE4NNPP9U9/+yzz+LPP//EyZMnGYCJrACnQBCRVWjcuDEWLlwINzc3hIeHlxol/fvvv9GxY0e9ICSVStGzZ0+cO3cOOTk5+Pjjj7Fnzx6IxWKoVCqoVCp069YNYrEY//zzD1QqFS5cuIC2bdvqjiGTyRAcHAwHBwdd+AWKQ1Z+fj4uXbqES5cuQalU6kYNgeLAJJPJ0LRpU9jZ2em97vz583pTDrQWLVqEsLAwdOzY8al6pdFoMG/ePLz00kto1KiRbvvp06eRn5+PTp066d6/SqXSTXf4888/9Y7z8D7/nU5gqqioKPj6+urCLwA4ODjg0KFDeiO5WqtXr4aHhweGDBli8rnEYjH69euHn3/+GXl5eQCKA/izzz6LqlWrokWLFjhz5gxCQkJQVFQElUoFV1dXtGnTBv/8889jj9+2bVts3rwZMpkMcXFx+PXXX7FmzRrcv39fb1oMEVkujgATkVVwdnZGv3794OfnhyFDhuCdd97Btm3bIJFIAAAZGRlQKBSlXqdQKKDRaJCdnQ0nJyfIZDIAxb/Gf1hmZiZSU1NRVFQEDw8Pvefc3d3h5uamt007rSAlJUUXZv/7Ojc3N7i7u5d6nUqlQlpamt72v//+G4cPH8bevXtx4MCBsrTEqD179uDatWv4/PPPsWjRIt329PR0ADA6Qnnv3j3d97dv3y7Vo6eRnp4OLy+vMu177do1bNy4EV9++SXu3LnzROcbMGAAPv/8c/z8889o1aoVTpw4gSVLluiet7e3B1A8L3j37t267drP06Oo1WosW7YMW7ZsQW5uLqpVq4amTZvqPltEZPkYgInIqgQHB+vm9X733Xe6KQdubm5ISUkptX9ycjKA4nCakZEBlUoFLy8v7NixA0DxxWuvvfYaPDw84OLiApFIhIyMDL1j5OTklLq46f79+wAAb29v5ObmAigOebVq1dLtk52dDaVSqfe61NRU2NnZwcPDA4mJiQCAoqIizJs3D8OGDYOfn98T90Zb69KlSzFx4sRSgVwb2pcsWWJwzu3DP0B4e3tjzZo1usfJyckYM2bME9fl4uJicG7zv//+Czc3N/j7++u2zZ8/H507d0arVq30LhB8mEgkeuT5atasiRYtWuDgwYNIT0+Hs7MzunTpAqD4z/zevXvw9fXFuHHj8OqrrwIAPv30U1y8ePGx72XdunX4+uuv8eGHH+L555+Hi4sLgOK5x0RkHTgFgoiszvjx4+Hj44MVK1bogugzzzyDo0eP6lZsAIqD5YEDBxAUFAR7e3uMGzcOU6ZMAQAEBQUhKCgIGRkZUKvVaNGiBRwdHdG4cWMcP35c7xjnzp1Deno6bt++rdt+5MgRODo6okGDBrppDg+/Li4uDrm5ubhw4QI0Go1u+9GjR9GkSRO9kcbvv/8e9+/fx9ixY5+6N2vWrIGXlxcGDx5c6rng4GDY2dkhKSlJ9/6DgoIglUqxbNkyvYBqb2+vt0+DBg2eqq6wsDDcvHkTV65c0W1TKpWYMGGC7ocRAPjjjz9w8uRJTJ8+3eBxtFMxxOLH//P10ksv4X//+x/279+PHj166EZo9+3bhy5duiAxMRE1a9ZEUFAQ6tevj8uXL6NFixaPPW5UVBTq1auHAQMG6MJvUlISLl++/NRTRYjIPDgCTERWx8nJCdOmTcPkyZOxdOlSfPzxxxg/fjz++OMPDBs2DKNHj4adnR02b96Mmzdv4ssvvwQAjBo1CqNHj8a8efPQuXNn3Lp1C8uXL8czzzyjm/c7duxYjBs3Dh988AG6du2KAwcO6EaRJ0+ejDFjxuDSpUvYtGkTRowYoZsa8corr2D16tWQy+Xw9/fHqlWrAAA3b97ErFmz0KNHDxw4cABnz57V1aMVExODRYsWGV1BASgeTdYu0wUUB2ztf7UXcmmPpV3R4r88PDwwcuRIfPrpp8j
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"execution_count": 16
|
|||
|
},
|
|||
|
{
|
|||
|
"metadata": {
|
|||
|
"ExecuteTime": {
|
|||
|
"end_time": "2024-12-24T19:29:29.624882Z",
|
|||
|
"start_time": "2024-12-24T19:29:29.615706Z"
|
|||
|
}
|
|||
|
},
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"# Кластеризация с использованием K-Means\n",
|
|||
|
"optimal_k = 3 # Выберите оптимальное количество кластеров на основе графиков\n",
|
|||
|
"kmeans = KMeans(n_clusters=optimal_k, random_state=42)\n",
|
|||
|
"kmeans_labels = kmeans.fit_predict(X_scaled)"
|
|||
|
],
|
|||
|
"id": "828e39778998aeb1",
|
|||
|
"outputs": [],
|
|||
|
"execution_count": 17
|
|||
|
},
|
|||
|
{
|
|||
|
"metadata": {
|
|||
|
"ExecuteTime": {
|
|||
|
"end_time": "2024-12-24T19:29:30.119292Z",
|
|||
|
"start_time": "2024-12-24T19:29:29.642049Z"
|
|||
|
}
|
|||
|
},
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"# Кластеризация с использованием Agglomerative Clustering\n",
|
|||
|
"agg_clustering = AgglomerativeClustering(n_clusters=optimal_k)\n",
|
|||
|
"agg_labels = agg_clustering.fit_predict(X_scaled)"
|
|||
|
],
|
|||
|
"id": "93601a295d1187c4",
|
|||
|
"outputs": [],
|
|||
|
"execution_count": 18
|
|||
|
},
|
|||
|
{
|
|||
|
"metadata": {
|
|||
|
"ExecuteTime": {
|
|||
|
"end_time": "2024-12-24T19:29:30.434026Z",
|
|||
|
"start_time": "2024-12-24T19:29:30.134155Z"
|
|||
|
}
|
|||
|
},
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"# Визуализация результатов кластеризации\n",
|
|||
|
"plt.figure(figsize=(16, 8))\n",
|
|||
|
"\n",
|
|||
|
"# K-Means\n",
|
|||
|
"plt.subplot(1, 2, 1)\n",
|
|||
|
"plt.scatter(X_pca[:, 0], X_pca[:, 1], c=kmeans_labels, cmap='viridis', alpha=0.5)\n",
|
|||
|
"plt.title('K-Means Clustering')\n",
|
|||
|
"plt.xlabel('PC1')\n",
|
|||
|
"plt.ylabel('PC2')"
|
|||
|
],
|
|||
|
"id": "8d3d6852adc608e0",
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"Text(0, 0.5, 'PC2')"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1600x800 with 1 Axes>"
|
|||
|
],
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAnQAAAK7CAYAAAB71xKuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeZild13n/ffvXs6+1b72vifdnZUAIZiFTUBw4wKVwXkch6Dgow+MI6gzjzjzMIrMBeMliIPAIILLiIAiEYZFYlgSQvak97Wqa6+z7/f2e/64T510pao73Z1Od1fn+7rMhVXnnPvc9avq7k/9lu9Xaa01QgghhBBizTIu9w0IIYQQQojnRgKdEEIIIcQaJ4FOCCGEEGKNk0AnhBBCCLHGSaATQgghhFjjJNAJIYQQQqxxEuiEEEIIIdY4CXRCCCGEEGucBDohhFjjpD68EEICnRDirN72trfxtre9bcXna7Uab37zm9m9ezff/OY3z/j6Bx54gB07drBjxw6++93vrvqco0ePdp9z6tSpi3bvl8oTTzzBf/yP/5E77riDvXv38spXvpL//J//M5OTk8uet2PHDv7kT/7kor73Qw89xN13331RrrX0vXrggQcuyvWEEJeOBDohxHmr1Wr8+3//7zlw4AAf+9jHeOUrX/msrzEMg6997WurPnbPPfdc7Fu8ZD7/+c/zcz/3c+Tzef7Df/gP/Pmf/zl33303P/zhD3nTm97EgQMHntf3/7u/+zuOHj16Ua517bXX8rd/+7dce+21F+V6QohLRwKdEOK8LIW5/fv38/GPf5zbb7/9nF5344038o1vfAPP81Y8ds8997Br166LfavPu4ceeogPfOAD/MIv/AKf/vSnecMb3sCLX/xi3vzmN/PXf/3XRKNRfud3fudy3+Y5S6VSXH/99aRSqct9K0KI8ySBTghxzur1Om9/+9s5ePAgn/jEJ3jZy152zq993eteR6lU4v7771/2+QMHDnDixAle+9rXrnjNoUOHeMc73sGNN97IjTfeyLve9a4Vy5gHDhzg137t13jJS17Ctddey8tf/nL+v//v/6PVanWfs2PHDj7/+c/zu7/7u9xyyy3ccMMN/MZv/AaLi4vd50xMTPArv/IrvPjFL+a6667jLW95C/fee+9Zv6ZPfepTpNNp3vOe96x4rLe3l/e973284hWvoNForHj8i1/84qpLzHfddRfve9/7uh9/73vf481vfjM33HADL3rRi/jVX/3V7ozc+973Pr70pS8xNTXFjh07+OIXvwhAu93mj/7oj7j99tvZvXs3b3jDG1bMgt511138t//23/i3//bfsnfvXn73d393xZLrn/zJn/CqV72K73znO7zhDW9g9+7dvOY1r+HLX/7ysmsdPXqUt7/97dx4443ceuutfOQjH+G3f/u3V12qF0I8P6zLfQNCiLWh0Whw9913s2/fPj71qU9x8803n9frt27dyrZt2/ja177Gbbfd1v38V7/6VW655RYGBgaWPf/48eP83M/9HJs3b+aDH/wgnufx8Y9/nJ//+Z/nH/7hH+jr62N+fp63vvWtXH/99fzhH/4hkUiEf/3Xf+V//a//xeDg4LK9ZR/5yEd41atexYc//GEmJyf5gz/4A0zT5MMf/jBBEPCOd7yDwcFB/uiP/gjLsvjsZz/Lr/7qr/LP//zPbNiwYcXXo7Xmu9/9LnfddRfxeHzVr/l1r3vdeY3RM01OTvLOd76Tn/3Zn+U973kPlUqFD3/4w9x999184xvf4J3vfCeFQoF9+/bx0Y9+lPXr16O15l3vehcPP/wwv/7rv86WLVv4xje+wbvf/W4cx+Gnfuqnutf//Oc/zy/90i/x9re/nWQyieM4K+5hYWGB//Jf/gu/+qu/ytjYGJ/61Kd473vfy549e9iyZQuFQoF/82/+DX19ffzBH/wBvu/zx3/8x0xPT3P99dc/p69fCHHuJNAJIZ7VUph76KGHuh9fiNe+9rV89rOf5f3vfz+WFf71c8899/Arv/IrK5770Y9+lHg8zmc+85nuEuBLX/pSXvnKV/LJT36S9773vRw6dIhdu3bxx3/8x93n3HrrrXzve9/jgQceWBbotm/fzh/8wR90P3788ce7e/ry+TzHjh3jne98Z3cJee/evXz0ox9dNeQAFItF2u024+PjFzQW5+Lxxx+n1Wrxjne8g6GhIQCGh4f51re+RaPRYP369fT29hKJRLrh6Xvf+x733XcfH/nIR7qB8uUvfznNZpP//t//Oz/xEz/RHfvR0VF+8zd/s/t+qx2GaDabfOADH+ClL30pABs3buTOO+/k3nvvZcuWLfzlX/4l9XqdL3/5y917vO6663jNa17zvI2LEGIlWXIVQjyrJ598ksOHD/P5z3+eDRs28L73vY+FhYVlzwmCAM/zlv33TM9cdn3ssceYm5vj1a9+9Yrn3n///dxyyy3EYrHu9VKpFDfffDPf//73Abjtttv43Oc+RzQa5ciRI3zrW9/i4x//OIVCYUUQe+Zs0fDwMM1mE4D+/n62bt3Kf/7P/5n3vve9fOUrXyEIAn77t3+bbdu2rTompmkC4Pv+OYzghbnuuuuIRqO86U1v4gMf+AD33XcfO3fu5N3vfvcZ97n94Ac/QCnF7bffvux7cdddd7GwsMDhw4e7zz3XfYunj93w8DDwdKi///77ueGGG7phDmBsbIwbbrjhfL9cIcRzIDN0Qohnlc1m+Yu/+At27tzJH/3RH/ELv/ALvPe97+VTn/oUSikAPvaxj/HRj3502esOHjy47ONNmzaxa9eu7rLrPffcw2233UY2m13xnqVSiXvuuWfVE7C9vb1AGCI//OEP8/nPf55Go8HIyAh79+4lGo2ueM0zl0UNw+jWb1NK8elPf5qPf/zjfOMb3+DLX/4ytm3zyle+kt///d9f9f6y2SzJZJLp6ekzjluj0cB13VVffy7Gx8f53Oc+xyc+8Qm+8IUv8NnPfpZMJsMv/MIv8P/8P/9Pd+xPVyqV0Fpz4403rnrN+fn5bpBLJBLndB+nj51hhPMAS2NXKBRWPRXb39+/bI+iEOL5JYFOCPGsduzYwc6dO4Fw1ugd73gHH/vYx/jkJz/J29/+dgDe/OY3c8cddzzrtV73utfxqU99it/7vd/ja1/72rIlv9Ol02luvfVWfumXfmnFY0tLhp/4xCf4zGc+w+///u/z6le/mnQ6DcCb3vSm8/4ah4aGeP/738/v/d7vceDAAb72ta/x53/+5/T09PB7v/d7q77mtttu44EHHqDdbq8aIv/3//7ffPCDH+QLX/jCitCzFMaCIFj2+Xq9vuzj05d+H3roIf72b/+WP/uzP2Pnzp2rHiRJp9MkEgk++9nPrnrPq+0HfC6Gh4dXDW75fP6ivo8Q4uxkyVUIcd7e+c53smfPHv74j/+Yxx9/HAgD0Z49e5b9t5rXvva1lEol/uzP/oxyucwrXvGKVZ93yy23cOTIEXbt2tW93u7du/nMZz7DN77xDSAsG7J161Z+9md/thvm5ubmOHTo0IqgdDaPPPIIt956K48//jhKKXbt2sW73/1utm/fftYZuH/37/4dpVKJ//E//seKxxYWFvj0pz/N1q1bV53BWloynZ2d7X7u6NGjlEql7sef+cxnuPPOO3Ech0gkwktf+lL+63/9rwDd+1qaMVtyyy230Gg00Fov+14cOnSIj33sY6suhT8XL3rRi3j00UeXLcHPz8/z6KOPXtT3EUKcnQQ6IcR5syyLD33oQ1iWxXve8x5qtdo5v3bdunXs2bOH//k//yd33nnnGZf93vnOdzIxMcE73vEOvvnNb3Lffffxf//f/zdf/epXu7OFe/fu7ZZQ+eEPf8jf/d3f8da3vhXHcbr7487FNddcQywW47d+67f46le/ygMPPMBHPvIR9u/ff9bN/ddffz2/8Ru/wac//Wnuvvtu7rnnHn7wgx/w2c9+lje96U202+1Vwx7Ai1/8YmKxGH/4h3/Ivffeyz333MO73vUucrlc9zkveclLWFhY4F3vehf33nsv3/3ud/nt3/5tIpEId955JwC
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"execution_count": 19
|
|||
|
},
|
|||
|
{
|
|||
|
"metadata": {
|
|||
|
"ExecuteTime": {
|
|||
|
"end_time": "2024-12-24T19:29:30.606152Z",
|
|||
|
"start_time": "2024-12-24T19:29:30.449140Z"
|
|||
|
}
|
|||
|
},
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"# Agglomerative Clustering\n",
|
|||
|
"plt.subplot(1, 2, 2)\n",
|
|||
|
"plt.scatter(X_pca[:, 0], X_pca[:, 1], c=agg_labels, cmap='viridis', alpha=0.5)\n",
|
|||
|
"plt.title('Agglomerative Clustering')\n",
|
|||
|
"plt.xlabel('PC1')\n",
|
|||
|
"plt.ylabel('PC2')\n",
|
|||
|
"\n",
|
|||
|
"plt.show()"
|
|||
|
],
|
|||
|
"id": "d20b274c9e7faf65",
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
],
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAASIAAAHFCAYAAABW//SMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOy9d5xddZ3///ycdnuZ3jKTSZskpJEACV0EBWzAuu5XRRF1Fcuuu6vyWEQXddcfa9d1bevquuKuBRUWFVFpApFOSO9lMpleb2+nfX5/nMmQIQlJIMlMkvN8PHgwuefec9+fc8993c/n/XkXIaWU+Pj4+EwhylQb4OPj4+MLkY+Pz5TjC5GPj8+U4wuRj4/PlOMLkY+Pz5TjC5GPj8+U4wuRj4/PlOMLkY+Pz5TjC5HPlHCmx9Ge6eN/Mb4QvUI+/vGPM3/+fH74wx+esPe4++67mT9/Pj09PSfsPU4Wpmnyr//6r/z2t7+deOwTn/gEl19++UmzYXBwkC996UtcffXVLFu2jIsvvpgPfvCDPPfcc5Oed8MNN3DDDTcc1/ceGBjgpptuore397icb/78+Xzzm988LueaSnwhegXkcjkefPBBOjo6uPPOO/1fuaNgaGiIO+64A9u2Jx778Ic/zLe+9a2T8v5r1qzh2muv5U9/+hPvete7+I//+A8+9alPUS6XueGGG7jnnntO6Ps/8cQTPProo8ftfHfeeSd/9Vd/ddzON1VoU23Aqcy9994LwKc+9SluvPFGnnrqKS644IIpturUo62t7aS8Tzqd5h/+4R9ob2/nv//7vwmFQhPHrrrqKm666SY+/elPc/HFF1NbW3tSbHqlnH322VNtwnHBnxG9Au666y4uuOACzj//fGbOnMnPf/7zg57zX//1X1xxxRUsXbqUt73tbTz88MPMnz+fp59+euI5jzzyCG9+85tZunQpV111Fffeey+vfe1rX3LK/fjjj3P99ddzzjnnsGrVKj7+8Y/T398/cfzuu+9myZIlPPfcc/zlX/4lS5Ys4aqrruLhhx9mz5493HjjjSxbtozXvva1/O53v5t07r6+Pj72sY+xcuVKli1bxo033siWLVsmjvf09DB//nz++7//e2J5c9dddwHw4IMPcv3117N8+XIWL17M1VdfzU9+8pOJ111xxRUA3HrrrRPLsQOXZrfddhsXXXQRjuNMsun2229n1apVWJYFwI4dO/jABz7AihUrWLFiBX/zN39Dd3f3S35e99xzD0NDQ3zyk5+cJEIAiqJw88038453vIN8Pn/Qa/eP+e677570+IuXlfv27eODH/wgq1atYtmyZbz1rW+dmAHdfffd3HrrrQBcccUVfOITn5h43S9/+Uve8IY3sHjxYi677DK++c1vTroGn/jEJ7jxxhv5zGc+w4oVK3j961+P4ziTlmZPP/008+fP58knn+S9730vy5Yt46KLLuLLX/7ypHPl83k+/elPc8EFF7B8+XI++tGP8qMf/Yj58+e/5PU7kfhC9DLZuXMnGzdu5LrrrgPguuuu46GHHmJkZGTiOd/61rf4yle+wute9zq+853vsGzZMv7hH/5h0nmeeuopPvzhD9PU1MQ3v/lN3vGOd/CZz3xmkqi8mHvuuYf3vve9NDU18bWvfY1bb72VtWvX8ta3vpXR0dGJ59m2zcc//nHe9ra38d3vfpdQKMTNN9/MBz/4QS677DL+4z/+g/r6em655RYGBgYAGBsb421vexubN2/mtttu46tf/Squ6/KOd7yD3bt3T7Ljm9/8Ju9///v50pe+xEUXXcQjjzzC3/zN37Bo0SK+853v8M1vfpPW1lb+5V/+hfXr11NfXz+xBPvQhz50yOXYtddey8jIyCShdl2X3//+97zhDW9A13U6Ozt529vexujoKF/84he5/fbb6e7u5u1vf/uk8b+Y1atXU1tby9KlSw95fMGCBdxyyy20t7cf9hwvheu6fOADH6BUKvGlL32J73znOySTST70oQ/R1dXFZZddxoc+9CHAuzc+/OEPA/C9732P2267jQsuuID/+I//4B3veAff//73ue222yad/7nnnqO/v59vf/vbfPzjH0dV1UPacfPNN3POOefwH//xH7zxjW/kBz/4Ab/85S8njn/4wx/m97//PR/5yEf4+te/TqFQ4Ktf/erLGvNxQ/q8LD7/+c/LlStXykqlIqWUsq+vTy5YsEB+97vflVJKWSgU5NKlS+XnPve5Sa+77bbbZEdHh3zqqaeklFJef/318pprrpGu6048595775UdHR3y3//936WUUt51112yo6NDdnd3S8dx5EUXXSTf+973TjpvV1eXXLRokfziF7846TU//elPJ57zu9/9TnZ0dMh/+7d/m3hs48aNsqOjQz7wwANSSim/9rWvySVLlsienp6J51QqFXnFFVfIj3zkI1JKKbu7u2VHR4f85Cc/OcmG73//+/KWW26Z9FgqlZIdHR3ye9/73qTX3nXXXRPPueWWW+SrX/1qKaWUruvKV7/61fLWW2+dOP7EE0/Ijo4OuW7dOimllB/72MfkhRdeKHO53KT3Oeecc+QXvvAFeThe//rXy7/6q7867PEX8853vlO+853vPKzdL7Z9aGhIdnR0yN/85jcTx7PZrPzXf/1XuWPHDinl5M9y//GlS5fKT3/605PO+4tf/EJ2dHRMvO6WW26RHR0dsr+/f9LzDrxPnnrqKdnR0SG//vWvT3rO5ZdfLj/wgQ9IKV+4ln/84x8njjuOI1/3utfJjo6Oo742xxt/RvQysCyL3/zmN7zmNa+hXC6TzWaJRCKcc845/OIXv8B1XdatW0e5XObqq6+e9No3vvGNE3+bpsnatWu58sorEUJMPH711VejaYd233V2djI8PDzpPOD5WZYvX84zzzwz6fHly5dP/F1TUwPAsmXLJh5LJpMAZLNZAJ588kkWLlxIQ0MDtm1j2zaKonDppZfyxBNPTDr3woULJ/37fe97H1/4whcoFAps2rSJ++67j+9973sTYz0ahBBcc801PPjggxOv+d3vfkd7e/uE3U899RQrV64kGAxO2BiNRjn33HMPsvFAVFU9aMl3PKmtrWXu3Lncdttt3HLLLfz2t7/FdV1uvfVW5s2bd8jXrF27lnK5zOWXXz4xFtu2J5Z7jz/++MRzk8kkjY2NR7TjwM8coLGxkWKxCHjXTtd1XvOa10wcVxSF17/+9cc83uOJ76x+GTzyyCOMjo7yq1/9il/96lcHHV+9ejW5XA6A6urqScf2iwF4zlPHcSY9Bt4XZr9AvJh0Og1wSGdqbW3tJF8OQDQaPeh5L/aPvPj8XV1dLFq06JDHS6XSxN/hcHjSsbGxMT7zmc/w4IMPIoRg5syZnHvuucCxxc1ce+21fPe732X16tVccskl3H///dx4442TbLzvvvu47777Dnrti6/3gTQ3N7Nhw4aXfO/+/n6ampqO2tYDEULwwx/+kO9+97s88MAD3HPPPRNf+n/+538mkUgc9Jr9n+dNN910yHMODQ1N/B2JRI7KjmAwOOnfiqJMXP9UKkUymURRJs9BXnwPnmx8IXoZ3HXXXbS2tnL77bdPelxKyd/+7d/y85//nL/+678GYHR0lNmzZ088Z2xsbOLvmpoadF2f5FcCz9ew/wZ9MfsF6sWvARgeHqaqqurlDGmCWCzGypUr+cd//MdDHjcM47Cvvfnmm9mzZw8/+tGPWL58OYZhUCqV+MUvfnFMNsyaNYulS5fy+9//HkVRyGazXHPNNZNsvPDCC3nPe95z0GsPN5MEuOSSS/jTn/7Exo0bWbJkyUHHt27dynXXXcett97Ku9/97knH9s9YXzyj2j/T2E9DQwOf/exn+cxnPsO2bdv4wx/+wPe//32qqqr4zGc+c9B7xuNxAL7yla8c0jd1vHfvGhoaSKVSuK47SYxeyrd2MvCXZsfI8PAwq1ev5g1veAOrVq2a9N/555/P1VdfzaOPPkpTUxOxWIwHHnhg0uvvv//+ib9VVWXFihU89NBDk57z8MMPT4qzOZBZs2ZRV1c3ETqwn+7ubtatW8eKFSte0fh
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"execution_count": 20
|
|||
|
},
|
|||
|
{
|
|||
|
"metadata": {
|
|||
|
"ExecuteTime": {
|
|||
|
"end_time": "2024-12-24T19:29:31.228189Z",
|
|||
|
"start_time": "2024-12-24T19:29:30.620784Z"
|
|||
|
}
|
|||
|
},
|
|||
|
"cell_type": "code",
|
|||
|
"source": [
|
|||
|
"# Оценка качества кластеризации\n",
|
|||
|
"print(\"Коэффициент силуэта для K-Means:\", silhouette_score(X_scaled, kmeans_labels))\n",
|
|||
|
"print(\"Коэффициент силуэта для Agglomerative Clustering:\", silhouette_score(X_scaled, agg_labels))"
|
|||
|
],
|
|||
|
"id": "dd0eb66cf8fddfc5",
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Коэффициент силуэта для K-Means: 0.20758611793922405\n",
|
|||
|
"Коэффициент силуэта для Agglomerative Clustering: 0.23799044339356212\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"execution_count": 21
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 2
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython2",
|
|||
|
"version": "2.7.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 5
|
|||
|
}
|