278 lines
346 KiB
Plaintext
278 lines
346 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Lab 5 Malafeev PIbd-31**\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1.Выберем бизнес цель: Сегментация клиентов для разработки персонализированных маркетинговых стратегий. Цель кластеризации: разделить клиентов на группы на основе их возраста, дохода и расходов, чтобы определить наиболее перспективные сегменты. Будем использовать Age, Income и Total_Spending с прошлой лабы"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAArEAAAIkCAYAAAAJcXPdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hc1bX4/e+ZXiWNerOKZbk3cMHG4IbBdEggyaWbC4QeEpL80sEkcAMJN+HykguhwyWkkACmhI6NMdjYxr1Ilq1eR9JoRtPbOe8fIwsby0XGxgivz/P4eaQzZ85s7RnbS/usvZaiaZqGEEIIIYQQQ4juWA9ACCGEEEKIwZIgVgghhBBCDDkSxAohhBBCiCFHglghhBBCCDHkSBArhBBCCCGGHAlihRBCCCHEkCNBrBBCCCGEGHIkiBVCCCGEEEOOBLFCCCGEEGLIkSBWCCGEEEIMORLECnEQ9fX1KIqyzx+73c7EiRO56667CAQCx3qYQggBDPxvlslkYtiwYVx66aVs2rRpwOclEgmeeuopzj77bPLz8zGZTKSnpzNt2jR++ctf0tDQsN/XfPbZZ/tfa82aNUfrRxNiL4qmadqxHoQQX2X19fWUl5dTUVHB5ZdfDoCmaXR2dvLGG29QX1/PjBkzWLFiBXq9/hiPVghxvBvo36xAIMCqVav46KOPMJvNvPfee8yaNav/OQ0NDVxwwQVs3LiRvLw8Tj/9dIYNG0YwGGTdunWsXLkSg8HAli1bGDFixD6vOWfOHD788EM0TeOGG27g4Ycf/tJ+XnEc04QQB1RXV6cB2sKFC/d5LBKJaCeccIIGaO+9994xGJ0QQuztQP9m/eIXv9AAbc6cOf3Hent7tVGjRmmA9uMf/1iLRCL7PK+mpkY777zztPXr1+/z2I4dOzRAO//887WRI0dq6enpWigUOpI/khADknQCIb4As9nMvHnzAOjq6trrsbKyMsrKyvZ5ziuvvNJ/223ZsmX9xzds2MB5553H8OHDsdvtZGRkcOKJJ/LAAw8Qj8cBUFWV0tJSsrKyiEajA45p9uzZGAwGmpubAfD5fNx3333MmTOHwsJCTCYThYWFXHnllezatWu/P9uiRYsGTKNQFIVFixbtda6iKMydO3efazz44IP9z6mvr+8//u6777Jw4UJKSkqwWCxkZWUxc+ZMnn766X2u8dJLL3HJJZcwYsQIbDYb6enpnHrqqfzrX//a59zdt1E/P77P/0x7jgUG/14BdHd3c/PNN1NWVobJZNpnjgZj2bJl+53rgcZ1tOfk6aefRlGUAd8PGHi+9je3n7d48eK95lPTNM4++2wUReHvf//7XudqmsZZZ5014GMHuvaB/gz0OW1oaOCaa66hqKgIk8lEcXEx11xzDY2NjQO+jt/v56677mLixIn983/CCSfwq1/9qv/v6m77S0ca6O/F7p/5ySefZNasWaSlpWGz2Zg6dSpPPvnkQX/+Q3HrrbcC7HXL//7776e6uprLL7+c3/3ud5jN5n2eN2LECF555RXGjh27z2O7x3bllVdyxRVX4PP5+Oc//3lExivEgRiO9QCEGMpisVh/ADJ58uRDOv+HP/zhgI81NzfT1dXFaaedRk5ODsFgkLfeeosf/OAHbNmyhccffxydTse1117LHXfcwb/+9S8uvfTSva5RXV3Nhx9+yDnnnENxcTEA27dv54477mDevHl84xvfwG63U1VVxfPPP8/rr7/OunXrKC0t3e+Yb7vtNjIyMgDwer38z//8zyHNTVdXF4sXLx7wsV27dhGJRDj77LPJzMzE6/Xy2muvcfXVV9Pc3Mwvf/nL/nN/9rOfYTKZOOWUUygoKKCzs5NXXnmFiy++mAcffLD/P+Uj7UDv1e7gas2aNUyePJmLL74Yh8MBpALAA+UOHsicOXP2CrIeeOCBAc87VnNyNCiKwlNPPcXEiRO5/vrrmTFjRv/n8YEHHuDNN99k0aJFfOc73znka1511VUDBv933XXXPsd27NjBKaecQmdnJ+eddx7jxo1jy5YtPPnkk7z66qusWLGCkSNH9p/vdruZM2cOVVVVTJ48mRtvvBFVVamqquK+++7jhz/8Yf/flz1NmjSJCy+8sP/7l19+mY0bN+51jqZpXHbZZfz1r3+lsrKSSy+9FJPJxDvvvMM111zDtm3buP/++w95Hg5kz1+0dgehd9xxx0GfZzKZ9vo+mUzyzDPP4HK5OPfcc5k6dSp33HEHTzzxBFdcccURGasQ+3VM14GFGAJ235qrqKjQ7rzzTu3OO+/U7rjjDu2mm27SKioqNIvFov3+97/f53mlpaVaaWnpXsfuvfdeDdCmTJmiAdrSpUsP+NqxWEyrqKjQ7HZ7/7GWlhbNYDBoc+fO3ef8H/3oRxqgvfzyy/3HvF6v1t3dvc+577//vqbT6bRrr712wNe+7LLLNECrr6/fZy6uuuqqvc7lc7cnNU3TbrjhBk2n02mTJ0/WAK2uru6AP2tvb69mt9u1cePG7XV8165d+5zr9/u1CRMmaOnp6VowGDzo+Ha76qqrBhzLYN+rrVu3aoB2wgknaIlEYq/nzZkzRxvsP63vvvuuBmiLFy8+6Lg07ejPyVNPPaUB2lNPPTXgcwYa1/7m9vPuvPPOAT/7b7zxhqYoinbyySdriURCW79+vWYymbTKykrN7/cf8JoHu/ZuA31O582bpwHan//8572O/+lPf9IAbf78+Xsdv+iiizRA+/nPf77P9dvb27V4PL7XsZqaGg3QFi1atNfxgebr0Ucf1QDt6quv1mKxWP/xaDSqnXfeeRqgrV27dn8/fr8DpRPccccdGqDNmzdP0zRNq6+v1wCtuLj4oNcdyCuvvKIB2vXXX99/bPbs2ZqiKFpNTc1hXVOIQyXpBEIcol27dnHXXXdx11138etf/5r//d//ZdeuXSxYsIAFCxYc9Pnt7e3cc889nHHGGZx77rkHPb+3t5d//OMftLS07LWqVFhYyHnnnccHH3zAzp07+4/H43GeffZZCgoKOOecc/qPp6enk5mZuc/1582bx7hx43j33XcHfP3dt0UHurV4MBs3buSxxx7jmmuuYdKkSQc9v7u7m6eeeopgMLjPCtrw4cP3Od/hcLBo0SJ8Pt9R2Ql9sPcqFAoBMGrUqCOymS8cDgP7rnLtz7GYk6PtzDPP5LbbbuPjjz/mpz/9KZdccgmapvHXv/61f5X7SGtsbGTp0qWMHTuW6667bq/HbrjhBkaPHs37779PU1MTkPpcvPjii1RUVAx4lyEvLw+DYe8bnIP5e/TQQw9ht9v505/+hNFo7D9uMpm45557APjrX/96yD/fzp07Wbx4MYsXL+bHP/4xs2fP5te//jUWi6X/eu3t7QD9d24G64knngBSqQS7XXnllf1pEUIcTZJOIMQhWrhwIW+++Wb/993d3Xz00UfcdtttzJo1i/fff5+TTjppv8//6U9/Sjgc5o9//CP/+Mc/9nvetdde2/8fA8DYsWN55pln9jrn+uuv56WXXuLxxx/n3nvvBVL5m263m5///Of7/Ee6bNkyHnjgAT755BO6urpIJBL9j+0vcOrt7QXAYrHsd6z78/3vfx+Hw8E999zDj3/84/2et2DBAt57773+72fMmMGDDz641zlut5t7772XN954g4aGhv6Ab7fW1tZBj+9gDvZejRo1CqfTyZIlS3jxxRdZuHAhdrv9sF+vp6cHAJvNdkjnH86cbNiwYcDAa8OGDft9nZdffnnAHFev1zvgLXNIpQBkZGSg0+nIyclh5MiRzJs3D53u4Gsm9957L8uWLeu/ZX7fffcxZcqUgz7vcO3+2efMmbNPHrNOp2P27NlUVVWxYcMGhg0bxtq1a9E0jXnz5u0VZB7Iof49CoVCbN68mcLCQu677759Ht8dDFdVVR3S68Jnv3gDGI1G8vLyuPTSS/npT3/KhAkTDvk6+9Pe3s7rr7/OiBEjOPnkk/uPf+tb3+LWW2/lmWee4Te/+Y1UbRFHjQSxQhymrKwszj//fGw2G6effjq//OUveeeddwY8d/Xq1Tz77LPceuutA26M2NP5559
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем данные\n",
|
|||
|
"data = pd.read_csv(\".//datasetlab1//marketing_campaign.csv\", sep=\"\\t\")\n",
|
|||
|
"# Фильтрация и выбор нужных столбцов\n",
|
|||
|
"data['Age'] = 2024 - data['Year_Birth'] \n",
|
|||
|
"data['Total_Spending'] = (data['MntWines'] + data['MntFruits'] + data['MntMeatProducts'] +\n",
|
|||
|
" data['MntFishProducts'] + data['MntSweetProducts'] + data['MntGoldProds'])\n",
|
|||
|
"data = data[['Age', 'Income', 'Total_Spending']].dropna()\n",
|
|||
|
"\n",
|
|||
|
"# Стандартизация данных\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"data_scaled = scaler.fit_transform(data)\n",
|
|||
|
"\n",
|
|||
|
"# Применение PCA\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"data_pca = pca.fit_transform(data_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Создание DataFrame с результатами PCA\n",
|
|||
|
"pca_df = pd.DataFrame(data_pca, columns=['PC1', 'PC2'])\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"sns.scatterplot(x='PC1', y='PC2', data=pca_df, s=50, alpha=0.7)\n",
|
|||
|
"plt.title('Визуализация данных после PCA', fontsize=14)\n",
|
|||
|
"plt.xlabel('Первая главная компонента')\n",
|
|||
|
"plt.ylabel('Вторая главная компонента')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Это мы выполнили понижение размерности и визуализацию данных."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"3.Теперь выберем кластеры"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 12,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABKUAAAHqCAYAAADVi/1VAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADd2klEQVR4nOzdd1yV5f/H8ddhD8UNoiDiFveWXDiprBz1y8pcpZVpOcq+2jLLsmk2HGWlpQ3TshzlSMWRuFDcW3CDK8WJjPv3x4mTCCggcAPn/Xw8zsNzrvu6r/P5HI6cm8+57uu2GIZhICIiIiIiIiIikocczA5ARERERERERETsj4pSIiIiIiIiIiKS51SUEhERERERERGRPKeilIiIiIiIiIiI5DkVpUREREREREREJM+pKCUiIiIiIiIiInlORSkREREREREREclzKkqJiIiIiIiIiEieU1FKRERERERERETynIpSIiIi6bBYLISEhJgdhoiIiIgUcH379sVisRAdHW12KPmOilIiGYiOjsZisXD33Xenuz08PByLxYLFYqFv3755G5yIiIjkudsdG5w5c4a6detisVgYOnRo3gZ3kzstrIeFhWGxWHjjjTdyLCYREZGbqSglkk0vvfSS2SGIiIhIPnH27Fnat2/P9u3bef7555kwYYLZIYmIiOR7KkqJZMO8efNYs2YNlStXNjsUERERMVlKQWrbtm0899xzfPLJJ2aHJCIiUiCoKCWSRUlJSbz88ss4OztnOKU9JCQEi8WS7rZbTYePioqif//+VKhQAVdXV3x9fenbty+HDx9O0zdlWv6xY8d49NFHKV26NB4eHrRo0YK//vorTf9bnceccjrCzachTp8+HYvFwvTp01O1Jycn07hx4wxPDZg7dy6tW7fGy8vLdopjyi2rpwGkvJbp3W6O6+Zc07vdnGNSUhLvv/8+devWxd3dPU3/sLCw28Z44+u3c+dOOnfuTPHixSlSpAidOnUiIiIizT4REREMHjyY2rVrU6xYMdzd3alTpw7vvvsuCQkJafo/9dRT1K9fn5IlS+Lm5kZgYCBPPfUUUVFRd/waZPRz/PTTT2373Pi+yej9ndH7qGLFilSsWDHdONN7j2U0Tnpujv3vv//GycmJ+vXrEx8fn6rvrbZllmEYDBs2DIvFQs+ePdP9WYmIfTl37hwdOnRg69atDBo0iE8//TTdfpcvX2b06NHUqFEDNzc3SpYsSefOnfn7779T9fvnn3944YUXCAoKokiRIhQtWpSmTZvy6aefZvg7548//qBOnTq4u7vTpk0bDh48aNu2aNEiatasadu2bdu2dMd4//338fPzo0iRIvTr14/Lly/btn3wwQf4+vri5eVFr169OHv2bKp9b3Vs0717dywWS5rPgbw8Lrl48SKjR4+mVq1auLu7U7x4cUJDQ1mzZk2avlk5hkuJMzO3zK4js2rVKrp27YqPjw+urq74+/vTvXv3VLG+8cYb6R6jXLlyBX9//1t+ht4qxpTxvvrqKywWC++//366YyxfvhyLxcLTTz8NwJo1a3jwwQfx9/fH1dUVHx8funXrlua9nfL6ZeaWYt++fbz00ks0bNiQUqVK4ebmRrVq1Rg5ciSXLl3K1Gt6o5T3UHq3jE53vV3cN1u1ahWhoaGULFkSBweHWx6DZSTlfXjt2jVGjhxJhQoVcHNzo2bNmnz22WcYhpGq/4ULF3jvvfdo06YN5cqVw8XFhXLlytG7d+9Uvw9S/PTTT7Rt25Zy5crZfmbt27dn3rx5d/waZPR/+8SJExQtWjTN74qM/l/f+DrcKKP3f4r0fpa3+n99q9jj4+OpX78+Tk5Oad7Pt9qWFWFhYRQvXpwKFSqwZ8+ebI9TkDmZHYBIQfPtt9+yc+dOnn32WapUqZJj465fv57Q0FAuX77MfffdR9WqVYmOjub777/nzz//JDw8nEqVKqXa559//qFFixaUKVOG/v37c/r0aWbNmsXdd9/NnDlz6Nq1a47Fd6Np06alW2gBWLBgAd27d8fd3Z0HH3yQwMBAHBwciI6O5ttvv832c44ePdp2PzIykt9///22+wwZMoTixYsDcP78+XS/uR45ciQffvghvr6+PPHEE5QpUwawfkCsXLkySzEeOnSIFi1a0LBhQwYOHMjhw4eZPXs2rVu3Zvny5TRr1szWd+rUqcyfP5/WrVtz7733cuXKFcLCwhg1ahQbN27kl19+STX2pk2bqFGjBu3bt8fZ2ZmdO3fy9ddf8/vvv7N7925KliyZ7dcgPWfOnCmQ64i0aNGCV199lTFjxvC///3PdvrM+fPn6dmzJ66urvz444+4urpmeeyEhAT69u3LDz/8wNChQxk/fnymDnBEpPBKKUhFRkby7LPP8vnnn6fb79q1a7Rr144NGzbQsGFDhg4dSmxsLLNmzWLx4sX8+OOP/N///R8Ahw8fZsqUKXTs2JHOnTuTmJjI8uXLGTJkCL/99ht//vlnqt9hK1asoEuXLri7u9OzZ08uXbpE586dAeuXXT179qRHjx6cO3eOOXPmEBISQkREBIGBgbYx3nrrLV5//XUCAwN58skn2bhxI8OHDwfgxx9/5MqVKzz22GNEREQwc+ZM9uzZQ3h4OE5Ot/5TYvny5cydO/eOXuPMuNVxyblz52jdujU7d+6kRYsWPPPMM8TFxfH777/Ttm1bZs+ene3jpeLFi6c6PgFsnzs3rymW8ll8K5988gnDhg3D3d2dbt26UaFCBY4fP86aNWuYM2cOLVu2vOX+48aN49ixY7d9noCAgFTFkZuPeR599FFeeOEFvv7663SXq5g6dSoAAwYMAGDOnDlERkbSvn17ypUrx8mTJ/n999+ZN28eEydO5JlnngGsX1Ld/HqNGTMmTTw3+vXXX/n6669p27YtISEhJCcns27dOt577z1WrlzJqlWrcHZ2vm3ON+vSpQv169dPFcfttGnTJlWxY/r06Wm+ON6yZQsdOnTAMAy6du1K9erVcXJyytIx2I0efvhhtmzZwoMPPgjAL7/8wvPPP090dDQfffSRrd/u3bt5/fXXadu2Ld26dcPT05M9e/bwww8/sHDhQjZv3kxAQICt/65du3BycqJ79+4UK1aM2NhYfv/9d7p06cKMGTN4/PHHs/0aZOR///tftgqJZko5bmzUqBE9e/Zk69atFCtWDLAu5bJ161beeOMNWrRoka3xf/nlF3r27EnlypVZvHgxfn5+ORl+wWGISLqioqIMwAgNDbW1Xb161fDz8zOKFClixMTEGOHh4QZg9OnTJ9W+bdq0MTL677VixQoDMEaPHm1ru379ulGxYkWjaNGixubNm1P1X716teHo6Gjcd999qdoBAzAee+wxIzk52da+detWw8XFxShTpoxx5coVW3ufPn0MwIiKisow15vzmDZtmgEY06ZNs7VduHDB8PHxMRo1amQARps2bVLt83//938GYPz666+3zTszWrZsmea1TC+uG/Xs2dMAjOjo6NvmWKZMGcPV1dU4ceJEqvbRo0cbgLFixYrbxpgyNmCMHDky1bZFixYZgFGnTp1U7YcPHzYSExNTtSUnJxtPPPGEARhr1qy57fO+9tprBmDMnj07zbasvAbp/RyfeeYZw8HBwahfv36a901ISIgBpHrf3Wr8gIAAIyAgIN0c0vtZZjROetKLPTEx0WjRooVhsViMP/74wzAMw3j44YcNwPjiiy9uO2Z6Y1+8eNHo1KmTARjjxo3L9BgiUrjceGxw7tw5o2HDhgZg3HXXXWl+J95ozJgxBmD07NkzVb/NmzcbLi4uRvHixY24uDjDMAzj0qVLxoULF1Ltn5ycbAwYMMAAjFGjRqXaVrduXcPFxcXYsmWLre2jjz4yAMPV1TXV58msWbMMwHj00UdtbTExMYaLi4tRrVo149y5c4ZhWH+Pdu3a1QCMSpUqGbGxsbb+AwcOTPP7NL3P+MTERKN27dqGn5+f4ePjk+ZzIK+OSx577DEDMKZOnZqqPTY21vD39zfKlCljXL161dae1WO4m93qM+9WIiMjDQcHB6NcuXJpXpPk5GTj+PHjtsf
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x500 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.cluster import KMeans\n",
|
|||
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"# Выбираем признаки для кластеризации\n",
|
|||
|
"features = data[['Age', 'Income', 'Total_Spending']].dropna()\n",
|
|||
|
"\n",
|
|||
|
"# Стандартизация данных\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"features_scaled = scaler.fit_transform(features)\n",
|
|||
|
"\n",
|
|||
|
"# Списки для хранения метрик\n",
|
|||
|
"inertia = []\n",
|
|||
|
"silhouette_scores = []\n",
|
|||
|
"\n",
|
|||
|
"# Оценка для числа кластеров от 2 до 10\n",
|
|||
|
"k_values = range(2, 11)\n",
|
|||
|
"for k in k_values:\n",
|
|||
|
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
|
|||
|
" labels = kmeans.fit_predict(features_scaled)\n",
|
|||
|
" \n",
|
|||
|
" inertia.append(kmeans.inertia_)\n",
|
|||
|
" silhouette_scores.append(silhouette_score(features_scaled, labels))\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация инерции\n",
|
|||
|
"plt.figure(figsize=(12, 5))\n",
|
|||
|
"\n",
|
|||
|
"plt.subplot(1, 2, 1)\n",
|
|||
|
"plt.plot(k_values, inertia, marker='o')\n",
|
|||
|
"plt.title('Инерция для различных k', fontsize=14)\n",
|
|||
|
"plt.xlabel('Количество кластеров (k)')\n",
|
|||
|
"plt.ylabel('Инерция')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация коэффициента силуэта\n",
|
|||
|
"plt.subplot(1, 2, 2)\n",
|
|||
|
"plt.plot(k_values, silhouette_scores, marker='o', color='orange')\n",
|
|||
|
"plt.title('Коэффициент силуэта для различных k', fontsize=14)\n",
|
|||
|
"plt.xlabel('Количество кластеров (k)')\n",
|
|||
|
"plt.ylabel('Силуэт')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"По графику инерции оптимальное значение находится около k = 3 (точка изгиба \"локтя\"). <br>\n",
|
|||
|
"Коэффициент силуэта также максимален при k = 2, но при k = 3 он все еще достаточно высокий. Выбираем k = 3 для баланса."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"4.Выполним иерархический алгоритм и неиерархический алгоритм кластеризации."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0wAAAJ0CAYAAAAhw6PDAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACc4klEQVR4nOzdeXRU9f3/8ddMZiaZbJM9IaxhX4MgCogKCi60rlhRqoK7WHe0RbtY2/5aazetFm2tFlGLG61aq2AFRdoKiCyiFVkEBISwBwLZk/v7g++9zkzmJjNhkpkkz8c5OTAzd+687/55389yHYZhGAIAAAAANOCMdQAAAAAAEK9ImAAAAADABgkTAAAAANggYQIAAAAAGyRMAAAAAGCDhAkAAAAAbJAwAQAAAIANEiYAAAAAsEHCBAAAAAA2SJgAAAAAwAYJEwB0QPPmzZPD4Qj5N3jw4FiHBwBA3HDFOgAAQOx8//vf14ABA6zXP//5z2MYDQAA8YeECQA6sLPOOkvjxo2zXj/11FPat29f7AICACDO0CQPADqg6upqSZLT2fRloLS0VHfeeae6du2qxMRE9e7dWw899JDq6+utabZu3SqHw6Hf/OY3Db4/ePDggKTM9MADD4RsEhg87bhx4zR48GCtXLlSp5xyirxer4qKivTHP/6xwTz37Nmj6667Tvn5+UpKStLQoUM1Z86cgGnMWEP9Pf/885KkZ555Rg6HQ0uWLNFNN92k7Oxspaena+rUqTp48GDA/F5//XV985vfVGFhoRITE9WrVy/97Gc/U11dXYPlcDgcuuiiixrEfdNNNzVoDukf52uvvRYwfWVlpTIzMxus8y+//FLf+c531K9fP3m9XmVnZ+vSSy/V1q1bG/wmACA81DABQAdkJkyJiYmNTldeXq6xY8fqq6++0k033aRu3brpgw8+0H333addu3bpkUceOe5YnnjiCaWmpkqS7rvvvpDTHDx4UN/4xjc0efJkTZkyRS+//LJuvvlmeTweXXvttZKkiooKjRs3Tps2bdKtt96qoqIivfLKK7r66qtVWlqqO+64I2CeU6ZM0Te+8Y2A98aMGRPw+tZbb1VGRoYeeOABrV+/Xk888YS+/PJLLV68WA6HQ9Kx5Co1NVUzZsxQamqq3n33Xd1///06fPiwfv3rXwfMLykpSW+++ab27NmjvLw8K+6XXnpJSUlJIZc9KSlJs2fPDki0/v73v6uysrLBtCtWrNAHH3ygyy+/XF26dNHWrVv1xBNPaNy4cfrss8+UnJwc8jcAAI0wAAAdziOPPGJIMj7++OOA98eOHWsMGjTIev2zn/3MSElJMTZs2BAw3b333mskJCQY27ZtMwzDMLZs2WJIMn796183+K1BgwYZY8eObfD+97//fUOSsW/fvkanHTt2rCHJ+O1vf2u9V1VVZZxwwglGXl6eUV1dHbBMzz//vDVddXW1MXr0aCM1NdU4fPhwk7GaZs+ebUgyTjzxRGv+hmEYv/rVrwxJxuuvv269V15e3uD7N910k5GcnGxUVlYGLMegQYOM4uJi4ze/+Y31/nPPPWd06dLFOO200wLWvRnnlClTDJfLZZSUlFifjR8/3vj2t7/dYDlCxbJ06VJDkvHss8/aLi8AwB5N8gCgA9q/f78kKTc3t9HpXnnlFZ122mnKzMzUvn37rL8JEyaorq5OS5YsCZi+vLw8YLp9+/Y1aJpmMmtI7GpW/LlcLt10003Wa4/Ho5tuukl79uzRypUrJUlvvfWWCgoKNGXKFGs6t9ut22+/XUeOHNH777/f5O8Eu/HGG+V2u63XN998s1wul9566y3rPa/Xa/2/rKxM+/bt02mnnaby8nJ9/vnnDeZ5zTXXaPbs2dbr2bNna9q0abbNI4cPH65Bgwbpueeek3Ss2d17772nq6++usG0/rHU1NRo//796t27tzIyMrRq1arwFxwAYCFhAoAO6Msvv5TL5WoyYdq4caMWLFig3NzcgL8JEyZIOtZnyN+Pf/zjBtOGShokad++fXK73WE1EyssLFRKSkrAe3379pUkq3/Ol19+qT59+jRIPMxRAL/88ssmfydYnz59Al6npqaqU6dOAX2C/ve//+niiy+Wz+dTenq6cnNzdeWVV0qSDh061GCeV1xxhTZs2KAPP/xQW7du1eLFi0MmP/78k6xnnnlGp5xySoPYpGPN++6//36rv1lOTo5yc3NVWloaMhYAQNPowwQAHdD69evVs2dPuVyNXwbq6+t11lln6Xvf+17Iz82kxXTjjTfq0ksvDXjvhhtuCPndrVu3qlu3blZfoLaotLRUY8eOVXp6un7605+qV69eSkpK0qpVqzRz5syAgTFMubm5Ov/88zV79mzl5+drzJgx6t27d6O/c+WVV+p73/ueli1bpjlz5uiHP/xhyOluu+02zZ49W3feeadGjx4tn88nh8Ohyy+/PGQsAICmkTABQAdTVVWlNWvWhBytLVivXr105MgRq0apKX369GkwbXDNkCTV1tbq448/1rnnnhvWfHfu3KmjR48GzGvDhg2SpB49ekiSunfvrrVr16q+vj6glsms4erevXtYv+Vv48aNOuOMM6zXR44c0a5du6zBIhYvXqz9+/fr73//u04//XRrui1btjQ632uvvVZXXHGFfD6fHnjggSbjyM7O1gUXXGA1Q5w8eXLI4d/nzZunadOm6be//a31XmVlpUpLS5v8DQBAaDTJA4AOZu7cuaqqqtL48eObnHby5MlaunSp3n777QaflZaWqra2tlkx/Otf/9KhQ4d04YUXhjV9bW2t/vSnP1mvq6ur9ac//Um5ubk68cQTJUnf+MY3VFJSopdeeinge4899phSU1M1duzYiON88sknVVNTY71+4oknVFtbq4kTJ0qSEhISJEmGYQTE9vjjjzc633PPPVcpKSk6cOCAJk+eHFYs1157rdauXatLL73UGlUwWEJCQkAskvTYY4/Z9iMDADSNGiYA6CCOHj2qxx57TD/96U+tgrX53CHT7t27deTIET3//PM666yz9N3vflf/+Mc/dN555+nqq6/WiSeeqKNHj+qTTz7RvHnztHXrVuXk5EQUx0svvaR77rlHiYmJqqioCIjh0KFDqqur02uvvRZQA1ZYWKiHHnpIW7duVd++ffXSSy9pzZo1evLJJ61BGW688Ub96U9/0tVXX62VK1eqR48emjdvnv773//qkUceUVpaWsTrrLq6WuPHj9fkyZO1fv16Pf744zr11FN1wQUXSJJOOeUUZWZmatq0abr99tvlcDj03HPPNUhagiUkJGjdunUyDCNkDVwo5557rvbu3WubLEnSeeedp+eee04+n08DBw7U0qVLtXDhQmVnZ4e/0ACAACRMANBB7N27N+A5R/6jzgW76qqr9N5772ncuHF6//339Ytf/EKvvPKKnn32WaWnp6tv3776yU9+Ip/PF3EcM2fO1I4dOyRJ1113Xchp7rzzzoCEKTMzU3PmzNFtt92mP//5z8rPz9cf/vCHgP5RXq9Xixcv1r333qs5c+bo8OHD6tevn2bPnt3koAp2/vCHP+ivf/2r7r//ftXU1GjKlCl69NFHrX5X2dnZ+uc//6m7775bP/zhD5WZmakrr7xS48eP1znnnNPovNPT0yOKxeFwNJmc/v73v1dCQoL++te/qrKyUmPGjNHChQubjAUAYM9hNHUbDADQLmzdulVFRUVWInS80zVXjx499MADD9gmMeaoceZIdOPGjdO+ffv06aefRj0WO88884yuueYarVixQiNGjGi13wUAxB/6MAEAAACADRImAOggUlNTdcUVVyg/Pz8q0zXXxRdfrF69etl+np+fr4svvrhFfhsAgEjRJA8AENdokgcAiCUSJgAAAACwQZM8AAAAALBBwgQAAAAANtr9c5jq6+u1c+dOpaWlWc/NAAAAANDxGIahsrIyFRYWyukMr+6o3SdMO3fuVNeuXWMdBgAAAIA4sX37dnXp0iWsadt9wpSWlibp2EqJ9KnqAAAAANqPw4cPq2vXrlaOEI52nzCZzfDS09NJmAAAAABE1FWHQR8AAAA
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x700 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from scipy.cluster.hierarchy import dendrogram, linkage\n",
|
|||
|
"from sklearn.cluster import AgglomerativeClustering\n",
|
|||
|
"\n",
|
|||
|
"# Выполняем агломеративную кластеризацию иерархически\n",
|
|||
|
"linkage_matrix = linkage(features_scaled, method='ward')\n",
|
|||
|
"plt.figure(figsize=(10, 7))\n",
|
|||
|
"dendrogram(linkage_matrix)\n",
|
|||
|
"plt.title('Дендрограмма')\n",
|
|||
|
"plt.xlabel('Объекты')\n",
|
|||
|
"plt.ylabel('Евклидово расстояние')\n",
|
|||
|
"plt.show()\n",
|
|||
|
"hierarchical = AgglomerativeClustering(n_clusters=3)\n",
|
|||
|
"data['Cluster_Hierarchical'] = hierarchical.fit_predict(features_scaled)\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAtEAAAIkCAYAAADYlnetAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAD/d0lEQVR4nOzdd3hUVf7H8ffMZNI7gYTee+8GEGUJhCLSVLAtCBYUVEBF2XXtK8rasK/rT7GhgAooPVIFAtKl9x5SKOllJjP39wdmZEwhg0CIfF7Pk0fn3jP3fnNPgE/OnHuuyTAMAxERERERKTVzWRcgIiIiIlLeKESLiIiIiHhIIVpERERExEMK0SIiIiIiHlKIFhERERHxkEK0iIiIiIiHFKJFRERERDykEC0iIiIi4iGFaBERERERDylEi4jIZffWW2/h7e3N4cOHy7qUv5Snn36aoKAgkpKSyroUkWuOQrSIlDuHDx/GZDLRq1evIvefOnWKFi1aYDKZGDt27JUtTgo5e/YsL774IiNGjKBWrVqu7cuXL8dkMjFq1KhC7zEMg8ceewyTyUTLli1JTEws9vjPPfccJpMJk8nE448/Xmy7J5980tXuueee+zPf0lXjsccew2w28+yzz5Z1KSLXHIVoEflLOX36NN27d2fbtm088sgjvPXWW2Vd0jXvzTff5MyZMzzxxBOlau9wOBg5ciRvvPEGnTt3ZsWKFURFRV3wfV5eXnz55Zfk5+cX2pefn8/nn3+Ol5eXx/VfzcLCwrj33nv5+OOPOXLkSFmXI3JNUYgWkb+MggD966+/8vDDDzNlypSyLumal5+fz8cff0znzp2pW7fuBdvn5eVxyy238Omnn9KnTx/i4uIIDQ0t1bl69+5NUlISc+fOLbRv/vz5JCYm0qdPH0+/haveXXfdhcPh4OOPPy7rUkSuKQrRIvKXcObMGWJiYti6dSujR4/m7bffLrbt8OHDXR/r//Fr+PDhbm1nzZrF7bffTr169fD39yckJITrr7+e7777rtjjb926lTvvvJNq1arh4+ND5cqV6dWrFz/++CPgPv2gpK8bb7zR7bjJycmMGzeOevXq4ePjQ0REBIMHD2b79u2FaqhVqxa1atUiNTWVBx54gKioKHx9fWndujVff/11ofYFNS1fvrz4iwxMnToVk8nE1KlTS2xXYOHChZw8eZJbb731gm0zMjLo06cPs2fP5o477mD27Nn4+fmV6jwAgwYNIjQ0lE8++aTQvk8++YSwsDAGDhxY7Ps9ub7Lli1jxIgRNGzYkMDAQAIDA2nXrh0fffRRkccu6M+kpCSGDRtGREQEfn5+XHfddUVe85MnT/Loo49Sv359/Pz8CA0NpXHjxowaNYq0tDS3tq1bt6ZevXql7hMRuTT+Wp9ricg1qSBAb9myhYceeoh33323VO979NFHXaOcqampRY5cT5w4EW9vb7p06ULlypVJSUnhhx9+4JZbbuHtt9/m4Ycfdmv/3Xffcccdd2AYBv369aNhw4YkJyezbt06/u///o9+/foVCseHDx/ms88+44YbbnDbd/784QMHDnDjjTdy/PhxevbsyYABA0hOTua7775j0aJFLFmyhI4dO7od12azERMTQ2ZmJnfffTdZWVnMmDGDO+64g1OnThWq/XJYsmQJANddd12J7U6dOkXv3r3ZsGEDY8aM4e2338ZkMnl0Ll9fX26//Xb+97//kZSURGRkJABJSUnMmzeP+++/H19f3yLf6+n1ffXVV9m/fz/XXXcdAwcOJDU1lYULF/LAAw+wZ88eXn/99ULnSE1NpUuXLoSEhHD33XeTnJzM9OnTiY2NZePGjTRr1gyA7OxsOnfuzOHDh+nZsycDBw7EZrNx6NAhvvjiCx5//HFCQkLcjh0dHc0XX3zB3r17adCggUfXTUQukiEiUs4cOnTIAIzY2FjjzJkzRps2bQzA6NSpk+F0Oi/4/jvvvNMAjMOHDxc65rBhw9zaHjhwoND7MzIyjObNmxshISFGVlaWa3tiYqIREBBgBAQEGJs2bSr0vmPHjhVZz7JlywzAePbZZ4utuVOnTobFYjEWLlzotn3Pnj1GUFCQ0bx5c7ftNWvWNACja9euRl5enlsNERERho+Pj3H8+HHX9meffdYAjGXLlhVbg2EYxqeffmoAxqefflpiuwLt27c3zGazkZubW2hfwfd90003GY0bNzYA45lnninVcc9XUPvXX39tbNiwwQCMyZMnu/ZPnjzZAIyNGzcaX3/9dZHX2tPre/DgwUJ12O12o0ePHobFYjGOHDnitg8wAOOhhx4yHA6Ha/vHH39sAMYDDzzg2vbDDz8YgDF27NhC58jIyCjyWk6ZMsUAjE8++aSIKyQil4Omc4hIuZWWlkaPHj3YtGkTvr6+rF27ljVr1lzwfXa7HQAfH58Ltq1Tp06hbYGBgQwfPpy0tDTWr1/v2v7ZZ5+RlZXFY489RuvWrQu9r1q1ahc8X1E2b97MmjVrGDZsGLGxsW77GjRowH333ce2bduKnHbw8ssv4+3t7VbDo48+Sl5eHt98881F1eOJ48ePExoaWuK1njt3Lrt27WLo0KE8//zzf+p8bdu2pUWLFnz66aeubZ9++iktW7akTZs2Rb7nYq5v7dq1Cx3Hy8uLUaNG4XA4WLZsWaH9AQEBvPrqq5jNv//TO2zYMLy8vNx+jgoUNZUlMDCwyGtZMOp+/PjxIr9HEbn0NJ1DRMqttWvXAnDPPffwwAMP0KVLF4YNG8bWrVsJCAgo9n3p6ekAxX60f77k5GReeeUVFixYwJEjR8jJyXHbn5CQ4Pr/X375BYCePXt6/L2UpOD7TEpKKnJptt27d7v+WzAlAM6Fuujo6ELtr7/+euBcePyjqVOnupaeCw8Pp06dOvTs2ROr1XpRtZ8+ffqCvzx07NiRXbt2MWPGDGJjYwvNS1++fHmhecOtWrViwIABRR5vxIgRjB07lvj4eAB27dpV4k2mF3N9MzIyeO2115g9ezYHDhwgKyvL7T3n/1wUaNCgAYGBgW7bvLy8iIyMJDU11bWta9euVK5cmVdeeYWtW7dy0003ccMNN9C4ceNip7iEh4cD56bFiMiVoRAtIuXaPffcw8cff4zZbGbixIm8+OKLPPHEE7z//vvFvuf06dN4e3sXmlf6R2fOnKF9+/YcPXqUzp07ExMTQ2hoKBaLhS1btjBnzhzy8vJc7Qtu+Kpateql+ebOqwNg3rx5zJs3r9h2fwxyERERbqOeBQpGLf94gxqcG03/oypVqvDNN9+4wrcn/Pz8yM3NLbFNq1atmDJlCrGxsYwYMcK1xF2B5cuXFxqhHjZsWLEh+q677mLChAmuGwy9vb258847iz2/p9fXZrNx4403smnTJlq3bs3dd99NhQoV8PLycs1vP//nokBwcHCRx/Xy8sLhcLheh4SEsHbtWp555hl+/PFH5s+fD0D16tV56qmneOihhwodo+CXO39//2LrF5FLS9M5RKTcateunStAA/zrX/+idevWfPDBB8TFxRX7vgMHDlCjRo0L3rj2f//3fxw9epQXX3yRVatW8c477/Diiy/y3HPPFXmjXMFNiidOnLj4b6oIBeHrnXfewTCMYr+GDRvm9r5Tp07hdDoLHa/g6XZF/RKxbNky1/GSk5N5+eWXSUhI4J577rmo2itWrOgKqSXp2LEjcXFxhISEcN9997mtcvHcc88V+l5LWomiQoUK9O/fn+nTpzN9+nQGDBhAhQoVim3v6fWdM2cOmzZtYuTIkWzatIkPPviAl156ieeee67YBwB5qkaNGkydOpWUlBQ2b97Mq6++itPpZPTo0UWurlJwjStWrHhJzi8iF6YQLSLlVoUKFdxGWq1WK59//jk+Pj6MGDGiyJHWffv2cebMGdq1a3fB4x84cACA/v37F9r3888/F9rWoUMHABYvXlzq76E0ClaFKJieUFr5+flFvqeg9qLmbZ+vYsWKTJw4kRYtWnDgwAG3KQel1bx5c3Jzczl69OgF27Zv356ffvqJ0NBQRo0axQcffODx+QqMGDGCjIwMMjIyGDFiRIltPb2
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"kmeans = KMeans(n_clusters=3, random_state=42)\n",
|
|||
|
"data['Cluster_KMeans'] = kmeans.fit_predict(features_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация кластеров неиерархически\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"sns.scatterplot(x='Age', y='Income', hue='Cluster_KMeans', data=data, palette='viridis', s=50)\n",
|
|||
|
"plt.title('Кластеры (K-Means)', fontsize=14)\n",
|
|||
|
"plt.xlabel('Age')\n",
|
|||
|
"plt.ylabel('Income')\n",
|
|||
|
"plt.legend(title='Кластер')\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"5.Оценим качество"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 16,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Коэффициент силуэта (K-Means): 0.374\n",
|
|||
|
"Коэффициент силуэта (Иерархическая кластеризация): 0.339\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"silhouette_kmeans = silhouette_score(features_scaled, data['Cluster_KMeans'])\n",
|
|||
|
"\n",
|
|||
|
"silhouette_hierarchical = silhouette_score(features_scaled, data['Cluster_Hierarchical'])\n",
|
|||
|
"\n",
|
|||
|
"print(f\"Коэффициент силуэта (K-Means): {silhouette_kmeans:.3f}\")\n",
|
|||
|
"print(f\"Коэффициент силуэта (Иерархическая кластеризация): {silhouette_hierarchical:.3f}\")\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "miivenv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.5"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|