AIM-PIbd-31-Medvedkov-A-D/Lab_4/lab4.ipynb

317 lines
125 KiB
Plaintext
Raw Normal View History

2024-12-20 14:14:59 +04:00
{
"cells": [
{
"cell_type": "markdown",
"id": "e7893b9e",
"metadata": {},
"source": [
"# Лабораторная работа: Методы искусственного интеллекта\n",
"## Задача кластеризации продуктов с использованием cuML\n",
"### Вариант: Продукты\n",
"В данной работе используется библиотека cuML для GPU-ускоренного анализа данных. Цель: провести кластеризацию продуктов на основе их характеристик."
]
},
{
"cell_type": "markdown",
"id": "e3834005",
"metadata": {},
"source": [
"### Загрузка и исследование данных"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "5530d138",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'cudf.core.dataframe.DataFrame'>\n",
"RangeIndex: 162313 entries, 0 to 162312\n",
"Data columns (total 5 columns):\n",
" # Column Non-Null Count Dtype\n",
"--- ------ -------------- -----\n",
" 0 category 162313 non-null object\n",
" 1 sub_category 162313 non-null object\n",
" 2 href 162313 non-null object\n",
" 3 items 162280 non-null object\n",
" 4 price 162282 non-null float64\n",
"dtypes: float64(1), object(4)\n",
"memory usage: 28.9+ MB\n",
"None\n",
" category sub_category \\\n",
"0 Groceries Fruits & Vegetables \n",
"1 Groceries Fruits & Vegetables \n",
"2 Groceries Fruits & Vegetables \n",
"3 Groceries Fruits & Vegetables \n",
"4 Groceries Fruits & Vegetables \n",
"\n",
" href \\\n",
"0 https://www.jiomart.com/c/groceries/fruits-veg... \n",
"1 https://www.jiomart.com/c/groceries/fruits-veg... \n",
"2 https://www.jiomart.com/c/groceries/fruits-veg... \n",
"3 https://www.jiomart.com/c/groceries/fruits-veg... \n",
"4 https://www.jiomart.com/c/groceries/fruits-veg... \n",
"\n",
" items price \n",
"0 Fresh Dates (Pack) (Approx 450 g - 500 g) 109.0 \n",
"1 Tender Coconut Cling Wrapped (1 pc) (Approx 90... 49.0 \n",
"2 Mosambi 1 kg 69.0 \n",
"3 Orange Imported 1 kg 125.0 \n",
"4 Banana Robusta 6 pcs (Box) (Approx 800 g - 110... 44.0 \n"
]
}
],
"source": [
"import cudf\n",
"import cuml\n",
"from cuml.preprocessing import LabelEncoder\n",
"from cuml.decomposition import PCA\n",
"from cuml.cluster import KMeans\n",
"import cupy as cp\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Загрузка данных\n",
"df = cudf.read_csv('/mnt/d/AIM-PIbd-31-Medvedkov-A-D//data/jio_mart_items.csv')\n",
"print(df.info())\n",
"print(df.head())"
]
},
{
"cell_type": "markdown",
"id": "49112908",
"metadata": {},
"source": [
"### Предварительная обработка данных"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "1e3ef9fa",
"metadata": {},
"outputs": [],
"source": [
"# Обработка пропущенных значений\n",
"df = df.dropna()\n",
"\n",
"# Кодирование категориального признака 'items'\n",
"label_encoder = LabelEncoder()\n",
"df['items_encoded'] = label_encoder.fit_transform(df['items'])\n",
"\n",
"# Нормализация числовых признаков\n",
"numeric_features = ['items_encoded', 'price']\n",
"df_scaled = df[numeric_features].astype('float32')\n",
"\n",
"# Преобразование данных в формат cupy\n",
"X = cp.asarray(df_scaled.values)"
]
},
{
"cell_type": "markdown",
"id": "ff5f1f8f",
"metadata": {},
"source": [
"### Понижение размерности и визуализация данных"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "e15c80bb",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHHCAYAAABDUnkqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABYO0lEQVR4nO3dd1xT5+IG8CeABJAhqIADFbeIe2KH2mLRWiudlp8trtrq1VutXltpqziq2OFtrXX3KlYcvVqrdYHWUbVinVTBPXEwVDRhyMz7+8NLSkxCEgwZh+f7+eTT5uTN4T3HjCfvOjIhhAARERGRRDhYuwJERERE5sRwQ0RERJLCcENERESSwnBDREREksJwQ0RERJLCcENERESSwnBDREREksJwQ0RERJLCcENERESSwnBDREREksJwQzYnNjYWMplM4+br64vevXtjx44d1q4eEVWyXr16abz/fXx80KVLFyxfvhwqlUqr/L59+/Dqq6/C398fzs7O8PX1xYABA7Bx40ad+z979ixkMhlcXFzw4MGDSj4asgaGG7JZM2bMwKpVq/Djjz/io48+wp07d/Diiy9i69at1q4aEVWy+vXrY9WqVVi1ahWmTJmC4uJijBgxAp988olGuejoaPTu3RvJycl4//33sXjxYkyaNAk5OTl47bXXsGbNGq19x8XFwd/fHwCwYcMGixwPWZaMF84kWxMbG4thw4bh6NGj6Ny5s3r7/fv34efnhzfeeAOrV6+2Yg2JqDL16tULd+/eRXJysnpbXl4eWrRogfv37+P+/fuoVq0aNmzYgDfeeAOvv/461qxZg2rVqmnsJyEhAUVFRXjppZfU24QQaNy4MV599VVcvXoV9+/fx969ey12bGQZbLkhu1GjRg24urrCyclJve3atWuQyWSIjY3VKDtmzBjIZDIMHTpUvW3jxo3o2rUrfHx84OrqipYtW+KLL75Aab7fu3cvZDIZfvnlF62/vWbNGshkMiQmJgIATp06haFDh6Jx48ZwcXGBv78/hg8fjnv37umse6NGjbS62mQyGfbt26dRpmx9AWD9+vWQyWRo1KiRetv58+fx3HPPwd/fH3K5HAEBARg1ahSysrLUZQoLCzF16lR06tQJXl5eqF69Op555hmtD/HS8/f1119r1Tk4OBi9evXS2NarVy+tbUePHlUfT1k5OTmYOHEiGjdujGrVqmkc9927d3Wep8fpOmePn7fKONZ9+/ZBJpPp/FXv7u6u8e9U2o167Ngxvcfx+HkbMmQIXFxccPbsWY1yYWFh8Pb2xu3bt/Xuq/Q49N0e//fJzMzEiBEj4OfnBxcXF7Rr1w4rV67U2q9KpcK8efPQpk0buLi4oHbt2ujbt6/WcenqNtb1dx88eIDx48cjICAAcrkcTZs2xRdffKGzW8kYbm5u6N69O3Jzc3Hnzh0AwJQpU+Dj44Ply5drBRvg0fksG2wA4I8//sC1a9fw1ltv4a233sL+/ftx8+bNCtWJbJeT4SJE1qFQKHD37l0IIZCZmYn58+cjJycHb7/9drnPu3TpEpYtW6a1XalUolu3bhgyZAiqVauG+Ph4TJ48GU5OTpg4cSJ69eqFgIAArF69Gq+88orGc1evXo0mTZogJCQEALBr1y5cuXIFw4YNg7+/P1JSUrB06VKkpKTg8OHDWl/0APDMM8/gvffeA/Coz3/27NnlHkdxcTE+/fRTre25ubmoX78+BgwYAE9PTyQnJ2PBggW4desWtmzZoj7WH374ARERERg5ciSys7Pxn//8B2FhYThy5Ajat29f7t82xccff6xz+6RJk7B48WKMGDECTz31FKpVq4aNGzfqDI/l6dOnDyIjIwE8ClLfffedxuOWPFZzmTdvHvbs2YMhQ4YgMTERjo6OWLJkCXbu3IlVq1ahbt26BvcRERGBF198UWNbVFSUxv2HDx+iV69euHTpEsaOHYvAwECsX78eQ4cOxYMHDzBu3Dh12REjRiA2Nhb9+vXDu+++i+LiYhw4cACHDx/WaEEt9c0336BWrVoAgFmzZmk8lpeXh549e+LWrVt4//330aBBAxw6dAhRUVFIS0vDt99+a+yp0nDlyhU4OjqiRo0auHjxIs6dO4fhw4fDw8PD6H2Uvpe7dOmC4OBguLm5Ye3atZg0aVKF6kQ2ShDZmBUrVggAWje5XC5iY2M1yl69elUAECtWrFBve/PNN0VwcLAICAgQQ4YMKfdvBQUFiZdeekl9PyoqSsjlcvHgwQP1tszMTOHk5CSio6PV2/Ly8rT2tXbtWgFA7N+/X+uxevXqiWHDhqnv7927VwAQe/fuVW9r2LChRn0XLlwo5HK56N27t2jYsGG5x/GPf/xDuLu7q+8XFxeLgoICjTL3798Xfn5+Yvjw4eptpefvq6++0tpn69atRc+ePTW29ezZU2Pb9u3bBQDRt29f8fjHSZ06dURYWJjGtujoaAFA3Llzp9zjEUKIwsJCAUCMHTtWvW39+vVa560yjrX032f9+vVaZatXr67x71T6ej169KjeY3n8vAkhREJCggAgPv/8c3HlyhXh7u4uwsPD9e6jIsfx7bffCgAiLi5Ova2wsFCEhIQId3d3oVQqhRBC7NmzRwAQH3zwgdY+VSqVxv1ly5YJAOL69et6j2/mzJmievXq4sKFCxrPnTx5snB0dBSpqanlHmPPnj1Fy5YtxZ07d8SdO3fE2bNnxQcffCAAiAEDBgghhNi8ebMAIL755pty91VWYWGhqFmzpvj000/V2/7v//5PtGvXzuh9kH1gtxTZrAULFmDXrl3YtWsX4uLi0Lt3b7z77rt6Z0AAwPHjx7F+/XrExMTAwUH3y/vu3bu4efMmYmNjcenSJTz77LPqxyIjI1FQUKDRHfHTTz+huLhYo8XI1dVV/f/5+fm4e/cuunfvDgA4ceKE1t8sLCyEXC43+tjz8vIwY8YMjB07Fg0aNNBZRqFQICMjA7t378a2bds0jsPR0RHOzs4AHnU3ZGVlobi4GJ07d9ZZv4oQQiAqKgqvvfYaunXrpvV4dnY2atasWeH95+fnAwBcXFzKLWfqsebl5eHu3bsat5KSEp37zs7O1iqrT2lLY3Z2tlHH98ILL+D999/HjBkz8Oqrr8LFxQVLliwx6rnG2r59O/z9/REREaHeVq1aNXzwwQfIycnB77//DgD4+eefIZPJEB0drbWPx1shCwsLAaDc1/P69evxzDPPwNvbW+PchYaGoqSkBPv37zdY93PnzqF27dqoXbs2WrVqhfnz56N///5Yvnw5gEctdgBMarXZsWMH7t27p3E+IiIi8NdffyElJcXo/ZDtq9LhZv/+/RgwYADq1q0LmUyGTZs2mbwPIQS+/vprNG/eHHK5HPXq1dNqoqWK6dq1K0JDQxEaGorBgwdj27ZtCAoKwtixY9UfsI+bPHkynnnmGa1+9lL5+fmoXbs2AgICMHz4cEyaNEmjObply5bo0qWLxoDl1atXo3v37mjatKl6W1ZWFsaNGwc/Pz+4urqidu3aCAwMBPDoS+5xCoUC7u7uRh/7v//9b+Tn52vNDCkrLCwM/v7+CA0NRatWrfDTTz9pPL5y5Uq0bdsWLi4uqFmzJmrXro1t27bprF9FrF69GikpKXq710JCQvDLL79gw4YNSEtLw927d5GXl2f0/kuDhJeXl8GyphxrdHS0+kuz9Hbu3Dmd+x0+fLhW2dzcXJ1lQ0NDUbt2bXh6esLb2xv/+Mc/9JYt9fXXX8PHxwdJSUn47rvv4Ovra/BYTXH9+nU0a9ZMK+i3atVK/TgAXL58GXXr1oWPj4/BfZZOnS7v9Xzx4kXEx8drnbvQ0FAAj8YBGdKoUSPs2rULv/32Gw4ePIj09HRs3bpV3RXm6ekJAEaHSeDRLKnAwEDI5XJcunQJly5dQpMmTeDm5sZJChJTpcfc5Obmol27dhg+fDheffXVCu1j3Lhx2LlzJ77++mu0adMGWVlZGgM7yXwcHBzQu3dvzJs3DxcvXkTr1q01Ht+5cyd+++039aBfXZydnbFr1y7k5eXhwIED+OKLLxAQEID3339fXSYyMhLjxo3
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Применение PCA для понижения размерности\n",
"pca = PCA(n_components=2)\n",
"reduced_data = pca.fit_transform(X)\n",
"\n",
"# Преобразуем данные из cupy в numpy\n",
"reduced_data_np = reduced_data.get()\n",
"\n",
"# Визуализация данных\n",
"plt.scatter(reduced_data_np[:, 0], reduced_data_np[:, 1])\n",
"plt.title('Визуализация данных после PCA')\n",
"plt.xlabel('PC1')\n",
"plt.ylabel('PC2')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "f2eef505",
"metadata": {},
"source": [
"### Выбор оптимального количества кластеров"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "f72195d2",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Оценка числа кластеров: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 9/9 [06:06<00:00, 40.73s/it]\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABIQAAAHWCAYAAAAGrFJtAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACVW0lEQVR4nOzdd3hUZfrG8fvMZJJJhxDS6DWQgIAgCqjgilIUYd21YEFQ0UVQV1xdsKHubxe7rorYcW0raxd1URZBLCgCotJ7T6WkkTpzfn+EGRhSCCHJmUm+n+uaK5kz58w8J1Hm5J73fV7DNE1TAAAAAAAAaDJsVhcAAAAAAACAhkUgBAAAAAAA0MQQCAEAAAAAADQxBEIAAAAAAABNDIEQAAAAAABAE0MgBAAAAAAA0MQQCAEAAAAAADQxBEIAAAAAAABNDIEQAAAAAABAE0MgBCDgPPvsszp48KD3/lNPPaWCggLrCgIAAACAAEMgBPiB1157TYZhaPny5RUee+mll2QYhsaMGSOXy2VBdf5n3rx5uv/++7Vr1y699dZbuvfeexUaGmp1WQAA4AT42/XPhRdeqPbt25/wcVOmTJFhGHVfEADUsyCrCwBQtQ8//FCTJk3SWWedpXfeeUd2u93qkvzCXXfdpYsuukj//Oc/ZbPZ9Pjjj8tmI98GAKAx4PoHABoGgRDgpxYvXqyxY8cqJSVF8+bNk9PptLokvzF48GDt2LFD69atU5s2bdS6dWurSwIAAHWA6x8AaDh8pA74oVWrVmn06NFKTEzUF198oejoaJ/HhwwZoh49emjFihUaOHCgQkND1aFDBz3//PM++y1evFiGYWjx4sU+2y+44AIZhqH7779fknT//ffLMIxqb0c/x48//qjhw4crOjpaYWFhGjx4sL777juf1/A85/r163XppZcqKipKLVq00K233qqioiKffY+uxePRRx+VYRgaMmRIpefTrFkzDRgwQK1bt65wPlWp6c/j6Pqzs7N99l2+fLkMw9Brr73ms339+vX64x//qJiYGDmdTvXr10+ffPKJzz6eofFLlizRjTfeqBYtWigqKkrjxo3TgQMHfPZt3769LrzwwgrnUNmwdE+tR8vPz1dCQkKF8x0yZIjPz1SSfvrpJ+/vGQAAqxzv+keS3n33XfXt21ehoaGKjY3VVVddpT179ngf37Nnj8aOHatWrVopJCREHTt21J133qm8vLwKz/XGG2+oTZs2atasmWbOnOndPnfuXCUlJSk2NlYPP/xwheO++OILde3aVREREbrllltkmqak8uuMTp06KSoqSlOnTvWZ6lZf1yDbt2+v9Lpk8uTJMgxD48eP99l+8OBB/fnPf1abNm0UEhKizp076+GHH5bb7a7wnI899liFc+/Ro4f3OsJzTtXdjndtVlRUpPvvv19du3aV0+lUYmKiLr74Ym3ZsqVW5yeVX+tUVovnOWbMmCGHw6GsrKwKx95www1q1qyZioqK9M0332jo0KGKjY1VaGio+vTpo9mzZ3t/39W91tE3jzlz5uh3v/ud4uLiFBISopSUFM2ePbvanw9Q3xghBPiZLVu2aPjw4QoJCdEXX3yhxMTESvc7cOCARo4cqUsvvVRjx47Vf/7zH02aNEnBwcG69tprq3z+JUuW6PPPP/fZdvHFF6tz587e+7fddpu6d++uG264wbute/fukqSvvvpKI0aMUN++fTVjxgzZbDbvG9w333yj/v37+zz3pZdeqvbt22vmzJn64Ycf9PTTT+vAgQN6/fXXq6zx4MGDPhdm1ansfE7EyR4vSWvWrNGgQYPUqlUrTZs2TeHh4frPf/6jMWPG6P3339fvf/97n/2nTJmiZs2a6f7779eGDRs0e/Zs7dixw3thVRcef/xxZWRk1Gjfv/71r3XymgAA1FZNrn9ee+01TZgwQaeddppmzpypjIwM/fOf/9R3332nn3/+Wc2aNdOWLVuUkZGhm2++Wc2bN9eaNWv09NNPa+HChfr222+9PQe/++47XXPNNRo4cKDGjh2rN954Q1u3blVhYaEefPBB3XXXXfryyy81bdo0tW3bVmPHjpUkbd26VWPGjFHnzp31j3/8Q/Pnz/f2QJo8ebJuvvlm/fzzz3ryySfVsmVLTZ8+vcpzrotrkMps3rxZL730UoXthw4d0uDBg7Vnzx7deOONatu2rb7//ntNnz5daWlpeuqpp07odbp376433njDe//FF1/UunXr9OSTT3q3nXLKKVUe73K5dOGFF2rhwoW6/PLLdeuttyovL08LFizQ6tWr1alTpxM6v6N169ZNd999tyQpOztbt912m/exq6++Wg8++KDmzp2rKVOmeLeXlJTovffe0x/+8Ac5nU59//33iouL0z333CO73a6vv/5aN910k3799VdvkHP33Xfr+uuv93mdG264QWeddVaFmmbPnq3U1FRddNFFCgoK0rx583TTTTfJ7XZr8uTJ1Z4PUG9MAJabM2eOKcn89NNPzU6dOpmSzPPPP7/K/QcPHmxKMh9//HHvtuLiYrN3795mXFycWVJSYpqmaS5atMiUZC5atMi73+mnn26OGDHClGTOmDGj0udv166dec0111TY7na7zS5dupjDhg0z3W63d/uhQ4fMDh06mOedd55324wZM0xJ5kUXXeTzHDfddJMpyfzll1+8246t5c477zTj4uLMvn37moMHD/Zur+351OZ4T/1ZWVk+z/HTTz+Zksw5c+Z4t5177rlmz549zaKiIp+f1cCBA80uXbp4t3l+z3379vX+jkzTNB955BFTkvnxxx97t7Vr18684IILKpzD5MmTzWP/6fbU6pGZmWlGRkZ6z+vo8x08eLDPz/Tzzz83JZnDhw+v8LwAANSnE7n+KSkpMePi4swePXqYhYWF3u2ffvqpKcm87777qnydBQsWmJLMBx980LvtoosuMjt06OB9787LyzM7dOhghoWFmVu3bjVNs/y9fNCgQWavXr28x91yyy1mZGSkmZ2dbZqmaZaWlppnnHGGKcn88ccfvfuNHTvWjIuL8z5/fV2DbNu2rcK2Sy+91OzRo4fZpk0bn+u5v/3tb2Z4eLi5ceNGn+edNm2aabfbzZ07d/o856OPPlrhZ5mamupzHXG0a665xmzXrl2lj1Xm1VdfNSWZTzzxRIXHPNeZJ3J+HoMGDTLPOecc7/3KnmPAgAHm6aef7nPcBx98UOF3dKy7777blGQuWbKkwmOVvc7RDh06VGHbsGHDzI4dO1b5ekB9Y8oY4EfGjx+vXbt26YorrtCXX36pd999t8p9g4KCdOONN3rvBwcH68Ybb1RmZqZWrFhR6TEffPCBfvrpJz300EO1qm/VqlXatGmTrrjiCu3bt0/Z2dnKzs5WQUGBzj33XC1ZssRnyLGkCp943HzzzZJU5Sdie/bs0TPPPKN7771XERER1dZzsudTk+P379/vPc/s7Gzl5ORUePyrr77SpZdeqry8PO9++/bt07Bhw7Rp0yafoexS+XBkh8PhvT9p0iQFBQXV2aeEf/vb3xQdHa1bbrml2v1M09T06dP1hz/8QaeffnqdvDYAACeqJtc/y5cvV2Zmpm666SafvkIXXHCBunXrps8++8y7rbS01Oe9u3fv3urXr5/P8y5cuFAjR45USEiIJCkiIkIpKSlq2bKlOnToIEneVc5++eUX7du3z3vc2WefrRYtWkgqvx7r27evJPmMkr744ouVmZmp1atXV3rOJ3sNU5UVK1bo3Xff1cyZMyssuPHuu+/qrLPOUvPmzX1+PkOHDpXL5dKSJUt89j906JDPftnZ2XW64tv777+v2NhY77Xh0aoaMV3d+XmUlJR4f69VGTdunH788Ufv1DRJeuutt9SmTRsNHjzYu+3Yn8HEiRPlcDiqvUavytEr4ubk5Cg7O1uDBw/W1q1bK1xfAg2lSQdCS5Ys0ahRo5SUlCTDMPTRRx+d0PFFRUUaP368evbsqaCgII0ZM6ba/b/77jsFBQWpd+/eta4Zjdv+/fv15ptv6l//+pd69+6tW2+9tco3iKSkJIWHh/ts69q1q6Ty+dbHcrlcuuu
"text/plain": [
"<Figure size 1400x500 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Оценка инерции и коэффициента силуэта\n",
"from cuml.metrics.cluster import silhouette_score\n",
"from tqdm import tqdm # Импорт библиотеки для отображения прогресса\n",
"\n",
"# Оценка инерции и коэффициента силуэта\n",
"inertia = []\n",
"silhouette_scores = []\n",
"k_range = range(2, 11)\n",
"\n",
"# tqdm для отображения прогресса\n",
"for k in tqdm(k_range, desc=\"Оценка числа кластеров\"):\n",
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
" kmeans.fit(reduced_data)\n",
" inertia.append(kmeans.inertia_)\n",
" silhouette_scores.append(silhouette_score(reduced_data, kmeans.labels_))\n",
"\n",
"# Построение графиков\n",
"plt.figure(figsize=(14, 5))\n",
"\n",
"# График инерции\n",
"plt.subplot(1, 2, 1)\n",
"plt.plot(k_range, inertia, marker='o')\n",
"plt.title('Критерий инерции')\n",
"plt.xlabel('Число кластеров')\n",
"plt.ylabel('Инерция')\n",
"\n",
"# График коэффициента силуэта\n",
"plt.subplot(1, 2, 2)\n",
"plt.plot(k_range, silhouette_scores, marker='o')\n",
"plt.title('Коэффициент силуэта')\n",
"plt.xlabel('Число кластеров')\n",
"plt.ylabel('Силуэт')\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "180e85ac",
"metadata": {},
"source": [
"### Кластерный анализ"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "dd573024",
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAHHCAYAAABDUnkqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABpZUlEQVR4nO3dd1gUV9sG8HuWsoAUsVBUFBV7xV7eWBKMGpNIotFoEmNiTDSaaDSNFOuXYCyJxho1tthL1Fixa4xYULEXrFgoKmWpC+yc7w9kZYWFRWEXhvt3XXvpzpyZfWbY3Xn2nDPnSEIIASIiIiKFUFk6ACIiIqLCxOSGiIiIFIXJDRERESkKkxsiIiJSFCY3REREpChMboiIiEhRmNwQERGRojC5ISIiIkVhckNERESKwuSGiEqdxMREuLm5YcWKFZYOpVh5++230adPH0uHQfTcmNwQFQNLliyBJEkICQnJsW7BggWQJAn+/v7Q6XQWiE55ZsyYAScnJ7z99tv6ZePGjYMkSXj48KFB2Tt37qBmzZooV64cTp06BQAYOHAgJEmCs7MzUlJScuw/LCwMkiRBkiRMnTq1aA+mEH3zzTfYsGEDzpw5Y+lQiJ4LkxuiYmzjxo0YOnQoXnjhBaxevRpWVlaWDqnES09Px4wZM/DRRx/lez7v3buHzp07IyYmBrt370azZs3066ytrZGcnIwtW7bk2G7FihWws7Mr9NiLmq+vL1q0aIFp06ZZOhSi58LkhqiYOnDgAPr164f69etjy5YtJfJiWRxt3boVDx48yLf55f79++jcuTMePXqE3bt3o3nz5gbr1Wo1XnrpJaxatSrHtitXrkSPHj0KNW5z6dOnD/7++28kJiZaOhSiZ8bkhqgYCg0NRc+ePeHp6YmgoCC4uLjkKHPr1i1908fTj+ymTp2Kdu3aoXz58rC3t0fz5s2xfv36XF93+fLlaNWqFRwcHODq6ooOHTpg165dAABvb2+jrydJEry9vfX7kWUZ06dPR4MGDWBnZwd3d3d88skniI2NNXg9b29vvPrqq9i1axeaNm0KOzs71K9fH3///bdBubya7bJ06tQJnTp1yuu0AgA2bdoEb29v1KxZ02iZiIgIdO7cGdHR0di1axdatGiRa7n+/ftjx44diIuL0y87ceIEwsLC0L9//1y3iYuLw8iRI+Hl5QW1Wg0fHx/88ssvkGXZoJypfzdJkjB8+HBs2rQJDRs2hFqtRoMGDbBz506DcgkJCRg5ciS8vb2hVqvh5uaGLl266JvasnTp0gVJSUnYvXu30fNDVNwxuSEqZq5fv45u3bpBrVYjKCgInp6eeZb/+OOP8ddff+Gvv/7CG2+8kWP9jBkz4OvriwkTJuDnn3+GtbU13nrrLWzbts2g3Pjx4/Hee+/BxsYGEyZMwPjx4+Hl5YV9+/YBAKZPn65/ne+++w4A8N133+mXTZ8+Xb+vTz75BF999RXat2+PGTNm4IMPPsCKFSvQtWtXpKenG7xuWFgY+vbti+7duyMwMFAfX1FdXI8cOWLQvPS0qKgovPjii4iMjERQUBBatmxptOybb74JSZIMkrGVK1eibt26ub5GcnIyOnbsiOXLl2PAgAH4/fff0b59ewQEBGDUqFEGZU39uwHA4cOH8emnn+Ltt9/G5MmTkZqail69euHRo0f6MkOGDMHcuXPRq1cvzJkzB19++SXs7e1x6dIlg33Vr18f9vb2+O+//4weN1GxJ4jI4hYvXiwAiK1bt4qaNWsKAOLll1/Oc5uwsDABQCxdulS/bOzYseLpj3VycrLB87S0NNGwYUPx4osvGuxLpVKJN954Q+h0OoPysizneO39+/cLAGL//v051v37778CgFixYoXB8p07d+ZYXq1aNQFAbNiwQb8sPj5eeHp6Cl9fX/2yrPNz4sSJ3E6FEEKIjh07io4dOxpdL4QQ6enpQpIkMXr06Bzrss5dtWrVhLOzswgODja6n/fff1+UKVNGCCFE7969xUsvvSSEEEKn0wkPDw8xfvx4cfPmTQFATJkyRb/dxIkTRZkyZcTVq1cN9vftt98KKysrER4erl9myt9NCCEACFtbW3Ht2jX9sjNnzggAYubMmfplLi4uYtiwYUaPKbvatWuL7t27m1SWqDhizQ1RMTJw4EDcuXMH/fv3x65du7Bu3TqjZdPS0gBk9v3Ii729vf7/sbGxiI+PxwsvvGDQHLFp0ybIsowxY8ZApTL8Wni6mSs/69atg4uLC7p06YKHDx/qH82bN4ejoyP2799vUL5SpUoGNU7Ozs4YMGAATp8+jcjISIOy8fHxePjwIRISEgoUU5aYmBgIIeDq6mq0TFRUFBwdHfOtMcvSv39/HDhwAJGRkdi3bx8iIyONNkmtW7cOL7zwAlxdXQ3OjZ+fH3Q6HQ4dOqQva8rfLYufn59BM1vjxo3h7OyMGzdu6JeVLVsWx44dw/379/M9pqz4iEqqUp3cHDp0CK+99hoqVaoESZKwadOmAu9DCIGpU6eidu3aUKvVqFy5Mn766afCD5ZKhZiYGCxfvhxLly5F06ZNMWLECMTHx+daNqufh6OjY5773Lp1K9q0aQM7OzuUK1cOFStWxNy5cw32e/36dahUKtSvX/+5jyEsLAzx8fFwc3NDxYoVDR6JiYmIjo42KO/j45MjgapduzaAzH5F2fn5+aFixYpwdnaGq6srPv30UyQlJRU4RiGE0XXLly9HTEwMunTpkiPW3LzyyitwcnLCmjVrsGLFCrRs2RI+Pj65lg0LC8POnTtznBc/Pz8AMHg9U/5uWapWrZpjmaurq0Efp8mTJ+P8+fPw8vJCq1atMG7cOIPkJzshRIGTWqLixNrSAVhSUlISmjRpgg8//BBvvvnmM+1jxIgR2LVrF6ZOnYpGjRohJiYGMTExhRwplRZTpkzBW2+9BQCYP38+2rRpg4CAAMyZMydH2axaDQ8PD6P7+/fff/H666+jQ4cOmDNnDjw9PWFjY4PFixdj5cqVRXIMsiznOUBexYoVn3nfs2fPRu3ataHVanHgwAH9GDK5nZ/clCtXDpIk5ejYnF3Hjh2xdu1avPnmm+jatSsOHDiQa4fuLGq1Gm+++SaWLl2KGzduYNy4cUbLyrKMLl264Ouvv851fVZSV9C/m7Fb2rMncX369MELL7yAjRs3YteuXZgyZQp++eUX/P333+jevbvBdrGxsahVq5bR4yAq7kp1ctO9e/ccH+rstFotvv/+e6xatQpxcXFo2LAhfvnlF/0dGZcuXcLcuXNx/vx51KlTBwBQvXp1c4ROCtWhQwf9/1u2bIlhw4Zh9uzZGDBgANq0aWNQ9uLFi5AkSf/ey82GDRtgZ2eHoKAgg+arxYsXG5SrWbMmZFnGxYsX0bRp0+c6hpo1a2LPnj1o3769QdOKMdeuXctRU3D16lUAMLgDCwBatWqlv3OpR48eOHPmTI67gvJibW2NmjVr4ubNm3mWe+2117Bo0SK8//77+ru58jqW/v37Y9GiRVCpVAYDAz6tZs2aSExM1NfUGGPq362gPD098emnn+LTTz9FdHQ0mjVrhp9++sngezAjIwN37tzB66+//lyvRWRJpbpZKj/Dhw9HcHAwVq9ejbNnz+Ktt95Ct27dEBYWBgDYsmULatSoga1bt6J69erw9vbGRx99xJobKjQ//fQTPD098fHHHyMjI0O/PCMjAxs2bECrVq3ybJaysrKCJEkGIxvfunUrRxOsv78/VCoVJkyYkOOW5LyacHLTp08f6HQ6TJw4Mce6jIwMg9umgczxZDZu3Kh/rtFosGzZMjRt2jTPWikgsyakoAMbtm3bNs9byrO89957mD59Og4fPoxevXrluMsru86dO2PixImYNWtWnjH36dMHwcHBCAoKyrEuLi5O/zc29e9mKp1Ol6M5y83NDZUqVYJWqzVYfvHiRaSmpqJdu3bP9FpExUGprrnJS3h4OBYvXozw8HBUqlQJAPDll19i586dWLx4MX7++WfcuHEDt2/fxrp167Bs2TLodDp88cUX6N27t/72WaLn4eTkhJkzZ+LNN9/EtGnT8M0332D
"text/plain": [
"<Figure size 640x480 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Кластеризация с использованием KMeans\n",
"optimal_k = 4 # Выбираем на основе графиков\n",
"kmeans = KMeans(n_clusters=optimal_k, random_state=42)\n",
"labels = kmeans.fit_predict(reduced_data)\n",
"\n",
"# Преобразуем данные из cupy в numpy\n",
"reduced_data_np = reduced_data.get()\n",
"labels_np = labels.get()\n",
"\n",
"# Визуализация кластеров\n",
"plt.scatter(reduced_data_np[:, 0], reduced_data_np[:, 1], c=labels_np, cmap='viridis')\n",
"plt.title('Кластеры (KMeans)')\n",
"plt.xlabel('PC1')\n",
"plt.ylabel('PC2')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"id": "407d268e",
"metadata": {},
"source": [
"### Оценка качества кластеризации"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "d00795e2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Силуэт для кластеризации: 0.58\n"
]
}
],
"source": [
"# Оценка коэффициента силуэта\n",
"silhouette = silhouette_score(reduced_data, labels)\n",
"print(f'Силуэт для кластеризации: {silhouette:.2f}')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}