238 lines
301 KiB
Plaintext
238 lines
301 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0wAAAIjCAYAAAAwSJuMAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5wU5f3A8c/MbLnee+F6o3dFOoKAFaNiiQV7b7EkmuQXjYlGY28kGgPEYIwFNIo0aUqTetSDq1zvvW6Zmd8fBwvrFfb0Dg543r7uJbvzzMyzu3e7+53n+3wfSdd1HUEQBEEQBEEQBKED+XR3QBAEQRAEQRAEob8SAZMgCIIgCIIgCEIXRMAkCIIgCIIgCILQBREwCYIgCIIgCIIgdEEETIIgCIIgCIIgCF0QAZMgCIIgCIIgCEIXRMAkCIIgCIIgCILQBREwCYIgCIIgCIIgdEEETIIgCIIgCIIgCF0QAZMgCIIgCIIgCEIXRMAkCIIgCIIgCILQBREwCYIgnME+++wzJEnq9Gfw4MGnu3tCH3vmmWecXnMPDw8GDhzI7373OxoaGjq0z8nJ4e677yY+Ph43Nzd8fHwYP348b7zxBq2trR3aq6pKREQEkiSxfPnyU/GQBEEQ+h3D6e6AIAiC8PM9/fTTpKWlOW7/+c9/Po29EU61+fPn4+XlRVNTE6tWreLPf/4za9euZdOmTUiSBMCyZcu45pprMJvN3HzzzQwePBir1crGjRt54oknOHDgAO+9957TcdeuXUtpaSmxsbEsXryY2bNnn46HJwiCcFqJgEkQBOEsMGPGDKZMmeK4/Y9//IOqqqrT1yHhlLr66qsJCgoC4J577uGqq65iyZIlbN26lXHjxpGXl8d1111HTEwMa9euJTw83LHv/fffT3Z2NsuWLetw3H//+9+MHDmSW265haeffprm5mY8PT1P2eMSBEHoD0RKniAIwhnMarUCIMsnfzuvqanh8ccfZ8iQIXh5eeHj48Ps2bPZs2ePU7v169cjSRKfffZZh2N4eXkxb948x+2FCxciSRI7duxwaldVVYUkSTzzzDOO+46lj3UXyMXGxjodH6Curo5HHnmE6OhozGYziYmJvPjii2iadtLHHBsb22XK4rGRl2PsdjvPPfccCQkJmM1mYmNjefrpp7FYLB2Ou3z5ciZPnoy3tzc+Pj6MGTOGjz76yKnNkSNHXDqvpmm8/vrrDBo0CDc3N0JDQ7n77rupra096ePryrRp0wDIy8sD4KWXXqKpqYkPPvjAKVg6JjExkYcfftjpvtbWVpYuXcp1113H3LlzaW1t5csvv/zJfRIEQThTiREmQRCEM9ixgMlsNp+0bW5uLl988QXXXHMNcXFxlJeX8/e//53Jkydz8OBBIiIi+rq7PdbS0sLkyZMpLi7m7rvvZsCAAWzevJmnnnqK0tJSXn/99ZMeY/jw4Tz22GNO9/3rX/9i9erVTvfdcccdLFq0iKuvvprHHnuMH374gRdeeIGMjAyWLl3qaLdw4UJuu+02Bg0axFNPPYWfnx+7d+9mxYoV3HDDDR3Of9dddzFx4kQAlixZ4nQsgLvvvpuFCxdy66238tBDD5GXl8fbb7/N7t272bRpE0aj0dWnyyEnJweAwMBAAL766ivi4+O54IILXD7G//73P5qamrjuuusICwtjypQpLF68uNPHKAiCcDYTAZMgCMIZrL6+HgB3d/eTth0yZAiZmZlOo1E33XQTqampfPDBB/z+97/vs37+VK+++io5OTns3r2bpKQkoD3AiIiI4K9//SuPPfYY0dHR3R4jMjKSG2+80em+rVu3OgVMe/bsYdGiRdxxxx28//77ANx3332EhITw8ssvs27dOqZOnUp9fT0PPfQQY8eOZf369bi5uTmOoeu60znsdjsA48ePd5w/OzvbKWDauHEj//jHPzoEIlOnTmXWrFl8+umnLgUoNTU1AI45TO+++y6hoaFMnDiRhoYGiouLueKKK056nBP9+9//5oILLnA8v9dddx333XcflZWVBAcH9+hYgiAIZzKRkicIgnAGq66uBnDpC6zZbHYES6qqUl1djZeXFykpKezatatD+8bGRqqqqpx+ulJfX+/U7tgX+M7U1NRQVVVFc3PzSfv86aefMnHiRPz9/Z2OP336dFRV5bvvvjvpMVzxzTffAPCrX/3K6f5jI1PH5vesXr2axsZGfvOb3zgFS0CHVDtXRv8+/fRTfH19mTFjhtPjGzVqFF5eXqxbt86l/qekpBAcHExcXBx33303iYmJLFu2DA8PD0e1PG9vb5eOBe2/VytXruT666933HfVVVchSRKffPKJy8cRBEE4G4gRJkEQhDNYfn4+BoPBpYBJ0zTeeOMN3n33XfLy8lBV1bHtWOrWiW677TaX+zF9+nSX26akpDj+HRISwp133smzzz6Loigd2mZlZbF3794uH19FRYXL5+1Ofn4+siyTmJjodH9YWBh+fn7k5+cDx1PdXCnZXldXB7TP++pKVlYW9fX1hISEdLrd1cf3+eef4+Pjg9FoJCoqioSEBMc2Hx8foD0AdtV///tfbDYbI0aMIDs723H/eeedx+LFi7n//vtdPpYgCMKZTgRMgiAIZ7DDhw8THx+PwXDyt/Pnn3+e3//+99x2220899xzBAQEIMsyjzzySKcFFP7v//7PMffmmMsuu6zTY7/zzjskJyc7bjc0NHDVVVd12vbYl/uWlhaWLl3Kn//8Z3x8fHjyySc7tNU0jRkzZnS6DXA6Z2/48SjRz1FWVga0B11d0TSNkJAQFi9e3Ol2V1PfJk2a5KiS92M+Pj5ERESwf/9+l44FOPozfvz4Trfn5uYSHx/v8vEEQRDOZCJgEgRBOENZLBbS09OZM2eOS+0/++wzpk6dygcffOB0f11dXadftocMGdJh5KizUSCAsWPHMnr0aMft7tL3Tvxyf/nll7Np0yZWrFjRaVCUkJBAU1NTj0awfoqYmBg0TSMrK8tpPavy8nLq6uqIiYlx9Adg//79HUajfuzgwYNIkuQ0ovZjCQkJfPvtt4wfP96leWg/1aWXXsp7773Hli1bGDduXLdt8/Ly2Lx5Mw888ACTJ0922qZpGjfddBMfffQRv/vd7/qsv4IgCP2JmMMkCIJwhvroo4+wWCxceOGFLrVXFKVDYYJPP/2U4uLivuieS3RdR9f1LgOxuXPnsmXLFlauXNlhW11dnaOwws918cUXA3Souvfqq68CcMkllwBw0UUX4e3tzQsvvEBbW5tT2xOfW7vdzueff87YsWO7TcmbO3cuqqry3HPPddhmt9sdaX0/15NPPomnpyd33HEH5eXlHbbn5OTwxhtvAMdHl5588kmuvvpqp5+5c+cyefLkLkfEBEEQzkZihEkQBOEM09zczFtvvcUf//hHRxD073//26lNeXk5TU1N/Pvf/2bGjBmEhoZy6aWX8sc//pFbb72VCy64gH379rF48eJTnlq1du1ap5S87OxsHnnkkU7bPvHEE/zvf//j0ksvZd68eYwaNYrm5mb27dvHZ599xpEjR7pMReuJYcOGccstt/Dee+9RV1fH5MmT2bZtG4sWLWLOnDlMnToVaE9ve+2117jjjjsYM2YMN9xwA/7+/uzZs4eWlhYWLVrEt99+y+9//3v27t3LV1991e15J0+ezN13380LL7xAeno6F110EUajkaysLD799FPeeOMNrr766p/9+BISEvjoo4+49tprSUtL4+abb2bw4MFYrVY2b97Mp59+6lj/avHixQwfPrzL6oOXX345Dz74ILt27WLkyJE/u2+CIAj9nQiYBEEQzjCVlZU89dRTjtt33313l21vuukm1q1bR2hoKE8//TTNzc189NFH/Pe//2XkyJEsW7aM3/zmN6ei2w7XXnst0F4KPS4ujtdee63LIgIeHh5s2LCB559/nk8//ZR//etf+Pj4kJyczLPPPouvr2+v9esf//gH8fHxLFy4kKVLlxIWFsZTTz3FH/7wB6d2t99+OyEhIfzlL3/hueeew2g0kpqayqOPPgq0r19kMpn45ptvmDlz5knP+7e//Y1
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"from sklearn.decomposition import PCA\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"\n",
|
|||
|
"# Создание датафрейма\n",
|
|||
|
"data = pd.read_csv(\"car_price_prediction.csv\")\n",
|
|||
|
"\n",
|
|||
|
"df = pd.DataFrame(data).head(200)\n",
|
|||
|
"\n",
|
|||
|
"# Замена '-' на NaN и обработка числовых данных\n",
|
|||
|
"df.replace(\"-\", np.nan, inplace=True)\n",
|
|||
|
"df[\"Levy\"] = pd.to_numeric(df[\"Levy\"], errors=\"coerce\").fillna(0)\n",
|
|||
|
"df[\"Mileage\"] = pd.to_numeric(df[\"Mileage\"], errors=\"coerce\").fillna(0)\n",
|
|||
|
"\n",
|
|||
|
"# Преобразование категориальных переменных в числовые\n",
|
|||
|
"categorical_columns = [\n",
|
|||
|
" \"Manufacturer\",\n",
|
|||
|
" \"Category\",\n",
|
|||
|
" \"Leather interior\",\n",
|
|||
|
" \"Fuel type\",\n",
|
|||
|
" \"Gear box type\",\n",
|
|||
|
" \"Drive wheels\",\n",
|
|||
|
" \"Wheel\",\n",
|
|||
|
" \"Color\",\n",
|
|||
|
"]\n",
|
|||
|
"encoder = OneHotEncoder()\n",
|
|||
|
"encoded_categorical = encoder.fit_transform(df[categorical_columns]).toarray()\n",
|
|||
|
"\n",
|
|||
|
"# Нормализация числовых данных\n",
|
|||
|
"numerical_columns = [\n",
|
|||
|
" \"Price\",\n",
|
|||
|
" \"Levy\",\n",
|
|||
|
" \"Prod. year\",\n",
|
|||
|
" \"Engine volume\",\n",
|
|||
|
" \"Mileage\",\n",
|
|||
|
" \"Cylinders\",\n",
|
|||
|
" \"Airbags\",\n",
|
|||
|
"]\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"scaled_numerical = scaler.fit_transform(df[numerical_columns])\n",
|
|||
|
"\n",
|
|||
|
"# Объединение данных\n",
|
|||
|
"features = np.hstack([scaled_numerical, encoded_categorical])\n",
|
|||
|
"\n",
|
|||
|
"# Снижение размерности с помощью PCA\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"pca_result = pca.fit_transform(features)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов PCA\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=pca_result[:, 0], y=pca_result[:, 1], hue=df[\"Category\"], palette=\"Set2\", s=100\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"Данные после PCA\")\n",
|
|||
|
"plt.xlabel(\"PCA Component 1\")\n",
|
|||
|
"plt.ylabel(\"PCA Component 2\")\n",
|
|||
|
"plt.legend(title=\"Category\", loc=\"upper right\")\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1sAAAHWCAYAAACBjZMqAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAACB7klEQVR4nOzdeVhUZf8G8PvMMMOw4yAwIIu4gwsqJuK+IIhGbtVPU9PULNPMJStb3FpM87Xc0ixTy6U9SysDV1wQFcV9Q1FcWFRk3wZmfn8YkxOgiBzOMNyf6+J6nXOemfM9X3mNm/Oc5wh6vV4PIiIiIiIiqlIyqQsgIiIiIiIyRwxbREREREREImDYIiIiIiIiEgHDFhERERERkQgYtoiIiIiIiETAsEVERERERCQChi0iIiIiIiIRMGwRERERERGJgGGLiIiIiIhIBAxbREREREREImDYIiIiIiIiEgHDFhERVbm1a9dCEAQIgoB9+/aV2q/X6+Hp6QlBEPDkk09KUCEREZH4GLaIiEg0KpUKGzduLLV9z549uH79OiwtLSWoioiIqHowbBERkWj69u2LH3/8EUVFRUbbN27ciICAAGg0GokqIyIiEh/DFhERiWbo0KG4c+cOIiMjDdsKCwvx008/4bnnnivzPTqdDp999hmaN28OlUoFV1dXvPTSS7h7965hTP369Q3TFMv6ql+/vmFsTk4Opk2bBk9PT1haWqJp06ZYuHAh9Hp9qWPv3r273M+sqFGjRpX5/tmzZxuN27lzJ7p06QIbGxs4Ojqif//+OHv2rNGY2bNnlzr2rl27YGlpiZdfftlozIO+du/ebXj/ihUr0KJFC1hbWxuN+emnnyp8jkREVDEWUhdARETmq379+ggKCsKmTZsQFhYGAPjrr7+QkZGBIUOGYMmSJaXe89JLL2Ht2rV44YUXMGnSJCQkJGDZsmU4duwY9u/fD4VCgc8++wzZ2dkAgLNnz+Kjjz7C22+/DV9fXwCAra0tgHv3hj311FPYtWsXxowZg9atW+Pvv//G9OnTcePGDXz66adl1j1p0iQ88cQTAIBvvvnGKCxWRN26dY0+e8SIEUb7t2/fjrCwMDRo0ACzZ89GXl4eli5dik6dOuHo0aNGYfF+x48fx4ABA9C3b18sX74cADBo0CA0atTIMGbKlCnw9fXFuHHjDNtK+vL999/jlVdeQffu3fHqq6/CxsbG0D8iIhKBnoiIqIqtWbNGD0B/+PBh/bJly/R2dnb63NxcvV6v1z/zzDP6Hj166PV6vd7b21vfr18/w/v27t2rB6DfsGGD0edt27atzO16vV6/a9cuPQD9rl27Su3bvHmzHoD+gw8+MNr+9NNP6wVB0MfHxxttj4iI0APQ//TTT4ZtEyZM0D/Kfy6HDRum9/HxMdoGQD9r1izD69atW+tdXFz0d+7cMWw7fvy4XiaT6Z9//nnDtlmzZhmOfeXKFb2bm5u+c+fO+ry8vHKP7+3trR85cmSZ+4YOHap3dHQ0en9J/3788ccKnyMREVUMpxESEZGonn32WeTl5WHr1q3IysrC1q1by51C+OOPP8LBwQG9e/fG7du3DV8BAQGwtbXFrl27HunYf/75J+RyOSZNmmS0fdq0adDr9fjrr7+Mtufn5wO4t7BHZRUWFj5w4Y+kpCTExcVh1KhRUKvVhu2tWrVC79698eeff5Z6z507dxAaGgo7Ozv8/vvvla4vKysL1tbWj3V+RERUcQxbREQkKmdnZwQHB2Pjxo345ZdfUFxcjKeffrrMsRcvXkRGRgZcXFzg7Oxs9JWdnY3U1NRHOvbVq1fh7u4OOzs7o+0l0+quXr1qtP327dsAAAcHh0c6zv3S09MN0xjLqwkAmjZtWmqfr68vbt++jZycHKPtTz75JM6fP4/09PQy7zWrqKCgINy8eROzZ89GYmIibt++jYyMjEp/HhERPRjv2SIiItE999xzePHFF5GcnIywsDA4OjqWOU6n08HFxQUbNmwoc7+zs7OIVQJXrlwBgHLvmaqI5ORkeHt7V01B/zh37hz++usvPPvss5g2bRrWrFlTqc+ZMmUKzp8/j/fffx9z5syp0hqJiKg0XtkiIiLRDRw4EDKZDAcPHix3CiEANGzYEHfu3EGnTp0QHBxc6svf3/+Rjuvt7Y2bN28iKyvLaPu5c+cM++935MgRaDQaeHh4PNJxSmi1WsTHxxuunJVXEwCcP3++1L5z586hbt26sLGxMdr++++/o0+fPpg3bx7Wrl2LHTt2VKo+KysrfPnll2jevDk6d+6MyMhILFy4sFKfRURED8ewRUREorO1tcWKFSswe/ZshIeHlzvu2WefRXFxMd5///1S+4qKipCenv5Ix+3bty+Ki4uxbNkyo+2ffvopBEEwrJAI3LsvateuXXjqqace6Rj3++2335CXl4eePXuWO8bNzQ2tW7fGunXrjM7n1KlTiIiIQN++fUu9p0uXLgCAV155BR07dsRLL72EvLy8StU4Y8YMJCYmYv369QgODkZAQEClPoeIiB6O0wiJiKhajBw58qFjunXrhpdeegnz5s1DXFwcQkJCoFAocPHiRfz4449YvHhxufd7lSU8PBw9evTAO++8gytXrsDf3x8RERH47bffMHnyZDRs2BAAEB0djbfeegt5eXlwdnbG+vXrDZ9x4cIFAMD69esxcODAUledACA3NxezZs3C559/jo4dOyIkJOSBdX3yyScICwtDUFAQxowZY1j63cHBodTzuO4nCAK++uortG7dGrNmzcKCBQsq3Avg3pLzn376Kb799tsqn+pIRESlMWwREZFJWblyJQICAvDFF1/g7bffhoWFBerXr4/hw4ejU6dOj/RZMpkMv//+O2bOnInvv/8ea9asQf369fHJJ59g2rRphnFffPEFoqKiAAAffvhhmZ81YsQIJCQklBm27t69i++//x7jxo3DnDlzIJM9eOJIcHAwtm3bhlmzZmHmzJlQKBTo1q0b5s+fDx8fnwe+19fXF++88w7ef/99DB06FG3atHlYGwDcu3I3cuRIDBkyBMOGDavQe4iI6PEI+sdZ1oiIiMgMjBo1CgCwdu3acscIgoCEhITHWjyDiIhqF96zRUREREREJAJOIyQiolqvY8eODx0zbNiwBz4/i4iI6L84jZCIiIiIiEgEnEZIREREREQkAoYtIiIiIiIiEfCerQrS6XS4efMm7OzsIAiC1OUQEREREZFE9Ho9srKy4O7u/sDHfTBsVdDNmzfh6ekpdRlERERERGQirl27Bg8Pj3L3M2xVkJ2dHYB7DbW3t5e0Fq1Wi4iICISEhEChUEhaizlif8XF/oqL/RUX+ysu9ldc7K+42F9xmVp/MzMz4enpacgI5WHYqqCSqYP29vYmEbasra1hb29vEt9s5ob9FRf7Ky72V1zsr7jYX3Gxv+Jif8Vlqv192O1FXCCDiIiIiIhIBAxbREREREREImDYIiIiIiIiEgHv2SIiIiIiMnHFxcXQarVSlyEZrVYLCwsL5Ofno7i4WPTjyeVyWFhYPPYjnxi2iIiIiIhMWHZ2Nq5fvw69Xi91KZLR6/XQaDS4du1atT3z1traGm5ublAqlZX+DIYtIiIiIiITVVxcjOvXr8Pa2hrOzs7VFjRMjU6nQ3Z2NmxtbR/4EOGqoNfrUVhYiFu3biEhIQGNGzeu9DEZtoiIiIiITJRWq4Ver4ezszOsrKykLkcyOp0OhYWFUKlUooctALCysoJCocDVq1cNx60MLpBBRERERGTiausVLSlVRahj2CIiIiIiIhIBw1YNU6zTIyYhDbG3BcQkpKFYV3tvlCQiIiIiMmW8Z6sG2XYqCXO2nEFSRj4AOb65eARuDirMCvdDnxZuUpdHRERERCaqWKfHoYQ0pGblw8VOhfY+ashl5jk1cffu3ejRowfu3r0LR0dHSWth2Kohtp1Kwvj1R/Hf61jJGfkYv/4oVgxvy8BFRERERKUY/8L+HrF/YT9q1Cikp6dj8+bNonx+ie7du6N169b47LPPDNs6duyIpKQkODg4iHrsiuA0whqgWKfHnC1nSgUtAIZtc7a
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x500 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1cAAAHWCAYAAACbsXOkAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAACItUlEQVR4nOzdd3hUVf7H8fekN3pIIQRCkS5dqvQAizQFC1gorrg/AQFjWdAVsNFEFiwL6lpWF1fsXaSHKtUgvQnSE1AgkEDa3N8flwzEBExgJndm8nk9T56ZuffMnc+chJDv3HPPsRmGYSAiIiIiIiLXxcfqACIiIiIiIt5AxZWIiIiIiIgTqLgSERERERFxAhVXIiIiIiIiTqDiSkRERERExAlUXImIiIiIiDiBiisREREREREnUHElIiIiIiLiBCquREREREREnEDFlYiIiIiIiBOouBIRcVPvvvsuNpuNDRs25Nv35ptvYrPZuPXWW8nJySmWPL169SIuLq7Izxs5ciQ2m835gURERNyMiisREQ/z+eef89BDD9GuXTs+/PBDfH19rY4kIiIiqLgSEfEoy5YtY+DAgdSrV4+vv/6aoKAgqyOJiIjIRSquREQ8RFJSEn379iU6OpoffviBMmXK5Gvz8ccf06xZM4KDgwkPD+fee+/lyJEjjv1Hjhxh4MCBxMTEEBgYSPXq1XniiSc4e/ZsvmO9//77xMbGUrZsWSZPnuzYPm/ePCpVqkR4eDhTp07N97wffviBWrVqERYWxqhRozAMAzALwxo1alC6dGkSEhLyDGdctmwZNpuNZcuW5TlWz549sdlsTJw40bFt4sSJ2Gw2Tp48mafthg0bsNlsvPvuu45tBw4cyLcNYMSIEdhsNoYMGZJn++nTpxkzZgyxsbEEBgZSs2ZNpk6dit1uz3fM6dOn53vvDRo0oGPHjnne09W+Ln9fBblw4QITJ06kVq1aBAUFER0dTb9+/di3b981vT+Ajh07Fpgl9xgTJkzA39+fEydO5Hvugw8+SNmyZblw4QIrVqwgPj6e8PBwgoODadKkCbNnz3Z8v6/2Wpd/5XrnnXfo3LkzERERBAYGUq9ePWbPnn3V/hERcTd+VgcQEZE/t2/fPv7yl78QGBjIDz/8QHR0dL427777LkOHDuWmm25i8uTJJCcnM2vWLFatWsVPP/1E2bJl2bdvH8nJyTz88MOUK1eObdu28fLLL7N48WJWrlxJcHAwAKtWrWLw4MG0adOGgQMH8v777/PLL79w/vx5nn32WZ588kkWLFjA2LFjqVKlCgMHDgTgl19+4dZbb6VmzZpMmjSJ+fPnO64ZGzFiBA8//DA//fQT//znP6lYsSLjxo274ntevnw53333ndP7cu/evbz55pv5tqenp9OhQweOHDnC3/72N6pUqcLq1asZN24cx44dY+bMmUV6nbp16/L+++87Hr/xxhvs2LGDf/7zn45tDRs2vOLzc3Jy6NWrF4sXL2bAgAGMHj2as2fPsnDhQrZu3UqNGjWK9P4uV6dOHZ566ikATp48ySOPPOLYd9999/Hss88yb948Ro4c6diemZnJJ598Qv/+/QkKCmL16tVERETwj3/8A19fXxITExk+fDg///yzoyh66qmneOCBB/K8zoMPPki7du3yZZo9ezb169enT58++Pn58fXXXzN8+HDsdjsjRoy46vsREXEbhoiIuKV33nnHAIxvvvnGqFGjhgEY3bp1K7BtZmamERERYTRo0MA4f/68Y/s333xjAMb48eOv+DoLFy40AOPZZ591bOvTp49RrVo148KFC4ZhGMbZs2eNatWqGSEhIcYvv/xiGIZh2O12o23btkajRo0czxs1apRRqlQp4+TJk4ZhGEZWVpbRqlUrAzDWrl3raDdw4EAjIiLCcfylS5cagLF06VJHm5YtWxo9evQwAGPChAmO7RMmTDAA48SJE3nex/r16w3AeOeddxzb9u/fn2/bnXfeaTRo0MCIjY01Bg8e7Nj+3HPPGaGhocbu3bvzHHfs2LGGr6+vcfDgwTzHfPHFF/P1Zf369Y0OHTrk224YhjF48GCjatWqBe4ryNtvv20AxowZM/Lts9vtRX5/udq2bWt06tTJ8bigY7Ru3dpo2bJlnud99tln+b5Hf/TUU08ZgLF8+fJ8+wp6nculp6fn29a9e3ejevXqV3w9ERF3o2GBIiJubsiQIRw6dIi7776bBQsW8PHHH+drs2HDBlJSUhg+fHie67B69uxJnTp1+Pbbbx3bsrKyOHnypOOrcePGNG/ePM9xFy9ezC233EJgYCAAYWFh1KtXj4oVK1KtWjUAx2yFmzdv5rfffnM8r3379lSoUAEAPz8/mjVrBkCLFi0cx+/Xrx8pKSls3bq1wPf82WefsX79eqZMmXJNfXYlGzdu5OOPP2by5Mn4+OT9L/Djjz+mXbt2lCtXLk//xMfHk5OTw/Lly/O0T09Pz9Pu5MmTTp258dNPPyU8PJyHH344374rzb54tfeXKzMz0/F9vZJBgwaxdu1ax/BDgLlz5xIbG0uHDh0c2/7YB8OGDcPf37/An9E/k3vWFODMmTOcPHmSDh068Msvv3DmzJkiH09ExAoqrkRE3Nzvv//Of//7X/7zn//QuHFjRo8ene+PzV9//RWA2rVr53t+nTp1HPvBHPJXsWLFPF8bNmxg7969AJw6dYq0tDRiYmL+NFtum0OHDjlur+V5l8vJyeHJJ5/knnvuueqwuWsxduxY2rVrR69evfLt27NnD/Pnz8/XN/Hx8QCkpKTkaT9hwoR8bXfu3Om0rPv27aN27dr4+RV+BP/V3l+u06dPExYWdtXj3HXXXQQGBjJ37lzALHa++eYb7rnnnjyF3bRp0/K8/7i4OLKyshw/S0WxatUq4uPjCQ0NpWzZslSsWJEnn3zS8foiIp5A11yJiLi5F198kTvuuAMwr9tp1aoV48aN41//+tc1Ha9Ro0YsXLgwz7bJkyezZs0awJxEoajOnz9/Tc/Nfd7l3nrrLQ4cOMAPP/xQ5BxXs2DBAhYtWuR4n39kt9vp2rUrTzzxRIH7a9Wqlefxgw8+6Pi+5Bo2bJhzwl6DP3t/uY4fP0737t2v2qZcuXL06tWLuXPnMn78eD755BMyMjK4995787QbNGgQN998c55tAwYMKHL2ffv20aVLF+rUqcOMGTOIjY0lICCA7777jn/+8595JhQREXFnKq5ERNxc+/btHfdvuukmRowYwWuvvcagQYNo1aoVAFWrVgVg165ddO7cOc/zd+3a5dgP5h/OuWdjciUkJDgmSAgPD8ff35+jR4/+abbcmQgrVaoEQHR09DU9L1d6ejrPPPMMw4cPz5P5ehmGwdixY7ntttscffZHNWrU4Ny5c/n65kpuuOGGfG1DQ0OvO+vledauXUtWVhb+/v5XbVuY9wdw+PBhzp49S926df/09QcNGkTfvn1Zv349c+fOpUmTJtSvXz9Pm+rVq1O9enXH45MnT/L7779fcbKNK/n666/JyMjgq6++okqVKo7tS5cuLdJxRESspmGBIiIe5oUXXiA6OpoHH3yQ7OxsAJo3b05ERARz5swhIyPD0fb7779nx44d9OzZE6DAa4K+/vprtmzZQr9+/QDw9/enVatWfPfdd2RmZgJw7tw5tm/fzokTJzhw4ABg/kH/5ZdfUqVKFUch1L59e5YvX87vv//ueL2NGzcCsG7dOsdrfvHFFwQHB9O8efM8WWbNmkVaWppjJjtn+fDDD/n555/zTCn/R3feeSdr1qwp8IzZ6dOnHX1dXPr378/Jkyd59dVX8+0zLpvuHAr3/nLbAfkK8IL06NHDMd1+YmJivrNWBf0sTZ48GcMwHD9LhZW7EPbl7+vMmTO88847RTqOiIjVdOZKRMTDlCpVildeeYV+/frx0ksv8fe//x1/f3+mTp3K0KFD6dChAwMHDnRMxR4XF+eYanvFihWMHTuWPn36UKFCBdatW8d//vMf6tWrx+OPP+54jSeffJIePXoQHx/PgAEDeO+99zh37hy+vr706tWLhx56iAULFrBy5UrmzJnjeN5jjz3
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x500 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.cluster import KMeans\n",
|
|||
|
"from sklearn.metrics import silhouette_score\n",
|
|||
|
"\n",
|
|||
|
"# Диапазон количества кластеров для проверки\n",
|
|||
|
"cluster_range = range(2, 11)\n",
|
|||
|
"inertia = []\n",
|
|||
|
"silhouette_scores = []\n",
|
|||
|
"\n",
|
|||
|
"# Вычисление инерции и коэффициента силуэта для каждого количества кластеров\n",
|
|||
|
"for k in cluster_range:\n",
|
|||
|
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
|
|||
|
" labels = kmeans.fit_predict(features)\n",
|
|||
|
" inertia.append(kmeans.inertia_)\n",
|
|||
|
" silhouette_scores.append(silhouette_score(features, labels))\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация метрики инерции (Elbow Method)\n",
|
|||
|
"plt.figure(figsize=(10, 5))\n",
|
|||
|
"plt.plot(cluster_range, inertia, marker=\"o\", linestyle=\"--\", label=\"Inertia\")\n",
|
|||
|
"plt.title(\"Метод локтя\")\n",
|
|||
|
"plt.xlabel(\"Number of Clusters (k)\")\n",
|
|||
|
"plt.ylabel(\"Inertia\")\n",
|
|||
|
"plt.xticks(cluster_range)\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация коэффициента силуэта\n",
|
|||
|
"plt.figure(figsize=(10, 5))\n",
|
|||
|
"plt.plot(\n",
|
|||
|
" cluster_range,\n",
|
|||
|
" silhouette_scores,\n",
|
|||
|
" marker=\"o\",\n",
|
|||
|
" linestyle=\"--\",\n",
|
|||
|
" color=\"orange\",\n",
|
|||
|
" label=\"Silhouette Score\",\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"Коэффициент силуэта\")\n",
|
|||
|
"plt.xlabel(\"Number of Clusters (k)\")\n",
|
|||
|
"plt.ylabel(\"Silhouette Score\")\n",
|
|||
|
"plt.xticks(cluster_range)\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"plt.legend()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA94AAAHqCAYAAADyGZa5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3yV5fn48c/9nJG9dyBkQSDsjQzZIKJWcM+vaG2to2pba/X7q61tHe1XbW3V2m2tq+Le7D1khD0TIIGQhOw9znie+/fHQwIh6xxIgJD7/Xr50pxzPysmuc69rktIKSWKoiiKoiiKoiiKonQJ7ULfgKIoiqIoiqIoiqJcylTHW1EURVEURVEURVG6kOp4K4qiKIqiKIqiKEoXUh1vRVEURVEURVEURelCquOtKIqiKIqiKIqiKF1IdbwVRVEURVEURVEUpQupjreiKIqiKIqiKIqidCHV8VYURVEURVEURVGULqQ63oqiKIqiKIqiKIrShVTHW1GUS96XX37Jjh07mr7+9NNP2bt374W7IUVRFEXpQVQcVhTV8VYuYv/+978RQrB169ZW3586dSqDBw8+z3eldEe7d+/mkUceISsri2+//ZYf/OAHVFdXX+jbUhRF6XHai+0LFixACKFi+yVIxWFFUR1vRVF6gHvvvZf8/HzS0tIYP348EydO5LLLLrvQt6UoiqKcdOjQId5+++0LfRtKF1FxWFHAeqFvQFEUpatFRUWxZ88e9uzZg7+/P+np6Rf6lhRFUZTTPPvss9hsNvr27Xuhb0XpAioOK4qa8VYuQW+//TajRo3Cz8+P8PBwbrnlFnJzc5u1mTp1KlOnTm322pYtWxBCIIQ463MOHjyYjIwMJkyYgJ+fH8nJyfzlL39p1s7pdPKLX/yCUaNGERISQkBAAJdffjkrV65s1i4nJwchBC+++CJ/+MMfSExMxM/PjylTprBnz56mdkVFRURFRTF16lSklE2vHzp0iICAAG6++Wavn3vVqlUIIVi1alWztgsWLCApKanZa4Zh8PLLLzNo0CB8fX2JiYnhvvvuo7y8vMX38ZtvvmHKlCkEBQURHBzMmDFjePfdd9u9v2effRZN0zps19b/PyEETz/9ND4+PowaNYr09HReeOEFhBAtztGaxnO+/PLLLd4bMGAAQggeeuihZq9XVFTw6KOPkpCQgI+PD3379uV3v/sdhmEAp/7ftvfPggULms535MgRbrzxRsLDw/H39+eyyy7jq6++avV+n3766VbP58mzKoqiXAiHDx/m7bff5r777iM2NrbVNioOm1Qcbk7FYaU7UTPeykWvsrKSkpKSFq+7XK4Wrz377LM89dRT3HTTTdx7770UFxfzyiuvMHnyZLZv305oaGib1/nZz37W6uvenLO8vJy5c+dy0003ceutt7Jw4ULuv/9+7HY799xzDwBVVVX84x//4NZbb+V73/se1dXV/POf/+SKK65g8+bNDB8+vNn1//Of/1BdXc2DDz5IQ0MDf/zjH5k+fTq7d+8mJiaG6OhoXn/9dW688UZeeeUVHn74YQzDYMGCBQQFBfHnP/+53e9vW8/tqfvuu49///vf3H333Tz88MNkZ2fz6quvsn37dtavX4/NZgPMfX333HMPgwYN4sknnyQ0NJTt27ezaNEibrvttlbP/cYbb/Dzn/+cl156qc023j5HRUUFzz//vFfP6OvryxtvvMGjjz7a9NqGDRs4evRoi7Z1dXVMmTKFvLw87rvvPvr06cOGDRt48sknKSgo4OWXXyYqKoq33nqr6ZiPP/6YTz75pNlrqampABQWFjJhwgTq6up4+OGHiYiI4M033+Q73/kOH374IfPnz2/1nl9//XUCAwMBePLJJ716XkVRlPPpmWeewWq18rOf/Yxbb721xfsqDrdPxeHmVBxWLlpSUS5Sb7zxhgTa/WfQoEFN7XNycqTFYpHPPvtss/Ps3r1bWq3WZq9PmTJFTpkypenrr7/+WgJyzpw58vRfC2/PCciXXnqp6TWHwyGHDx8uo6OjpdPplFJK6Xa7pcPhaHa+8vJyGRMTI++5556m17KzsyUg/fz85PHjx5te37RpkwTkj370o2bnuPXWW6W/v7/MzMyUL7zwggTkp59+2qyNp8+9evVqCcgVK1Y0O/6uu+6SiYmJTV+vXbtWAvKdd95p1m7RokXNXq+oqJBBQUFy3Lhxsr6+vllbwzBavb+vvvpKWq1W+ZOf/ESeydPnkFJKQP7yl79s+vrxxx+X0dHRctSoUc3O0RZA3nDDDdJqtcqtW7c2vf7d735X3nbbbRKQDz74YNPrv/nNb2RAQIDMzMxsdp4nnnhCWiwWeezYsRbX+OUvf9nivhs9+uijEpBr165teq26ulomJyfLpKQkqet6s/b/+7//KwFZUlLS9NqgQYM8elZFUZSu1hjbt2zZIqWU8vDhw9JqtcqHH35YSmn+fT+X2K7isEnFYRWHlYuPWmquXPRee+01li5d2uKfoUOHNmv38ccfYxgGN910EyUlJU3/xMbG0q9fvxZLyBpJKXnyySe5/vrrGTdu3Dmd02q1ct999zV9bbfbue+++ygqKiIjIwMAi8WC3W4HzOVhZWVluN1uRo8ezbZt21rc37x58+jVq1fT12PHjmXcuHF8/fXXzdq9+uqrhISEcMMNN/DUU09x5513cu2117b5fW3vuaOjowE4fvx4m8cDfPDBB4SEhDBr1qxm359Ro0YRGBjY9P1ZunQp1dXVPPHEE/j6+jY7R2tL+zdv3sxNN93E9ddfzwsvvNDuPbT3HGfKy8vjlVde4amnnmoahfZETEwMV111FW+88QZgjqYvXLiQu+++u0XbDz74gMsvv5ywsLBm35OZM2ei6zpr1qzx+LoAX3/9NWPHjmXSpElNrwUGBvL973+fnJwc9u3b16x9Q0MDQIvvs6IoysWocbb7iSeeaPV9FYdVHAYVh5VLg1pqrlz0xo4dy+jRo1u83vgHtVFWVhZSSvr169fqeRqXWp3pnXfeYe/evSxcuLDZ/qWzOWd8fDwBAQHNXktLSwPM/USNGTzffPNNXnrpJQ4cONBsyXxycnKLa7R27bS0NBYuXNjstfDwcP70pz9x4403EhMTw5/+9KdW77lRe8+dkpJCbGwsL774IsOGDSM+Ph4Ah8PRrF1WVhaVlZVNHxDOVFRUBJj79wCPSsTk5eVx1VVXUVtbS2lpaasfCDx9jjP98pe/JD4+nvvuu48PP/yww3s53d13383dd9/NSy+9xAcffEBYWBjTp09v0S4rK4tdu3YRFRXV6nkavyeeOnr0aKsfZBoT0xw9erTZ97WkpASbzYa/v79X11EURTnfjhw5wltvvcUDDzxAXFxcq21UHFZxuJGKw0p3pzreyiXDMAyEEHzzzTdYLJYW77c2sup0Onnqqaf47ne/2xSYz/WcHXn77bdZsGAB8+bN46c//SnR0dFYLBaef/75psB4thYvXgyYe9yOHz/e5p72jp7bbrfz97//ndtuu41hw4Y1ey8xMbHpvw3DIDo6mnfeeafV67QV9Npz6NAhRo4cyR/+8AfuvPNO3nzzTe66666zeo7T7d+/n3//+9+8/fbbbQ7CtOeqq67Cbrfz6aef8sYbb3DXXXehaS0XDRmGwaxZs3j88cdbPU9H93mucnJy6NOnT4cflBRFUS60Z599tmlvd1tUHFZxuJGKw0p3pzreyiUjNTUVKSXJycke/1H985//TFFREU8//XSnnDM/P5/a2tpmo+2ZmZkATVlIP/zwQ1JSUvj444+b/VH+5S9/2eo5s7KyWryWmZnZIqvpokWL+Mc//sHjjz/OO++8w1133cWmTZuwWlv+mnf03ABXX301eXl57Nq1i/r6egBeeOEFDh482NQmNTWVZcuWMXHiRPz8/No8V2OCkj179nRYKiYuLo6vv/6amJgYPvvsM37yk58wd+7cVj88ePIcjZ588kmGDx/eLLusN6xWK3feeSfPPvsse/fu5V//+ler7VJTU6mpqWHmzJlndZ0zJSYmNvueNzpw4EDT+43cbjc7d+5kzpw5nXJtRVGUrpK
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x500 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.cluster import KMeans, AgglomerativeClustering\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"\n",
|
|||
|
"# Выбор оптимального количества кластеров (например, k=3, исходя из анализа)\n",
|
|||
|
"optimal_k = 3\n",
|
|||
|
"\n",
|
|||
|
"# 1. Неиерархический метод: K-Means\n",
|
|||
|
"kmeans = KMeans(n_clusters=optimal_k, random_state=42)\n",
|
|||
|
"kmeans_labels = kmeans.fit_predict(features)\n",
|
|||
|
"\n",
|
|||
|
"# 2. Иерархический метод: Agglomerative Clustering\n",
|
|||
|
"agglo = AgglomerativeClustering(n_clusters=optimal_k)\n",
|
|||
|
"agglo_labels = agglo.fit_predict(features)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов K-Means\n",
|
|||
|
"plt.figure(figsize=(10, 5))\n",
|
|||
|
"plt.subplot(1, 2, 1)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=pca_result[:, 0], y=pca_result[:, 1], hue=kmeans_labels, palette=\"Set2\", s=100\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"Неиерархический метод\")\n",
|
|||
|
"plt.xlabel(\"PCA Component 1\")\n",
|
|||
|
"plt.ylabel(\"PCA Component 2\")\n",
|
|||
|
"plt.legend(title=\"Cluster\", loc=\"upper right\")\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов Agglomerative Clustering\n",
|
|||
|
"plt.subplot(1, 2, 2)\n",
|
|||
|
"sns.scatterplot(\n",
|
|||
|
" x=pca_result[:, 0], y=pca_result[:, 1], hue=agglo_labels, palette=\"Set2\", s=100\n",
|
|||
|
")\n",
|
|||
|
"plt.title(\"Иерархический метод\")\n",
|
|||
|
"plt.xlabel(\"PCA Component 1\")\n",
|
|||
|
"plt.ylabel(\"PCA Component 2\")\n",
|
|||
|
"plt.legend(title=\"Cluster\", loc=\"upper right\")\n",
|
|||
|
"plt.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": ".venv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.5"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|