677 lines
324 KiB
Plaintext
677 lines
324 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Цель работы\n",
|
|||
|
"Мы будем кластеризовать автомобили, основываясь на их характеристиках, с целью выделения групп автомобилей с похожими свойствами. Это может быть полезно, например, для автосалонов или производителей для сегментации рынка."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# загрузим датасет"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>ID</th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" <th>Levy</th>\n",
|
|||
|
" <th>Manufacturer</th>\n",
|
|||
|
" <th>Model</th>\n",
|
|||
|
" <th>Prod. year</th>\n",
|
|||
|
" <th>Category</th>\n",
|
|||
|
" <th>Leather interior</th>\n",
|
|||
|
" <th>Fuel type</th>\n",
|
|||
|
" <th>Engine volume</th>\n",
|
|||
|
" <th>Mileage</th>\n",
|
|||
|
" <th>Cylinders</th>\n",
|
|||
|
" <th>Gear box type</th>\n",
|
|||
|
" <th>Drive wheels</th>\n",
|
|||
|
" <th>Doors</th>\n",
|
|||
|
" <th>Wheel</th>\n",
|
|||
|
" <th>Color</th>\n",
|
|||
|
" <th>Airbags</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>45654403</td>\n",
|
|||
|
" <td>13328</td>\n",
|
|||
|
" <td>1399</td>\n",
|
|||
|
" <td>LEXUS</td>\n",
|
|||
|
" <td>RX 450</td>\n",
|
|||
|
" <td>2010</td>\n",
|
|||
|
" <td>Jeep</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Hybrid</td>\n",
|
|||
|
" <td>3.5</td>\n",
|
|||
|
" <td>186005 km</td>\n",
|
|||
|
" <td>6.0</td>\n",
|
|||
|
" <td>Automatic</td>\n",
|
|||
|
" <td>4x4</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Silver</td>\n",
|
|||
|
" <td>12</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>44731507</td>\n",
|
|||
|
" <td>16621</td>\n",
|
|||
|
" <td>1018</td>\n",
|
|||
|
" <td>CHEVROLET</td>\n",
|
|||
|
" <td>Equinox</td>\n",
|
|||
|
" <td>2011</td>\n",
|
|||
|
" <td>Jeep</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Petrol</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>192000 km</td>\n",
|
|||
|
" <td>6.0</td>\n",
|
|||
|
" <td>Tiptronic</td>\n",
|
|||
|
" <td>4x4</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Black</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>45774419</td>\n",
|
|||
|
" <td>8467</td>\n",
|
|||
|
" <td>-</td>\n",
|
|||
|
" <td>HONDA</td>\n",
|
|||
|
" <td>FIT</td>\n",
|
|||
|
" <td>2006</td>\n",
|
|||
|
" <td>Hatchback</td>\n",
|
|||
|
" <td>No</td>\n",
|
|||
|
" <td>Petrol</td>\n",
|
|||
|
" <td>1.3</td>\n",
|
|||
|
" <td>200000 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Variator</td>\n",
|
|||
|
" <td>Front</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Right-hand drive</td>\n",
|
|||
|
" <td>Black</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>45769185</td>\n",
|
|||
|
" <td>3607</td>\n",
|
|||
|
" <td>862</td>\n",
|
|||
|
" <td>FORD</td>\n",
|
|||
|
" <td>Escape</td>\n",
|
|||
|
" <td>2011</td>\n",
|
|||
|
" <td>Jeep</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Hybrid</td>\n",
|
|||
|
" <td>2.5</td>\n",
|
|||
|
" <td>168966 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Automatic</td>\n",
|
|||
|
" <td>4x4</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>45809263</td>\n",
|
|||
|
" <td>11726</td>\n",
|
|||
|
" <td>446</td>\n",
|
|||
|
" <td>HONDA</td>\n",
|
|||
|
" <td>FIT</td>\n",
|
|||
|
" <td>2014</td>\n",
|
|||
|
" <td>Hatchback</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Petrol</td>\n",
|
|||
|
" <td>1.3</td>\n",
|
|||
|
" <td>91901 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Automatic</td>\n",
|
|||
|
" <td>Front</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Silver</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>19232</th>\n",
|
|||
|
" <td>45798355</td>\n",
|
|||
|
" <td>8467</td>\n",
|
|||
|
" <td>-</td>\n",
|
|||
|
" <td>MERCEDES-BENZ</td>\n",
|
|||
|
" <td>CLK 200</td>\n",
|
|||
|
" <td>1999</td>\n",
|
|||
|
" <td>Coupe</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>CNG</td>\n",
|
|||
|
" <td>2.0 Turbo</td>\n",
|
|||
|
" <td>300000 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Manual</td>\n",
|
|||
|
" <td>Rear</td>\n",
|
|||
|
" <td>02-Mar</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Silver</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>19233</th>\n",
|
|||
|
" <td>45778856</td>\n",
|
|||
|
" <td>15681</td>\n",
|
|||
|
" <td>831</td>\n",
|
|||
|
" <td>HYUNDAI</td>\n",
|
|||
|
" <td>Sonata</td>\n",
|
|||
|
" <td>2011</td>\n",
|
|||
|
" <td>Sedan</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Petrol</td>\n",
|
|||
|
" <td>2.4</td>\n",
|
|||
|
" <td>161600 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Tiptronic</td>\n",
|
|||
|
" <td>Front</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Red</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>19234</th>\n",
|
|||
|
" <td>45804997</td>\n",
|
|||
|
" <td>26108</td>\n",
|
|||
|
" <td>836</td>\n",
|
|||
|
" <td>HYUNDAI</td>\n",
|
|||
|
" <td>Tucson</td>\n",
|
|||
|
" <td>2010</td>\n",
|
|||
|
" <td>Jeep</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Diesel</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>116365 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Automatic</td>\n",
|
|||
|
" <td>Front</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Grey</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>19235</th>\n",
|
|||
|
" <td>45793526</td>\n",
|
|||
|
" <td>5331</td>\n",
|
|||
|
" <td>1288</td>\n",
|
|||
|
" <td>CHEVROLET</td>\n",
|
|||
|
" <td>Captiva</td>\n",
|
|||
|
" <td>2007</td>\n",
|
|||
|
" <td>Jeep</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Diesel</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>51258 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Automatic</td>\n",
|
|||
|
" <td>Front</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>Black</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>19236</th>\n",
|
|||
|
" <td>45813273</td>\n",
|
|||
|
" <td>470</td>\n",
|
|||
|
" <td>753</td>\n",
|
|||
|
" <td>HYUNDAI</td>\n",
|
|||
|
" <td>Sonata</td>\n",
|
|||
|
" <td>2012</td>\n",
|
|||
|
" <td>Sedan</td>\n",
|
|||
|
" <td>Yes</td>\n",
|
|||
|
" <td>Hybrid</td>\n",
|
|||
|
" <td>2.4</td>\n",
|
|||
|
" <td>186923 km</td>\n",
|
|||
|
" <td>4.0</td>\n",
|
|||
|
" <td>Automatic</td>\n",
|
|||
|
" <td>Front</td>\n",
|
|||
|
" <td>04-May</td>\n",
|
|||
|
" <td>Left wheel</td>\n",
|
|||
|
" <td>White</td>\n",
|
|||
|
" <td>12</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>19237 rows × 18 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" ID Price Levy Manufacturer Model Prod. year Category \\\n",
|
|||
|
"0 45654403 13328 1399 LEXUS RX 450 2010 Jeep \n",
|
|||
|
"1 44731507 16621 1018 CHEVROLET Equinox 2011 Jeep \n",
|
|||
|
"2 45774419 8467 - HONDA FIT 2006 Hatchback \n",
|
|||
|
"3 45769185 3607 862 FORD Escape 2011 Jeep \n",
|
|||
|
"4 45809263 11726 446 HONDA FIT 2014 Hatchback \n",
|
|||
|
"... ... ... ... ... ... ... ... \n",
|
|||
|
"19232 45798355 8467 - MERCEDES-BENZ CLK 200 1999 Coupe \n",
|
|||
|
"19233 45778856 15681 831 HYUNDAI Sonata 2011 Sedan \n",
|
|||
|
"19234 45804997 26108 836 HYUNDAI Tucson 2010 Jeep \n",
|
|||
|
"19235 45793526 5331 1288 CHEVROLET Captiva 2007 Jeep \n",
|
|||
|
"19236 45813273 470 753 HYUNDAI Sonata 2012 Sedan \n",
|
|||
|
"\n",
|
|||
|
" Leather interior Fuel type Engine volume Mileage Cylinders \\\n",
|
|||
|
"0 Yes Hybrid 3.5 186005 km 6.0 \n",
|
|||
|
"1 No Petrol 3 192000 km 6.0 \n",
|
|||
|
"2 No Petrol 1.3 200000 km 4.0 \n",
|
|||
|
"3 Yes Hybrid 2.5 168966 km 4.0 \n",
|
|||
|
"4 Yes Petrol 1.3 91901 km 4.0 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"19232 Yes CNG 2.0 Turbo 300000 km 4.0 \n",
|
|||
|
"19233 Yes Petrol 2.4 161600 km 4.0 \n",
|
|||
|
"19234 Yes Diesel 2 116365 km 4.0 \n",
|
|||
|
"19235 Yes Diesel 2 51258 km 4.0 \n",
|
|||
|
"19236 Yes Hybrid 2.4 186923 km 4.0 \n",
|
|||
|
"\n",
|
|||
|
" Gear box type Drive wheels Doors Wheel Color Airbags \n",
|
|||
|
"0 Automatic 4x4 04-May Left wheel Silver 12 \n",
|
|||
|
"1 Tiptronic 4x4 04-May Left wheel Black 8 \n",
|
|||
|
"2 Variator Front 04-May Right-hand drive Black 2 \n",
|
|||
|
"3 Automatic 4x4 04-May Left wheel White 0 \n",
|
|||
|
"4 Automatic Front 04-May Left wheel Silver 4 \n",
|
|||
|
"... ... ... ... ... ... ... \n",
|
|||
|
"19232 Manual Rear 02-Mar Left wheel Silver 5 \n",
|
|||
|
"19233 Tiptronic Front 04-May Left wheel Red 8 \n",
|
|||
|
"19234 Automatic Front 04-May Left wheel Grey 4 \n",
|
|||
|
"19235 Automatic Front 04-May Left wheel Black 4 \n",
|
|||
|
"19236 Automatic Front 04-May Left wheel White 12 \n",
|
|||
|
"\n",
|
|||
|
"[19237 rows x 18 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"df = pd.read_csv(\"/mnt/d/AIMLabs/AIM-PIbd-31-Kouvshinoff-T-A/static/csv/car_price_prediction.csv\", sep=\",\")\n",
|
|||
|
"df\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Предобработка данных\n",
|
|||
|
"Мы удалим неинформативные столбцы, такие как ID, преобразуем категориальные переменные в числовые (one-hot encoding), а также нормализуем данные для дальнейшего анализа."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"ename": "UnsupportedCUDAError",
|
|||
|
"evalue": "A GPU with NVIDIA Volta™ (Compute Capability 7.0) or newer architecture is required.\nDetected GPU 0: NVIDIA GeForce GTX 1060 6GB\u0000 \nDetected Compute Capability: 6.1",
|
|||
|
"output_type": "error",
|
|||
|
"traceback": [
|
|||
|
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
|||
|
"\u001b[0;31mUnsupportedCUDAError\u001b[0m Traceback (most recent call last)",
|
|||
|
"Cell \u001b[0;32mIn[2], line 8\u001b[0m\n\u001b[1;32m 5\u001b[0m df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mget_dummies(df, drop_first\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m 7\u001b[0m \u001b[38;5;66;03m# Нормализация числовых данных\u001b[39;00m\n\u001b[0;32m----> 8\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpreprocessing\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m StandardScaler\n\u001b[1;32m 10\u001b[0m scaler \u001b[38;5;241m=\u001b[39m StandardScaler()\n\u001b[1;32m 11\u001b[0m df_scaled \u001b[38;5;241m=\u001b[39m scaler\u001b[38;5;241m.\u001b[39mfit_transform(df)\n",
|
|||
|
"File \u001b[0;32m/mnt/d/AIMLabs/AIM-PIbd-31-Kouvshinoff-T-A/wslenv/lib/python3.12/site-packages/cuml/__init__.py:17\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m# Copyright (c) 2022-2023, NVIDIA CORPORATION.\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# limitations under the License.\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m---> 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Base, UniversalBase\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mavailable_devices\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m is_cuda_available\n\u001b[1;32m 20\u001b[0m \u001b[38;5;66;03m# GPU only packages\u001b[39;00m\n",
|
|||
|
"File \u001b[0;32m/mnt/d/AIMLabs/AIM-PIbd-31-Kouvshinoff-T-A/wslenv/lib/python3.12/site-packages/cuml/internals/__init__.py:18\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m# Copyright (c) 2019-2023, NVIDIA CORPORATION.\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# limitations under the License.\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mavailable_devices\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m is_cuda_available\n\u001b[0;32m---> 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbase_helpers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BaseMetaClass, _tags_class_and_instance\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapi_decorators\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 20\u001b[0m _deprecate_pos_args,\n\u001b[1;32m 21\u001b[0m api_base_fit_transform,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 33\u001b[0m exit_internal_api,\n\u001b[1;32m 34\u001b[0m )\n\u001b[1;32m 35\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapi_context_managers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 36\u001b[0m in_internal_api,\n\u001b[1;32m 37\u001b[0m set_api_output_dtype,\n\u001b[1;32m 38\u001b[0m set_api_output_type,\n\u001b[1;32m 39\u001b[0m )\n",
|
|||
|
"File \u001b[0;32m/mnt/d/AIMLabs/AIM-PIbd-31-Kouvshinoff-T-A/wslenv/lib/python3.12/site-packages/cuml/internals/base_helpers.py:20\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01minspect\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Parameter, signature\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m\n\u001b[0;32m---> 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapi_decorators\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m (\n\u001b[1;32m 21\u001b[0m api_base_return_generic,\n\u001b[1;32m 22\u001b[0m api_base_return_array,\n\u001b[1;32m 23\u001b[0m api_base_return_sparse_array,\n\u001b[1;32m 24\u001b[0m api_base_return_any,\n\u001b[1;32m 25\u001b[0m api_return_any,\n\u001b[1;32m 26\u001b[0m _deprecate_pos_args,\n\u001b[1;32m 27\u001b[0m )\n\u001b[1;32m 28\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marray\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CumlArray\n\u001b[1;32m 29\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marray_sparse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SparseCumlArray\n",
|
|||
|
"File \u001b[0;32m/mnt/d/AIMLabs/AIM-PIbd-31-Kouvshinoff-T-A/wslenv/lib/python3.12/site-packages/cuml/internals/api_decorators.py:24\u001b[0m\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mwarnings\u001b[39;00m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;66;03m# TODO: Try to resolve circular import that makes this necessary:\u001b[39;00m\n\u001b[0;32m---> 24\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m input_utils \u001b[38;5;28;01mas\u001b[39;00m iu\n\u001b[1;32m 25\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapi_context_managers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BaseReturnAnyCM\n\u001b[1;32m 26\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mapi_context_managers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m BaseReturnArrayCM\n",
|
|||
|
"File \u001b[0;32m/mnt/d/AIMLabs/AIM-PIbd-31-Kouvshinoff-T-A/wslenv/lib/python3.12/site-packages/cuml/internals/input_utils.py:20\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcollections\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m namedtuple\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtyping\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Literal\n\u001b[0;32m---> 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marray\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m CumlArray\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01marray_sparse\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m SparseCumlArray\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mglobal_settings\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m GlobalSettings\n",
|
|||
|
"File \u001b[0;32m/mnt/d/AIMLabs/AIM-PIbd-31-Kouvshinoff-T-A/wslenv/lib/python3.12/site-packages/cuml/internals/array.py:21\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01moperator\u001b[39;00m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpickle\u001b[39;00m\n\u001b[0;32m---> 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mglobal_settings\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m GlobalSettings\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mlogger\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m debug\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmem_type\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MemoryType, MemoryTypeError\n",
|
|||
|
"File \u001b[0;32m/mnt/d/AIMLabs/AIM-PIbd-31-Kouvshinoff-T-A/wslenv/lib/python3.12/site-packages/cuml/internals/global_settings.py:20\u001b[0m\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mthreading\u001b[39;00m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mavailable_devices\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m is_cuda_available\n\u001b[0;32m---> 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdevice_type\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m DeviceType\n\u001b[1;32m 21\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmem_type\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MemoryType\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msafe_imports\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m cpu_only_import, gpu_only_import\n",
|
|||
|
"File \u001b[0;32m/mnt/d/AIMLabs/AIM-PIbd-31-Kouvshinoff-T-A/wslenv/lib/python3.12/site-packages/cuml/internals/device_type.py:19\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 2\u001b[0m \u001b[38;5;66;03m# Copyright (c) 2022-2023, NVIDIA CORPORATION.\u001b[39;00m\n\u001b[1;32m 3\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 14\u001b[0m \u001b[38;5;66;03m# limitations under the License.\u001b[39;00m\n\u001b[1;32m 15\u001b[0m \u001b[38;5;66;03m#\u001b[39;00m\n\u001b[1;32m 18\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01menum\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m Enum, auto\n\u001b[0;32m---> 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mmem_type\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m MemoryType\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mclass\u001b[39;00m \u001b[38;5;21;01mDeviceTypeError\u001b[39;00m(\u001b[38;5;167;01mException\u001b[39;00m):\n\u001b[1;32m 23\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"An exception thrown to indicate bad device type selection\"\"\"\u001b[39;00m\n",
|
|||
|
"File \u001b[0;32m/mnt/d/AIMLabs/AIM-PIbd-31-Kouvshinoff-T-A/wslenv/lib/python3.12/site-packages/cuml/internals/mem_type.py:22\u001b[0m\n\u001b[1;32m 19\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mdevice_support\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m GPU_ENABLED\n\u001b[1;32m 20\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcuml\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01minternals\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01msafe_imports\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m cpu_only_import, gpu_only_import\n\u001b[0;32m---> 22\u001b[0m cudf \u001b[38;5;241m=\u001b[39m \u001b[43mgpu_only_import\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcudf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 23\u001b[0m cp \u001b[38;5;241m=\u001b[39m gpu_only_import(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcupy\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 24\u001b[0m cpx_sparse \u001b[38;5;241m=\u001b[39m gpu_only_import(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcupyx.scipy.sparse\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
|
|||
|
"File \u001b[0;32m/mnt/d/AIMLabs/AIM-PIbd-31-Kouvshinoff-T-A/wslenv/lib/python3.12/site-packages/cuml/internals/safe_imports.py:362\u001b[0m, in \u001b[0;36mgpu_only_import\u001b[0;34m(module, alt)\u001b[0m\n\u001b[1;32m 336\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"A function used to import modules required only in GPU installs\u001b[39;00m\n\u001b[1;32m 337\u001b[0m \n\u001b[1;32m 338\u001b[0m \u001b[38;5;124;03mThis function will attempt to import a module with the given name, but it\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 359\u001b[0m \u001b[38;5;124;03m UnavailableMeta.\u001b[39;00m\n\u001b[1;32m 360\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 361\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m GPU_ENABLED:\n\u001b[0;32m--> 362\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mimportlib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mimport_module\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodule\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 363\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 364\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m safe_import(\n\u001b[1;32m 365\u001b[0m module,\n\u001b[1;32m 366\u001b[0m msg\u001b[38;5;241m=\u001b[39m\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmodule\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m is not installed in non GPU-enabled installations\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 367\u001b[0m alt\u001b[38;5;241m=\u001b[39malt,\n\u001b[1;32m 368\u001b[0m )\n",
|
|||
|
"File \u001b[0;32m/usr/lib/python3.12/importlib/__init__.py:90\u001b[0m, in \u001b[0;36mimport_module\u001b[0;34m(name, package)\u001b[0m\n\u001b[1;32m 88\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n\u001b[1;32m 89\u001b[0m level \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m---> 90\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_bootstrap\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_gcd_import\u001b[49m\u001b[43m(\u001b[49m\u001b[43mname\u001b[49m\u001b[43m[\u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m:\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpackage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlevel\u001b[49m\u001b[43m)\u001b[49m\n",
|
|||
|
"File \u001b[0;32m/mnt/d/AIMLabs/AIM-PIbd-31-Kouvshinoff-T-A/wslenv/lib/python3.12/site-packages/cudf/__init__.py:20\u001b[0m\n\u001b[1;32m 17\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mcudf\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mutils\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mgpu_utils\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m validate_setup\n\u001b[1;32m 19\u001b[0m _setup_numba()\n\u001b[0;32m---> 20\u001b[0m \u001b[43mvalidate_setup\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 22\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mcupy\u001b[39;00m\n\u001b[1;32m 23\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mnumba\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m config \u001b[38;5;28;01mas\u001b[39;00m numba_config, cuda\n",
|
|||
|
"File \u001b[0;32m/mnt/d/AIMLabs/AIM-PIbd-31-Kouvshinoff-T-A/wslenv/lib/python3.12/site-packages/cudf/utils/gpu_utils.py:89\u001b[0m, in \u001b[0;36mvalidate_setup\u001b[0;34m()\u001b[0m\n\u001b[1;32m 85\u001b[0m device_name \u001b[38;5;241m=\u001b[39m deviceGetName(\u001b[38;5;241m0\u001b[39m)\n\u001b[1;32m 86\u001b[0m minor_version \u001b[38;5;241m=\u001b[39m getDeviceAttribute(\n\u001b[1;32m 87\u001b[0m cudaDeviceAttr\u001b[38;5;241m.\u001b[39mcudaDevAttrComputeCapabilityMinor, \u001b[38;5;241m0\u001b[39m\n\u001b[1;32m 88\u001b[0m )\n\u001b[0;32m---> 89\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedCUDAError(\n\u001b[1;32m 90\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mA GPU with NVIDIA Volta™ (Compute Capability 7.0) \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 91\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mor newer architecture is required.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 92\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDetected GPU 0: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdevice_name\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 93\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDetected Compute Capability: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mmajor_version\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mminor_version\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 94\u001b[0m )\n\u001b[1;32m 96\u001b[0m cuda_runtime_version \u001b[38;5;241m=\u001b[39m runtimeGetVersion()\n\u001b[1;32m 98\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cuda_runtime_version \u001b[38;5;241m<\u001b[39m \u001b[38;5;241m11000\u001b[39m:\n\u001b[1;32m 99\u001b[0m \u001b[38;5;66;03m# Require CUDA Runtime version 11.0 or greater.\u001b[39;00m\n",
|
|||
|
"\u001b[0;31mUnsupportedCUDAError\u001b[0m: A GPU with NVIDIA Volta™ (Compute Capability 7.0) or newer architecture is required.\nDetected GPU 0: NVIDIA GeForce GTX 1060 6GB\u0000 \nDetected Compute Capability: 6.1"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Удаляем неинформативный столбец ID\n",
|
|||
|
"df = df.drop(columns=[\"ID\"])\n",
|
|||
|
"\n",
|
|||
|
"# Преобразование категориальных данных в числовые с помощью one-hot encoding\n",
|
|||
|
"df = pd.get_dummies(df, drop_first=True)\n",
|
|||
|
"\n",
|
|||
|
"# Нормализация числовых данных\n",
|
|||
|
"from cuml.preprocessing import StandardScaler\n",
|
|||
|
"\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"df_scaled = scaler.fit_transform(df)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Визуализация данных с помощью PCA (снижение размерности)\n",
|
|||
|
"Для визуализации мы применим метод PCA, который уменьшит количество измерений до двух, сохраняя при этом максимальное количество информации. \n",
|
|||
|
"Ключевые термины:\n",
|
|||
|
"- PCA (Principal Component Analysis) — метод снижения размерности, который находит новые оси в данных, вдоль которых разброс максимален, и проецирует данные на эти оси.\n",
|
|||
|
"- Снижение размерности — процесс упрощения данных за счёт уменьшения числа признаков."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAroAAAIjCAYAAADslLiSAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOydd3gU1f7GP7O76b3QE2roXYoFFLyASrEDwlXBjmK59t7LRS9Xf3ZFr1IUBQERG11RFASE0EsIBEghIT0km82WOb8/vrMJgdBVJJzP8+RJMuXMmdn27jvv+R5DKaXQaDQajUaj0WhqGbZT3QGNRqPRaDQajebPQAtdjUaj0Wg0Gk2tRAtdjUaj0Wg0Gk2tRAtdjUaj0Wg0Gk2tRAtdjUaj0Wg0Gk2tRAtdjUaj0Wg0Gk2tRAtdjUaj0Wg0Gk2tRAtdjUaj0Wg0Gk2tRAtdjUaj0Wg0Gk2tRAtdjUaj0Wg0Gk2tRAvdM5RJkyZhGEblT3BwMK1ateKuu+4iJyfnkO1zcnJ48MEHadOmDaGhoYSFhdGtWzdefPFFioqKajxGz549MQyD995776T7e8MNN1Trr8PhIDExkREjRrB58+aTbl+j0Wg0Gk3tw3GqO6A5tTz//PM0a9YMl8vFL7/8wnvvvcf333/Pxo0bCQ0NBWDVqlUMGjSI0tJSrrvuOrp16wbA77//zssvv8zPP//MggULqrW7fft2Vq1aRdOmTZk6dSp33HHHSfc1KCiI//3vfwB4vV527NjB+++/z7x589i8eTMNGzY86WNoNBqNRqOpPWihe4YzcOBAunfvDsAtt9xCXFwcr732GnPmzGHkyJEUFRVx5ZVXYrfbSU5Opk2bNtX2f+mll/jwww8PaffTTz+lbt26vPrqqwwdOpRdu3bRtGnTk+qrw+Hguuuuq7bsnHPOYciQIXz33XfceuutJ9W+RqPRaDSa2oWOLmiq8Y9//AOAtLQ0ACZMmEBmZiavvfbaISIXoF69ejz55JOHLP/ss88YOnQoQ4YMISoqis8+++yQbZxOJ1u3biUvL++E+1u/fn1ARLAffyxj165dlctM06RTp04YhsGkSZMqlz/77LO0a9eO8PBwIiMjOeecc/jqq68q1z/zzDMEBASQm5t7yLFvu+02oqOjcblcAMyZM4fBgwfTsGFDgoKCaNGiBS+88AI+n++QfXft2lUtinHgz8HbHNhfgDvvvBPDMLjhhhsql3355Zf07NmT2NhYQkJCaNOmDa+88gpKqcptdu/ezdixY2ndujUhISHExcUxbNiwatfpwOv3+++/V1uel5eHYRg8++yz1ZbXtGz8+PEYhkHfvn2rLd+5cyfDhg2jYcOG2Gy2ynPu0KHDIdeoJpYsWXLU6/Znneuzzz6LYRiHPF9///33Qx6nG264gfDw8COey4Htl5eX06ZNG9q0aUN5eXnlNgUFBTRo0IDzzjuvxufRwedxuJ+DH5/k5GQGDhxIZGQk4eHh9OvXj99+++2QdouKirjvvvto2rQpQUFBJCQkMGrUqEOuwcHRosMdd+vWrQwdOpTY2FiCg4Pp3r07X3/99RGvkx/TNHnjjTfo2LEjwcHB1KlTh0suuaTaY2cYBnfdddch+w4ZMqTaF+3jeW1NmDABm83GjBkzjrh/SkoKsbGx/POf/6zWZlFREffeey+JiYkEBQWRlJTEK6+8gmmah7T33//+95C+d+jQodrryP8aWLJkSbXtBg8eXOM1//HHHzn//POJiYmp9tjUdJ0OxH+c6dOn8/jjj1O/fn3CwsK47LLLSE9Pr7bt0qVLGTZsGI0bNyYoKIjExETuu+++as9lOPr7LUDfvn0xDIMrrrjikD6NGTOmxvcL0zR5/fXXad++PcHBwdSrV48xY8ZQWFhYbbumTZsyZMgQFixYQJcuXQgODqZdu3Z8+eWX1bY73vfAP+L1dKT3toNfT/73ogMpLS2lfv36NT43znS0o6upxo4dOwCIi4sD4OuvvyYkJIShQ4cecxsrVqwgNTWViRMnEhgYyFVXXcXUqVN5/PHHq223cuVKLrzwQp555plD3jgOh/8D1ufzsXPnTh555BHi4uIYMmTIEff75JNP2LBhwyHLy8rKuPLKK2natCnl5eVMmjSJq6++muXLl9OzZ0+uv/56nn/+eaZPn17tg8HtdjNz5kyuvvpqgoODAXlzDA8P5/777yc8PJwffviBp59+mpKSEsaPH19jv2677TbOP/98QMTq7Nmzj3geqampNTroJSUlnH322YwePZqAgADmzZvHo48+isPh4IEHHgAkgrJs2TJGjBhBQkICu3bt4r333qNv375s3ry5MqpyshQVFTFu3LhDlvt8Pi677DJ2797NvffeS6tWrTAMg5deeum4j3HPPffQo0cPAKZMmcLChQurrf+rzvWPIiQkhMmTJ9OrVy+eeOIJXnvtNUCEV3FxMZMmTcJutx+1HX8UyU9paekhsaFNmzZx/vnnExkZycMPP0xAQAATJkygb9++/PTTT5x99tmV+55//vls2bKFm266ibPOOou8vDy+/vprMjIyiI+Pr9ZufHw8//d//1f5//XXX3/IcXv16kWjRo149NFHCQsL44svvuCKK65g1qxZXHnllUc8t5tvvplJkyYxcOBAbrnlFrxeL0uXLuW3336rvCt1MhzutTVmzBi2bdvG6NGjadq0aeXz7kAKCgoYMmQIbdu2ZeLEiZXLnU4nffr0ITMzkzFjxtC4cWOWLVvGY489xt69e3n99ddPut8AP//8M99///0hy9PS0hg8eDANGjTg6aefpk6dOsChj82ReOmllzAMg0ceeYR9+/bx+uuv079/f9auXUtISAgAM2bMwOl0cscddxAXF8fKlSt56623yMjIqPYF4Wjvt36Cg4P57rvv2LdvH3Xr1gXky+D06dMr328PZMyYMUyaNIkbb7yRe+65h7S0NN5++22Sk5P59ddfCQgIqNx2+/btXHPNNdx+++2MHj2aiRMnMmzYMObNm8eAAQOO+br4+aNeT23btuWTTz6pbPeDDz5gy5Yt1V5TnTp1Omw/Xn311RrH12gApTkjmThxogLUokWLVG5urkpPT1fTpk1TcXFxKiQkRGVkZCillIqJiVGdO3c+rrbvuusulZiYqEzTVEoptWDBAgWo5OTkatv9+OOPClDPPPPMUdscPXq0Ag75adSokVq9enWN55aWlqaUUsrlcqnGjRurgQMHKkBNnDjxsMfZt2+fAtR///vfymXnnnuuOvvss6tt9+WXXypA/fjjj5XLnE7nIe2NGTNGhYaGKpfLVW359u3bFaAmT55cueyZZ55RB74k09LSDunv8OHDVYcOHVRiYqIaPXr0Yc9DKaXatWunhgwZcsT+LV++XAFqypQplcv812/VqlXVts3Nza3x8Tp42cMPP6zq1q2runXrpvr06VO5fNu2bQpQ48aNq7Z/nz59VPv27Y94Ln78z6WZM2dWLrvzzjvVwW9lf8a5+h+f3NzcatuuWrXqkMdp9OjRKiws7IjnUtO1fOyxx5TNZlM///yzmjFjhgLU66+/fsR2jvc8rrjiChUYGKh27NhRuSwrK0tFRESoCy64oHLZ008/rQD15ZdfHnI8/2vbz7XXXquaNWt2xPPr16+f6tixY7XXgmma6rzzzlMtW7Y84vn98MMPClD33HPPEfsCqDvvvPOQbQYPHqyaNGlS+f/xvrZ8Pp+69NJLVYMGDVR6enq1/d1ut+rbt69q1qyZ2rdvX7X9XnjhBRUWFqZSUlKqLX/00UeV3W5Xe/bsqdaf8ePHH9L39u3bV3sd+d83D3zvOfvssyvf3w685hMmTFCAWr58ebU2D3edDsR/nEaNGqmSkpLK5V988YUC1BtvvFG5rKbX27hx45RhGGr37t2HPUZN77f+94NOnTpVW/7JJ5+ohIQEdf7551d7v1i6dKkC1NSpU6u1PW/evEOWN2nSRAFq1qxZlcuKi4tVgwYNVNeuXSuXnerXk1LyHnLgc/ZADv6s2Ldvn4qIiKh8Dhz43NAopaMLZzj9+/enTp06lRUMwsPDmT17No0aNQL
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Импортируем PCA и визуализируем данные\n",
|
|||
|
"from cuml.decomposition import PCA\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"# Применяем PCA для снижения размерности до 2\n",
|
|||
|
"pca = PCA(n_components=2)\n",
|
|||
|
"df_pca = pca.fit_transform(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"plt.scatter(df_pca[:, 0], df_pca[:, 1], c='blue', edgecolor='k', alpha=0.6)\n",
|
|||
|
"plt.title(\"PCA: Визуализация данных после снижения размерности\")\n",
|
|||
|
"plt.xlabel(\"Главная компонента 1\")\n",
|
|||
|
"plt.ylabel(\"Главная компонента 2\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Количество кластеров\n",
|
|||
|
"Количество кластеров напрямую влияет на результаты кластеризации, так как оно определяет, сколько групп или сегментов будет выделено в данных. Оптимальный выбор количества кластеров важен, чтобы обеспечить баланс между точностью кластеризации и интерпретируемостью результатов. \n",
|
|||
|
"# Зачем выбирать количество кластеров?\n",
|
|||
|
"## Оптимальная сегментация данных\n",
|
|||
|
"Разное количество кластеров может приводить к слишком мелкому делению (много мелких кластеров) или слишком крупному (слишком обобщённые кластеры).\n",
|
|||
|
"-Слишком мало кластеров: важные различия в данных могут быть упущены.\n",
|
|||
|
"-Слишком много кластеров: анализ становится сложным, и кластеры могут быть избыточно раздроблены.\n",
|
|||
|
"## Интерпретируемость результатов\n",
|
|||
|
"Оптимальное количество кластеров делает результаты понятными и полезными. Например, выделение 3-5 кластеров может быть удобно для анализа, тогда как 15-20 кластеров усложнят интерпретацию.\n",
|
|||
|
"## Избежание переобучения или недообучения\n",
|
|||
|
"Количество кластеров влияет на обобщающую способность модели. Слишком большое количество кластеров может привести к переобучению (модель подстраивается под шум), а слишком малое — к упрощению и игнорированию важных данных.\n",
|
|||
|
"## Практическая применимость\n",
|
|||
|
"В бизнес-задачах обычно требуется понятное разделение данных. Например, если мы сегментируем клиентов, 3-5 кластеров проще использовать для таргетинга, чем 20."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Определение оптимального количества кластеров\n",
|
|||
|
"Для выбора количества кластеров мы применим: \n",
|
|||
|
"- Метод локтя — измеряет инерцию (размерность ошибок внутри кластеров).\n",
|
|||
|
"- Коэффициент силуэта — показывает, насколько хорошо объекты распределены между кластерами.\n",
|
|||
|
" \n",
|
|||
|
"Ключевые термины: \n",
|
|||
|
"- Инерция — сумма квадратов расстояний от точек до центроидов их кластеров. Чем меньше, тем лучше.\n",
|
|||
|
"- Коэффициент силуэта — оценивает плотность внутри кластеров и разницу между ними (от -1 до 1)."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAs0AAAIjCAYAAAD4ASZzAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACLxUlEQVR4nOzdd1hTZ/8G8DuBMGSKooCAICBuUSkCTuqus63bVty+1Q5HW0v9WbW+FltbtVprVdxatdZRtdaBs1ZkiKg4GMpWVES2hJCc3x+UvEZAhkISuD/Xlesy5zzn5HsekuPN4TlPRIIgCCAiIiIiojKJ1V0AEREREZGmY2gmIiIiIioHQzMRERERUTkYmomIiIiIysHQTERERERUDoZmIiIiIqJyMDQTEREREZWDoZmIiIiIqBy66i6AiIheXUFBAdLT06FQKGBjY6PucoiIah1eaSYi0lJhYWEYO3YsGjZsCH19fVhbW+Pdd99Vd1lERLUSQzPVOVu3boVIJIJIJMLFixdLrBcEAXZ2dhCJRBg0aJAaKiQq3x9//IGuXbvi1q1bWLp0KU6dOoVTp05h/fr16i6NiKhW4vAMqrMMDAzw66+/omvXrirLz58/j+TkZOjr66upMqKXS09Px5QpU9CvXz/s27cPenp66i6JiKjW45VmqrPeeust7Nu3D4WFhSrLf/31V3Tq1AlWVlZqqozo5bZs2YL8/Hxs3bqVgZmIqIYwNFOdNWbMGDx58gSnTp1SLisoKMDvv/+OsWPHlrqNQqHAqlWr0Lp1axgYGKBx48aYPn06nj59qmzj4OCgHP5R2sPBwUHZNjc3F3PnzoWdnR309fXh6uqK77//HoIglHjtc+fOlbnPipowYUKp2y9atEil3ZkzZ9CtWzcYGRnB3NwcQ4cOxe3bt1XaLFq0qMRrnz17Fvr6+vjPf/6j0uZlj3Pnzim3X7duHdq0aYN69eqptPn9998rdHw9e/as0PEBqsN0nn/07NlTpd3Vq1fRv39/WFpaqrQrb+hOfHy8SnuJRAIHBwd89tlnKCgoKFFHWFjYS4/r+bouX74MNzc3fPPNN8r3jouLC5YtWwaFQqGybWFhIZYsWQInJyfo6+vDwcEBX375JaRSqUo7BwcHDBo0CCdPnoSbmxsMDAzQqlUrHDhwQKVdeno6Pv30U7Rt2xbGxsYwNTXFgAEDcO3atZf2R7EXfx6FhYV46623YGFhgVu3blW6bqDsz8bzn7XiNs+/3wBg4MCBJWp6sb+B//08t27dqrL8zp07GD58OCwsLGBgYAB3d3ccPny4RI0ZGRmYPXs2HBwcoK+vD1tbW4wfPx5paWkv/Wy/+B5+8TNlYmICDw8PHDp0SOX1/v77b4wYMQL29vbQ19eHnZ0dZs+ejWfPnpWo7UXF78n4+Hjlsps3b6J+/foYNGhQiQsNZX3unu+rytRz584djBw5EpaWljA0NISrqyvmz59f6vGXd04JDg5G//79YWZmhnr16qFHjx74559/VF6veJ/Fr2tqaooGDRrgk08+QX5+vkrbynyeiusRi8WwsrLCqFGjkJiYWG7/k+bi8AyqsxwcHODl5YXdu3djwIABAIC//voLmZmZGD16NFavXl1im+nTp2Pr1q2YOHEiPv74Y8TFxeGnn37C1atX8c8//0AikWDVqlXIyckBANy+fRvffPMNvvzyS7Rs2RIAYGxsDKBo7PSQIUNw9uxZTJ48GW5ubjhx4gQ+++wzpKSkYOXKlaXW/fHHH+ONN94AAGzfvl0l9FdEw4YNVfb9/vvvq6wPDAzEgAED0KxZMyxatAjPnj3DmjVr0KVLF4SHh6sEkeddu3YNw4YNw1tvvYW1a9cCAN555x04Ozsr28yePRstW7bEtGnTlMuK+2Xv3r2YMWMGevbsiY8++ghGRkbK/qsMW1tb+Pv7AwBycnLwwQcfvLT9ypUr0bBhQwDA0qVLVdZlZmZiwIABEAQBc+bMgZ2dnfI4KmratGno1q0bpFIpTpw4ge+//x4GBgZYsmRJZQ5LxZMnT3Dx4kVcvHgRkyZNQqdOnXD69Gn4+fkhPj4ev/zyi7LtlClTsG3bNgwfPhxz585FcHAw/P39cfv2bRw8eFBlvzExMRg1ahT+85//wNfXF1u2bMGIESNw/Phx9OnTBwBw7949HDp0CCNGjICjoyMePnyI9evXo0ePHrh161alZ+6YMmUKzp07h1OnTqFVq1ZVqrvY85+zDRs2lBtQLly4gGPHjlWq3ufdvHkTXbp0QZMmTfDFF1/AyMgIv/32G4YNG4b9+/fj7bffBlD0PuzWrRtu376NSZMmoWPHjkhLS8Phw4eRnJyMli1bYseOHcr9btiwAbdv31b5nLZr107ltYvbp6Wl4eeff8aIESMQGRkJV1dXAMC+ffuQl5eHDz74AA0aNEBISAjWrFmD5ORk7Nu3r1LHmZSUhP79+6NFixb47bffoKtbMjq0aNFCGWzT0tJKfEYqWs/169fRrVs3SCQSTJs2DQ4ODrh79y6OHDmCpUuXVuqccubMGQwYMACdOnXCwoULIRaLsWXLFrz55pv4+++/4eHhoVLjyJEj4eDgAH9/f1y+fBmrV6/G06dPsX37dmWbyrwvu3XrhmnTpkGhUCAyMhKrVq3C/fv38ffff1eq/0mDCER1zJYtWwQAQmhoqPDTTz8JJiYmQl5eniAIgjBixAjBx8dHEARBaNq0qTBw4EDldn///bcAQNi1a5fK/o4fP17qckEQhLNnzwoAhLNnz5ZYd+jQIQGA8N///ldl+fDhwwWRSCTExsaqLD958qQAQPj999+Vy2bOnClU5mM8btw4wdHRUWUZAGHhwoXK525ubkKjRo2EJ0+eKJddu3ZNEIvFwvjx45XLFi5cqHzt+Ph4wdraWujatavw7NmzMl+/adOmgq+vb6nrxowZI5ibm6tsX9x/+/btq9DxeXt7C23atFE+f/z4cYnjK7Zx40YBgJCQkKBc1qNHD6FHjx7K5ydOnBAACLt37y5xHM+/N0oTFxcnABC2bNmistzGxkZ46623lM+ffz+W5cW6evToIQAQFi1apNJuwoQJAgDhxo0bgiAIQkREhABAmDJlikq7Tz/9VAAgnDlzRuWYAAj79+9XLsvMzBSsra2FDh06KJfl5+cLcrm8xLHq6+sLX3/9dZnHUOz5n4efn5+go6MjHDp0SKVNZeoWBEE4deqUAEA4f/68cpmvr6/QtGlT5fPSPoudO3cWBgwYUOI94uPjI3Tv3r3EMb748+zVq5fQtm1bIT8/X7lMoVAI3t7egouLi3LZV199JQAQDhw4UKI/FApFiWUv1v685z93xYrPDb/99ptyWfE57Xn+/v6CSCRSec+Xpvg9GRcXJ6SnpwutWrUSXF1dhbS0tFLbd+nSRXneFITS+6qi9XTv3l0wMTEpUWNp/SQIZZ9TFAqF4OLiIvTr109l27y8PMHR0VHo06ePcllxnw4ZMkRlHzNmzBAACNeuXRMEofKfpxfrGjt2rFCvXr1Sj4O0A4dnUJ02cuRIPHv2DEePHkV2djaOHj1a5tCMffv2wczMDH369EFaWpry0alTJxgbG+Ps2bOVeu1jx45BR0cHH3/8scryuXPnQhAE/PXXXyrLi/9MaGBgUKnXeV5BQcFLb3B88OABIiIiMGHCBFhYWCiXt2vXDn369Cn1qtyTJ0/Qr18/mJiY4PDhw1WuLzs7G/Xq1Xul48vPz6/w9sVDJF7WH9nZ2QCABg0aVLmmnJwcpKWlISUlBRs2bEBqaip69epVol1mZibS0tKUr1keHR2dElfz5s6dCwD4888/AUD585ozZ85L2xWzsbFRXh0FAFNTU4wfPx5Xr15FamoqgKL+EouL/uuQy+V48uQJjI2N4erqivDw8ArVDgA//fQT/P39sXr1agwdOlRlXWXrrsjP8kUHDhxAaGgoli1bVmJdo0aNkJyc/NLt09PTcebMGYwcORLZ2dnK80Hx5yEmJgYpKSkAgP3796N9+/YqfVusMsOrnlf8erdv38Yvv/wCIyMjeHp6KtcbGhoq/52bm4u0tDR4e3tDEAR
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 800x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Метод локтя\n",
|
|||
|
"from cuml.cluster import KMeans\n",
|
|||
|
"\n",
|
|||
|
"border_l = 2\n",
|
|||
|
"border_r = 5\n",
|
|||
|
"\n",
|
|||
|
"inertia = []\n",
|
|||
|
"for k in range(border_l, border_r):\n",
|
|||
|
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
|
|||
|
" kmeans.fit(df_scaled)\n",
|
|||
|
" inertia.append(kmeans.inertia_)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация метода локтя\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"plt.plot(range(border_l, border_r), inertia, marker='o')\n",
|
|||
|
"plt.title('Метод локтя для выбора количества кластеров')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Инерция')\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"# Коэффициент силуэта\n",
|
|||
|
"from cuml.metrics import silhouette_score\n",
|
|||
|
"\n",
|
|||
|
"silhouette_scores = []\n",
|
|||
|
"for k in range(border_l, border_r):\n",
|
|||
|
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
|
|||
|
" kmeans.fit(df_scaled)\n",
|
|||
|
" score = silhouette_score(df_scaled, kmeans.labels_)\n",
|
|||
|
" silhouette_scores.append(score)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация коэффициента силуэта\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"plt.plot(range(border_l, border_r), silhouette_scores, marker='o')\n",
|
|||
|
"plt.title('Коэффициент силуэта для различных кластеров')\n",
|
|||
|
"plt.xlabel('Количество кластеров')\n",
|
|||
|
"plt.ylabel('Коэффициент силуэта')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Кластеризация с помощью K-means\n",
|
|||
|
"После выбора оптимального числа кластеров (например, 3), мы применим K-means для кластеризации и визуализируем результаты. \n",
|
|||
|
"Ключевой термин:\n",
|
|||
|
"- K-means — алгоритм кластеризации, который группирует данные вокруг центров (центроидов) кластеров."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Кластеризация с помощью K-means\n",
|
|||
|
"optimal_clusters = 3\n",
|
|||
|
"kmeans = KMeans(n_clusters=optimal_clusters, random_state=42)\n",
|
|||
|
"df['Cluster'] = kmeans.fit_predict(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация кластеров с использованием PCA\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"plt.scatter(df_pca[:, 0], df_pca[:, 1], c=df['Cluster'], cmap='viridis', edgecolor='k', alpha=0.6)\n",
|
|||
|
"plt.title(\"Кластеры, определенные K-means (PCA)\")\n",
|
|||
|
"plt.xlabel(\"Главная компонента 1\")\n",
|
|||
|
"plt.ylabel(\"Главная компонента 2\")\n",
|
|||
|
"plt.colorbar(label='Кластер')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Иерархическая кластеризация\n",
|
|||
|
"Применяем иерархическую кластеризацию для сравнения. Также строим дендрограмму. \n",
|
|||
|
"Ключевой термин:\n",
|
|||
|
"- Иерархическая кластеризация — метод, который строит древовидную структуру кластеров (дендрограмму)."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from cuml.cluster import AgglomerativeClustering\n",
|
|||
|
"from scipy.cluster.hierarchy import dendrogram\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"# Применение иерархической кластеризации\n",
|
|||
|
"hierarchical = AgglomerativeClustering(n_clusters=optimal_clusters)\n",
|
|||
|
"df['Hierarchical Cluster'] = hierarchical.fit_predict(df_scaled)\n",
|
|||
|
"\n",
|
|||
|
"# Функция для получения матрицы linkage\n",
|
|||
|
"def get_linkage_matrix(model: AgglomerativeClustering) -> np.ndarray:\n",
|
|||
|
" counts = np.zeros(model.children_.shape[0]) # type: ignore\n",
|
|||
|
" n_samples = len(model.labels_)\n",
|
|||
|
" for i, merge in enumerate(model.children_): # type: ignore\n",
|
|||
|
" current_count = 0\n",
|
|||
|
" for child_idx in merge:\n",
|
|||
|
" if child_idx < n_samples:\n",
|
|||
|
" current_count += 1\n",
|
|||
|
" else:\n",
|
|||
|
" current_count += counts[child_idx - n_samples]\n",
|
|||
|
" counts[i] = current_count\n",
|
|||
|
"\n",
|
|||
|
" return np.column_stack([model.children_, model.distances_, counts]).astype(float)\n",
|
|||
|
"\n",
|
|||
|
"# Построение дендрограммы\n",
|
|||
|
"linkage_matrix = get_linkage_matrix(hierarchical)\n",
|
|||
|
"plt.figure(figsize=(12, 8))\n",
|
|||
|
"dendrogram(linkage_matrix)\n",
|
|||
|
"plt.title(\"Дендограмма, восстановленная из модели AgglomerativeClustering\")\n",
|
|||
|
"plt.xlabel(\"Индексы объектов\")\n",
|
|||
|
"plt.ylabel(\"Евклидово расстояние\")\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация кластеров\n",
|
|||
|
"plt.figure(figsize=(8, 6))\n",
|
|||
|
"plt.scatter(df_pca[:, 0], df_pca[:, 1], c=df['Hierarchical Cluster'], cmap='viridis', edgecolor='k', alpha=0.6)\n",
|
|||
|
"plt.title(\"Кластеры, определенные иерархической кластеризацией (PCA)\")\n",
|
|||
|
"plt.xlabel(\"Главная компонента 1\")\n",
|
|||
|
"plt.ylabel(\"Главная компонента 2\")\n",
|
|||
|
"plt.colorbar(label='Кластер')\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"# Оценка качества кластеризации\n",
|
|||
|
"Оценим качество кластеров, сравнив коэффициенты силуэта для двух методов."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"# Оценка качества\n",
|
|||
|
"silhouette_kmeans = silhouette_score(df_scaled, df['Cluster'])\n",
|
|||
|
"silhouette_hierarchical = silhouette_score(df_scaled, df['Hierarchical Cluster'])\n",
|
|||
|
"\n",
|
|||
|
"print(f\"Коэффициент силуэта для K-means: {silhouette_kmeans:.4f}\")\n",
|
|||
|
"print(f\"Коэффициент силуэта для иерархической кластеризации: {silhouette_hierarchical:.4f}\")"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3 (ipykernel)",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.3"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|