2024-11-23 11:55:22 +04:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['HeartDisease', 'BMI', 'Smoking', 'AlcoholDrinking', 'Stroke',\n",
" 'PhysicalHealth', 'MentalHealth', 'DiffWalking', 'Sex', 'AgeCategory',\n",
" 'Race', 'Diabetic', 'PhysicalActivity', 'GenHealth', 'SleepTime',\n",
" 'Asthma', 'KidneyDisease', 'SkinCancer'],\n",
" dtype='object')\n"
]
},
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>HeartDisease</th>\n",
" <th>BMI</th>\n",
" <th>Smoking</th>\n",
" <th>AlcoholDrinking</th>\n",
" <th>Stroke</th>\n",
" <th>PhysicalHealth</th>\n",
" <th>MentalHealth</th>\n",
" <th>DiffWalking</th>\n",
" <th>Sex</th>\n",
" <th>AgeCategory</th>\n",
" <th>Race</th>\n",
" <th>Diabetic</th>\n",
" <th>PhysicalActivity</th>\n",
" <th>GenHealth</th>\n",
" <th>SleepTime</th>\n",
" <th>Asthma</th>\n",
" <th>KidneyDisease</th>\n",
" <th>SkinCancer</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>No</td>\n",
" <td>16.60</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>3.0</td>\n",
" <td>30.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>55-59</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>5.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>No</td>\n",
" <td>20.34</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>80 or older</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>7.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>No</td>\n",
" <td>26.58</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>20.0</td>\n",
" <td>30.0</td>\n",
" <td>No</td>\n",
" <td>Male</td>\n",
" <td>65-69</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>Yes</td>\n",
" <td>Fair</td>\n",
" <td>8.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>No</td>\n",
" <td>24.21</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>75-79</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Good</td>\n",
" <td>6.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>No</td>\n",
" <td>23.71</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>28.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Female</td>\n",
" <td>40-44</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Very good</td>\n",
" <td>8.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>Yes</td>\n",
" <td>28.87</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>6.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Female</td>\n",
" <td>75-79</td>\n",
" <td>Black</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>Fair</td>\n",
" <td>12.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>No</td>\n",
" <td>21.63</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>15.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>70-74</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Fair</td>\n",
" <td>4.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>No</td>\n",
" <td>31.64</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>5.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Female</td>\n",
" <td>80 or older</td>\n",
" <td>White</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>Good</td>\n",
" <td>9.0</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>No</td>\n",
" <td>26.45</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>No</td>\n",
" <td>Female</td>\n",
" <td>80 or older</td>\n",
" <td>White</td>\n",
" <td>No, borderline diabetes</td>\n",
" <td>No</td>\n",
" <td>Fair</td>\n",
" <td>5.0</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>No</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>No</td>\n",
" <td>40.69</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>0.0</td>\n",
" <td>0.0</td>\n",
" <td>Yes</td>\n",
" <td>Male</td>\n",
" <td>65-69</td>\n",
" <td>White</td>\n",
" <td>No</td>\n",
" <td>Yes</td>\n",
" <td>Good</td>\n",
" <td>10.0</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" <td>No</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" HeartDisease BMI Smoking AlcoholDrinking Stroke PhysicalHealth \\\n",
"0 No 16.60 Yes No No 3.0 \n",
"1 No 20.34 No No Yes 0.0 \n",
"2 No 26.58 Yes No No 20.0 \n",
"3 No 24.21 No No No 0.0 \n",
"4 No 23.71 No No No 28.0 \n",
"5 Yes 28.87 Yes No No 6.0 \n",
"6 No 21.63 No No No 15.0 \n",
"7 No 31.64 Yes No No 5.0 \n",
"8 No 26.45 No No No 0.0 \n",
"9 No 40.69 No No No 0.0 \n",
"\n",
" MentalHealth DiffWalking Sex AgeCategory Race \\\n",
"0 30.0 No Female 55-59 White \n",
"1 0.0 No Female 80 or older White \n",
"2 30.0 No Male 65-69 White \n",
"3 0.0 No Female 75-79 White \n",
"4 0.0 Yes Female 40-44 White \n",
"5 0.0 Yes Female 75-79 Black \n",
"6 0.0 No Female 70-74 White \n",
"7 0.0 Yes Female 80 or older White \n",
"8 0.0 No Female 80 or older White \n",
"9 0.0 Yes Male 65-69 White \n",
"\n",
" Diabetic PhysicalActivity GenHealth SleepTime Asthma \\\n",
"0 Yes Yes Very good 5.0 Yes \n",
"1 No Yes Very good 7.0 No \n",
"2 Yes Yes Fair 8.0 Yes \n",
"3 No No Good 6.0 No \n",
"4 No Yes Very good 8.0 No \n",
"5 No No Fair 12.0 No \n",
"6 No Yes Fair 4.0 Yes \n",
"7 Yes No Good 9.0 Yes \n",
"8 No, borderline diabetes No Fair 5.0 No \n",
"9 No Yes Good 10.0 No \n",
"\n",
" KidneyDisease SkinCancer \n",
"0 No Yes \n",
"1 No No \n",
"2 No No \n",
"3 No Yes \n",
"4 No No \n",
"5 No No \n",
"6 No Yes \n",
"7 No No \n",
"8 Yes No \n",
"9 No No "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd \n",
"df = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\")\n",
"print(df.columns)\n",
"\n",
"display(df.head(10))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Сегментация пациентов по рискам сердечно-сосудистых заболеваний\n",
"Цель: Определить группы пациентов с различными уровнями риска развития сердечно-сосудистых заболеваний на основе их демографических данных, образа жизни и состояния здоровья.\n",
"\n",
"Пример:\n",
"\n",
"Кластер 1: Пациенты с высоким риском (курение, высокий ИМТ, низкая физическая активность).\n",
"\n",
"Кластер 2: Пациенты с о средним риском (умеренное курение, средний ИМТ, средняя физическая активность).\n",
"\n",
"Кластер 3: Пациенты с низким риском (отсутствие вредных привычек, нормальный ИМТ, высокая физическая активность).\n",
"\n",
"Бизнес-применение:\n",
"\n",
"Разработка персонализированных программ профилактики и лечения для каждой группы пациентов.\n",
"\n",
"Таргетированная реклама медицинских услуг и продуктов для улучшения образа жизни."
]
},
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 71,
2024-11-23 11:55:22 +04:00
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"## почистила данные чут чут\n",
"df = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\").head(1000)\n",
2024-12-11 20:59:23 +04:00
"df = df.dropna()\n",
"from scipy import stats\n",
"\n",
"# Вычисление z-score для числовых столбцов\n",
"z_scores = stats.zscore(df[['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']])\n",
"abs_z_scores = abs(z_scores)\n",
"filtered_entries = (abs_z_scores < 3).all(axis=1)\n",
"df = df[filtered_entries]\n",
"from sklearn.preprocessing import LabelEncoder\n",
2024-11-23 11:55:22 +04:00
"\n",
2024-12-11 20:59:23 +04:00
"# Инициализация LabelEncoder\n",
"label_encoders = {}\n",
2024-11-23 11:55:22 +04:00
"\n",
2024-12-11 20:59:23 +04:00
"# Кодирование категориальных переменных\n",
"for column in df.select_dtypes(include=['object']).columns:\n",
" le = LabelEncoder()\n",
" df[column] = le.fit_transform(df[column])\n",
" label_encoders[column] = le\n",
2024-11-23 11:55:22 +04:00
"\n",
2024-12-11 20:59:23 +04:00
"from imblearn.over_sampling import RandomOverSampler\n",
2024-11-23 11:55:22 +04:00
"\n",
2024-12-11 20:59:23 +04:00
"# Инициализация RandomOverSampler\n",
"ros = RandomOverSampler(random_state=42)\n",
2024-11-23 11:55:22 +04:00
"\n",
2024-12-11 20:59:23 +04:00
"# Выбор признаков и целевой переменной\n",
"X = df.drop('HeartDisease', axis=1)\n",
"y = df['HeartDisease']\n",
2024-11-23 11:55:22 +04:00
"\n",
2024-12-11 20:59:23 +04:00
"# Применение RandomOverSampler\n",
"X_resampled, y_resampled = ros.fit_resample(X, y)\n",
2024-11-23 11:55:22 +04:00
"\n",
2024-12-11 20:59:23 +04:00
"# Создание нового датасета с балансированными данными\n",
"df_balanced = X_resampled.copy()\n",
"df_balanced['HeartDisease'] = y_resampled\n",
2024-11-23 11:55:22 +04:00
"\n",
2024-12-11 20:59:23 +04:00
"df = df_balanced\n"
2024-11-23 11:55:22 +04:00
]
},
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 72,
2024-11-23 11:55:22 +04:00
"metadata": {},
"outputs": [],
"source": [
"## стандартизация\n",
"scaler = StandardScaler()\n",
"df_encoded[numeric_features] = scaler.fit_transform(df_encoded[numeric_features])"
]
},
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 73,
2024-11-23 11:55:22 +04:00
"metadata": {},
"outputs": [
{
"data": {
2024-12-11 20:59:23 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjQAAASlCAYAAAAPuDBdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd5gT1f7H8XeS3WRrtlfaUqWDAiIiCAoidkXBDraLBbvX9vPaFdu194oFFRFsXMWKolIUVGx0lr691ySbzO+PlUDYZGm7myx8Xs+TB3bOZOabySQ5c75zzjEZhmEgIiIiIiIiIiIiIiISwszBDkBERERERERERERERGRXlNAQEREREREREREREZGQp4SGiIiIiIiIiIiIiIiEPCU0REREREREREREREQk5CmhISIiIiIiIiIiIiIiIU8JDRERERERERERERERCXlKaIiIiIiIiIiIiIiISMhTQkNEREREREREREREREKeEhoiIiIiIiIiIiIiIhLylNAQEZG9MmnSJGJiYlp0n9OmTcNkMrF+/fpm2f6dd96JyWRqlm3vC5PJxJ133rnb606ZMqV5AxIRERGRVmfSpElkZWUFOwwf69evx2QyMW3atN1e95FHHmn+wJpZVlYWkyZNCnYYeyRUr5VE5MCjhIaIyC5sa0Tf8ZGamsrIkSP57LPPGqy/bZ2LL77Y7/b+7//+z7tOYWGhd3kwEgT+TJo0yee12u12+vXrx3//+18cDkeww2txWVlZnHDCCX7Lvv32W0wmE++//36LxrRgwQLuvPNOSktLW3S/IiIiIgeSHa8DfvjhhwblhmHQrl07TCZTwPpiU/n777+58847m/zGnsZuhtn2+pcsWdKk+9yVTz/9dLdv5gk1f/zxB6effjodOnQgIiKCNm3aMHr0aJ566qlgh+ZXVlZWg2tdf4/dSTiJiLSUsGAHICLSWtx999107NgRwzDIy8tj2rRpHHfccXzyyScNLmAiIiKYNWsWzz77LFar1afsnXfeISIigtra2pYMf4/YbDZefvllAEpLS5k1axY33HADP//8M++++27Q4jrvvPM488wzsdlsQYshFCxYsIC77rqLSZMmER8fH+xwRERERPZrERERvP322xxxxBE+y7/77js2b97cInXTv//+m7vuuosRI0aEXC+Lpvbpp5/yzDPPtLqkxoIFCxg5ciTt27fnkksuIT09nU2bNrFo0SKeeOIJrrzyymCH2MDjjz9OZWWl9+9PP/2Ud955h8cee4zk5GTv8sMPP5xzzz2Xm2++ORhhioj4UEJDRGQ3jR07loEDB3r/vuiii0hLS+Odd95pkNA49thj+fjjj/nss884+eSTvcsXLFhAdnY248aNY9asWS0W+54KCwvj3HPP9f59+eWXM3jwYGbMmMGjjz5KZmZmUOKyWCxYLJag7FtEREREDkzHHXccM2fO5MknnyQsbHszyttvv82AAQN8el3Lgeu+++4jLi6On3/+ucFNR/n5+cEJahdOOeUUn79zc3N55513OOWUU/wmznY8/0VEgkVDTomI7KX4+HgiIyP9VuratGnD8OHDefvtt32WT58+nT59+tC7d++92ucjjzyCyWRiw4YNDcpuueUWrFYrJSUlAKxevZpx48aRnp5OREQEbdu25cwzz6SsrGyP92s2mxkxYgRAg27uW7Zs4ZRTTiEmJoaUlBRuuOEG3G43UN8NPysryyeps01tbS1xcXFMnjzZu+ypp56iV69eREVFkZCQwMCBA32OYaA5ND777DOOPPJIYmNjsdvtDBo0yOd533//PWeccQbt27fHZrPRrl07rr32Wmpqavb4WOyOLVu2cOGFF5KWlobNZqNXr168+uqrPus4nU5uv/12BgwYQFxcHNHR0QwbNox58+Y1uu0777yTf//73wB07NjR2w1852Py4Ycf0rt3b+/+586d26SvUURERORAcdZZZ1FUVMSXX37pXeZ0Onn//fc5++yz/T7H4/Hw+OOP06tXLyIiIkhLS2Py5Mneuvo224Y3/eGHHzj00EOJiIigU6dOvPHGG951pk2bxhlnnAHAyJEjvfW/b7/9FoCPPvqI448/nszMTGw2G507d+aee+7x1smb2ooVKzj99NNJTEwkIiKCgQMH8vHHH/usU1xczA033ECfPn2IiYnBbrczduxYli1b1ui2J02axDPPPAPgM+TRzl588UU6d+6MzWZj0KBB/Pzzz41ud8mSJZhMJl5//fUGZZ9//jkmk4k5c+YAUFFRwTXXXENWVhY2m43U1FRGjx7NL7/80ug+1q5dS69evfz2oE5NTW30uVDfM/6aa66hXbt22Gw2unTpwoMPPojH4/FZb0/PrS+++IL+/fsTERFBz549mT179i5j8cffHBrbhiybOXMmPXv2JDIykiFDhvDHH38A8MILL9ClSxciIiIYMWKE3yHTFi9ezLHHHktcXBxRUVEceeSR/Pjjj3sVo4gcGJRaFRHZTWVlZRQWFmIYBvn5+Tz11FNUVlb69GTY0dlnn83VV19NZWUlMTEx1NXVMXPmTK677rq9Hm5q/Pjx3Hjjjbz33nveRu1t3nvvPY455hgSEhJwOp2MGTMGh8PBlVdeSXp6Olu2bGHOnDmUlpYSFxe3x/teu3YtAElJSd5lbrebMWPGMHjwYB555BG++uor/vvf/9K5c2cuu+wyTCYT5557Lg899BDFxcUkJiZ6n/vJJ59QXl7uPX4vvfQSV111FaeffjpXX301tbW1/P777yxevDjghSLUX+BdeOGF9OrVi1tuuYX4+Hh+/fVX5s6d633ezJkzqa6u5rLLLiMpKYmffvqJp556is2bNzNz5sxdvnaXy+X3zjt/yaG8vDwOO+wwb+U+JSWFzz77jIsuuojy8nKuueYaAMrLy3n55Zc566yzuOSSS6ioqOCVV15hzJgx/PTTT/Tv399vLKeddhqrVq1q0BU8JSXFu84PP/zA7Nmzufzyy4mNjeXJJ59k3LhxbNy40ef9ExEREZFdy8rKYsiQIbzzzjuMHTsWqL+hpqysjDPPPJMnn3yywXMmT57MtGnTuOCCC7jqqqvIzs7m6aef5tdff+XHH38kPDzcu+6aNWs4/fTTueiii5g4cSKvvvoqkyZNYsCAAfTq1Yvhw4dz1VVX8eSTT3LrrbfSo0cPAO+/06ZNIyYmhuuuu46YmBi++eYbbr/9dsrLy3n44Yd3+fpqa2v91nV3HIpom7/++ouhQ4fSpk0bbr75ZqKjo3nvvfc45ZRTmDVrFqeeeioA69at48MPP+SMM86gY8eO5OXl8cILL3DkkUfy999/B+zxPXnyZLZu3cqXX37Jm2++6Xedt99+m4qKCiZPnozJZOKhhx7itNNOY926dT7HdUcDBw6kU6dOvPfee0ycONGnbMaMGSQkJDBmzBgALr30Ut5//32mTJlCz549KSoq4ocffmD58uUccsghAY9jhw4dWLhwIX/++ece38BWXV3NkUceyZYtW5g8eTLt27dnwYIF3HLLLeTk5PD444/7HKPdPbdWr17NhAkTuPTSS5k4cSKvvfYaZ5xxBnPnzmX06NF7FGMg33//PR9//DFXXHEFAFOnTuWEE07gxhtv5Nlnn+Xyyy+npKSEhx56iAsvvJBvvvnG+9xvvvmGsWPHMmDAAO644w7MZjOvvfYaRx11FN9//z2HHnpok8QoIvsZQ0REGvXaa68ZQIOHzWYzpk2b1mB9wLjiiiuM4uJiw2q1Gm+++aZhGIbxv//9zzCZTMb69euNO+64wwCMgoIC7/MmTpxoREdH7zKeIUOGGAMGDPBZ9tNPPxmA8cYbbxiGYRi//vqrARgzZ87c49e7LY6CggKjoKDAWLNmjXH//fcbJpPJ6Nu3r896gHH33Xf7PP/ggw/2iW/lypUGYDz33HM+65100klGVlaW4fF4DMMwjJNPPtno1atXo7Ftey+ys7MNwzCM0tJSIzY21hg8eLBRU1Pjs+627RqGYVRXVzfY1tSpUw2TyWRs2LDBu2zb+7KjDh06+H3/d3zseJwvuugiIyMjwygsLPTZzplnnmnExcV
2024-11-23 11:55:22 +04:00
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"\n",
"# Визуализация данных с учетом понимания их особенностей\n",
"plt.figure(figsize=(16, 12))\n",
"\n",
"# Взаимодействие между BMI и PhysicalHealth\n",
"plt.subplot(2, 2, 1)\n",
"sns.scatterplot(x='BMI', y='PhysicalHealth', hue='HeartDisease', data=df)\n",
"plt.title('BMI vs PhysicalHealth')\n",
"\n",
"# Взаимодействие между MentalHealth и SleepTime\n",
"plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x='MentalHealth', y='SleepTime', hue='HeartDisease', data=df)\n",
"plt.title('MentalHealth vs SleepTime')\n",
"\n",
"# Взаимодействие между PhysicalHealth и SleepTime\n",
"plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x='PhysicalHealth', y='SleepTime', hue='HeartDisease', data=df)\n",
"plt.title('PhysicalHealth vs SleepTime')\n",
"\n",
"# Сравнение зависимости BMI и MentalHealth\n",
"plt.subplot(2, 2, 4)\n",
"sns.scatterplot(x='BMI', y='MentalHealth', hue='HeartDisease', data=df)\n",
"plt.title('BMI vs MentalHealth')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 74,
2024-11-23 11:55:22 +04:00
"metadata": {},
"outputs": [
{
"data": {
2024-12-11 20:59:23 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjQAAASlCAYAAAAPuDBdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXwTdf7H8Xdpm6ZXQmkE5EZbD7BVFl1XwSrrweK6LlgPPEFZf6634sHhfXB5rYoHHuu1CioCrrreJ4d4o+C6anFZEBCwpU3apml65PeH22xD0yRN08kkfT0fjz6U+c7M9/P9zjeTmflkZlJ8Pp9PAAAAAAAAAAAAJtYj3gEAAAAAAAAAAACEQ0IDAAAAAAAAAACYHgkNAAAAAAAAAABgeiQ0AAAAAAAAAACA6ZHQAAAAAAAAAAAApkdCAwAAAAAAAAAAmB4JDQAAAAAAAAAAYHokNAAAAAAAAAAAgOmR0AAAAAAAAAAAAKZHQgMAEJXJkycrJyfH0DqfeOIJpaSk6D//+U+XrP/GG29USkpKl6y7M1JSUnTjjTdGPO9FF13UtQEBAAAg4UyePFlDhgyJdxgB/vOf/yglJUVPPPFExPPecccdXR9YFxsyZIgmT54c7zA6xKznSgC6HxIaABBGy0X01n+9e/fWmDFj9Nprr7WZv2WeP/3pT0HXd8011/jnKS8v90+PR4IgmMmTJwe01Wazaf/999edd96p+vr6eIdnuCFDhui4444LWvb+++8rJSVFL7zwgqExffjhh7rxxhtVVVVlaL0AAADdSevzgJUrV7Yp9/l8GjhwoFJSUto9XoyVb775RjfeeGPMf9gT6scwLe3/7LPPYlpnOK+++mrEP+Yxm3Xr1unEE0/U4MGDZbVa1b9/fx199NGaP39+vEMLasiQIW3OdYP9RZJwAgCjpMU7AABIFDfffLOGDh0qn8+n7du364knntCxxx6rl19+uc0JjNVq1ZIlS/TAAw/IYrEElC1atEhWq1Uej8fI8DskIyNDjz76qCSpqqpKS5Ys0ZVXXqlPP/1Uzz77bNziOvPMMzVx4kRlZGTELQYz+PDDD3XTTTdp8uTJ6tmzZ7zDAQAASGpWq1ULFy7U6NGjA6Z/8MEH2rx5syHHpt98841uuukmHXHEEaa7yyLWXn31Vd1///0Jl9T48MMPNWbMGA0aNEjnnnuu+vbtqx9//FEfffSR7rnnHl188cXxDrGNu+++WzU1Nf5/v/rqq1q0aJH+8pe/yOFw+KcfeuihOuOMMzR9+vR4hAkAAUhoAECExo0bpwMPPND/7ylTpqhPnz5atGhRm4TG7373O7300kt67bXX9Mc//tE//cMPP9SGDRtUWlqqJUuWGBZ7R6WlpemMM87w//uCCy7QwQcfrOeee0533XWX+vXrF5e4UlNTlZqaGpe6AQAA0D0de+yxWrx4se69916lpf3vMsrChQs1cuTIgLuu0X3NmjVLdrtdn376aZsfHe3YsSM+QYUxfvz4gH9v27ZNixYt0vjx44MmzlqPfwCIFx45BQBR6tmzpzIzM4Me1PXv318lJSVauHBhwPRnnnlGRUVF2m+//aKq84477lBKSoo2btzYpmzGjBmyWCyqrKyUJJWVlam0tFR9+/aV1WrVgAEDNHHiRDmdzg7X26NHDx1xxBGS1OY29y1btmj8+PHKycnRbrvtpiuvvFJNTU2SfrkNf8iQIQFJnRYej0d2u13nnXeef9r8+fM1fPhwZWVlKS8vTwceeGBAH7b3Do3XXntNhx9+uHJzc2Wz2XTQQQcFLLdixQqddNJJGjRokDIyMjRw4EBdfvnlqqur63BfRGLLli0655xz1KdPH2VkZGj48OF67LHHAubxer26/vrrNXLkSNntdmVnZ+uwww7Te++9F3LdN954o6666ipJ0tChQ/23ge/aJy+++KL2228/f/2vv/56TNsIAADQXZx66qmqqKjQW2+95Z/m9Xr1wgsv6LTTTgu6THNzs+6++24NHz5cVqtVffr00Xnnnec/Vm/R8njTlStX6te//rWsVqv22GMPPfXUU/55nnjiCZ100kmSpDFjxviP/95//31J0t///nf9/ve/V79+/ZSRkaE999xTt9xyi/+YPNa+/fZbnXjiierVq5esVqsOPPBAvfTSSwHz7Ny5U1deeaWKioqUk5Mjm82mcePG6auvvgq57smTJ+v++++XpIBHHu3q4Ycf1p577qmMjAwddNBB+vTTT0Ou97PPPlNKSoqefPLJNmVvvPGGUlJS9Morr0iSqqurddlll2nIkCHKyMhQ7969dfTRR+uLL74IWccPP/yg4cOHB72Dunfv3iGXlX65M/6yyy7TwIEDlZGRoYKCAs2bN0/Nzc0B83V0bL355ps64IADZLVaNWzYMC1dujRsLMEEe4dGyyPLFi9erGHDhikzM1OHHHKI1q1bJ0l66KGHVFBQIKvVqiOOOCLoI9M+/vhj/e53v5PdbldWVpYOP/xwrVq1KqoYAXQPpFYBIEJOp1Pl5eXy+XzasWOH5s+fr5qamoA7GVo77bTTdOmll6qmpkY5OTlqbGzU4sWLNXXq1KgfN3XyySfr6quv1vPPP++/qN3i+eef1zHHHKO8vDx5vV6NHTtW9fX1uvjii9W3b19t2bJFr7zyiqqqqmS32ztc9w8//CBJys/P909ramrS2LFjdfDBB+uOO+7Q22+/rTvvvFN77rmnzj//fKWkpOiMM87Qbbfdpp07d6pXr17+ZV9++WW5XC5//z3yyCO65JJLdOKJJ+rSSy+Vx+PR2rVr9fHHH7d7oij9coJ3zjnnaPjw4ZoxY4Z69uypNWvW6PXXX/cvt3jxYrndbp1//vnKz8/XJ598ovnz52vz5s1avHhx2LY3NDQE/eVdsOTQ9u3b9Zvf/MZ/cL/bbrvptdde05QpU+RyuXTZZZdJklwulx599FGdeuqpOvfcc1VdXa2//vWvGjt2rD755BMdcMABQWM54YQT9P3337e5FXy33Xbzz7Ny5UotXbpUF1xwgXJzc3XvvfeqtLRUmzZtCth+AAAACG/IkCE65JBDtGjRIo0bN07SLz+ocTqdmjhxou699942y5x33nl64okndPbZZ+uSSy7Rhg0bdN9992nNmjVatWqV0tPT/fOuX79eJ554oqZMmaJJkybpscce0+TJkzVy5EgNHz5cJSUluuSSS3Tvvfdq5syZ2nfffSXJ/98nnnhCOTk5mjp1qnJycvTuu+/q+uuvl8vl0u233x62fR6PJ+ixbutHEbX45z//qVGjRql///6aPn26srOz9fzzz2v8+PFasmSJJkyYIEn697//rRdffFEnnXSShg4dqu3bt+uhhx7S4Ycfrm+++abdO77PO+88bd26VW+99Zb+9re/BZ1n4cKFqq6u1nnnnaeUlBTddtttOuGEE/Tvf/87oF9bO/DAA7XHHnvo+eef16RJkwLKnnvuOeXl5Wns2LGSpD//+c964YUXdNFFF2nYsGGqqKjQypUr9a9//Uu/+tWv2u3HwYMHa/Xq1fr66687/AM2t9utww8/XFu2bNF5552nQYMG6cMPP9SMGTP0008/6e677w7oo0jHVllZmU455RT9+c9/1qRJk/T444/rpJNO0uuvv66jjz66QzG2Z8WKFXrppZd04YUXSpLmzJmj4447TldffbUeeOABXXDBBaqsrNRtt92mc845R++++65/2XfffVfjxo3TyJEjdcMNN6hHjx56/PHH9dvf/lYrVqzQr3/965jECCDJ+AAAIT3++OM+SW3+MjIyfE888USb+SX5LrzwQt/OnTt9FovF97e//c3n8/l8//jHP3wpKSm+//znP74bbrjBJ8n3888/+5ebNGmSLzs7O2w8hxxyiG/kyJEB0z755BOfJN9TTz3l8/l8vjVr1vgk+RYvXtzh9rbE8fPPP/t+/vln3/r1632zZ8/2paSk+IqLiwPmk+S7+eabA5YfMWJEQHzfffedT5LvwQcfDJjv+OOP9w0ZMsTX3Nzs8/l8vj/+8Y++4cOHh4ytZVts2LDB5/P5fFVVVb7c3FzfwQcf7Ku
2024-11-23 11:55:22 +04:00
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Визуализация данных без учета понимания их особенностей\n",
"plt.figure(figsize=(16, 12))\n",
"\n",
"# Взаимодействие между BMI и PhysicalHealth\n",
"plt.subplot(2, 2, 1)\n",
"sns.scatterplot(x='BMI', y='PhysicalHealth', data=df)\n",
"plt.title('BMI vs PhysicalHealth')\n",
"\n",
"# Взаимодействие между MentalHealth и SleepTime\n",
"plt.subplot(2, 2, 2)\n",
"sns.scatterplot(x='MentalHealth', y='SleepTime', data=df)\n",
"plt.title('MentalHealth vs SleepTime')\n",
"\n",
"# Взаимодействие между PhysicalHealth и SleepTime\n",
"plt.subplot(2, 2, 3)\n",
"sns.scatterplot(x='PhysicalHealth', y='SleepTime', data=df)\n",
"plt.title('PhysicalHealth vs SleepTime')\n",
"\n",
"# Сравнение зависимости BMI и MentalHealth\n",
"plt.subplot(2, 2, 4)\n",
"sns.scatterplot(x='BMI', y='MentalHealth', data=df)\n",
"plt.title('BMI vs MentalHealth')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 75,
2024-11-23 11:55:22 +04:00
"metadata": {},
"outputs": [
{
"data": {
2024-12-11 20:59:23 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAJhCAYAAACHJrtbAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACSh0lEQVR4nOzdeXxU5d3///dMZiaZ7PsGYd9XERRQQBAUUVxa1GpdcKn683apWq3ar7VWe1e7arUu1WpRq7dbW60LKopIq2AVpeACbmwSEtaEJXtyfn/E63BmMpPMJIEEzuv5eOQBmZw55zrXOec61+dcy/FYlmUJAAAAAFzC29UJAAAAAID9iSAIAAAAgKsQBAEAAABwFYIgAAAAAK5CEAQAAADAVQiCAAAAALgKQRAAAAAAVyEIAgAAAOAqBEEAAAAAXIUgCADQrfTp00fnnXdeVycDAHAQIwgCABeaN2+ePB6P/ZOUlKTi4mLNnDlTd999t3bt2tXVSQQAYJ/xdXUCAABd59Zbb1Xfvn1VX1+vsrIyLVq0SFdddZV+//vf65///KdGjRrV1UkEAKDTEQQBgIvNmjVL48aNs3+/8cYbtXDhQs2ePVsnnXSSPvvsMwWDwS5MYWR79uxRSkrKftlWTU2NAoGAvF46TwDAwYISHQAQ4uijj9ZPf/pTrVu3Tn/961/tz1etWqVTTz1V2dnZSkpK0rhx4/TPf/4z5Lumm90777yja665Rnl5eUpJSdF3vvMdbdmyJWRZy7L0i1/8Qj179lRycrKmTZumTz75pEV6zDrffvtt/c///I/y8/PVs2dP++/33Xefhg8frsTERBUXF+uyyy5TRUVFi/Xce++96tevn4LBoA4//HD961//0tSpUzV16lR7mUWLFsnj8eipp57STTfdpB49eig5OVk7d+7U9u3bde2112rkyJFKTU1Venq6Zs2apf/+978h2zHreOaZZ/Tzn/9cPXr0UFpamk499VRVVlaqtrZWV111lfLz85Wamqrzzz9ftbW18RwiAEAH0RIEAGjhnHPO0U9+8hO9/vrruuiii/TJJ5/oyCOPVI8ePXTDDTcoJSVFzzzzjE455RT97W9/03e+852Q719xxRXKysrSz372M61du1Z33XWXLr/8cj399NP2MjfffLN+8Ytf6Pjjj9fxxx+vDz/8UMcee6zq6uoipul//ud/lJeXp5tvvll79uyRJN1yyy36+c9/rhkzZujSSy/V6tWrdf/99+v999/XO++8I7/fL0m6//77dfnll2vy5Mm6+uqrtXbtWp1yyinKysoKCaiM2267TYFAQNdee61qa2sVCAT06aef6vnnn9dpp52mvn37qry8XH/605901FFH6dNPP1VxcXHIOm6//XYFg0HdcMMN+vLLL3XPPffI7/fL6/Vqx44duuWWW7R06VLNmzdPffv21c0339yhYwYAiIMFAHCdv/zlL5Yk6/3334+6TEZGhjVmzBjLsixr+vTp1siRI62amhr7701NTdYRRxxhDRw4sMV6Z8yYYTU1NdmfX3311VZCQoJVUVFhWZZlbd682QoEAtYJJ5wQstxPfvITS5I1d+7cFuucNGmS1dDQYH9u1nHsscdajY2N9ud//OMfLUnWI488YlmWZdXW1lo5OTnWYYcdZtXX19vLzZs3z5JkHXXUUfZnb731liXJ6tevn1VVVRWSHzU1NSHbsSzLWrNmjZWYmGjdeuutLdYxYsQIq66uzv78zDPPtDwejzVr1qyQdUycONHq3bu3BQDYf+gOBwCIKDU1Vbt27dL27du1cOFCnX766dq1a5e2bt2qrVu3atu2bZo5c6a++OILbdy4MeS7F198sTwej/375MmT1djYqHXr1kmS3njjDdXV1emKK64IWe6qq66Kmp6LLrpICQkJ9u9mHVdddVXIeJ2LLrpI6enpevnllyVJH3zwgbZt26aLLrpIPt/eDhBnnXWWsrKyIm5r7ty5LcZCJSYm2ttpbGzUtm3blJqaqsGDB+vDDz9ssY5zzz3XbomSpPHjx8uyLF1wwQUhy40fP14bNmxQQ0ND1H0HAHQuusMBACLavXu38vPz9eWXX8qyLP30pz/VT3/604jLbt68WT169LB/79WrV8jfTbCxY8cOSbKDoYEDB4Ysl5eXFzUw6du3b8jvZh2DBw8O+TwQCKhfv372382/AwYMCFnO5/OpT58+MW1LkpqamvSHP/xB9913n9asWaPGxkb7bzk5OS2WD8+DjIwMSVJJSUmLz5uamlRZWRlxPQCAzkcQBABo4ZtvvlFlZaUGDBigpqYmSdK1116rmTNnRlw+PMBwttg4WZbV7jTtz1nqIm3rl7/8pX7605/qggsu0G233abs7Gx5vV5dddVVdh45RcuDfZE3AID4EAQBAFp4/PHHJUkzZ85Uv379JEl+v18zZszolPX37t1bkvTFF1/Y65ekLVu22K1Fsa5j9erVIeuoq6vTmjVr7LSa5b788ktNmzbNXq6hoUFr166N+V1Izz33nKZNm6aHH3445POKigrl5ubGtA4AQPfAmCAAQIiFCxfqtttuU9++fXXWWWcpPz9fU6dO1Z/+9Cdt2rSpxfLhU1/HYsaMGfL7/brnnntCWkDuuuuuuNYRCAR09913h6zj4YcfVmVlpU444QRJ0rhx45STk6OHHnooZNzNE088EXPAJTW34IS31jz77LMtxkMBALo/WoIAwMXmz5+vVatWqaGhQeXl5Vq4cKEWLFig3r1765///KeSkpIkNb9jZ9KkSRo5cqQuuugi9evXT+Xl5VqyZIm++eabFu/KaUteXp6uvfZa3X777Zo9e7aOP/54ffTRR5o/f37MrSp5eXm68cYb9fOf/1zHHXecTjrpJK1evVr33XefDjvsMJ199tmSmscI3XLLLbriiit09NFH6/TTT9fatWs1b9489e/fP2RihtbMnj1bt956q84//3wdccQRWrlypZ544omQVigAwIGBIAgAXMy8myYQCCg7O1sjR47UXXfdpfPPP19paWn2csOGDdMHH3ygn//855o3b562bdum/Px8jRkzpt3vt/nFL36hpKQkPfDAA3rrrbc0fvx4vf7663YLTixuueUW5eXl6Y9//KOuvvpqZWdn6+KLL9Yvf/nLkJnZLr/8clmWpd/97ne69tprNXr0aP3zn//UlVdeaQd6bfnJT36iPXv26Mknn9TTTz+tQw89VC+//LJuuOGGuPcdANC1PBYjMQEALtTU1KS8vDx997vf1UMPPdTVyQEA7EeMCQIAHPRqampajOd57LHHtH37dk2dOrVrEgUA6DK0BAEADnqLFi3S1VdfrdNOO005OTn68MMP9fDDD2vo0KFatmyZAoFAVycRALAfMSYIAHDQ69Onj0pKSnT33Xdr+/btys7O1rnnnqs77riDAAgAXIiWIAAAAACuwpggAAAAAK5CEAQAAADAVeIaE3TLLbfo5z//echngwcP1qpVqyQ1z77zox/9SE899ZRqa2s1c+ZM3XfffSooKLCXX79+vS699FK99dZbSk1N1dy5c3X77bfL54s9KU1NTSotLVVaWlrML7kDAAAAcPCxLEu7du1ScXGxvN7Y2njinhhh+PDheuONN/auwBG8XH311Xr55Zf17LPPKiMjQ5dffrm++93v6p133pEkNTY26oQTTlBhYaHeffddbdq0Seeee678fr9++ctfxpyG0tJSlZSUxJt0AAAAAAepDRs2qGfPnjEtG9fECLfccouef/55LV++vMXfKisrlZeXpyeffFKnnnqqJGnVqlUaOnSolixZogkTJmj+/PmaPXu2SktL7dahBx54QNdff722bNkS8ww9lZWVyszM1IYNG5Senh5r8gEAAAAcZHbu3KmSkhJVVFQoIyMjpu/E3RL0xRdfqLi4WElJSZo4caJuv/129erVS8uWLVN9fb1mzJhhLztkyBD16tXLDoKWLFmikSNHhnSPmzlzpi699FJ98sknGjNmTMRt1tbWqra21v59165dkqT09HSCIAAAAABxDZOJa2KE8ePHa968eXr11Vd1//33a82aNZo8ebJ
2024-11-23 11:55:22 +04:00
"text/plain": [
"<Figure size 1000x700 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.cluster import AgglomerativeClustering\n",
"from scipy.cluster.hierarchy import dendrogram, linkage\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Выбор признаков для кластеризации\n",
"X = df[['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']]\n",
"\n",
"# Применение агломеративной кластеризации\n",
"agg_clustering = AgglomerativeClustering(n_clusters=3)\n",
"clusters = agg_clustering.fit_predict(X)\n",
"\n",
"# Добавление результатов кластеризации в датасет\n",
"df['Cluster'] = clusters\n",
"\n",
"# Визуализация дендрограммы\n",
"linked = linkage(X, 'ward')\n",
"\n",
"plt.figure(figsize=(10, 7))\n",
"dendrogram(linked,\n",
" orientation='top',\n",
" distance_sort='descending',\n",
" show_leaf_counts=True)\n",
"plt.title('Dendrogram')\n",
"plt.show()"
]
},
{
"cell_type": "code",
2024-12-11 20:59:23 +04:00
"execution_count": 76,
2024-11-23 11:55:22 +04:00
"metadata": {},
"outputs": [
{
"data": {
2024-12-11 20:59:23 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjUAAAlWCAYAAAASq1EUAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gVVf7H8ffMvemVACH03nvvRREQASkiICrYUcRd17Kuv1WxrmtfV9e2rmABpQgqdqSoSJEiSO+9EyAJ6ffe8/vjkpiQe5MAISHh83qePJCZM3O+M3fu5Jz5zpyxjDEGERERERERERERERGRi5xd0gGIiIiIiIiIiIiIiIgUhpIaIiIiIiIiIiIiIiJSKiipISIiIiIiIiIiIiIipYKSGiIiIiIiIiIiIiIiUiooqSEiIiIiIiIiIiIiIqWCkhoiIiIiIiIiIiIiIlIqKKkhIiIiIiIiIiIiIiKlgpIaIiIiIiIiIiIiIiJSKiipISIiIiIiIiIiIiIipYKSGiJSZtSqVYubbrqppMMoEc8//zyNGjXC4/GUdCgXtYULF2JZFjNnzizWei3L4vHHH78g6961axeWZTF58uQLsv5zddNNN1GrVq1Clw0PDy+wXHx8PGFhYXz99dfnGZ2IiIhI6fL4449jWRbHjh0rsnWeTXutMPbu3UtwcDC//PJLka2zrKpVqxYDBw4s1jqL+vM+U69evejVq9cFW/+5yOr/LVy4sNBlC9NXHDVqFCNGjCiCCEXkXCmpISIXve3btzNu3Djq1KlDcHAwkZGRdO3alVdffZXU1NRiiSElJYXHH3+8UI2h4paYmMhzzz3HQw89hG3/cVq3LCvXT1hYGE2aNOHpp58mJSUl1zpuuukmLMsiMjLS5z7dunVr9npefPHF7OkllSQ4U1YcWT8BAQHUqVOHMWPGsGPHjhKNrSQU1OktiU5UUXyHypcvz2233cajjz5adIGJiIhIqXNmO9ffT0m33Xv16kWzZs1KNIbi9OSTT9KxY0e6du2aPS2rn5H143Q6qV69OqNGjWLDhg25ls/Zpv/oo4981tG1a1csy8qzX0uifetLrVq1cm1vbGws3bt3Z/bs2SUdWrHLuvkqZ/8xpwuRqCuMqVOn8q9//eu81vHQQw/x6aefsmbNmqIJSkTOmrOkAxARyc9XX33FtddeS1BQEGPGjKFZs2ZkZGSwaNEiHnzwQdavX88777xzweNISUnhiSeeALjo7j557733cLlcXHfddXnm9enThzFjxgBw6tQpfv75Zx599FHWrFnDjBkzcpV1Op2kpKQwZ86cPHedTJkyheDgYNLS0i7chhSBP/3pT7Rv357MzExWrVrFO++8w1dffcXatWupUqVKicWVmpqK03lp/8ktqu/QnXfeyb///W/mz5/P5ZdfXkTRiYiISGny4Ycf5vr9gw8+YO7cuXmmN27cuDjDuqQdPXqU999/n/fffz/PvKCgIN59910AXC4X27dv56233uLbb79lw4YNedrpwcHBTJ06lRtuuCHX9F27drF48WKCg4Mv3IYUgVatWnH//fcDcODAAd5++22GDRvGm2++yZ133llicf33v//Vk/14kxrr1q3j3nvvPed1tG7dmnbt2vHSSy/xwQcfFF1wIlJol/YVFhG5qO3cuZNRo0ZRs2ZN5s+fT+XKlbPn3X333Wzbto2vvvqqBCM8f8nJyYSFhZ3XOiZNmsTVV1/ts3HfoEGDXJ2BO++8k4yMDGbNmkVaWlquZYKCgujatSsff/xxnqTG1KlTGTBgAJ9++ul5xXqhde/eneHDhwNw880306BBA/70pz/x/vvv8/DDD5dYXBd7x6s0ady4Mc2aNWPy5MlKaoiIiFyizrzYvXTpUubOnZtn+plSUlIIDQ29kKFdsj766COcTieDBg3KM8/pdOb5bDp16sTAgQP56quvuP3223PNu+qqq/jiiy84duwYFSpUyJ4+depUKlWqRP369Tlx4sSF2ZAiULVq1VzbO2bMGOrVq8crr7xSokmNgICAEqu7LBoxYgQTJ07kjTfeKNRQuiJStDT8lIhctJ5//nlOnTrF//73v1wJjSz16tXjz3/+s9/lsx5nPdPkyZOxLItdu3ZlT1uxYgX9+vWjQoUKhISEULt2bW655RbAe0dQxYoVAXjiiSeyHyXO+Y6ETZs2MXz4cGJiYggODqZdu3Z88cUXPuv98ccfGT9+PLGxsVSrVg2ApKQk7r33XmrVqkVQUBCxsbH06dOHVatW5buPdu7cye+//84VV1yRb7mc4uLish/9PtPo0aP55ptvOHnyZPa05cuXs3XrVkaPHl3oOnI6fPgwTqcz+y79nDZv3oxlWbz++usAZGZm8sQTT1C/fn2Cg4MpX7483bp1Y+7cuedUd9ZF7507d+aa7vF4eOaZZ6hWrRrBwcH07t2bbdu2Zc+fOHEiAQEBHD16NM8677jjDqKjo7OfWsnv2Mni650a+/fv59Zbb6VKlSoEBQVRu3Zt7rrrLjIyMgA4fvw4DzzwAM2bNyc8PJzIyEj69+9/wR5x9ng8/Otf/6Jp06YEBwdTqVIlxo0bl6fD+PnnnzNgwIDsuOvWrctTTz2F2+32u+7CfIfAu0+GDBlCeHg4FStW5IEHHvC53j59+jBnzhyMMee/4SIiIlImZQ39tHLlSnr06EFoaCj/93//B/h/35mvd/SdPHmSe++9l+rVqxMUFES9evV47rnniuyO999//52bbrope6jduLg4brnlFuLj432WP3bsGCNGjCAyMpLy5cvz5z//2efT1B999BFt27YlJCSEmJgYRo0axd69ewuM55NPPqFt27ZEREQQGRlJ8+bNefXVVwtc7rPPPqNjx46FvrgbFxcH4LNPMnjwYIKCgvI8WT516lRGjBiBw+EoVB1nGjhwIHXq1PE5r3PnzrRr1y7797lz59KtWzeio6MJDw+nYcOG2cfP2YqLi6Nx48Z5+iQAixYtokOHDgQHB1OnTp1cd/3v2LEDy7J45ZVX8iy3ePFiLMvi448/BgrXn/T1Tg2Px8Orr75K8+bNCQ4OpmLFilx55ZWsWLEiu8ykSZO4/PLLiY2NJSgoiCZNmvDmm2+e074ojGXLlnHllVcSFRVFaGgoPXv2zPOelt27dzN+/HgaNmxISEgI5cuX59prr83Vx/elV69efPXVV+zevTu7T+Jrn+TXV8zSp08fkpOTz7mvKiLnR09qiMhFa86cOdSpU4cuXbpc0HqOHDlC3759qVixIn/729+Ijo5m165dzJo1C4CKFSvy5ptvctdddzF06FCGDRsGQIsWLQBYv349Xbt2pWrVqvztb38jLCyM6dOnM2TIED799FOGDh2aq77x48dTsWJFHnvsMZKTkwHvExQzZ85kwoQJNGnShPj4eBYtWsTGjRtp06aN39gXL14M4LdMWlpa9hilycnJ/PLLL7z//vuMHj3aZwdi2LBh3HnnncyaNSv7wvzUqVNp1KhRvnHkp1KlSvTs2ZPp06czceLEXPOmTZuGw+Hg2muvBbyJqGeffZbbbruNDh06kJiYyIoVK1i1ahV9+vQ567q3b98OeN/FkNM///lPbNvmgQceICEhgeeff57rr7+eZcuWAXDjjTfy5JNPMm3aNCZMmJC9XEZGBjNnzuSaa64hODi4wGPHnwMHDtChQwdOnjzJHXfcQaNGjdi/fz8zZ84kJSWFwMBAduzYwWeffca1115L7dq1OXz4MG+//TY9e/b0+Zi+L8ePH/c53VcnfNy4cUyePJmbb76ZP/3pT+zcuZPXX3+d3377jV9++SX7zq7JkycTHh7OfffdR3h4OPPnz+exxx4jMTGRF154wWd9BX2HANxuN/369aNjx468+OKL/PDDD7z00kvUrVuXu+66K9f62rZtyyuvvML69esvqXGqRURE5OzEx8fTv39/Ro0axQ033EClSpXOavmUlBR69uzJ/v37GTduHDVq1GDx4sU8/PDDHDx48LzH5QfvxfMdO3Zw8803ExcXlz287vr161m6dGmem7RGjBhBrVq1ePbZZ1m6dCn//ve/OXHiRK6L4c8
2024-11-23 11:55:22 +04:00
"text/plain": [
"<Figure size 1600x2400 with 8 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"from sklearn.preprocessing import LabelEncoder\n",
2024-12-11 20:59:23 +04:00
"from sklearn.cluster import MiniBatchKMeans\n",
2024-11-23 11:55:22 +04:00
"import matplotlib.pyplot as plt\n",
"\n",
2024-12-11 20:59:23 +04:00
"# Определение функции draw_data_2d\n",
"def draw_data_2d(data, feature_x, feature_y, labels, subplot):\n",
" subplot.scatter(data.iloc[:, feature_x], data.iloc[:, feature_y], c=labels, cmap='viridis')\n",
" subplot.set_xlabel(data.columns[feature_x])\n",
" subplot.set_ylabel(data.columns[feature_y])\n",
"\n",
2024-11-23 11:55:22 +04:00
"# Загрузка данных\n",
2024-12-11 20:59:23 +04:00
"df = pd.read_csv(\"..//static//csv//heart_2020_cleaned.csv\").head(1000)\n",
2024-11-23 11:55:22 +04:00
"\n",
"# Инициализация LabelEncoder\n",
"label_encoders = {}\n",
"\n",
"# Кодирование категориальных переменных\n",
"for column in df.select_dtypes(include=['object']).columns:\n",
" le = LabelEncoder()\n",
" df[column] = le.fit_transform(df[column])\n",
" label_encoders[column] = le\n",
"\n",
"# Выбор признаков для кластеризации\n",
"X = df[['BMI', 'PhysicalHealth', 'MentalHealth', 'SleepTime']]\n",
"\n",
2024-12-11 20:59:23 +04:00
"# Применение MiniBatchKMeans\n",
"kmeans = MiniBatchKMeans(n_clusters=3, random_state=0, batch_size=100)\n",
"result = kmeans.fit_predict(X)\n",
2024-11-23 11:55:22 +04:00
"\n",
"# Визуализация результатов кластеризации и истинных меток\n",
"plt.figure(figsize=(16, 24))\n",
"\n",
"# Визуализация кластеров на основе признаков 0 и 1\n",
"plt.subplot(4, 2, 1)\n",
2024-12-11 20:59:23 +04:00
"draw_data_2d(X, 0, 1, result, plt.subplot(4, 2, 1))\n",
2024-11-23 11:55:22 +04:00
"plt.title('Clusters (BMI vs PhysicalHealth)')\n",
"\n",
"# Визуализация истинных меток на основе признаков 0 и 1\n",
"plt.subplot(4, 2, 2)\n",
2024-12-11 20:59:23 +04:00
"draw_data_2d(X, 0, 1, df['HeartDisease'], plt.subplot(4, 2, 2))\n",
2024-11-23 11:55:22 +04:00
"plt.title('True Labels (BMI vs PhysicalHealth)')\n",
"\n",
"# Визуализация кластеров на основе признаков 2 и 3\n",
"plt.subplot(4, 2, 3)\n",
2024-12-11 20:59:23 +04:00
"draw_data_2d(X, 2, 3, result, plt.subplot(4, 2, 3))\n",
2024-11-23 11:55:22 +04:00
"plt.title('Clusters (MentalHealth vs SleepTime)')\n",
"\n",
"# Визуализация истинных меток на основе признаков 2 и 3\n",
"plt.subplot(4, 2, 4)\n",
2024-12-11 20:59:23 +04:00
"draw_data_2d(X, 2, 3, df['HeartDisease'], plt.subplot(4, 2, 4))\n",
2024-11-23 11:55:22 +04:00
"plt.title('True Labels (MentalHealth vs SleepTime)')\n",
"\n",
"# Визуализация кластеров на основе признаков 0 и 2\n",
"plt.subplot(4, 2, 5)\n",
2024-12-11 20:59:23 +04:00
"draw_data_2d(X, 0, 2, result, plt.subplot(4, 2, 5))\n",
2024-11-23 11:55:22 +04:00
"plt.title('Clusters (BMI vs MentalHealth)')\n",
"\n",
"# Визуализация истинных меток на основе признаков 0 и 2\n",
"plt.subplot(4, 2, 6)\n",
2024-12-11 20:59:23 +04:00
"draw_data_2d(X, 0, 2, df['HeartDisease'], plt.subplot(4, 2, 6))\n",
2024-11-23 11:55:22 +04:00
"plt.title('True Labels (BMI vs MentalHealth)')\n",
"\n",
"# Визуализация кластеров на основе признаков 1 и 3\n",
"plt.subplot(4, 2, 7)\n",
2024-12-11 20:59:23 +04:00
"draw_data_2d(X, 1, 3, result, plt.subplot(4, 2, 7))\n",
2024-11-23 11:55:22 +04:00
"plt.title('Clusters (PhysicalHealth vs SleepTime)')\n",
"\n",
"# Визуализация истинных меток на основе признаков 1 и 3\n",
"plt.subplot(4, 2, 8)\n",
2024-12-11 20:59:23 +04:00
"draw_data_2d(X, 1, 3, df['HeartDisease'], plt.subplot(4, 2, 8))\n",
2024-11-23 11:55:22 +04:00
"plt.title('True Labels (PhysicalHealth vs SleepTime)')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
2024-12-11 20:59:23 +04:00
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cluster 1 (120):\n",
"4, 6, 10, 26, 32, 44, 49, 54, 57, 65, 66, 71, 76, 80, 81, 90, 97, 105, 119, 121, 130, 155, 157, 166, 206, 218, 219, 239, 240, 247, 263, 267, 278, 279, 281, 284, 288, 293, 297, 306, 315, 317, 323, 326, 333, 335, 343, 346, 355, 378, 398, 402, 406, 407, 410, 418, 431, 443, 447, 453, 455, 469, 499, 516, 539, 543, 544, 545, 551, 559, 570, 575, 581, 586, 597, 601, 602, 605, 609, 615, 616, 618, 619, 622, 629, 647, 650, 653, 659, 668, 680, 686, 689, 703, 718, 725, 728, 731, 732, 743, 749, 760, 789, 818, 833, 840, 857, 866, 888, 906, 915, 930, 939, 945, 948, 964, 969, 976, 990, 998\n",
"--------\n",
"Cluster 2 (796):\n",
"1, 3, 5, 7, 8, 9, 11, 12, 13, 15, 16, 18, 19, 20, 21, 22, 24, 27, 28, 29, 30, 31, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 45, 46, 47, 48, 50, 51, 52, 53, 55, 56, 58, 59, 60, 61, 62, 63, 64, 67, 68, 69, 70, 72, 73, 74, 75, 77, 79, 82, 83, 85, 86, 87, 88, 89, 91, 92, 93, 94, 95, 96, 98, 99, 100, 102, 103, 104, 107, 108, 109, 110, 111, 112, 113, 114, 116, 117, 118, 120, 122, 123, 124, 125, 127, 128, 129, 131, 132, 133, 134, 135, 137, 140, 141, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 158, 159, 160, 161, 163, 164, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 189, 190, 191, 192, 193, 194, 196, 197, 198, 199, 201, 202, 203, 204, 205, 207, 208, 209, 210, 211, 212, 213, 214, 215, 217, 220, 221, 222, 223, 224, 225, 226, 227, 228, 230, 232, 233, 234, 235, 236, 237, 238, 241, 243, 244, 245, 246, 248, 249, 251, 252, 253, 255, 256, 257, 258, 259, 260, 261, 262, 264, 265, 268, 269, 271, 274, 275, 276, 277, 280, 282, 283, 285, 286, 287, 289, 291, 292, 294, 295, 298, 299, 300, 301, 302, 303, 304, 305, 307, 308, 309, 310, 311, 312, 313, 314, 316, 318, 319, 320, 321, 322, 324, 325, 327, 328, 329, 330, 331, 332, 334, 337, 338, 339, 340, 341, 342, 344, 345, 347, 348, 349, 351, 352, 353, 354, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 379, 380, 381, 382, 383, 385, 386, 387, 389, 390, 391, 392, 393, 394, 395, 396, 397, 399, 400, 403, 405, 408, 409, 411, 412, 413, 414, 415, 416, 417, 419, 420, 421, 422, 423, 424, 425, 426, 427, 428, 429, 432, 433, 434, 435, 436, 437, 438, 439, 440, 441, 442, 444, 445, 446, 448, 449, 450, 452, 454, 456, 457, 458, 460, 461, 462, 463, 464, 465, 466, 467, 468, 470, 471, 472, 473, 474, 475, 476, 477, 478, 479, 480, 481, 482, 483, 484, 485, 486, 487, 488, 489, 491, 492, 493, 494, 495, 496, 497, 498, 500, 501, 502, 505, 506, 507, 508, 509, 510, 512, 513, 514, 515, 517, 518, 519, 520, 521, 522, 523, 524, 525, 526, 527, 528, 529, 530, 531, 532, 533, 534, 535, 536, 537, 540, 541, 542, 546, 547, 548, 549, 550, 552, 554, 555, 556, 557, 558, 560, 561, 562, 563, 565, 566, 567, 569, 571, 573, 574, 576, 577, 578, 579, 580, 582, 583, 584, 585, 587, 588, 589, 590, 591, 592, 593, 594, 595, 596, 598, 599, 600, 604, 607, 608, 610, 611, 612, 613, 614, 617, 620, 621, 623, 624, 626, 627, 628, 630, 631, 632, 633, 634, 635, 637, 638, 639, 640, 641, 642, 643, 644, 645, 646, 648, 649, 651, 652, 654, 655, 656, 658, 660, 661, 662, 663, 665, 666, 667, 669, 670, 671, 672, 673, 674, 675, 676, 677, 678, 679, 681, 682, 683, 684, 685, 687, 688, 690, 691, 692, 693, 694, 695, 696, 697, 698, 699, 701, 702, 704, 705, 706, 707, 708, 709, 710, 711, 712, 713, 714, 715, 716, 717, 719, 720, 721, 722, 723, 724, 726, 729, 730, 733, 734, 735, 736, 737, 739, 740, 741, 742, 744, 745, 746, 748, 751, 752, 753, 754, 755, 756, 757, 759, 761, 762, 763, 764, 765, 766, 767, 768, 769, 770, 771, 772, 773, 775, 776, 777, 778, 779, 780, 781, 782, 784, 785, 786, 787, 788, 790, 791, 792, 793, 794, 795, 796, 797, 798, 799, 800, 802, 803, 804, 805, 806, 807, 808, 809, 810, 811, 812, 813, 814, 815, 816, 817, 819, 820, 821, 822, 823, 824, 825, 826, 827, 828, 829, 830, 831, 832, 834, 835, 836, 837, 839, 841, 842, 843, 844, 845, 846, 847, 848, 849, 850, 851, 852, 853, 854, 855, 856, 858, 859, 860, 861, 862, 863, 864, 865, 867, 869, 870, 871, 872, 873, 874, 875, 877, 878, 879, 880, 881, 882, 883, 884, 885, 886, 887, 889, 890, 892, 893, 894, 895, 898, 899, 900, 901, 902, 903, 904, 905, 907, 909, 910, 911, 912, 913, 914, 916, 917, 918, 919, 920, 921, 922, 923, 924, 925, 926, 927, 928, 929, 931, 933, 934, 935, 936, 938, 940, 941, 942, 943, 944, 946, 947, 949, 950, 951, 952, 953, 954, 955, 956, 959, 961, 962, 963, 965, 967, 970, 971, 972, 973, 974, 975, 978, 979, 980, 981, 982, 983, 984, 985, 986, 988, 989, 991, 992, 993, 994, 996, 997, 999\n",
"--------\n",
"Cluster 3 (84):\n",
"0, 2, 14, 17, 23, 25, 78, 84, 101, 106, 115, 126, 136, 138, 139, 142, 156, 162, 165, 188, 195, 200, 216, 229, 231, 242, 250, 254, 266, 270, 272, 273, 290, 296, 336, 350, 384, 388, 401, 404, 430, 451, 459, 490, 503, 504, 511, 538, 553, 564, 568, 572, 603, 606, 625, 636, 657, 664, 700, 727, 738, 747, 750, 758, 774, 783, 801, 838, 868, 876, 891, 896, 897, 908, 932, 937, 957, 958, 960, 966, 968, 977, 987, 995\n",
"--------\n"
]
},
{
"data": {
"text/plain": [
"array([[31.04766667, 24.575 , 2.90833333, 7.21666667],\n",
" [28.68121859, 1.00125628, 0.97361809, 7.33165829],\n",
" [30.42404762, 10.45238095, 26.0952381 , 6.42857143]])"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"0 0\n",
"1 0\n",
"2 0\n",
"3 0\n",
"4 0\n",
" ..\n",
"995 0\n",
"996 0\n",
"997 1\n",
"998 0\n",
"999 0\n",
"Name: HeartDisease, Length: 1000, dtype: int64"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjUAAASmCAYAAABm7inNAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3gUVdvH8d/MbjpptNB7k46IIKh0ARUFO2JBsbyKFWw8dkVRUR8siI+9iw0bIgpIESlKBxGk91ACSUjP7sz7RyAQkk02yWY3Cd/Pda1kZ86ec8/s7Doz955zDNu2bQEAAAAAAAAAAJRzZqADAAAAAAAAAAAA8AZJDQAAAAAAAAAAUCGQ1AAAAAAAAAAAABUCSQ0AAAAAAAAAAFAhkNQAAAAAAAAAAAAVAkkNAAAAAAAAAABQIZDUAAAAAAAAAAAAFQJJDQAAAAAAAAAAUCE4Ax1AWbMsS3v27FFkZKQMwwh0OAAAAECFYtu2jhw5ojp16sg0+U1UYbj2AAAAAErO22uPSp/U2LNnj+rXrx/oMAAAAIAKbefOnapXr16gwyjXuPYAAAAASq+oa49Kn9SIjIyUlLMjoqKiAhwNAAAAULEkJyerfv36uefV8IxrDwAAAKDkvL32qPRJjWPdvqOioriwAAAAAEqI4ZSKxrUHAAAAUHpFXXswKC4AAAAAAAAAAKgQSGoAAAAAAAAAAIAKgaQGAAAAAAAAAACoEEhqAAAAAAAAAACACoGkBgAAAAAAAAAAqBBIagAAAAAAAAAAgAqBpAYAAAAAAAAAAKgQSGoAAAAAAAAAAIAKgaQGAAAAAAAAAACoEEhqAAAAAAAAAACACoGkBgAAAAAAAAAAqBBIagAAAAAAAAAAgAqBpAYAAAAAAAAAAKgQSGoAAAAAAAAAAIAKgaQGAAAAAAAAAACoEEhqAAAAAAAAAACACsEZ6AAQGLb7oJTxg2z3HhlmVSn0QiXsr6JPnv5af/60XC6XWw1b19NtL1+vJu0b5Xt9anKa5nz+h3b8s0thVUJ1zmXd1Kxj47xt2LZW/LZWy35ZKZfLrfDIMKWnZOjwvkSlH8mQ2+WWK9ulBq3q6YwBHXTGwI7asmq7FkxdovSUDDVsXU+9h52t8MgwSZLb7dZfP6/Uqrl/y7Ztte/ZWl0vOF0b/tqsN+55X/Fb9yskPETdLjxdkTERSk1OV+0mceo7/BxFxqRLqf+TslfKllPzpzXQu08mKy05SyHhIerQs40at41U30sTVDUuQ4ajvhQ2OGffFGDj8i36/ZvFykjNVKM29dW0UyMtmbZcRw4fVp0GB9T7MrciY6M097va+vHttUpN3K/qcSnqP0zKzGqlgwdaat+2g9q346BO65ymbv0PKSIqRD9/Ymr1ohDF1ozVwBv76KwLa8mRPV07/l6oZb/t18Kfq6p6vbq6+cXRqlanbm482/7eqblfzNeOtcuUknRYVWKClJ0dJSs7TVkZ6arfIkqn9++nsJgG+mfeR6pec51ku/TvmmjN/yFCtuVQdI0o1Wkap1qNaiomLlqZaVnKSM1Q3Wa11ebslvpz+gol7k9SzfrV1efKuoqNWSDZR2Q4GklhF2n3phTN+fwPJR86krvfo6tH5dlve7fs02+fLdDOf/fo8N5E2YYty2WpYet6OnNQJ50xsKMcDkee11iu/dq95i3t+meZ9u+2tHFdG3Ufeom6XXi6TPN4Xta2kqX0H+TO3Kzt6xO1dnGanMY2ORyWDiXUV/1216v70LOVsPuQZn+6QIfiD6t63WrqO/xsVa9brTgfnzJj27Y2/LVJf3z3lzLTMtWkfUP1vLK7wiJCZdu2Vs5Zq6UzVsrtcqvlmc119iVnKig4KNBh5/Pv8i364NEpSthzSLFxMbru8cvV+qyWBZZ1Zbu06Iel+vuP9TIdpjr1a6/O/dvneW9PlrD3sGZ/Ml8bl2/Rofgk1W5SU6d1bZ7n+8LfPH0/nXw8F+bw/iT99unvit+2X9HVo9R7WA/VbVa7WHHY2etkZ/wq2akynE2k0MEyzCrF3Zxicbvd+nP6Cq2et06S1L5na515fqc8235gV4Jmf/q7EvYcUrXaseoz/BzVrF+95G263Fr041KtXbBehmGoU992OmNAh0KPGwAAAF/ZtHKrfv96sdJTMlS/VV31ufpsRUSFBzosAABOGYZt23agGp88ebImT56sbdu2SZLatGmjxx57TIMGDZIkZWRkaMyYMZoyZYoyMzM1YMAAvfHGG4qLi/O6jeTkZEVHRyspKUlRUVFFv6CSs21bSn1LdspESbZyOuvYsm23fvq4qiY9XE+W28jzmo692+iFWY/LMHKWz5nyh14a+YYyM7LkdDpk27bcLktdBnXSI1PuVXhkmPbvOKCHLxyvbWt3yuE05XZZRcYWEhaszPQsOZymDMOQy+VWSFiI7n/vdjVq10CPDn5Oe7fskyMo50aZO9stZ5BDrmx3gfU5nKYsy5bDId306C4NvemgbFuyrJxNHzusiVb9UUXSse21ZZjSsLsO6Lr798kwTBmRD8mIuC63ztTkND19xcta9usqOZymZBhyH23fNCXTYcntMmSYtoJDbGWkOY7uZx1tx5Zh2LLtnDZNh2S5DXXunaSdG0O1f1eIHE5bbrck21Bc/Uw98d5W1W+eJYfDlmVLHz5fS19OqqlL72qnG8Y/pOevf12/f734aDvGCf/mvuu57cfWyNYzn21RwxYZit8RpCdHNtGOf0MLeI2ObpMhy8p5vWEacjhMud1uGYata8fs17C7D8qVbWni/fU166sYmQ5TpmnI7bbkcJi6+YVrdcndF8jtcuv1u97VtP/NzBvSSeo0jdPTP45Vg1Z1Zdu2Uva8rFD7LcmwZbkNmaYt05R+/LCavvxfe73w6+Oq16KO7LRvZCc/IdvOktslOZy2DCPnvXa7pKBgKSnBoefuaK7l80Jy47TclmxJwx4aqhFPX5V7jAdC8qEjeuqyF7Vq7rrjn4Fst8Iiw3T7f0fo21ena8vq7XI4HZKRc/zH1IjSE98+oDbdC04Y+JtlWRrT63GtXbA+37pmnRrrtSXPyuk8nkv/d9lmPXbx80rYc1jOIIds5WxXvZZ19My0sarTtFaeOmzb1idPfa2Pn/5KtpX/IAoJD9F9796mXlf28Pm2FWb7P7sK/H6Ka1hDT//4kBq3bVBkHV+9+IPe/c9nsqycz45l2bLclgbd1Fd3TbpJzqDCf4NgW6myk+6VMudKcijn8+yWFCIj+hkZYYNLu5kF2r5upx6+cLz2bTuQd9sb1dC4H8eqYet6eu/hz/XFC9/JMIzc7xTbsnX5fRdp5Piri52I2Lxqmx4Z/JwO7krI02adZrX0zLSxqteijs+3EzjVcT7tPfYVULmlp6Rr3FUT9ef05XmvW0ODNfrt/1Ofq88JdIgAAFRo3p5PBzSp8eOPP8rhcKh58+aybVsffvihJkyYoBUrVqhNmza67bbb9NNPP+mDDz5QdHS07rjjDpmmqT/++MPrNriwyMtOmyI7+bEC11mW9O1b1fXWU3XzrTv3sm569MsxWvHbGj3Y/2kVdNiYDlOdz+ugx78eo5vbjdH+HQe8SmYU6ug9+iqxEUpLTpflLnl9D76+XX0uSZQkffxinD55OU4F3ciXpFse361Lbz2YE0L0SzLCBsu2bT004GmtnPN3qeIomH10W/PGYzpsRUa79fa89Yqudjx5M/G+uvr5s2pq2jFOW1YfKPAGb0FMh63wSLde+/lfPXRFUx3YE5wviVUco57Zpa3rQ/Xzp9VkWwXX89DHd2nTii365r/TVNS3jWEaiqkZrXf//q9CzalypI8rsJxtSV9NrqFv3m6lj9ZdppCsu2XbUmE5CbdbcmUZuv28Ftq1OTTf+pufv0ZX3H9x4QGWEdu2dc/Zj2j9n5s8Hlumw8y3zjQNBYUG661VL+ZLAATCYxc/r0U/LvW4vt05p+nleU9Jkg7uTtBNbUcrPSUj/3Y5TFWrE6t3//6vwqoc73kx9ZWfNPneDwqNwTAMTZj9uDr0alPyDSmG5ENHNLL1vUpOOFLgdkTGRuidv/+rmBrRHuu
"text/plain": [
"<Figure size 1600x1200 with 4 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.cluster import KMeans\n",
"import matplotlib.pyplot as plt\n",
"\n",
"def run_kmeans(data, n_clusters, random_state):\n",
" kmeans = KMeans(n_clusters=n_clusters, random_state=random_state)\n",
" labels = kmeans.fit_predict(data)\n",
" centers = kmeans.cluster_centers_\n",
" return labels, centers\n",
"\n",
"def print_cluster_result(data, n_clusters, labels):\n",
" for i in range(n_clusters):\n",
" cluster_indices = [index for index, label in enumerate(labels) if label == i]\n",
" print(f\"Cluster {i+1} ({len(cluster_indices)}):\")\n",
" print(\", \".join(map(str, cluster_indices)))\n",
" print(\"--------\")\n",
"\n",
"def draw_cluster_results(data, feature_x, feature_y, labels, centers, subplot):\n",
" subplot.scatter(data.iloc[:, feature_x], data.iloc[:, feature_y], c=labels, cmap='viridis')\n",
" subplot.scatter(centers[:, feature_x], centers[:, feature_y], marker='x', s=200, linewidths=3, color='r')\n",
" subplot.set_xlabel(data.columns[feature_x])\n",
" subplot.set_ylabel(data.columns[feature_y])\n",
"\n",
"# Применение K-means\n",
"random_state = 9\n",
"n_clusters = 3\n",
"labels, centers = run_kmeans(X, n_clusters, random_state)\n",
"print_cluster_result(X, n_clusters, labels)\n",
"display(centers)\n",
"display(df['HeartDisease'])\n",
"\n",
"# Визуализация результатов кластеризации\n",
"plt.figure(figsize=(16, 12))\n",
"draw_cluster_results(X, 0, 1, labels, centers, plt.subplot(2, 2, 1))\n",
"draw_cluster_results(X, 2, 3, labels, centers, plt.subplot(2, 2, 2))\n",
"draw_cluster_results(X, 0, 2, labels, centers, plt.subplot(2, 2, 3))\n",
"draw_cluster_results(X, 1, 3, labels, centers, plt.subplot(2, 2, 4))\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ 9.31321501 26.77646868]\n",
" [ -6.91783938 1.05243249]\n",
" [ 25.54665344 17.10779298]\n",
" ...\n",
" [ -5.04837552 -0.86215887]\n",
" [ 22.80197652 -17.26780336]\n",
" [ -6.07756748 -0.15069417]]\n"
]
}
],
"source": [
"from sklearn.decomposition import PCA\n",
"\n",
"# Понижение размерности до n=2 с использованием PCA\n",
"reduced_data = PCA(n_components=2).fit_transform(X)\n",
"\n",
"# Вывод результатов понижения размерности\n",
"print(reduced_data)\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 90,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjQAAAJOCAYAAAAQ6+4mAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hT1R8G8Pfem+7dQsteZQnIXmWDDNmylwxlbxRQQZShgIggiICCUGTKRvbeiOwpsvcso3sn9/z+qM2P0KZNaJs07ft5Hh7pvecmb0Jt78n3DEkIIUBERERERERERERERJSJydYOQERERERERERERERElBoWNIiIiIiIiIiIiIiIKNNjQYOIiIiIiIiIiIiIiDI9FjSIiIiIiIiIiIiIiCjTY0GDiIiIiIiIiIiIiIgyPRY0iIiIiIiIiIiIiIgo02NBg4iIiIiIiIiIiIiIMj0WNIiIiIiIiIiIiIiIKNNjQYOIiIiIiIiIiIiIiDI9FjSIiLKhNWvWwNvbGxEREen2mPXq1UO9evVMblumTJl0e25KP4UKFUKLFi3S9TElScKECRPMuqZz587o2LFjuuYgIiIiosyF/RLbIkkShgwZkm6Pd/fuXUiShCVLlph1XfXq1fHZZ5+lWw4isi0saBBRprdkyRJIkqT/4+joiOLFi2PIkCF49uxZkvbPnj3DqFGjULJkSTg7O8PFxQWVKlXCt99+i5CQkGSfo2rVqpAkCfPnzzc5V+LNV+IfOzs75MiRAzVq1MDYsWNx//79t33JePz4MSZMmIDz58+/9WMYo9PpMH78eAwdOhSurq4AgFKlSqFcuXJJ2m7cuBGSJKFu3bpJzi1evBiSJGH37t3JPk9Gvoa02L59u0kfrr/5fWfsT6FChTI8c0oSvw9/+OEHq+ZIb59//jnWr1+PCxcuWDsKEREREQD2S9KbLfdLJkyYYFJfwdTCSkY5ePAgJEnCunXrrJojvX3++eeYO3cunj59au0oRGQFGmsHICIy1aRJk1C4cGHExMTg6NGjmD9/PrZv347Lly/D2dkZAHDq1Ck0a9YMERER+PDDD1GpUiUAwOnTp/Hdd9/h8OHDSW50b9y4gVOnTqFQoUJYsWIFBg4caFauLl26oFmzZlBVFcHBwTh16hRmzZqF2bNnY9GiRejcubPZr/Xx48eYOHEiChUqhPLly5t9fUq2bNmCa9euoV+/fvpjtWrVwqJFixAaGgoPDw/98WPHjkGj0eDUqVOIj4+HnZ2dwTlFURAQEAAASd7XjHwNabF9+3bMnTs31aJGnTp1sGzZMoNjffr0QdWqVQ3eu8TOF6WvChUqoHLlypgxYwaWLl1q7ThEREREeuyXpA9b7pe0bdsWRYsW1X8dERGBgQMHok2bNmjbtq3+uJ+fX7o8Hxlq3bo13N3dMW/ePEyaNMnacYjIwljQICKb0bRpU1SuXBlAwgfLPj4+mDlzJv7880906dIFISEhaNOmDRRFwblz51CyZEmD6ydPnoyFCxcmedzly5fD19cXM2bMQPv27XH37l2zRt1XrFgRH374ocGxe/fuoXHjxujZsyfeeeedZEcZWUtgYCBq1qyJvHnz6o/VqlULCxcuxF9//YWmTZvqjx87dgwdO3bEypUrcebMGVSvXl1/7ujRoyhbtizc3NwAAPb29pZ7ERZQpEgRFClSxODYgAEDUKRIkST/3q/TarVQVTXLvR/W0LFjR4wfPx7z5s1j4YiIiIgyDfZL0oct90vKli2LsmXL6r9+8eIFBg4ciLJly6bYV4iJiYG9vT1kmQumpIUsy2jfvj2WLl2KiRMnQpIka0ciIgviT1AislkNGjQAANy5cwcA8Ouvv+LRo0eYOXNmkk4DkDA6Zty4cUmOr1y5Eu3bt0eLFi3g4eGBlStXpjlbwYIFsWTJEsTFxeH777/XH3/16hVGjRqFd999F66urnB3d0fTpk0NltU5ePAgqlSpAgD46KOP9NOVE9cVPXLkCDp06IACBQrAwcEB+fPnxyeffILo6OhUc8XExGDnzp1o2LChwfFatWoBSOgovN727NmzaNu2LYoUKWJw7vnz57h+/br+OsBwrdrUXkOiK1euoH79+nB2dkbevHkN3qtEQUFB6N27N/z8/ODo6Ihy5crh999/N2iTOJX64MGDBsffXJO1V69emDt3LgAYTAV/W68v9zRr1iz4+/vDwcEBV65c0S9JcPfuXZOynjhxAu+//z48PDzg7OyMunXrGrznaRUYGIgGDRrA19cXDg4OKFWqVIpLGezevRvly5eHo6MjSpUqhQ0bNiRpExISghEjRiB//vxwcHBA0aJFMW3aNKiqmmKW8PBwjBgxAoUKFYKDgwN8fX3RqFEjnD171qBdo0aNEBkZiT179rzdiyYiIiKyAPZLske/xFyJ9/1//PEHxo0bh7x588LZ2RlhYWH6JaveZKwPsWPHDtSuXRsuLi5wc3ND8+bN8c8//6Q5Y6IffvgBNWrUgI+PD5ycnFCpUqUUl6lasWIFSpQoAUdHR1SqVAmHDx9O0ubRo0f4+OOP4efnBwcHB5QuXRqLFy9ONcvTp0/x0UcfIV++fHBwcEDu3LnRunXrJO9Jo0aNcO/evUy3xDERZTzO0CAim3Xr1i0AgI+PDwBg8+bNcHJyQvv27U1+jBMnTuDmzZsIDAyEvb092rZtixUrVmDs2LFpzhcQEAB/f3+DD2Nv376NTZs2oUOHDihcuDCePXuGX3/9FXXr1sWVK1eQJ08evPPOO5g0aRK+/vpr9OvXD7Vr1wYA1KhRAwCwdu1aREVFYeDAgfDx8cHJkycxZ84cPHz4EGvXrk0x05kzZxAXF4eKFSsaHC9SpAjy5MmDo0eP6o+dOnUKcXFxqFGjBmrUqIFjx45h5MiRAIC//voLAAw6Dq9L7TUAQHBwMN5//320bdsWHTt2xLp16/D555/j3Xff1Y/Gio6ORr169XDz5k0MGTIEhQsXxtq1a9GrVy+EhIRg+PDhqf9DvKZ///54/Pgx9uzZk2Q5qbQIDAxETEwM+vXrBwcHB3h7e5t1/f79+9G0aVNUqlQJ48ePhyzL+gLEkSNHULVq1TRnnD9/PkqXLo1WrVpBo9Fgy5YtGDRoEFRVxeDBgw3a3rhxA506dcKAAQPQs2dPBAYGokOHDti5cycaNWoEAIiKikLdunXx6NEj9O/fHwUKFMBff/2FMWPG4MmTJ5g1a5bRLAMGDMC6deswZMgQlCpVCi9fvsTRo0fx77//GnxvlipVCk5OTjh27BjatGmT5veAiIiIKCOwX5L1+yVp8c0338De3h6jRo1CbGys2TNIli1bhp49e6JJkyaYNm0aoqKiMH/+fNSqVQvnzp1Llz39Zs+ejVatWqFbt26Ii4vDH3/8gQ4dOmDr1q1o3ry5QdtDhw5h9erVGDZsGBwcHDBv3jy8//77OHnypH6D9WfPnqF69er6TcRz5syJHTt2oHfv3ggLC8OIESOMZmnXrh3++ecfDB06FIUKFUJQUBD27NmD+/fvG7zWxGXcjh07hgoVKqT5PSAiGyKIiDK5wMBAAUDs3btXPH/+XDx48ED88ccfwsfHRzg5OYmHDx8KIYTw8vIS5cqVM+uxhwwZIvLnzy9UVRVCCLF7924BQJw7dy7Va+/cuSMAiOnTpxtt07p1awFAhIaGCiGEiImJETqdLsnjODg4iEmTJumPnTp1SgAQgYGBSR4zKioqybGpU6cKSZLEvXv3Usz822+/CQDi0qVLSc516NBBODk5ibi4OP1jFi5cWAghxLx584Svr6++7ahRowQA8ejRI/2xunXrirp165r0GurWrSsAiKVLl+qPxcbGily5col27drpj82aNUsAEMuXL9cfi4uLEwEBAcLV1VWEhYUJIYQ4cOCAACAOHDhg8DyJ/0avZxg8eLB4219/Li4uomfPnkke393dXQQFBRm0Tfy+vXPnjsHxN7OqqiqKFSsmmjRpov8+FCLh37lw4cKiUaNGKWYy5fsw8fHe1KRJE1GkSBGDYwULFhQAxPr16/XHQkNDRe7cuUWFChX0x7755hvh4uIirl+
"text/plain": [
"<Figure size 1600x600 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"\n",
"# Визуализация данных после понижения размерности\n",
"plt.figure(figsize=(16, 6))\n",
"\n",
"# Визуализация данных без учета истинных меток\n",
"draw_data_2d(\n",
" pd.DataFrame({\"Column1\": reduced_data[:, 0], \"Column2\": reduced_data[:, 1]}),\n",
" 0,\n",
" 1,\n",
" labels=[0] * len(reduced_data), # Добавляем метки для визуализации\n",
" subplot=plt.subplot(1, 2, 1),\n",
")\n",
"plt.title('PCA Data (Without True Labels)')\n",
"\n",
"# Визуализация данных с учетом истинных меток\n",
"draw_data_2d(\n",
" pd.DataFrame({\"Column1\": reduced_data[:, 0], \"Column2\": reduced_data[:, 1]}),\n",
" 0,\n",
" 1,\n",
" df['HeartDisease'],\n",
" plt.subplot(1, 2, 2),\n",
")\n",
"plt.title('PCA Data (With True Labels)')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1UAAAIjCAYAAADr8zGuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXxU1fn48c+5d9ZkskIW9h3ZVxVBARUERcEquFRt3VqrdalLq/XXRWm11mrV1m9dqq3WvSru1l1QEFxABWTfZCes2TPbvef3x5kEQgIkkGQSeN6v17wgc+/c+9zJJJlnnnOeo7TWGiGEEEIIIYQQB8VKdgBCCCGEEEII0ZJJUiWEEEIIIYQQh0CSKiGEEEIIIYQ4BJJUCSGEEEIIIcQhkKRKCCGEEEIIIQ6BJFVCCCGEEEIIcQgkqRJCCCGEEEKIQyBJlRBCCCGEEEIcAkmqhBBCCCGEEOIQSFIlhBCHmRNPPJETTzwx2WEclBkzZqCUYsaMGUmL4S9/+Qu9evXCdd2knP+SSy6hc+fOSTl3U3jkkUfo2LEjkUikUc+jlOL222+v877XXHNNo8YjhDi8SVIlhGhyTz75JEop5s6dW+3+oqIijj32WAKBAO+++26Somu+CgoK+OUvf0mvXr1ISUkhNTWVoUOHcscdd1BYWNhkcfzpT3/itddea7LzNaXi4mLuvvtubrnlFizL/IncsWMH99xzD6NGjSInJ4fMzEyOO+44/vvf/9b72FOnTmXgwIGEQiGCwSD9+vXjlltuYdOmTY1xObV66KGHePLJJ5vsfHu75JJLiEajPProo0163tmzZ3P77bc3+M/K999/j1Kq6mbbNh07duSss87i22+/rbF/OBzm/vvvZ9iwYWRkZBAIBOjZsyfXXHMNy5cvr/UcN998M0opzjvvvAaNXQjRcDzJDkAIIcC84Rw3bhwLFizg1Vdf5dRTT012SM3KV199xYQJEygtLeWiiy5i6NChAMydO5c///nPfPrpp7z//vtNEsuf/vQnpkyZwg9+8IMGP/aoUaOoqKjA5/M1+LHr4t///jfxeJwf/vCHVffNmTOH3/zmN0yYMIHf/va3eDwepk2bxvnnn8/ixYuZOnXqAY+7evVqxo4dy7p16zjnnHO44oor8Pl8LFiwgH/961+8+uqr+3xD3dAeeughWrduzSWXXNIk59tbIBDg4osv5r777uPaa69FKdUo56moqMDj2f02Z/bs2UydOpVLLrmEzMzMBj/fD3/4QyZMmIDjOCxZsoSHH36Yd955h88//5xBgwYBsH37dk499VTmzZvHGWecwQUXXEAoFGLZsmW88MIL/POf/yQajVY7rtaa559/ns6dO/Pmm29SUlJCWlpag8cvhDg0klQJIZKupKSE8ePH8+233/LKK69w2mmnJTukZqWwsJCzzjoL27b55ptv6NWrV7Xtd955J4899liSomsY4XAYn8+HZVkEAoGkxfHEE08wadKkajH07duXFStW0KlTp6r7fv7znzN27Fjuvvtubr75ZlJTU/d5zHg8ztlnn01BQQEzZszghBNOqLb9zjvv5O677274i2lC8Xgc13XrnAyfe+65/OUvf2H69OmcfPLJjRJTU7+OhgwZwkUXXVT19fHHH8+kSZN4+OGHq6pyl1xyCd988w0vv/wykydPrvb4P/7xj/zmN7+pcdwZM2awYcMGPv74Y8aPH88rr7zCxRdf3LgXI4SoNxn+J4RIqtLSUk499VS+/vprpk2bxumnn77f/W+//XaUUixfvpyLLrqIjIwMcnJy+N3vfofWmvXr13PmmWeSnp5Ofn4+f/3rX2scIxKJcNttt9G9e3f8fj8dOnTg5ptvrjHH44knnuDkk08mNzcXv99Pnz59ePjhh2scr3PnzpxxxhnMmjWravhi165deeqpp6rtF4vFmDp1Kj169CAQCNCqVStOOOEEPvjgg/1e86OPPsrGjRu57777aiRUAHl5efz2t7/d5+Mrh1t+//331e6vbf7SihUrmDx5Mvn5+QQCAdq3b8/5559PUVERYOaelJWV8Z///KdquNOeFY+NGzdy2WWXkZeXh9/vp2/fvvz73/+u9bwvvPACv/3tb2nXrh0pKSkUFxfXGtOJJ55Iv379WLx4MSeddBIpKSm0a9eOv/zlLzWude3atUyaNInU1FRyc3O54YYbeO+99+o0T2vNmjUsWLCAsWPHVru/S5cu1RKqyufhBz/4AZFIhNWrV+/3uNOmTWP+/Pn85je/qZFQAaSnp3PnnXfu8/H7mmdWOexsz6F8W7Zs4dJLL6V9+/b4/X7atGnDmWeeWfW979y5M4sWLeKTTz6p+v7tOf+usLCQ66+/ng4dOuD3++nevTt33313tflllee99957eeCBB+jWrRt+v5/FixcD8OCDD9K3b19SUlLIysri6KOP5rnnnqsW+9ChQ8nOzub111/f73P397//Hdu2qw3Z++tf/4pSihtvvLHqPsdxSEtL45Zbbqm6b885Vbfffju/+tWvAPP9rLz2vX8mXnvtNfr161f12j2UYciVyeKaNWsA+OKLL3j77be5/PLLayRUAH6/n3vvvbfG/c8++yx9+vThpJNOYuzYsTz77LMHHZMQovFIpUoIkTRlZWWcdtppfPXVV7z88succcYZdX7seeedR+/evfnzn//M22+/zR133EF2djaPPvooJ598MnfffTfPPvssv/zlLznmmGMYNWoUAK7rMmnSJGbNmsUVV1xB7969WbhwIffffz/Lly+vNlfo4Ycfpm/fvkyaNAmPx8Obb77Jz3/+c1zX5eqrr64Wz8qVK5kyZQqXX345F198Mf/+97+55JJLGDp0KH379gXMG7u77rqLn/zkJxx77LEUFxczd+5cvv76a0455ZR9Xusbb7xBMBhkypQp9Xh26y8ajTJ+/HgikQjXXnst+fn5bNy4kbfeeovCwkIyMjJ4+umnq+K/4oorAOjWrRtg5nwdd9xxVZP+c3JyeOedd7j88sspLi7m+uuvr3a+P/7xj/h8Pn75y18SiUT2W+XYtWsXp556KmeffTbnnnsuL7/8Mrfccgv9+/evqmyWlZVx8skns3nzZn7xi1+Qn5/Pc889x/Tp0+t0/bNnzwZMxaEutmzZAkDr1q33u98bb7wBwI9+9KM6HfdQTJ48mUWLFnHttdfSuXNntm7dygcffMC6devo3LkzDzzwANdeey2hUKiqKpKXlwdAeXk5o0ePZuPGjfzsZz+jY8eOzJ49m1tvvZXNmzfzwAMPVDvXE088QTgc5oorrsDv95Odnc1jjz3Gddddx5QpU/jFL35BOBxmwYIFfPHFF1xwwQXVHj9kyBA+++yz/V7PyJEjcV2XWbNmVf1+mDlzJpZlMXPmzKr9vvnmG0pLS6t+zvd29tlns3z5cp5//nnuv//+qu9ZTk5O1T6zZs3ilVde4ec//zlpaWn8/e9/Z/Lkyaxbt45WrVrV4dmvbtWqVQBVjz2Y10EkEmHatGncdNNNgBlieOmll7Jlyxby8/PrHZMQohFpIYRoYk888YQGdKdOnbTX69WvvfZanR972223aUBfccUVVffF43Hdvn17rZTSf/7zn6vu37Vrlw4Gg/riiy+uuu/pp5/WlmXpmTNnVjvuI488ogH92WefVd1XXl5e4/zjx4/XXbt2rXZfp06dNKA//fTTqvu2bt2q/X6/vummm6ruGzhwoD799NPrfK2VsrKy9MCBA+u8/+jRo/Xo0aOrvq58vtesWVNtv+nTp2tAT58+XWut9TfffKMB/dJLL+33+KmpqdWe00qXX365btOmjd6+fXu1+88//3ydkZFR9XxWnrdr1641nuO9Y6q8HkA/9dRTVfdFIhGdn5+vJ0+eXHXfX//6Vw1Uez1VVFToXr161ThmbX77299qQJeUlOx3P6213rFjh87NzdUjR4484L6DBw/WGRkZB9yv0sUXX6w7depU9XVtz4nWWq9Zs0YD+oknntBam9c7oO+55579Hr9v377VXh+V/vjHP+rU1FS9fPnyavf/+te/1rZt63Xr1lU7b3p6ut66dWu1fc8
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.cluster import KMeans\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Определение функции fit_kmeans\n",
"def fit_kmeans(data, n_clusters, random_state):\n",
" kmeans = KMeans(n_clusters=n_clusters, random_state=random_state)\n",
" kmeans.fit(data)\n",
" return kmeans\n",
"\n",
"# Определение функции draw_clusters\n",
"def draw_clusters(data, kmeans):\n",
" labels = kmeans.labels_\n",
" centers = kmeans.cluster_centers_\n",
" \n",
" plt.figure(figsize=(10, 6))\n",
" plt.scatter(data[:, 0], data[:, 1], c=labels, cmap='viridis', alpha=0.6)\n",
" plt.scatter(centers[:, 0], centers[:, 1], marker='x', s=200, linewidths=3, color='r')\n",
" plt.xlabel('Principal Component 1')\n",
" plt.ylabel('Principal Component 2')\n",
" plt.title('K-means Clustering (2 Clusters) with PCA')\n",
" plt.show()\n",
"\n",
"# Применение K-means для двух кластеров на пониженных данных\n",
"kmeans = fit_kmeans(reduced_data, 2, random_state)\n",
"draw_clusters(reduced_data, kmeans)"
]
},
{
"cell_type": "code",
"execution_count": 95,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1UAAAIjCAYAAADr8zGuAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3wc1dXw8d/M9tUW9WKru3fjboptwAVML4GEkNBCgEBCEtLfBEJCQspDyZNCCaEkMfAAAQIhdEyzMbj3blmyet/V7mrr3PcP2cJCspGbVrLP9/NxgqaeWa1258y991xNKaUQQgghhBBCCHFY9GQHIIQQQgghhBADmSRVQgghhBBCCHEEJKkSQgghhBBCiCMgSZUQQgghhBBCHAFJqoQQQgghhBDiCEhSJYQQQgghhBBHQJIqIYQQQgghhDgCklQJIYQQQgghxBGQpEoIIYQQQgghjoAkVUIIcQKZM2cOc+bMSXYYh+Xdd99F0zTefffdpMXwu9/9jpEjR2IYRtJi6A+Ki4u5+uqrkx3GAf385z9H07TD3j+ZfycPPvgghYWFRCKRpJxfCHF4JKkSQvQrjz/+OJqmsWLFii7LfT4f06ZNw26389prryUpuv6rrq6O733ve4wcORKn00lKSgqTJ0/mrrvuorW1tc/i+PWvf82LL77YZ+frS36/n9/+9rf88Ic/RNc//frUNK3zn67rDBo0iPnz5/eY/CUSCR577DHmzJlDeno6NpuN4uJirrnmmm7v+X3+8pe/oGka06dPP1aXJo6SUCjEz3/+8yNK/K+++mqi0SgPPfTQ0QtMCHHMmZMdgBBCfB6/38/8+fNZt24dL7zwAmeddVayQ+pXli9fzsKFCwkEAlx55ZVMnjwZgBUrVvCb3/yG999/nzfeeKNPYvn1r3/NpZdeyoUXXnjUjz1r1iza29uxWq1H/di98eijjxKPx/nSl77Ubd28efP46le/ilKKsrIy/vKXv3DGGWfwyiuvcPbZZwPQ3t7OxRdfzGuvvcasWbP4yU9+Qnp6Ort37+aZZ57hiSeeoKKigvz8/C7HXrRoEcXFxXzyySfs2LGDoUOH9sn1ikMXCoW48847AQ67pctut3PVVVdx77338s1vfvOIWtyEEH1HkiohRL/W1tbGggULWLNmDc8//3znDaro0NraykUXXYTJZGL16tWMHDmyy/pf/epX/PWvf01SdEdHOBzGarWi6zp2uz1pcTz22GOcf/75PcYwfPhwrrzyys6fL7roIsaPH8/999/f+Z79/ve/z2uvvcZ9993Ht7/97S7733HHHdx3333djltWVsbSpUt5/vnnueGGG1i0aBF33HHHYV9DMBgkJSXlsPcXfeOyyy7jd7/7HYsXL+aMM85IdjhCiF6Q7n9CiH4rEAhw1llnsWrVKv71r39xzjnnHHT7feMotm3bxpVXXonX6yUrK4uf/exnKKXYs2cPF1xwAR6Ph9zcXO65555ux4hEItxxxx0MHToUm81GQUEBP/jBD7qNb3jsscc444wzyM7OxmazMXr0aB544IFuxysuLubcc8/lww8/7Oy+WFpayt///vcu28ViMe68806GDRuG3W4nIyODU089lTfffPOg1/zQQw9RVVXFvffe2y2hAsjJyeGnP/3pAfff191y9+7dXZb3NH5p+/btXHLJJeTm5mK328nPz+eLX/wiPp8P6OgGFwwGeeKJJzq7w+0/7qaqqoprr72WnJwcbDYbY8aM4dFHH+3xvE8//TQ//elPGTx4ME6nE7/f32NMc+bMYezYsWzatInTTz8dp9PJ4MGD+d3vftftWsvLyzn//PNJSUkhOzub73znO7z++uu9GqdVVlbGunXrmDt37kG322fcuHFkZmZSVlYGQGVlJQ899BDz5s3rllABmEwmvve97/XYSpWWlsY555zDpZdeyqJFi3p1fujoRuZyudi5cycLFy7E7Xbz5S9/GQDDMLj//vsZM2YMdrudnJwcbrjhBlpaWrocQynFXXfdRX5+Pk6nk9NPP52NGzd2O9eBxjAd6P316quvMnv2bNxuNx6Ph6lTp/Lkk0922ebjjz/mrLPOwuv14nQ6mT17NkuWLOl2jg8//JCpU6dit9sZMmTIIXebe/jhhxkyZAgOh4Np06bxwQcfdNsmGo1y++23M3nyZLxeLykpKZx22mksXry4c5vdu3eTlZUFwJ133tn5N/Dzn/8cgHXr1nH11VdTWlqK3W4nNzeXa6+9lqampm7nmzx5Munp6fz73/8+pGsRQiSPtFQJIfqlYDDI2WefzfLly3nuuec499xze73v5ZdfzqhRo/jNb37DK6+8wl133UV6ejoPPfQQZ5xxBr/97W9ZtGgR3/ve95g6dSqzZs0COm40zz//fD788EO+/vWvM2rUKNavX899993Htm3buowVeuCBBxgzZgznn38+ZrOZl19+mW984xsYhsHNN9/cJZ4dO3Zw6aWXct1113HVVVfx6KOPcvXVVzN58mTGjBkDdNyU3n333Xzta19j2rRp+P1+VqxYwapVq5g3b94Br/Wll17C4XBw6aWXHsKre+ii0SgLFiwgEonwzW9+k9zcXKqqqvjPf/5Da2srXq+Xf/zjH53xf/3rXwdgyJAhQMeYrxkzZqBpGrfccgtZWVm8+uqrXHfddfj9/m6Jxi9/+UusVivf+973iEQiB+3y19LSwllnncXFF1/MZZddxnPPPccPf/hDxo0b19lKFAwGOeOMM6ipqeHWW28lNzeXJ598sstN8cEsXboUgEmTJvVq+5aWFlpaWjq76r366qvE43G+8pWv9Gr/fRYtWsTFF1+M1WrlS1/6Eg888ADLly9n6tSpvdo/Ho+zYMECTj31VP7nf/4Hp9MJwA033MDjjz/ONddcw7e+9S3Kysr405/+xOrVq1myZAkWiwWA22+/nbvuuouFCxeycOFCVq1axfz584lGo4d0Hft7/PHHufbaaxkzZgw//vGPSU1NZfXq1bz22mtcccUVALzzzjucffbZTJ48mTvuuANd1zsfZHzwwQdMmzYNgPXr1zN//nyysrL4+c9/Tjwe54477iAnJ6dXsfztb3/jhhtu4OSTT+bb3/42u3bt4vzzzyc9PZ2CgoLO7fx+P4888ghf+tKXuP7662lra+Nvf/sbCxYs4JNPPmHixIlkZWXxwAMPcNNNN3HRRRdx8cUXAzB+/HgA3nzzTXbt2sU111xDbm4uGzdu5OGHH2bjxo0sW7asW1I6adKkHpNIIUQ/pYQQoh957LHHFKCKioqUxWJRL774Yq/3veOOOxSgvv71r3cui8fjKj8/X2mapn7zm990Lm9paVEOh0NdddVVncv+8Y9/KF3X1QcffNDluA8++KAC1JIlSzqXhUKhbudfsGCBKi0t7bKsqKhIAer999/vXFZfX69sNpu67bbbOpdNmDBBnXPOOb2+1n3S0tLUhAkTer397Nmz1ezZszt/3vd6l5WVddlu8eLFClCLFy9WSim1evVqBahnn332oMdPSUnp8pruc91116m8vDzV2NjYZfkXv/hF5fV6O1/PfectLS3t9hp/NqZ91wOov//9753LIpGIys3NVZdccknnsnvuuUcBXd5P7e3tauTIkd2O2ZOf/vSnClBtbW3d1gHquuuuUw0NDaq+vl59/PHH6swzz1SAuueee5RSSn3nO99RgFq9evVBz7O/FStWKEC9+eabSimlDMNQ+fn56tZbb+3V/ldddZUC1I9+9KMuyz/44AMFqEWLFnVZ/tprr3VZXl9fr6xWqzrnnHOUYRid2/3kJz9RQJff876/vc/67PurtbVVud1uNX36dNXe3t5l233nMAxDDRs2TC1YsKDLeUOhkCopKVHz5s3rXHbhhRcqu92uysvLO5dt2rRJmUymHuPZXzQaVdnZ2WrixIkqEol0Ln/44YcV0OXvJB6Pd9lGqY7PkJycHHXttdd2LmtoaFCAuuOOO7qdr6fPjKeeeqrb58M+X//615XD4TjoNQgh+g/p/ieE6Jfq6uqw2+1dnhb31te+9rXO/zaZTEyZMgWlFNddd13n8tTUVEa
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.cluster import KMeans\n",
"import matplotlib.pyplot as plt\n",
"\n",
"# Определение функции fit_kmeans\n",
"def fit_kmeans(data, n_clusters, random_state):\n",
" kmeans = KMeans(n_clusters=n_clusters, random_state=random_state)\n",
" kmeans.fit(data)\n",
" return kmeans\n",
"\n",
"# Определение функции draw_clusters\n",
"def draw_clusters(data, kmeans):\n",
" labels = kmeans.labels_\n",
" centers = kmeans.cluster_centers_\n",
" \n",
" plt.figure(figsize=(10, 6))\n",
" plt.scatter(data[:, 0], data[:, 1], c=labels, cmap='viridis', alpha=0.6)\n",
" plt.scatter(centers[:, 0], centers[:, 1], marker='x', s=200, linewidths=3, color='white')\n",
" plt.xlabel('Principal Component 1')\n",
" plt.ylabel('Principal Component 2')\n",
" plt.title('K-means Clustering (PCA-reduced data)')\n",
" plt.show()\n",
"\n",
"# Применение K-means для трех кластеров на пониженных данных\n",
"kmeans = fit_kmeans(reduced_data, 3, random_state)\n",
"draw_clusters(reduced_data, kmeans)"
]
},
{
"cell_type": "code",
"execution_count": 96,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAABjQAAASlCAYAAAAPuDBdAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeZxN9R/H8df33Dv7ZjAIka0saUNClhaUKJWklUopWtQv7SXtKe2rFqlIKW1UskWJFmlPIVlS9lnMfu/5/v4YMxkzd+YOM3dmzPv5eNxH5pzvOedzrtP4fs/nuxhrrUVERERERERERERERKQKcyo7ABERERERERERERERkdIooSEiIiIiIiIiIiIiIlWeEhoiIiIiIiIiIiIiIlLlKaEhIiIiIiIiIiIiIiJVnhIaIiIiIiIiIiIiIiJS5SmhISIiIiIiIiIiIiIiVZ4SGiIiIiIiIiIiIiIiUuUpoSEiIiIiIiIiIiIiIlWeEhoiIiIiIiIiIiIiIlLlKaEhIgIcdNBBDBs2rLLDqBTjx4+ndevWuK5b2aFUqqr+DNx5550YY/b6+F69etGrV6/yC6gMnnvuOZo0aUJ2dnalXF9ERESkJsuvR27durXczjls2DAOOuigcjvf+vXriYyMZPHixeV2zupoX+v8Fe2vv/7CGMMrr7yyV8e/8sorGGP466+/yjWuYGzbto2YmBg++uijkF9bRMqXEhoisl9bvXo1I0aMoHnz5kRGRhIfH0+3bt14/PHHyczMDEkMGRkZ3HnnnXz22WchuV5ZpKam8uCDD3LjjTfiOP/9k2CMKfg4jkPDhg3p06dPsffg9/uZNGkSvXr1onbt2kRERHDQQQdx0UUX8e233xZ73WeeeQZjDJ07d66oW5NyUh7P77Bhw8jJyeH5558vv8BERERE9sLu9dySPpVdd+/VqxeHHnpopcYQSnfddRedO3emW7duBduGDRtW6O8kPj6eww8/nAkTJhTbUeb777/n/PPP58ADDyQiIoLatWtz4oknMmnSJPx+f5HyycnJREZGYozht99+q9D7k303depUHnvssb0+vk6dOgwfPpzbb7+9/IISkUrhrewAREQqyqxZszjrrLOIiIjgwgsv5NBDDyUnJ4cvvviCMWPG8MsvvzBx4sQKjyMjI4Nx48YBVFoP+UBefvllfD4f55xzTpF9vXv35sILL8Ray5o1a3jmmWc4/vjjmTVrFieffDIAmZmZnHHGGXzyySf06NGDW265hdq1a/PXX3/x1ltvMXnyZNatW0fjxo0LnXvKlCkcdNBBfP3116xatYqWLVuG5H6l7Mrj+Y2MjGTo0KE88sgjXHXVVVW615mIiIjs31577bVCP7/66qvMmTOnyPY2bdqEMqwabcuWLUyePJnJkycX2RcREcGLL74I5CUg3nnnHa6//nq++eYbpk2bVlDuxRdf5PLLL6d+/fpccMEFtGrVirS0NObNm8cll1zCP//8wy233FLo3NOnT8cYQ4MGDZgyZQr33HNPxd6o7JOpU6fy888/M3r06L0+x+WXX84TTzzB/PnzOf7448svOBEJKSU0RGS/tGbNGoYMGULTpk2ZP38+BxxwQMG+UaNGsWrVKmbNmlWJEe679PR0YmJi9ukckyZN4tRTTyUyMrLIvoMPPpjzzz+/4OfTTz+dww47jMcee6wgoTFmzBg++eQTHn300SIVy7Fjx/Loo48WOe+aNWv48ssvmTFjBiNGjGDKlCmMHTt2r++hPL4HqXiDBw9m/PjxLFiwQI0HERERqTS7128Bli5dypw5c4ps31NGRgbR0dEVGVqN9frrr+P1ehkwYECRfV6vt9DfzciRI+ncuTNvvvkmjzzyCA0bNmTp0qVcfvnldOnShY8++oi4uLiC8qNHj+bbb7/l559/Lva6/fr1o2nTpkydOnWfEhpZWVmEh4cXGvUuVU+bNm049NBDeeWVV9QmEanG9JtWRPZL48ePZ+fOnbz00kuFkhn5WrZsyTXXXBPw+EBzlxY35+e3335L3759qVu3LlFRUTRr1oyLL74YyJtjNCkpCYBx48YVDJe+8847C45fsWIFgwYNonbt2kRGRtKxY0c++OCDYq+7cOFCRo4cSb169QpGPaSlpTF69GgOOuggIiIiqFevHr179+a7774r8Ttas2YNP/74IyeeeGKJ5fK1b9+eunXrsmbNGgA2bNjA888/T+/evYvtJePxeLj++uuLHZ2RmJjIKaecwqBBg5gyZUpQ14e8YeexsbGsXr2afv36ERcXx3nnnQeA67o89thjtGvXjsjISOrXr8+IESPYsWNHoXNYa7nnnnto3Lgx0dHRHHfccfzyyy9FrlWWZwDg448/pmfPnsTFxREfH0+nTp2YOnVqoTJfffUVJ510EgkJCURHR9OzZ89i5wn+4osv6NSpE5GRkbRo0aLMUzVNnDiRFi1aEBUVxdFHH83nn39epExOTg533HEHHTp0ICEhgZiYGLp3786CBQsKypT2/P74448MGzasYEq3Bg0acPHFF7Nt27Yi1+vQoQO1a9fm/fffL9O9iIiIiIRa/nRPy5Yto0ePHkRHRxf07t+zLp+vuPXYkpOTGT16dMEUSC1btuTBBx8st7XrylIXA9i6dSuDBw8mPj6eOnXqcM0115CVlVWk3Ouvv06HDh2Iioqidu3aDBkyhPXr15caz7Rp0+jQoUNBfbh9+/Y8/vjjpR733nvv0blzZ2JjY0st6zhOwajh/Pp4fj11ypQphZIZ+Tp27Fjk72bdunV8/vnnDBkyhCFDhhR0ugrGZ599hjGGadOmcdttt9GoUSOio6NJTU0FyrfOX9KaFcU9i3///TeXXHIJDRs2JCIigmbNmnHFFVeQk5NTUCbY5zI5OZlhw4aRkJBArVq1GDp0KMnJyUF9RwC//PILxx9/PFFRUTRu3Jh77rmn2Gf//fff55RTTimIuUWLFtx9992Fpgnr1asXs2bNYu3atQVtkvw1XIJp1+yud+/efPjhh1hrg74XEalaNEJDRPZLH374Ic2bN6dr164Vep3NmzfTp08fkpKSuOmmm6hVqxZ//fUXM2bMACApKYlnn32WK664gtNPP50zzjgDgMMOOwzIq+R169aNRo0acdNNNxETE8Nbb73FwIEDeeeddzj99NMLXW/kyJEkJSVxxx13kJ6eDuQNm3377be58soradu2Ldu2beOLL77gt99+46ijjgoYe36FvaQyu9uxYwc7duwomB7q448/xufzccEFF5ThG8tLaJxxxhmEh4dzzjnn8Oyzz/LNN9/QqVOnoI73+Xz07duXY489locffrigp9yIESN45ZVXuOiii7j66qtZs2YNTz31FMuXL2fx4sWEhYUBcMcdd3DPPffQr18/+vXrx3fffUefPn0KVfLL6pVXXuHiiy+mXbt23HzzzdSqVYvly5fzySefcO655wIwf/58Tj75ZDp06MDYsWNxHIdJkyZx/PHH8/nnn3P00UcD8NNPPxU8U3feeSc+n4+xY8dSv379oGJ56aWXGDFiBF27dmX06NH8+eefnHrqqdSuXZsDDzywoFxqaiovvvgi55xzDpdeeilpaWm89NJL9O3bl6+//pojjjii1Od3zpw5/Pnnn1x00UU0aNCgYBq3X375haVLlxZJCB111FE1fqFHERERqR62bdvGySefzJAhQzj//PODrovly8jIoGfPnvz999+MGDGCJk2a8OWXX3LzzTfzzz//7NNaAPnKWhcbPHgwBx10EPfffz9Lly7liSeeYMeOHbz66qsFZe69915uv/12Bg8ezPDhw9myZQtPPvkkPXr0YPny5dSqVStgLOeccw4nnHACDz74IAC//fYbixcvLrEjWW5uLt988w1XXHFF0Pe9evVqIG9NhIyMDObNm0ePHj1o0qRJ0Od44403iImJoX///kRFRdGiRQumTJlSpvbj3XffTXh4ONdffz3Z2dmEh4eHrM5fnI0bN3L00UeTnJzMZZddRuvWrfn77795++23ycjIIDw8POjn0lrLaaedxhdffMHll19OmzZtePfddxk6dGhQsfz7778
"text/plain": [
"<Figure size 1600x1200 with 3 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import matplotlib.pyplot as plt\n",
"import pandas as pd\n",
"\n",
"# Преобразование меток кластеров\n",
"labels = [2 if val == 1 else 1 if val == 2 else val for val in kmeans.labels_]\n",
"\n",
"# Визуализация результатов кластеризации и истинных меток\n",
"plt.figure(figsize=(16, 12))\n",
"\n",
"# Визуализация кластеров на основе признаков 0 и 1\n",
"draw_data_2d(\n",
" pd.DataFrame({\"Column1\": reduced_data[:, 0], \"Column2\": reduced_data[:, 1]}),\n",
" 0,\n",
" 1,\n",
" labels,\n",
" plt.subplot(2, 2, 1),\n",
")\n",
"plt.title('Clusters (PCA-reduced data)')\n",
"\n",
"# Визуализация истинных меток на основе признаков 0 и 1\n",
"draw_data_2d(\n",
" pd.DataFrame({\"Column1\": reduced_data[:, 0], \"Column2\": reduced_data[:, 1]}),\n",
" 0,\n",
" 1,\n",
" df['HeartDisease'],\n",
" plt.subplot(2, 2, 2),\n",
")\n",
"plt.title('True Labels (PCA-reduced data)')\n",
"\n",
"# Визуализация результатов кластеризации и истинных меток\n",
"draw_data_2d(\n",
" pd.DataFrame({\"Column1\": reduced_data[:, 0], \"Column2\": reduced_data[:, 1]}),\n",
" 0,\n",
" 1,\n",
" df['HeartDisease'],\n",
" plt.subplot(2, 2, 3),\n",
")\n",
"plt.title('True Labels (PCA-reduced data)')\n",
"\n",
"plt.tight_layout()\n",
"plt.show()"
]
2024-11-23 11:55:22 +04:00
}
],
"metadata": {
"kernelspec": {
"display_name": "aimenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 2
}