371 lines
354 KiB
Plaintext
371 lines
354 KiB
Plaintext
|
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 13,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
||
|
"RangeIndex: 10000 entries, 0 to 9999\n",
|
||
|
"Data columns (total 10 columns):\n",
|
||
|
" # Column Non-Null Count Dtype \n",
|
||
|
"--- ------ -------------- ----- \n",
|
||
|
" 0 Id 10000 non-null object \n",
|
||
|
" 1 Name 10000 non-null object \n",
|
||
|
" 2 Short description 9996 non-null object \n",
|
||
|
" 3 Gender 9927 non-null object \n",
|
||
|
" 4 Country 9721 non-null object \n",
|
||
|
" 5 Occupation 9836 non-null object \n",
|
||
|
" 6 Birth year 10000 non-null int64 \n",
|
||
|
" 7 Death year 9999 non-null float64\n",
|
||
|
" 8 Manner of death 1893 non-null object \n",
|
||
|
" 9 Age of death 9999 non-null float64\n",
|
||
|
"dtypes: float64(2), int64(1), object(7)\n",
|
||
|
"memory usage: 781.4+ KB\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"import matplotlib.pyplot as plt\n",
|
||
|
"from sklearn.preprocessing import LabelEncoder\n",
|
||
|
"from sklearn.cluster import AgglomerativeClustering\n",
|
||
|
"from sklearn.cluster import KMeans\n",
|
||
|
"from sklearn.metrics import silhouette_score\n",
|
||
|
"from sklearn.decomposition import PCA\n",
|
||
|
"from sklearn import metrics\n",
|
||
|
"from imblearn.over_sampling import RandomOverSampler\n",
|
||
|
"from imblearn.under_sampling import RandomUnderSampler\n",
|
||
|
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
|
||
|
"from sklearn.metrics import ConfusionMatrixDisplay\n",
|
||
|
"from sklearn.compose import ColumnTransformer\n",
|
||
|
"from sklearn.pipeline import Pipeline\n",
|
||
|
"from sklearn.impute import SimpleImputer\n",
|
||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
||
|
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier, GradientBoostingClassifier\n",
|
||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
||
|
"from sklearn.metrics import (\n",
|
||
|
" precision_score, recall_score, accuracy_score, roc_auc_score, f1_score,\n",
|
||
|
" matthews_corrcoef, cohen_kappa_score, confusion_matrix\n",
|
||
|
")\n",
|
||
|
"from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error\n",
|
||
|
"import numpy as np\n",
|
||
|
"import featuretools as ft\n",
|
||
|
"from sklearn.metrics import accuracy_score, classification_report\n",
|
||
|
"\n",
|
||
|
"# Функция для применения oversampling\n",
|
||
|
"def apply_oversampling(X, y):\n",
|
||
|
" oversampler = RandomOverSampler(random_state=42)\n",
|
||
|
" X_resampled, y_resampled = oversampler.fit_resample(X, y)\n",
|
||
|
" return X_resampled, y_resampled\n",
|
||
|
"\n",
|
||
|
"# Функция для применения undersampling\n",
|
||
|
"def apply_undersampling(X, y):\n",
|
||
|
" undersampler = RandomUnderSampler(random_state=42)\n",
|
||
|
" X_resampled, y_resampled = undersampler.fit_resample(X, y)\n",
|
||
|
" return X_resampled, y_resampled\n",
|
||
|
"\n",
|
||
|
"def split_stratified_into_train_val_test(\n",
|
||
|
" df_input,\n",
|
||
|
" stratify_colname=\"y\",\n",
|
||
|
" frac_train=0.6,\n",
|
||
|
" frac_val=0.15,\n",
|
||
|
" frac_test=0.25,\n",
|
||
|
" random_state=None,\n",
|
||
|
"):\n",
|
||
|
" \"\"\"\n",
|
||
|
" Splits a Pandas dataframe into three subsets (train, val, and test)\n",
|
||
|
" following fractional ratios provided by the user, where each subset is\n",
|
||
|
" stratified by the values in a specific column (that is, each subset has\n",
|
||
|
" the same relative frequency of the values in the column). It performs this\n",
|
||
|
" splitting by running train_test_split() twice.\n",
|
||
|
"\n",
|
||
|
" Parameters\n",
|
||
|
" ----------\n",
|
||
|
" df_input : Pandas dataframe\n",
|
||
|
" Input dataframe to be split.\n",
|
||
|
" stratify_colname : str\n",
|
||
|
" The name of the column that will be used for stratification. Usually\n",
|
||
|
" this column would be for the label.\n",
|
||
|
" frac_train : float\n",
|
||
|
" frac_val : float\n",
|
||
|
" frac_test : float\n",
|
||
|
" The ratios with which the dataframe will be split into train, val, and\n",
|
||
|
" test data. The values should be expressed as float fractions and should\n",
|
||
|
" sum to 1.0.\n",
|
||
|
" random_state : int, None, or RandomStateInstance\n",
|
||
|
" Value to be passed to train_test_split().\n",
|
||
|
"\n",
|
||
|
" Returns\n",
|
||
|
" -------\n",
|
||
|
" df_train, df_val, df_test :\n",
|
||
|
" Dataframes containing the three splits.\n",
|
||
|
" \"\"\"\n",
|
||
|
"\n",
|
||
|
" if frac_train + frac_val + frac_test != 1.0:\n",
|
||
|
" raise ValueError(\n",
|
||
|
" \"fractions %f, %f, %f do not add up to 1.0\"\n",
|
||
|
" % (frac_train, frac_val, frac_test)\n",
|
||
|
" )\n",
|
||
|
"\n",
|
||
|
" if stratify_colname not in df_input.columns:\n",
|
||
|
" raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
|
||
|
"\n",
|
||
|
" X = df_input # Contains all columns.\n",
|
||
|
" y = df_input[\n",
|
||
|
" [stratify_colname]\n",
|
||
|
" ] # Dataframe of just the column on which to stratify.\n",
|
||
|
"\n",
|
||
|
" # Split original dataframe into train and temp dataframes.\n",
|
||
|
" df_train, df_temp, y_train, y_temp = train_test_split(\n",
|
||
|
" X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
|
||
|
" )\n",
|
||
|
"\n",
|
||
|
" # Split the temp dataframe into val and test dataframes.\n",
|
||
|
" relative_frac_test = frac_test / (frac_val + frac_test)\n",
|
||
|
" df_val, df_test, y_val, y_test = train_test_split(\n",
|
||
|
" df_temp,\n",
|
||
|
" y_temp,\n",
|
||
|
" stratify=y_temp,\n",
|
||
|
" test_size=relative_frac_test,\n",
|
||
|
" random_state=random_state,\n",
|
||
|
" )\n",
|
||
|
"\n",
|
||
|
" assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
|
||
|
"\n",
|
||
|
" return df_train, df_val, df_test\n",
|
||
|
"\n",
|
||
|
"\n",
|
||
|
"df = pd.read_csv(\"../data/age.csv\", nrows=10000)\n",
|
||
|
"df.info()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "markdown",
|
||
|
"metadata": {},
|
||
|
"source": [
|
||
|
"Вариант: Список людей.\n",
|
||
|
"Бизнес-цель: реклама. Необходимо разбить людей на группы, чтобы показывать им определенную рекламу в приложениях"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 4,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"df.fillna({\"Gender\": \"NaN\", \"Country\": \"NaN\", \"Occupation\" : \"NaN\", \"Manner of death\" : \"NaN\"}, inplace=True)\n",
|
||
|
"df = df.dropna()\n",
|
||
|
"df['Country'] = df['Country'].str.split('; ')\n",
|
||
|
"df = df.explode('Country')\n",
|
||
|
"data = df.copy()\n",
|
||
|
"\n",
|
||
|
"value_counts = data[\"Country\"].value_counts()\n",
|
||
|
"rare = value_counts[value_counts < 100].index\n",
|
||
|
"data = data[~data[\"Country\"].isin(rare)]\n",
|
||
|
"\n",
|
||
|
"data.drop(data[~data['Gender'].isin(['Male', 'Female'])].index, inplace=True)\n",
|
||
|
"\n",
|
||
|
"data1 = data[[\"Country\", \"Age of death\", \"Gender\"]]\n",
|
||
|
"data1 = pd.get_dummies(data1, drop_first=True)\n",
|
||
|
"#data1 = pd.get_dummies(data, columns=['Gender', 'Country', 'Occupation'], drop_first=True)"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 10,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"[[ -1.11861923 -0.74208424]\n",
|
||
|
" [-19.1195007 -0.58172792]\n",
|
||
|
" [-12.11858619 -0.75979619]\n",
|
||
|
" ...\n",
|
||
|
" [ -2.1214623 0.45075518]\n",
|
||
|
" [ 28.87844461 0.50067068]\n",
|
||
|
" [ 5.87851368 0.4636366 ]]\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1kAAAK9CAYAAADWo6YTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3hc5Z33/88pUzTSSLItW7KNjcEGTDEQMCZACEkglDh1QzadkrBsdpfspuzzC9nkSXZTFrJpJCEhIY0UCITkIQUHAoHQWToBA7axQbhKsqwy0vRTfn8caTQjzciyfWxJ9vt1Xb5g5kw5M5Jmzud87/t7G77v+wIAAAAAhMKc7B0AAAAAgP0JIQsAAAAAQkTIAgAAAIAQEbIAAAAAIESELAAAAAAIESELAAAAAEJEyAIAAACAEBGyAAAAACBEhCwAAAAACBEhCwAAAABCRMgCABxQrrvuOhmGUfoXj8d1+OGH67LLLlNnZ2fFbTs7O/Xv//7vWrp0qRKJhOrr63XiiSfqS1/6kvr6+qo+/ooVK2QYhq655pp98GoAAFOR4fu+P9k7AQDAvnLdddfp4osv1he+8AUdcsghyuVyeuCBB/SLX/xCBx98sFavXq1EIqHHHntMb3rTmzQ4OKgPfOADOvHEEyVJjz/+uG688UadeuqpuuOOOyoe+8UXX9Thhx+uRYsWaf78+XrggQcm4yUCACaZPdk7AADAZDjvvPO0fPlySdIll1yiWbNm6Rvf+IZ+//vf67zzztM73vEOWZalp556SkuXLq2475e//GX98Ic/HPOYv/zlLzVnzhx9/etf1/nnn6/29nYtWrRoX7wcAMAUwnBBAAAkveENb5Akvfzyy/rBD36gLVu26Bvf+MaYgCVJra2t+uxnPzvm+htuuEHnn3++3vzmN6upqUk33HDDXt9vAMDUQ8gCAEDShg0bJEmzZs3SH/7wB9XV1en888+f8P0feeQRrV+/Xu9973sVjUb1d3/3d7r++uv31u4CAKYwQhYA4IDU39+v7u5ubd68WTfddJO+8IUvqK6uTm9+85v1wgsv6PDDD1c0Gp3w4/3yl7/UggULdNppp0mS3vOe9+j555/X008/vZdeAQBgqmJOFgDggHTWWWdVXD744IN1/fXXa/78+UqlUkomkxN+LMdxdNNNN+nCCy+UYRiSguGHc+bM0fXXX6/jjz8+zF0HAExxhCwAwAHpu9/9rg4//HDZtq3W1lYdccQRMs1ggEdjY6MGBgYm/Fh33HGHtm/frhUrVmj9+vWl61//+tfrV7/6lb7yla+UHhsAsP8jZAEADkgrVqwodRccbenSpXr66adVKBQmNGRweO7V3//931fdfu+99+r1r3/97u8sAGBaIWQBADDKW97yFj388MP67W9/q/e+973j3jadTuv3v/+93v3ud1dtlPGv//qvuv766wlZAHAAIWQBADDKRz7yEX3nO9/RJz/5SZ144ok6/PDDK7Z3dXXp2muv1Wc/+1ndcsstSqfT+pd/+RedfvrpYx7rjjvu0M0336zvfve7isVi++olAAAmEQPEAQAYZcaMGbrllluUz+d1/PHH69JLL9UPfvAD/eAHP9A//uM/asmSJbr//vslBUMFZ82apVNPPbXqY731rW9VX1+fVq1atS9fAgBgEhGyAACo4uSTT9bq1av1kY98RPfee68+9rGP6ROf+ISeeOIJXX755br55pvV1dWlv/zlL3rTm94ky7KqPs6ZZ56pRCKhX/7yl/v4FQAAJovh+74/2TsBAAAAAPsLKlkAAAAAECJCFgAAAACEiJAFAAAAACEiZAEAAABAiAhZAAAAABAiQhYAAAAAhMie7B2Y6jzP09atW5VMJmUYxmTvDgAAAIBJ4vu+BgYGNG/ePJlm7XoVIWsntm7dqgULFkz2bgAAAACYIjZt2qSDDjqo5nZC1k4kk0lJwRvZ2Ng4yXsDAAAAYLKkUiktWLCglBFqIWTtxPAQwcbGRkIWAAAAgJ1OI6LxBQAAAACEiJAFAAAAACEiZAEAAABAiAhZAAAAABAiQhYAAAAAhIiQBQAAAAAhImQBAAAAQIgIWQAAAAAQIkIWAAAAAISIkAUAAAAAISJkAQAAAECICFkAAAAAECJCFgAAAACEiJAFAAAAACEiZAEAAABAiAhZAAAAABAiQhYAAAAAhIiQBQAAAAAhImQBAAAAQIgIWQAAAAAQInuydwAAAAAAqvE8X1v6skoXHNVHbc1vrpNpGpO9WztFyAIAAAAw5azvGtCfV3dqw/ZB5RxXcdvS4tkNOueYVi2Zk5zs3RsXIQsAAADAlLK+a0A/fbBdPemC5jbFlYjWKVNwtHprv7b2Z3XxaYumdNBiThYAAACAKcPzfP15dad60gUdNqdByXhElmkoGY/osDkN6kkXdMdznfI8f7J3tSZCFgAAAIApY0tfVhu2D2puU1yGUTn/yjAMzW2Ka33XoLb0ZSdpD3eO4YIAAADANJYazOsLtz2vzT05HTQzrs+dd5QaG2KTvVu7LV1wlHNcJaJ1VbfXRS11pnJKF5x9vGcTR8gCAAAApoDnt27XO7/3qHKOFLel3/7zCh01b7YkqaN3UBf/7HF1DRQ0JxnVTy9crrYZDbrwJ4/ovnXdKg2ce1n67RNb9drDW/SzD508aa9lT9RHbcVtS5mCo2Q8MmZ7tuAqZluqj07dKGP4vj91BzNOAalUSk1NTerv71djY+Nk7w4AAACmsVpBavHlq+RWub0laU5jVNtShTHbIqZU9Go/1xnTNGh5nq9r7tmg1Vv7ddichoohg77v68WuQS2b36SPnLF4n7dzn2g2mLrxDwAAAJjCHnppk9537TOlyzdceqxOPXSBJGljd7/ec+2j6s06mlFn68ZLV+j1X3ugIkhlHelN33503OdwpaoBSxo/YEnSfeu6lRrMT7uhg6Zp6JxjWrW1P6sXu4K5WXVRS9mCq239Oc2sj+rso1un9HpZVLJ2gkoWAADA/u26x57Wf/52S+nyf75zvi466XhJ0p+ef1H//PN1pW3fu+Bwvemow7To8lU1Hy8ZszSQr1aX2vfOP3GevvauV032buyW8nWy8k4wRHDJnAadffTkrZM10WxAyNoJQhYAAMD096277tM37xwoXf74G5P6tzNfO25Y2h+8+pCZuvEfT5ns3dhtnudrS19W6YKj+qit+c11k1rBYrggAAAA9ktX3/OgvnZ7X+nyv5/brMted5q+dvtduvqeXOn6y14X17+fe2bVIPXNOwf0zTv374AlSQfNjE/2LuwR0zS0YGZisndjl1HJ2gkqWQAAAPveN++8R9+6K126/G9n1uvjb3zdfl95CpMh6W+fPWvazcmayqhkAQAAYNJ9+vpV+tWzI5ffu0y64v0rJUlf+t0q/eh/R7Zd8mrps29fWTVIfeuutL51FwFrV7z28BYC1iShkrUTVLIAAADG939+tko3vzBy+V1HSl+9sHpYQrgipuR4UvkBvSFN63WypjIaX4SEkAUAAA4kl357le7YOnL57HnStf+6Upd9f5VubR+5/s2LpKs/QpDa28ZbJ2tuY1QP/8cblRrM6wu3Pa/NPTkdNDOuz513FBWsvYSQFRJCFgAA2N98/MerdMuLI5ffcZj0zQ8TmCaLJWnDlStrLlQsSR29g7r4Z4+ra6CgOcmofnrhcrXNaJjcHT8AEbJCQsgCAABT1Xu/vEoPj3Ql1ylJ6VefCeY7ffArq3R/78i202dIv/gUQWpfqLVOVjJm6aZ/PLFmkMLUR8gKCSELAABMpr//wio9mhm5vCIh/fpzhKXJ1n7lSj300ia979pnStfdcOmxOvXQBZKkjd39es+1j6o362hGna0bL12hhS1Nk7W7CAkhKySELAAAEIb3X7FKD/aPXD6tSbr+00HV6Y2Xr1LZ6D0dJunOKwlS+0L7lSt13WNP6z9/u6V03X++c74uOul4SdKfnn9R//zzdaVt37vgcL3pqMP29W5iiiBkhYSQBQAAdsV5l69SWaM9HSlVXMa+137lSn3rrvv0zTtHxlZ+/I1J/duZr53EvcJ0RMgKCSELAIA
|
||
|
"text/plain": [
|
||
|
"<Figure size 1000x800 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"pca = PCA(n_components=2)\n",
|
||
|
"data_pca = pca.fit_transform(data1)\n",
|
||
|
"\n",
|
||
|
"print(data_pca)\n",
|
||
|
"\n",
|
||
|
"plt.figure(figsize=(10, 8))\n",
|
||
|
"plt.scatter(data_pca[:, 0], data_pca[:, 1], alpha=0.5)\n",
|
||
|
"plt.title('PCA')\n",
|
||
|
"plt.xlabel('1')\n",
|
||
|
"plt.ylabel('2')\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 12,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA04AAAIjCAYAAAA0vUuxAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABh3ElEQVR4nO3dd3hUZf7+8Xtm0kmBEFKAAAFCM3SlClhQRGVBVnRdWcSCZcFVWV1Fd0V3dfnpfkHWBoIK62LFFewKolIUQUoUUCFAIAgJKUAapM2c3x9JhgTSSXKmvF/XNReZc56Z+cwYSW6e5/kci2EYhgAAAAAA1bKaXQAAAAAAuDqCEwAAAADUguAEAAAAALUgOAEAAABALQhOAAAAAFALghMAAAAA1ILgBAAAAAC1IDgBAAAAQC0ITgAAAABQC4ITAAAAANSC4AQAXmDXrl2aPHmy2rVrJ39/f7Vt21Y33nijdu3aZXZpAAC4BYthGIbZRQAAms57772nG264QeHh4br11lsVFxenAwcO6JVXXlFWVpbeeustXXPNNWaXCQCASyM4AYAH27dvn/r06aMOHTpo3bp1atOmjfNcZmamRowYoUOHDunHH39U586dTawUAADXxlI9APBg//rXv3Ty5EktWrSoUmiSpIiICL300kvKz8/X008/7Tz+2GOPyWKxKDMzs9L4LVu2yGKxaOnSpZWOOxwOzZ8/X+edd54CAgIUFRWlO+64Q8ePH680rlOnTrr66qvPqnHGjBmyWCyVjlksFj322GPO+yUlJbryyisVHh6un376yXl8yZIluuSSSxQZGSl/f3/16tVLCxYsqNNnM3XqVHXq1KnSsUOHDikwMFAWi0UHDhxoUO2StGzZMg0cOFCBgYEKDw/X7373Ox06dKjSmIsuukgJCQnaunWrhg0bpsDAQMXFxWnhwoWVxn399deyWCx69913z3qd4OBgTZ06tdKxTp06nXVs+fLlslgsld7vgQMHqvzvOX36dFkslrOeAwC8HcEJADzYhx9+qE6dOmnEiBFVnh85cqQ6deqkjz/+uMGvcccdd+iBBx7Q8OHD9e9//1s333yzXn/9dY0ZM0bFxcUNft6KbrvtNn399df68MMP1atXL+fxBQsWqGPHjnr44Yc1d+5cxcbG6o9//KNeeOGFBr3Oo48+qoKCgnOq9cknn9SUKVMUHx+vefPm6d5779WaNWs0cuRInThxotLY48eP68orr9TAgQP19NNPq3379rrrrrv06quvnlMNFZWUlOiRRx6p09i9e/dq8eLFjfbaAOBJfMwuAADQNLKzs3XkyBGNHz++xnF9+vTRBx98oNzcXIWEhNTrNTZs2KCXX35Zr7/+un7/+987j1988cW64oortHz58krHG+Lhhx/WsmXL9L///U/Dhw+vdG7t2rUKDAx03p8xY4auuOIKzZs3T9OnT6/X6+zatUuvvfaaxo4dq08//bRBtR48eFCzZ8/WE088oYcffth5fOLEierfv79efPHFSsePHDmiuXPnaubMmZJKQ+jgwYM1a9Ys/eEPf5Cvr2+D6qho8eLFSklJ0cUXX6z9+/fXOPaRRx5R9+7dlZ2dfc6vCwCehhknAPBQubm5klRrGCo/n5OTU+/XWL58ucLCwnTZZZcpMzPTeRs4cKCCg4P11VdfVRpfXFxcaVxmZmaNMzzPP/+85syZo2effbbKAFgxNGVnZyszM1OjRo3S/v376/3L/6xZszRgwABNmjSpyvN1qf29996Tw+HQddddV2lcdHS04uPjz/o8fHx8dMcddzjv+/n56Y477lB6erq2bt1ar/qrcvLkSf3973/XjBkz1KFDhxrHbt26VcuXL9ecOXNktfLrAQCcyatnnNatW6d//etf2rp1q1JTU7VixQpNmDChXs9hGIbmzp2rRYsW6eDBg4qIiNAf//jHOi+LAICmUh6IygNUdeoasKqSlJSk7OxsRUZGVnk+PT290v1Vq1adtdeqOp9++qm2bNkiSTp27FiVY7755hvNnj1bGzdu1MmTJyudy87OVlhYWJ1ea8OGDfrwww+1Zs0apaSkVDmmLrUnJSXJMAzFx8dXef7MGaS2bduqRYsWlY5169ZNUukepCFDhtSp/urMmzdPBQUFevjhh52zWtV56KGHNGLECF199dWaMWPGOb0uAHgirw5O+fn56tu3r2655RZNnDixQc9xzz33aNWqVfq///s/9e7dW8eOHav2BzwANKewsDDFxMToxx9/rHHcjz/+qHbt2ik0NLTer+FwOBQZGanXX3+9yvNnBo3BgwfriSeeqHTs+eef1/vvv3/WYzdv3qxp06apRYsWeuKJJzRp0iR1797deX7fvn269NJL1aNHD82bN0+xsbHy8/PTJ598omeeeUYOh6PO7+PBBx/UmDFjdMkll5zVLKE+tTscDlksFn366aey2WxnPUdwcHCdazpXmZmZ+te//qVZs2YpPDy8xrGrVq3SF198oY0bNzZTdQDgfrw6OI0dO1Zjx46t9nxhYaEeeeQRvfnmmzpx4oQSEhL01FNP6aKLLpIk/fzzz1qwYIF27tzp/GEeFxfXHKUDQJ1cffXVWrx4sTZs2KALL7zwrPPr16/XgQMHKi0Xq48uXbroiy++0PDhwystm6tORESERo8eXenYypUrqxx72WWXacGCBSooKNDKlSt1++23OzvMSaWNLwoLC/XBBx9UWoZ25nK42qxcuVIbN27Utm3bzrn2Ll26yDAMxcXFOWeOanLkyBHl5+dXmnXas2ePJJ3V8a++nnjiCYWEhOiee+6pcZxhGHrooYd0zTXXnPMMFwB4MhYx12DGjBnauHGj3nrrLf3444+aNGmSrrjiCiUlJUkq/aHduXNnffTRR4qLi1OnTp102223MeMEwGU88MADCgwM1B133KGsrKxK544dO6Y777xTQUFBeuCBBxr0/Nddd53sdrv+8Y9/nHWupKTkrC5y9TFs2DDZbDa1aNFCCxcu1Lp16yp1fCuf0al4OcLs7GwtWbKkzq9ht9v18MMP6/e//7369evX4FrLTZw4UTabTY8//rjOvEyiYRhn/TcoKSnRSy+95LxfVFSkl156SW3atNHAgQMbXMeBAwe0YMECPfbYY7UG2vKfcXPmzGnw6wGAN/DqGaeapKSkaMmSJUpJSVHbtm0lSffff78+++wzLVmyRP/85z+1f/9+HTx4UMuXL9drr70mu92u++67T9dee62+/PJLk98BAEjx8fH6z3/+oxtvvFG9e/fWrbfeqri4OB04cECvvPKKMjMz9eabb6pLly5nPfbLL7+stHyv/B+NduzYoR07dqh3794aNWqU7rjjDs2ZM0eJiYm6/PLL5evrq6SkJC1fvlz//ve/de21157z+xgzZowmT56sv/zlLxo3bpxiYmJ0+eWXy8/PT+PGjdMdd9yhvLw8LV68WJGRkUpNTa3T8/7666/O5X2NoUuXLnriiSc0a9YsHThwQBMmTFBISIiSk5O1YsUK3X777br//vud49u2baunnnpKBw4cULdu3fT2228rMTFRixYtOms/VGJi4llL/ex2uw4fPqy1a9dq1KhRzuNr165Vz549dfPNN9da86pVqzRt2rRKyyABAGcjOFVjx44dstvtZy21KCwsVOvWrSWVrmUvLCzUa6+95hz3yiuvaODAgdq9ezc/hAC4hEmTJqlHjx6aM2eOMyy1bt1aF198sR5++GElJCRU+bjrr7++yuPz5s1TVlaWcy/QwoULNXDgQL300kt6+OGH5ePjo06dOmny5MlntQ8/F/Pnz9fnn3+u6dOn67333lP37t317rvv6q9//avuv/9+RUdH66677lKbNm10yy231Pl577rrrnNeFlfRQw89pG7duumZZ57R448/LkmKjY3V5Zdfrt/85jeVxrZq1Ur/+c9/dPfdd2vx4sWKiorS888/r2nTpp31vE8++WSVr/fFF18oKSmp0gV7Jemf//xnlfuszhQYGFjpYsMAgKpZjDPXEngpi8VSqave22+/rRtvvFG7du066wdPcHCwoqOjNXv2bP3zn/+sdIHHU6d
|
||
|
"text/plain": [
|
||
|
"<Figure size 1000x600 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
},
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1cAAAIjCAYAAADvBuGTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACL30lEQVR4nOzdeVxU5eIG8OfMMDCsI7JvsiqCG4qKaGoqbplatpi5ZWllahllV+umqZW3NCvT1PxleMPKFte6ueGeCwqa4gqKgMgqO8g2c35/oJMjoAwwHJbn+/nMJznnzJlnBjUf3ve8RxBFUQQRERERERHViUzqAERERERERM0ByxUREREREVE9YLkiIiIiIiKqByxXRERERERE9YDlioiIiIiIqB6wXBEREREREdUDlisiIiIiIqJ6wHJFRERERERUD1iuiIiIiIiI6gHLFRERERERUT1guSKiFu/8+fOYMGECXFxcYGJiAmdnZ4wfPx7nz5+XOlqLN3PmTAiCoPfzli1bBkEQcP369foPRUREVA2WKyJq0TZv3oxu3bohIiICU6ZMwddff42XXnoJ+/fvR7du3bBlyxapIxIREVETYSR1ACIiqVy9ehUTJ06El5cXDh06BDs7O+2+N954A3379sXEiRNx9uxZeHl5SZiUiIiImgKOXBFRi7V06VIUFRXhm2++0SlWAGBra4u1a9eisLAQn376qXb7Bx98AEEQkJmZqXP8qVOnIAgCwsLCdLZrNBp88cUX6NChA5RKJRwcHPDKK68gOztb5zgPDw88/vjjlTJWNS1OEAR88MEH2q/Ly8vx2GOPoXXr1rhw4YJ2+3fffYeBAwfC3t4eJiYm8Pf3x+rVq2v02bzwwgvw8PDQ2RYeHg6ZTIb//Oc/Otv37duHvn37wtzcHK1atcLo0aNx8eJF7f78/HxMnToV7u7uMDExgaurK1599VWkpaVVet1du3ahXbt2sLCwwOuvvw5RFAEABw4cgLe3N6ysrBAaGgq1Wq3zvKioKHTt2hVmZmYYN24ciouLAQAxMTHo3LkzzM3NMWnSJBQVFWmfc/369Sq/ZzNmzIAgCHjhhRe028LCwiAIAk6dOqVzbGZmZqXvB1D5ewRU/H4TBAGPPvqozvaSkhIsWLAAPj4+MDExgZubG9555x2UlJRUOufMmTMrfWaPP/649nt19z096HH3fWVlZeHtt99Gp06dYGFhASsrKwwfPhx///13pdeoTnh4OHr27AkzMzNYW1ujX79+2L17t3a/h4eHzucIAL/88gsEQaj0+wv4589XdZn3798PQRCqHFH+4YcfIAgCjh07huTkZIwbN0471dfLywvvvPMO8vPzH/pa9z4OHDgAADh8+DCeeeYZtGnTRvs9evPNN3H79u0af1ZE1DJw5IqIWqwdO3bAw8MDffv2rXJ/v3794OHhgT/++KPWr/HKK68gLCwMU6ZMweuvv474+HisXLkSp0+fxl9//QWFQlHrc981depUHDhwAHv27IG/v792++rVq9GhQweMGjUKRkZG2LFjB1577TVoNBrMmDFDr9fYvXs3XnzxRcycORNz587Vbt+7dy+GDx8OLy8vfPDBB7h9+za++uor9OnTB9HR0fDw8EBWVhbOnj2LqVOnwtHREXFxcVizZg127tyJyMhI2NvbAwCuXbuGJ554Aj4+Pvj444+xc+dObZmZMWMGZs2ahdOnT+Pzzz+HnZ0d5s2bBwDIy8vDsGHDYGpqig8//BCnT5/GihUrAFSU01dffRWpqalYsWIFTE1NsXbt2mrfZ1xcHNatW6fXZ1MTOTk5WLJkSaXtGo0Go0aNwpEjR/Dyyy/Dz88P586dw+eff44rV65g69ater2OnZ0dvv/+e+3XmzdvxpYtW3S2eXt7A6j4vLdu3YpnnnkGnp6eSEtLw9q1a9G/f39cuHABzs7OD3ythQsX4oMPPkDv3r2xaNEiGBsb48SJE9i3bx+GDBlS5XPKy8vx3nvvPfR93Jv3zTff1P760UcfhZubGzZu3Ignn3xS5zkbN26Et7c3goODcejQIaSlpWHWrFmwtrbG+fPnsWLFCkRERODIkSMwNTXFmDFj4OPjo/M6fn5+ePnll7Xb/Pz8AFQUwqKiIkyfPh02NjaIjIzEV199hRs3buCXX3556PshohZEJCJqgXJyckQA4ujRox943KhRo0QAYl5eniiKorhgwQIRgJiRkaFz3MmTJ0UA4nfffafddvjwYRGAuHHjRp1jd+7cWWm7u7u7OGLEiEqvP2PGDPH+v6oBiAsWLBBFURTnzZsnyuVycevWrZWeW1RUVGnb0KFDRS8vrwe+Z1EUxcmTJ4vu7u6iKIriqVOnRAsLC/GZZ54R1Wq1znEBAQGivb29eOvWLe22v//+W5TJZOKkSZOqPX9MTIxoYmIivvjii9ptr7/+umhpaSlmZmaKoiiKZWVlYq9evUQA4okTJ7THjRs3TrS3txeLi4tFURTF5cuXi4IgiJcuXdIe8/TTT4sAxE2bNmm3zZs3TzQxMRFTU1NFURTF+Pj4St+zZ599VuzYsaPo5uYmTp48Wbv9u+++EwGIJ0+e1HkfGRkZOt+Pu+7f9s4774j29vZiYGCg2L9/f+3277//XpTJZOLhw4d1nr9mzRoRgPjXX3/pnHPGjBmVPssRI0Zov1f3u/v7tSrFxcWVvp/x8fGiiYmJuGjRoiqfc1dsbKwok8nEJ598stI5NBqN9tfu7u46n+PXX38tmpiYiAMGDKgy83vvvScKgqCz7f5z3P0+5uTkaLelp6eLRkZGlb4P99qzZ48IoNr3dv/r3KuqP0tLliwRBUEQExISqn1NImp5OC2QiFqku9ODLC0tH3jc3f15eXl6v8Yvv/wClUqFwYMHIzMzU/sIDAyEhYUF9u/fr3N8WVmZznGZmZna6W1VWblyJZYsWYIVK1Zg9OjRlfabmppqf52bm4vMzEz0798f165dQ25ubo3ew7Vr1zBixAgEBATg+++/h0z2z/82UlJScObMGbzwwgto3bq1dnvnzp0xePBg/O9//9Nu02g0Ou/LwcEBjz32GH777TdoNBoAQEREBPr16wcbGxsAgJGREQIDAwEAPXv21J5rzJgxSE9PR0xMjPZ5/v7+8PX11R4TFBRU5fNKSkpw5MiRKt9rVFQUfvnlFyxZskTnfdZVcnIyvvrqK7z//vuwsLDQ2ffLL7/Az88P7du31/l8Bg4cCACVfo8UFxdX+j1SVlZWq1wmJiba96lWq3Hr1i1YWFjA19cX0dHRD3zu1q1bodFoMH/+/EqfVXWrOxYVFWHRokWYOXMm2rRpU+UxpaWlMDExeeBrT5o0CSUlJfj111+12zZt2oTy8nJMmDBBu+3+P08BAQHo3r17rUaa7v2zVFhYiMzMTPTu3RuiKOL06dN6n4+Imi9OCySiFuluabr3Goyq1LSEVSU2Nha5ubnaaW/3S09P1/l69+7dla79qs6ff/6pnTKXlZVV5TF//fUXFixYgGPHjulcawRUlC2VSvXA1ygsLMTQoUORlpYGGxubSv9oTkhIAACdUnOXn58fdu3ahcLCQpibmyMxMRGenp5Vvk5mZibs7e2RlJSEPn36PDATALi4uAAAkpKSEBgYiKSkJO22mj6vKnPnzkXfvn3x+OOPV3ltU20tWLAAzs7OeOWVV3QKAVDxe+TixYvVft/v/z3y7bff4ttvv610nLu7u965NBoNvvzyS3z99deIj4/XuY7tbsGtztWrVyGTyXSmoT7M8uXLUVxcjHfffRehoaFVHpOTk1OpgN6vffv26NGjBzZu3IiXXnoJQMWUwF69eulM8/vrr78wYMCASs+/tyjVVGJiIubPn4/t27dXul6ypj+oIKKWgeWKiFoklUoFJycnnD179oHHnT17Fi4uLrCystL7NTQaDezt7bFx48Yq99//D+qgoCB8+OGHOttWrlyJbdu2VXpuZGQkpk2bBnNzc3z44Yd45plndErO1atXMWjQILRv3x7Lly+Hm5sbjI2N8b///Q+ff/65drToQTIzM2Fubo4dO3bgiSeewJIlS7BgwYKavPV
|
||
|
"text/plain": [
|
||
|
"<Figure size 1000x600 with 1 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Оценка инерции\n",
|
||
|
"inertia = []\n",
|
||
|
"for k in range(1, 11):\n",
|
||
|
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
|
||
|
" kmeans.fit(data_pca)\n",
|
||
|
" inertia.append(kmeans.inertia_)\n",
|
||
|
"\n",
|
||
|
"plt.figure(figsize=(10, 6))\n",
|
||
|
"plt.plot(range(1, 11), inertia, marker='o')\n",
|
||
|
"plt.title('Оценка инерции')\n",
|
||
|
"plt.xlabel('Количество кластеров')\n",
|
||
|
"plt.ylabel('Инерция')\n",
|
||
|
"plt.show()\n",
|
||
|
"\n",
|
||
|
"# Оценка коэффициента силуэта\n",
|
||
|
"silhouette_scores = []\n",
|
||
|
"for k in range(2, 11):\n",
|
||
|
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
|
||
|
" kmeans.fit(data_pca)\n",
|
||
|
" score = silhouette_score(data_pca, kmeans.labels_)\n",
|
||
|
" silhouette_scores.append(score)\n",
|
||
|
"\n",
|
||
|
"plt.figure(figsize=(10, 6))\n",
|
||
|
"plt.plot(range(2, 11), silhouette_scores, marker='o')\n",
|
||
|
"plt.title('Оценка коэффициента силуэта')\n",
|
||
|
"plt.xlabel('Количество кластеров')\n",
|
||
|
"plt.ylabel('Коэффициент силуэта')\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 20,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"data": {
|
||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABN0AAAIjCAYAAAA3Gm3YAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdZ3gc5fX38e/MbFXvkuXeC25gm2I6mGp6CCUQWjoBkhCSAE+AQEJJIYEEEkr4QwImgAmEXg2mmmJccMNVrrJk9bZ9Z54XK8uWJdmWWUmW/ftcly6k2SlnVjI6OnPf5zYcx3EQERERERERERGRpDF7OgAREREREREREZF9jYpuIiIiIiIiIiIiSaaim4iIiIiIiIiISJKp6CYiIiIiIiIiIpJkKrqJiIiIiIiIiIgkmYpuIiIiIiIiIiIiSaaim4iIiIiIiIiISJKp6CYiIiIiIiIiIpJkKrqJiIiIiIiIiIgkmYpuItIr/eY3v8EwDCorK3s6FBERERGRfdaHH37I7NmzW76ePXs2H330Uc8FJNKLqOgmIiIiIiKyH/v73/+OYRgccsghPR2K7IU2bNjAlVdeyaJFi1i0aBFXXnklGzZs6OmwRHoFFd1ERERERET2YzNmzGDQoEF89tlnrFq1qqfDkb3MOeecQ3p6OuPHj2f8+PFkZWVxzjnn9HRYIr2Cim4iIiIiIiL7qZKSEj7++GP+/Oc/k5+fz4wZM3o6JNnLeL1ePv74YxYsWMCCBQv44IMP8Hg8PR2WSK+gopuI7DPWrVvHsGHDGDt2LOXl5R3uN2jQIE477TRmz57N5MmT8fv9jBs3rqVXxXPPPce4cePw+XxMmjSJ+fPntznHV199xbnnnktOTg4+n4/Jkyfz4osvttqnurqa6667jnHjxpGWlkZGRgannHIKCxcubLXf7NmzMQyDZ555httvv51+/frh8/k4/vjj2zxtXrlyJd/4xjcoKirC5/PRr18/LrjgAurq6vbwXRMREZH92YwZM8jOzmb69Omce+65bYpua9euxTCMdj+eeOKJVvsec8wx7e732GOPtdpv5syZTJo0Cb/fT15eHhdffDGbNm1qtc9ll12GYRhMnDixTcx33nknhmGQlpbW5rUnnnii5dw5OTlccMEFbaZCHnPMMYwdO5YvvviCqVOn4vf7GTx4MA888ECr/SKRCDfffDOTJk0iMzOT1NRUjjzySN59993den+2flx22WUAPPbYYxiGwdq1a1uOt22b8ePHt3mftt7/1o/s7GyOOeYYPvjgg1Yxbs1rd3TVVVdhGEarbYZhcNVVV7XZd6v24hs0aBCXXXYZlmUxYcIEJkyYwHPPPYdhGAwaNKjDc4lIgqunAxARSYbVq1dz3HHHkZOTw1tvvUVeXt5O91+1ahXf+ta3+MEPfsDFF1/Mn/70J04//XQeeOABbrzxRq688kogkdSdd955LF++HNNMPKdYsmQJhx9+OH379uX6668nNTWVZ555hrPOOov//ve/nH322QCsWbOG//3vf3zzm99k8ODBlJeX8+CDD3L00UezdOlSiouLW8V01113YZom1113HXV1dfzhD3/goosu4tNPPwUSid9JJ51EOBzm6quvpqioiE2bNvHyyy9TW1tLZmZmst9WERER2cfNmDGDc845B4/Hw4UXXsg//vEPPv/8c6ZMmdJqvwsvvJBTTz211bbDDz+8zflGjRrF//t//w+AyspKfvazn7V6/bHHHuPyyy9nypQp3HnnnZSXl3Pvvffy0UcfMX/+fLKyslr2dblcLFmyhPnz53PggQe2OofP52tz7dtvv52bbrqJ8847j+9+97tUVFTwt7/9jaOOOqrNuWtqajj11FM577zzuPDCC3nmmWf40Y9+hMfj4YorrgCgvr6ef/7zn1x44YV873vfo6GhgUceeYSTTjqJzz77jIkTJ5Kfn8/jjz/ect7nnnuO559/vtW2oUOHdvT28/jjj7No0aJ2X8vLy+Mvf/kLABs3buTee+/l1FNPZcOGDa3upTvFYrGW76+I7AZHRKQXuuWWWxzAqaiocJYtW+YUFxc7U6ZMcaqrq3d57MCBAx3A+fjjj1u2vfHGGw7g+P1+Z926dS3bH3zwQQdw3n333ZZtxx9/vDNu3DgnFAq1bLNt25k6daozfPjwlm2hUMiJx+Otrl1SUuJ4vV7ntttua9n27rvvOoAzevRoJxwOt2y/9957HcBZtGiR4ziOM3/+fAdwZs6cuRvvkIiIiMjOzZ071wGct956y3GcRD7Tr18/5yc/+UnLPiUlJQ7g/PGPf9zl+Q4//HDn2GOPbXPso48+6jiO40QiEaegoMAZO3asEwwGW/Z7+eWXHcC5+eabW7ZdeumlTmpqqnP66ac7V111Vcv2Dz74wPH7/c5ZZ53lpKamtmxfu3atY1mWc/vtt7eKadGiRY7L5Wq1/eijj3YA5+67727ZFg6HnYkTJzoFBQVOJBJxHMdxYrFYq9zMcRynpqbGKSwsdK644op234OtOWp7Hn30UQdwSkpKHMdJ5IoDBgxwTjnllFbv09b7HzhwYKvjH3roIQdwPvvss5ZtAwcOdKZPn97mWj/+8Y/bxAE4P/7xj9uNrb34tp7/0ksvbfn673//u+P1ep1jjz22TXwi0paml4pIr7Z48WKOPvpoBg0axNtvv012dvZuHTdmzBgOO+ywlq+3rtZ13HHHMWDAgDbb16xZAySmjL7zzjucd955NDQ0UFlZSWVlJVVVVZx00kmsXLmyZXqE1+ttGR0Xj8epqqoiLS2NkSNHMm/evDYxXX755a36Yxx55JGtrr11JNsbb7xBIBDYrfsUERER6ciMGTMoLCzk2GOPBRLTD88//3yeeuop4vF4p88XiUTwer0dvj537ly2bNnClVde2Wqk2vTp0xk1ahSvvPJKm2OuuOIKnnzyScLhMACPPvoo55xzTpsR/s899xy2bXPeeee15GeVlZUUFRUxfPjwVlNCITGK7gc/+EHL1x6Phx/84Ads2bKFL774AgDLslpyM9u2qa6uJhaLMXny5HZzuc66//77qaqq4pZbbmn3ddu2W+5jwYIF/Pvf/6ZPnz6MHj261X7RaLTVPVdWVhIKhdo9ZygUasldbdvuVLyBQIDbbruNq666qlW+LCIdU9FNRHq1008/nfT0dN544w0yMjJatjc2NlJWVtbyUVFR0eq4HROFrYlb//79291eU1MDJKalOo7DTTfdRH5+fquPrQnTli1bgESi9Je//IXhw4fj9XrJy8sjPz+fL7/8st0ebDvGtLWAuPXagwcP5tprr+Wf//wneXl5nHTSSdx///3q5yYiIiKdFo/Heeqppzj22GMpKSlh1apVrFq1ikMOOYTy8nJmzZrV6XPW1ta222dtq3Xr1gEwcuTINq+NGjWq5fXtTZ8+HZfLxQsvvEBTUxPPPPMMl19+eZv9Vq5cieM4DB8+vE2OtmzZspb8bKvi4mJSU1NbbRsxYgRAq55m//rXvxg/fjw+n4/c3Fzy8/N55ZVXvnb+VVdXxx133MG1115LYWFhu/ts2LCh5R4OPPBAVq9ezX//+9827/Gbb77Z5p4feeSRds/5yCOPkJ+fT15eHn6/n6OOOoq5c+fuVsx//vOfCYVC3HjjjZ27WZH9mHq6iUiv9o1vfIN//etfzJgxo9XTyj/96U/ceuutLV8PHDiwVQJlWVa75+tou+M4AC1PBK+77jpOOumkdvcdNmwYAHfccQc33XQTV1xxBb/97W/JycnBNE1++tOftvtkcVfXBrj77ru57LLLeOGFF3jzzTe55ppruPPOO/nkk0/o169fu8eLiIiI7Oidd95h8+bNPPXUUzz11FNtXp8xYwYnnnhip85ZVlbWYX60p9xuNxdffDGPPvoogUCA3NxcjjvuuFY90yCRoxmGwWuvvdZuTrWzYmBHnnjiCS677DLOOussfvGLX1BQUIBlWdx5552sXr16j+8J4Pe//z2mafKLX/yCqqqqdvcpLCxsWayirq6O//u//+Pkk0/mww8/ZNy4cS37HXLIIfzud79rdex9993HCy+80Oa
|
||
|
"text/plain": [
|
||
|
"<Figure size 1500x600 with 2 Axes>"
|
||
|
]
|
||
|
},
|
||
|
"metadata": {},
|
||
|
"output_type": "display_data"
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Неиерархический алгоритм: k-means\n",
|
||
|
"optimal_k = 3 # Предположим, что оптимальное количество кластеров равно 3\n",
|
||
|
"kmeans = KMeans(n_clusters=optimal_k, random_state=42)\n",
|
||
|
"kmeans_labels = kmeans.fit_predict(data_pca)\n",
|
||
|
"\n",
|
||
|
"# Иерархический алгоритм: агломеративный\n",
|
||
|
"agglomerative = AgglomerativeClustering(n_clusters=optimal_k)\n",
|
||
|
"agglomerative_labels = agglomerative.fit_predict(data_pca)\n",
|
||
|
"\n",
|
||
|
"# Визуализация результатов\n",
|
||
|
"plt.figure(figsize=(15, 6))\n",
|
||
|
"\n",
|
||
|
"plt.subplot(1, 2, 1)\n",
|
||
|
"plt.scatter(data_pca[:, 0], data_pca[:, 1], c=kmeans_labels, cmap='viridis', alpha=0.5)\n",
|
||
|
"plt.title('k-means')\n",
|
||
|
"plt.xlabel('Главная компонента 1')\n",
|
||
|
"plt.ylabel('Главная компонента 2')\n",
|
||
|
"\n",
|
||
|
"plt.subplot(1, 2, 2)\n",
|
||
|
"plt.scatter(data_pca[:, 0], data_pca[:, 1], c=agglomerative_labels, cmap='viridis', alpha=0.5)\n",
|
||
|
"plt.title('Агломеративный')\n",
|
||
|
"plt.xlabel('Главная компонента 1')\n",
|
||
|
"plt.ylabel('Главная компонента 2')\n",
|
||
|
"\n",
|
||
|
"plt.show()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 15,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"k-means - Инерция: 442804.1358263728, Коэффициент силуэта: 0.5444824210491334\n",
|
||
|
"Агломеративный - Коэффициент силуэта: 0.5158480408075595\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"# Оценка качества k-means\n",
|
||
|
"kmeans_inertia = kmeans.inertia_\n",
|
||
|
"kmeans_silhouette = silhouette_score(data_pca, kmeans_labels)\n",
|
||
|
"\n",
|
||
|
"# Оценка качества агломеративной кластеризации\n",
|
||
|
"agglomerative_silhouette = silhouette_score(data_pca, agglomerative_labels)\n",
|
||
|
"\n",
|
||
|
"print(f'k-means - Инерция: {kmeans_inertia}, Коэффициент силуэта: {kmeans_silhouette}')\n",
|
||
|
"print(f'Агломеративный - Коэффициент силуэта: {agglomerative_silhouette}')"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "aimvenv",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.12.5"
|
||
|
}
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|