714 lines
332 KiB
Plaintext
Raw Permalink Normal View History

2024-12-20 16:38:38 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вариант: Экономика стран"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 369 entries, 0 to 368\n",
"Data columns (total 14 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 stock index 369 non-null object \n",
" 1 country 369 non-null object \n",
" 2 year 369 non-null float64\n",
" 3 index price 317 non-null float64\n",
" 4 log_indexprice 369 non-null float64\n",
" 5 inflationrate 326 non-null float64\n",
" 6 oil prices 369 non-null float64\n",
" 7 exchange_rate 367 non-null float64\n",
" 8 gdppercent 350 non-null float64\n",
" 9 percapitaincome 368 non-null float64\n",
" 10 unemploymentrate 348 non-null float64\n",
" 11 manufacturingoutput 278 non-null float64\n",
" 12 tradebalance 365 non-null float64\n",
" 13 USTreasury 369 non-null float64\n",
"dtypes: float64(12), object(2)\n",
"memory usage: 40.5+ KB\n"
]
}
],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"from sklearn.preprocessing import LabelEncoder\n",
"from sklearn.cluster import AgglomerativeClustering\n",
"from sklearn.cluster import KMeans\n",
"from sklearn.metrics import silhouette_score\n",
"from sklearn.decomposition import PCA\n",
"from sklearn import metrics\n",
"from scipy.cluster.hierarchy import dendrogram, linkage, fcluster\n",
"from imblearn.over_sampling import RandomOverSampler\n",
"from imblearn.under_sampling import RandomUnderSampler\n",
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.metrics import ConfusionMatrixDisplay\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier, GradientBoostingClassifier\n",
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
"from sklearn.metrics import (\n",
" precision_score, recall_score, accuracy_score, roc_auc_score, f1_score,\n",
" matthews_corrcoef, cohen_kappa_score, confusion_matrix\n",
")\n",
"from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error\n",
"import numpy as np\n",
"import featuretools as ft\n",
"from sklearn.metrics import accuracy_score, classification_report\n",
"\n",
"# Функция для применения oversampling\n",
"def apply_oversampling(X, y):\n",
" oversampler = RandomOverSampler(random_state=42)\n",
" X_resampled, y_resampled = oversampler.fit_resample(X, y)\n",
" return X_resampled, y_resampled\n",
"\n",
"# Функция для применения undersampling\n",
"def apply_undersampling(X, y):\n",
" undersampler = RandomUnderSampler(random_state=42)\n",
" X_resampled, y_resampled = undersampler.fit_resample(X, y)\n",
" return X_resampled, y_resampled\n",
"\n",
"def split_stratified_into_train_val_test(\n",
" df_input,\n",
" stratify_colname=\"y\",\n",
" frac_train=0.6,\n",
" frac_val=0.15,\n",
" frac_test=0.25,\n",
" random_state=None,\n",
"):\n",
" \"\"\"\n",
" Splits a Pandas dataframe into three subsets (train, val, and test)\n",
" following fractional ratios provided by the user, where each subset is\n",
" stratified by the values in a specific column (that is, each subset has\n",
" the same relative frequency of the values in the column). It performs this\n",
" splitting by running train_test_split() twice.\n",
"\n",
" Parameters\n",
" ----------\n",
" df_input : Pandas dataframe\n",
" Input dataframe to be split.\n",
" stratify_colname : str\n",
" The name of the column that will be used for stratification. Usually\n",
" this column would be for the label.\n",
" frac_train : float\n",
" frac_val : float\n",
" frac_test : float\n",
" The ratios with which the dataframe will be split into train, val, and\n",
" test data. The values should be expressed as float fractions and should\n",
" sum to 1.0.\n",
" random_state : int, None, or RandomStateInstance\n",
" Value to be passed to train_test_split().\n",
"\n",
" Returns\n",
" -------\n",
" df_train, df_val, df_test :\n",
" Dataframes containing the three splits.\n",
" \"\"\"\n",
"\n",
" if frac_train + frac_val + frac_test != 1.0:\n",
" raise ValueError(\n",
" \"fractions %f, %f, %f do not add up to 1.0\"\n",
" % (frac_train, frac_val, frac_test)\n",
" )\n",
"\n",
" if stratify_colname not in df_input.columns:\n",
" raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
"\n",
" X = df_input # Contains all columns.\n",
" y = df_input[\n",
" [stratify_colname]\n",
" ] # Dataframe of just the column on which to stratify.\n",
"\n",
" # Split original dataframe into train and temp dataframes.\n",
" df_train, df_temp, y_train, y_temp = train_test_split(\n",
" X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
" )\n",
"\n",
" # Split the temp dataframe into val and test dataframes.\n",
" relative_frac_test = frac_test / (frac_val + frac_test)\n",
" df_val, df_test, y_val, y_test = train_test_split(\n",
" df_temp,\n",
" y_temp,\n",
" stratify=y_temp,\n",
" test_size=relative_frac_test,\n",
" random_state=random_state,\n",
" )\n",
"\n",
" assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
"\n",
" return df_train, df_val, df_test\n",
"\n",
"\n",
"df = pd.read_csv(\"../data/Economic.csv\")\n",
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Бизнес-цель: сегментировать страны на основе экономических показателей для определения схожих групп стран и последующего анализа каждой группы."
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Данные содержат текстовые значения. Убедитесь, что только числовые данные используются для анализа.\n",
"Исходный размер датасета: 369\n",
"Очищенный размер датасета: 219\n"
]
}
],
"source": [
"data = df.copy()\n",
"data_cleaned = df.dropna()\n",
"\n",
"if not np.issubdtype(data_cleaned.dtypes.iloc[1], np.number):\n",
" print(\"Данные содержат текстовые значения. Убедитесь, что только числовые данные используются для анализа.\")\n",
" cleaned_data = data_cleaned.select_dtypes(include=[np.number])\n",
"\n",
"print(f\"Исходный размер датасета: {df.shape[0]}\")\n",
"print(f\"Очищенный размер датасета: {data_cleaned.shape[0]}\")\n",
"\n",
"data1 = pd.get_dummies(data_cleaned, columns=['country'], drop_first=True)"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['stock index', 'year', 'index price', 'log_indexprice', 'inflationrate',\n",
" 'oil prices', 'exchange_rate', 'gdppercent', 'percapitaincome',\n",
" 'unemploymentrate', 'manufacturingoutput', 'tradebalance', 'USTreasury',\n",
" 'country_France', 'country_Germany', 'country_Hong Kong',\n",
" 'country_India', 'country_Japan', 'country_Spain',\n",
" 'country_United Kingdom', 'country_United States of America'],\n",
" dtype='object')\n",
"stock index 0\n",
"year 0\n",
"index price 0\n",
"log_indexprice 0\n",
"inflationrate 0\n",
"oil prices 0\n",
"exchange_rate 0\n",
"gdppercent 0\n",
"percapitaincome 0\n",
"unemploymentrate 0\n",
"manufacturingoutput 0\n",
"tradebalance 0\n",
"USTreasury 0\n",
"country_France 0\n",
"country_Germany 0\n",
"country_Hong Kong 0\n",
"country_India 0\n",
"country_Japan 0\n",
"country_Spain 0\n",
"country_United Kingdom 0\n",
"country_United States of America 0\n",
"dtype: int64\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 219 entries, 10 to 367\n",
"Data columns (total 20 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 year 219 non-null float64\n",
" 1 index price 219 non-null float64\n",
" 2 log_indexprice 219 non-null float64\n",
" 3 inflationrate 219 non-null float64\n",
" 4 oil prices 219 non-null float64\n",
" 5 exchange_rate 219 non-null float64\n",
" 6 gdppercent 219 non-null float64\n",
" 7 percapitaincome 219 non-null float64\n",
" 8 unemploymentrate 219 non-null float64\n",
" 9 manufacturingoutput 219 non-null float64\n",
" 10 tradebalance 219 non-null float64\n",
" 11 USTreasury 219 non-null float64\n",
" 12 country_France 219 non-null bool \n",
" 13 country_Germany 219 non-null bool \n",
" 14 country_Hong Kong 219 non-null bool \n",
" 15 country_India 219 non-null bool \n",
" 16 country_Japan 219 non-null bool \n",
" 17 country_Spain 219 non-null bool \n",
" 18 country_United Kingdom 219 non-null bool \n",
" 19 country_United States of America 219 non-null bool \n",
"dtypes: bool(8), float64(12)\n",
"memory usage: 24.0 KB\n"
]
}
],
"source": [
"print(data1.columns)\n",
"print(data1.isnull().sum())\n",
"data2 = data1.drop(['stock index'], axis = 1)\n",
"data2.info()"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[[ -175.11730098 -8125.91541511]\n",
" [ 1359.15430649 -7976.83866444]\n",
" [ 2330.12842909 -7886.62363629]\n",
" [ 3638.58468227 -7885.49439407]\n",
" [ 4638.69365316 -7738.97353013]\n",
" [ 5921.57827905 -7533.60974468]\n",
" [ 7420.03426153 -7269.3752748 ]\n",
" [ 8823.38406687 -6981.62904291]\n",
" [ 10508.03737208 -6093.36153206]\n",
" [ 12357.01890855 -5087.16328305]\n",
" [ 13108.1576432 -6856.08050495]\n",
" [ 13984.76420629 -7375.23274241]\n",
" [ 15460.23414568 -7307.74768791]\n",
" [ 17685.72825995 -7028.50596781]\n",
" [ 20090.21526684 -6980.26253392]\n",
" [ 22277.89668445 -6874.90441336]\n",
" [ 23962.33912597 -6605.01263864]\n",
" [ 24446.64571772 -6845.1952535 ]\n",
" [ 25871.01649677 -6556.01887636]\n",
" [ 27597.98078369 -6314.0423994 ]\n",
" [ 29116.38461377 -5778.79720501]\n",
" [ 31080.78776834 -4997.72117578]\n",
" [ 34067.26159619 -4464.16933609]\n",
" [ 36188.91962302 -3273.16517374]\n",
" [ 39173.66351113 -2162.39563038]\n",
" [ 41402.60872282 -1707.33871091]\n",
" [ -4920.26128698 -6264.09433006]\n",
" [ -4107.33309919 -5936.17563897]\n",
" [ -5591.9683601 -4970.64964709]\n",
" [ -4284.01907339 -5358.91854133]\n",
" [ -851.8323232 -4827.08675426]\n",
" [ 369.10904029 -4430.5671039 ]\n",
" [ 2778.18409795 -3478.03512091]\n",
" [ 4317.56600567 -2772.06424501]\n",
" [ 4775.4295974 -1736.3805783 ]\n",
" [ 4243.84270452 -2429.91924498]\n",
" [ 3781.33828765 -3422.66358249]\n",
" [ 6009.39340531 -4759.76856132]\n",
" [ 10424.87883391 -4341.57001167]\n",
" [ 16256.5577047 -4160.49272194]\n",
" [ 18000.35381788 -3402.49405369]\n",
" [ 20556.36944866 -2868.79479514]\n",
" [ 26534.29033635 -2792.87710749]\n",
" [ 14799.28968455 -3522.03466609]\n",
" [ 15612.92493655 -3056.85953865]\n",
" [ 18114.06439047 -3451.64906502]\n",
" [ 18523.60322894 -3137.05524082]\n",
" [ 19497.88792183 -2311.83435969]\n",
" [ 23542.33878862 -2603.44082121]\n",
" [ 17156.41944477 -1855.11336425]\n",
" [ 16429.21734114 -1290.51296177]\n",
" [ 19092.49490231 -2321.82962639]\n",
" [ 18472.52923296 -1490.57697639]\n",
" [ 16375.47555831 -2516.33360096]\n",
" [-23669.6107605 -6856.0257174 ]\n",
" [-23678.77675534 -5288.17745923]\n",
" [-23675.29226616 -4557.37607353]\n",
" [-23614.87756504 -3978.16434834]\n",
" [-23608.70369197 -4795.08252579]\n",
" [-23583.41444535 -4821.11540317]\n",
" [-23553.11851947 -4248.00207401]\n",
" [-23571.19840587 -4851.19514063]\n",
" [-23490.20520886 -2902.51879176]\n",
" [-23516.8183412 -3935.81513361]\n",
" [-23526.75429279 -4645.50718999]\n",
" [-23504.72354379 -4531.19715956]\n",
" [-23363.19551277 -2072.91896281]\n",
" [-23261.92618182 -1311.98677798]\n",
" [-23100.48320798 1479.32071988]\n",
" [-22891.64152145 5863.3059029 ]\n",
" [-22497.55043967 12353.76223468]\n",
" [-22810.24606437 1719.80676484]\n",
" [-22498.91033109 9530.51743361]\n",
" [-22162.14605795 12565.71566922]\n",
" [-22196.83923531 7511.01242735]\n",
" [-22104.88476049 11481.38894531]\n",
" [-22052.6046314 13224.64916476]\n",
" [-21760.03118667 19546.72612234]\n",
" [-21765.08108727 18164.36724379]\n",
" [-21624.70393447 18669.39338085]\n",
" [-21178.78608654 26088.80277007]\n",
" [-21109.11279888 28098.84971046]\n",
" [-20866.86464368 33279.39393894]\n",
" [-20893.51896865 39780.06254589]\n",
" [-14423.1457247 -1282.18684029]\n",
" [-13509.2003846 -655.76348716]\n",
" [-14296.15572734 -755.96193511]\n",
" [-13413.69439115 639.71572953]\n",
" [-12809.29884004 2375.19409329]\n",
" [-12158.52959914 4348.17715487]\n",
" [ -6522.44157462 8033.5362674 ]\n",
" [ 1704.70099935 18471.27282034]\n",
" [ 1655.56058562 25476.71405478]\n",
" [ 2080.3623691 20850.54443176]\n",
" [ 5485.77754575 15616.59302296]\n",
" [ 7819.78947585 9366.29074685]\n",
" [ 12111.55694469 10242.68841252]\n",
" [ 15650.51181433 10984.00736822]\n",
" [ 11433.19601313 9557.94037665]\n",
" [ 8234.20852471 6601.43492437]\n",
" [ 14908.39218447 8212.21166551]\n",
" [ 15582.44630853 3213.09254948]\n",
" [ 17022.83119681 354.98642592]\n",
" [ 20691.76843817 917.30352414]\n",
" [ 10957.19855236 10377.0947459 ]\n",
" [ 2024.91216378 4271.80296384]\n",
" [ 1577.53768178 7262.90870814]\n",
" [ 923.30323464 3888.13700397]\n",
" [ 177.88519727 1781.94675355]\n",
" [ 2920.96982312 4376.20609959]\n",
" [ 4532.07472006 5747.27132176]\n",
" [ 6973.62358473 8398.96029259]\n",
" [ 8070.77035599 14953.33034889]\n",
" [ 7169.62495215 11882.96240347]\n",
" [ 8957.83841078 9430.82539374]\n",
" [ 11632.4269169 12493.44565284]\n",
" [ 13222.91784378 12528.81546068]\n",
" [ 14875.88602712 11756.77156727]\n",
" [ 16838.60967462 13672.27804775]\n",
" [ 18971.65627197 14248.82756954]\n",
" [ 20293.03610972 15071.21521838]\n",
" [ 22656.89308108 12373.65973606]\n",
" [ 25164.7880265 17249.89060492]\n",
" [ 25038.51948028 19604.32898337]\n",
" [-22521.51647532 -6475.6488675 ]\n",
" [-22287.15333873 -6806.44040052]\n",
" [-21928.10308673 -6275.45882841]\n",
" [-21264.78463135 -3653.78528522]\n",
" [-20531.00831341 -5095.02609565]\n",
" [-20172.14558233 -5282.85209575]\n",
" [-19454.7374316 -5247.74237248]\n",
" [-18394.16059996 -5440.24957641]\n",
" [-17708.66878744 -5890.61030489]\n",
" [-16977.19895608 -5942.37154953]\n",
" [-16348.31897325 -5865.65397511]\n",
" [-15924.38194624 -4498.96322609]\n",
" [-15860.82110486 -5179.59873881]\n",
" [-15124.35357677 -4924.63281227]\n",
" [-14037.81314916 -5297.21871369]\n",
" [-13797.61834264 -5294.07509501]\n",
" [-13510.87104888 -5120.83918239]\n",
" [-12087.007858 -6638.79003576]\n",
" [-10512.58841775 -6714.07353917]\n",
" [-10962.01711103 -5979.83585746]\n",
" [-10171.64621343 -6161.24259993]\n",
" [ -7826.84801297 -6077.1684473 ]\n",
" [ -8407.60947409 -5426.43845034]\n",
" [-10173.15559564 -4042.87459349]\n",
" [ -9985.51047281 -3269.68499945]\n",
" [ 2842.53923731 -1659.64644782]\n",
" [ -210.01838598 -2102.09484993]\n",
" [ -311.0406242 -3373.08487142]\n",
" [ 6335.70060224 -4747.4495941 ]\n",
" [ 10138.40650878 -4559.05113457]\n",
" [ 10581.93058082 -3418.56352793]\n",
" [ 12446.58907491 -2280.15757639]\n",
" [ 17768.95074632 -953.007551 ]\n",
" [ 21653.34615304 -4315.39855815]\n",
" [ 17670.70161619 -2103.24709782]\n",
" [ 22775.09988097 -3257.17802075]\n",
" [ 24196.31624589 612.69839918]\n",
" [ 17303.57975039 1736.2821211 ]\n",
" [ 20809.83874293 3816.64931923]\n",
" [ 24003.4803278 1371.27597591]\n",
" [ 22733.16145132 4096.77689857]\n",
" [ 22002.33518214 4587.07826629]\n",
" [-22527.05006009 -7381.85374308]\n",
" [-22612.5967173 -7288.92543614]\n",
" [-22552.96608493 -7003.45088673]\n",
" [-21964.59843912 -6545.73342438]\n",
" [-21554.53662783 -6960.49818877]\n",
" [-21329.32661106 -6392.47113847]\n",
" [-21315.93410183 -5965.58018732]\n",
" [-20704.3226911 -6474.22835092]\n",
" [-20715.47475303 -6217.16016385]\n",
" [-20387.16579041 -6133.85309515]\n",
" [-20594.13107878 -5717.76992433]\n",
" [-20434.51858193 -6109.14576725]\n",
" [-19917.99313807 -6132.23909419]\n",
" [-19908.15323203 -5688.67632044]\n",
" [-20292.77585504 -4994.97321597]\n",
" [-20154.66080781 -4054.69817659]\n",
" [ 764.91853433 -2600.79072445]\n",
" [ -1496.89053334 -2571.96787734]\n",
" [ -1498.61057888 -3874.0102535 ]\n",
" [ 298.9782736 -5483.28126325]\n",
" [ 5650.9006054 -5132.90198485]\n",
" [ 9825.18879465 -4981.65429224]\n",
" [ 10818.74371737 -4113.98676633]\n",
" [ 12541.14641327 -3333.40223951]\n",
" [ 17627.97654687 -3397.70050337]\n",
" [ 16697.34233815 -5182.04865092]\n",
" [ 19848.87251026 -5911.89362344]\n",
" [ 18636.58173303 -4742.88385811]\n",
" [ 19099.76425172 -4778.5442203 ]\n",
" [ 14841.29503493 -3624.0984356 ]\n",
" [ 17590.64652191 -4279.85387892]\n",
" [ 16457.40721459 -3001.71826466]\n",
" [-23875.42539397 -4266.38628493]\n",
" [-23830.7846687 -2743.19109952]\n",
" [-23782.78332812 -643.28924727]\n",
" [-23709.95844212 1936.59508244]\n",
" [ -8040.24745841 3320.76379591]\n",
" [ -9078.32541232 816.39865948]\n",
" [ -8478.58542565 87.95138959]\n",
" [ -6804.24833781 -2318.23626326]\n",
" [ -2356.71756272 -736.88616465]\n",
" [ 1073.78794125 514.96450948]\n",
" [ 2639.31978814 2126.46003002]\n",
" [ 4689.57935304 5484.86147412]\n",
" [ 8917.53112497 6407.45915828]\n",
" [ 11676.61475759 345.44254243]\n",
" [ 7828.232595 -180.49504026]\n",
" [ 4463.92176667 -489.00527974]\n",
" [ 5255.16254195 1239.45767925]\n",
" [ 1920.63195671 956.24415893]\n",
" [ 4360.91701569 1390.62029681]\n",
" [ 6538.93427087 -172.17625484]\n",
" [ 5742.17430744 858.73932849]]\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\mitat\\AppData\\Local\\Temp\\ipykernel_19512\\3444879312.py:7: UserWarning: No data for colormapping provided via 'c'. Parameters 'cmap' will be ignored\n",
" plt.scatter(reduced_data[:, 0], reduced_data[:, 1], alpha=0.7, cmap='viridis')\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA28AAAIjCAYAAACUIiNfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACRAElEQVR4nOzdeXxU9b0//tc5Z/bMkgSYxEgIICCrolEx+G3VK5W22NYWK/Zaxa3VXvQWsS6o1WrLpT/b69K6tdfbgle9UtpqVRRLcbstiDaKArIKhs0skGQmM5NZzvL7Y5ghk0ySmWS2M/N6Ph55KDOfzJxZcs7n/fm8P++PoGmaBiIiIiIiIipoYr4PgIiIiIiIiAbH4I2IiIiIiEgHGLwRERERERHpAIM3IiIiIiIiHWDwRkREREREpAMM3oiIiIiIiHSAwRsREREREZEOMHgjIiIiIiLSAQZvREREREREOsDgjYiIiIiISAcYvBEREREREekAgzciIsqIP/7xjxAEIenP9OnT8314lGU/+clPEj5zm82GqVOn4u6774bX6+3T/tNPP8X111+P8ePHw2KxwOl04pxzzsEjjzyC7u7uPu0VRUFNTQ0EQcBrr72Wi5dERFRwDPk+ACIiKi533nknpkyZEv/3smXL8ng0lGtPPPEE7HY7fD4f/vrXv2LZsmV444038I9//AOCIAAA1qxZg29/+9swm8248sorMX36dITDYfz973/Hrbfeim3btuG3v/1twuO+8cYb+PzzzzF27Fg8++yz+MpXvpKPl0dElFcM3oiIKKO+9KUv4bzzzov/+6mnnsKRI0fyd0CUU5dccglGjhwJALjhhhswf/58/PnPf8a7776LhoYG7Nu3D5dddhnq6urwxhtv4IQTToj/7qJFi7Bnzx6sWbOmz+M+88wzOP3007Fw4ULceeed8Pv9KCsry9nrIiIqBEybJCKijAiHwwAAURz80tLe3o4f/ehHmDFjBux2O5xOJ77yla/go48+Smj31ltvQRAE/PGPf+zzGHa7HVdddVX83ytWrIAgCPjnP/+Z0O7IkSMQBAE/+clP4rfFUvwGCirHjh2b8PgA0NnZicWLF6O2thZmsxkTJkzA//f//X9QVXXQ1zx27Nh+00pjM1Ixsizjpz/9KU466SSYzWaMHTsWd955J0KhUJ/Hfe2113DuuefC4XDA6XTizDPPxHPPPZfQ5rPPPkvpeVVVxcMPP4xp06bBYrGgqqoK119/PTo6OgZ9ff35l3/5FwDAvn37AAAPPPAAfD4f/vu//zshcIuZMGECfvjDHybc1t3djRdeeAGXXXYZLr30UnR3d+Mvf/nLkI+JiEivOPNGREQZEQvezGbzoG337t2LF198Ed/+9rcxbtw4tLS04De/+Q3OPfdcfPLJJ6ipqcn24aYtEAjg3HPPxaFDh3D99ddjzJgx2LBhA5YuXYrPP/8cDz/88KCPMXPmTNxyyy0Jtz399NNYt25dwm3XXXcdVq5ciUsuuQS33HILNm3ahOXLl2P79u144YUX4u1WrFiBa665BtOmTcPSpUtRXl6ODz/8EGvXrsW//uu/9nn+73//+/jCF74AAPjzn/+c8FgAcP3112PFihW4+uqr8e///u/Yt28fHn30UXz44Yf4xz/+AaPRmOrbFffpp58CAEaMGAEAePnllzF+/HjMnj075cd46aWX4PP5cNlll6G6uhrnnXcenn322aSvkYiomDF4IyKijPB4PAAAq9U6aNsZM2Zg165dCbN0V1xxBSZPnoz//u//xo9//OOsHedQPfjgg/j000/x4YcfYuLEiQCiwU5NTQ1+8Ytf4JZbbkFtbe2Aj3HiiSfiu9/9bsJt7777bkLw9tFHH2HlypW47rrr8F//9V8AgH/7t3+D2+3GL3/5S7z55ps4//zz4fF48O///u8466yz8NZbb8FiscQfQ9O0hOeQZRkAcM4558Sff8+ePQnB29///nc89dRTfYKi888/H1/+8pexevXqlIKl9vZ2AIiveXv88cdRVVWFL3zhC/B6vTh06BC+8Y1vDPo4PT3zzDOYPXt2/P297LLL8G//9m9oa2vDqFGj0nosIiI9Y9okERFlxNGjRwEgpc602WyOB26KouDo0aOw2+04+eST8cEHH/Rp39XVhSNHjiT89Mfj8SS0iwUTybS3t+PIkSPw+/2DHvPq1avxhS98ARUVFQmPP2fOHCiKgnfeeWfQx0jFq6++CgBYsmRJwu2xGbvYerB169ahq6sLd9xxR0LgBqBPOmQqs6KrV6+Gy+XCl770pYTXV19fD7vdjjfffDOl4z/55JMxatQojBs3Dtdffz0mTJiANWvWwGazxatOOhyOlB4LiH6vXn/9dXznO9+J3zZ//nwIgoA//OEPKT8OEVEx4MwbERFlRFNTEwwGQ0rBm6qqeOSRR/D4449j3759UBQlfl8sva6na665JuXjmDNnTsptTz755Pj/u91ufO9738N9990HSZL6tN29ezc+/vjjfl9fa2trys87kKamJoiiiAkTJiTcXl1djfLycjQ1NQE4no6YyjYMnZ2dAKLrBPuze/dueDweuN3upPen+vr+9Kc/wel0wmg0YvTo0TjppJPi9zmdTgDRYDxVq1atQiQSwWmnnYY9e/bEb581axaeffZZLFq0KOXHIiLSOwZvRESUETt37sT48eNhMAx+afmP//gP/PjHP8Y111yDn/70p6isrIQoili8eHHS4h/33HNPfK1WzNe+9rWkj/3YY49h0qRJ8X97vV7Mnz8/adtYoBEIBPDCCy9g2bJlcDqduO222/q0VVUVX/rSl5LeByDhOTOh9+zZcDQ3NwOIBoD9UVUVbrcbzz77bNL7U01P/OIXvxivNtmb0+lETU0Ntm7dmtJjAYgfzznnnJP0/r1792L8+PEpPx4RkZ4xeCMiomELhULYvHkzLr744pTa//GPf8T555+P//7v/064vbOzM2nHf8aMGX1m1JLNjgHAWWedhTPOOCP+74FSLHsGGl//+tfxj3/8A2vXrk0aoJ100knw+XxpzewNRV1dHVRVxe7duxP2y2tpaUFnZyfq6urixwMAW7du7TNL19snn3wCQRASZhp7O+mkk/C3v/0N55xzTkrrFofqoosuwm9/+1ts3LgRDQ0NA7bdt28fNmzYgBtvvBHnnntuwn2qquKKK67Ac889h7vvvjtrx0tEVEi45o2IiIbtueeeQygUwgUXXJBSe0mS+hTVWL16NQ4dOpSNw0uJpmnQNK3foPDSSy/Fxo0b8frrr/e5r7OzM14UZLi++tWvAkCf6pUPPvggAGDevHkAgAsvvBAOhwPLly9HMBhMaNvzvZVlGX/6059w1llnDZg2eemll0JRFPz0pz/tc58sy/HUy+G67bbbUFZWhuuuuw4tLS197v/000/xyCOPADg+63bbbbfhkksuSfi59NJLce655/Y7U0hEVIw480ZEREPm9/vx61//Gvfff388IHvmmWcS2rS0tMDn8+GZZ57Bl770JVRVVeGiiy7C/fffj6uvvhqzZ8/Gli1b8Oyzz+Y8/e2NN95ISJvcs2cPFi9enLTtrbfeipdeegkXXXQRrrrqKtTX18Pv92PLli344x//iM8++6zfdMF0nHrqqVi4cCF++9vforOzE+eeey7ee+89rFy5EhdffDHOP/98ANEUxIceegjXXXcdzjzzTPzrv/4rKioq8NFHHyEQCGDlypX429/+hh//+Mf4+OOP8fLLLw/4vOeeey6uv/56LF++HJs3b8aFF14Io9GI3bt3Y/Xq1XjkkUdwySWXDPv1nXTSSXjuueewYMECTJkyBVdeeSWmT5+OcDiMDRs2YPXq1fH99Z599lnMnDmz3yqeX//613HTTTfhgw8+wOmnnz7sYyMiKnQM3oiIaMja2tqwdOnS+L+vv/76ftteccUVePPNN1FVVYU777wTfr8fzz33HFatWoXTTz8da9aswR133JGLw45bsGABgOj2BuPGjcNDDz3UbwEMm82Gt99+G//xH/+B1atX4+mnn4bT6cSkSZNw3333weVyZey4nnrqKYw
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"pca = PCA(n_components=2)\n",
"reduced_data = pca.fit_transform(data2)\n",
"\n",
"print(reduced_data)\n",
"\n",
"plt.figure(figsize=(10,6))\n",
"plt.scatter(reduced_data[:, 0], reduced_data[:, 1], alpha=0.7, cmap='viridis')\n",
"plt.title(\"Данные после PCA\")\n",
"plt.xlabel(\"PC1\")\n",
"plt.ylabel(\"PC2\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Выбор количества кластеров"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAIjCAYAAADFthA8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABhOElEQVR4nO3deXhU5cH+8XtmspKV7AEChD1ssrkERBSCFZWitmoRC4hWa3FBrXX7KfBaS7VWafWVur244triVpVFWQRBQIiCQdawCCEJhGSyTpKZ8/sjyUBMgBCSnFm+n+uaq8k5Z87cOVXJzXnO81gMwzAEAAAAAH7CanYAAAAAAGhLlCAAAAAAfoUSBAAAAMCvUIIAAAAA+BVKEAAAAAC/QgkCAAAA4FcoQQAAAAD8CiUIAAAAgF+hBAEAAADwK5QgAAAAAH6FEgQAAADAr1CCAMCPvPLKK7JYLLJYLFq1alWD/YZhKCUlRRaLRZdffrkJCQEAaH2UIADwQyEhIVqwYEGD7StWrNBPP/2k4OBgE1IBANA2KEEA4IcuvfRSvffee6qurq63fcGCBRo6dKiSkpJMSgYAQOujBAGAH5o4caKOHDmiJUuWuLdVVlbq/fff13XXXdfoe1wul+bOnat+/fopJCREiYmJuuWWW3T06FH3MV27dnUPt2vs1bVrV/expaWluueee5SSkqLg4GD17t1bTz75pAzDaPDZy5cvP+E5m2rq1KmNvn/WrFn1jvvyyy81cuRIhYWFKTo6WhMmTNDWrVvrHTNr1qwGn71s2TIFBwfr97//fb1jTvZavny5+/3z5s1T//791a5du3rHvP/++03+GQEATRNgdgAAQNvr2rWr0tPT9dZbb2ncuHGSpM8++0xFRUX6zW9+o3/+858N3nPLLbfolVde0Q033KA77rhD2dnZevbZZ7Vp0yatXr1agYGBmjt3rkpKSiRJW7du1V/+8hc9+OCDSktLkySFh4dLqnn26Je//KWWLVumG2+8UYMGDdKiRYt077336sCBA3r66acbzX3HHXfo7LPPliS99tpr9UpcU8TFxdU7929/+9t6+5cuXapx48apW7dumjVrlsrLy/XMM89oxIgR2rhxY70Sd7zvvvtOV1xxhS699FL97//+ryTpqquuUo8ePdzH3HXXXUpLS9PNN9/s3lZ3Xd555x394Q9/0IUXXqjbb79dYWFh7usHAGgFBgDAb8yfP9+QZKxfv9549tlnjYiICKOsrMwwDMO4+uqrjYsuusgwDMPo0qWLcdlll7nf99VXXxmSjDfffLPe+T7//PNGtxuGYSxbtsyQZCxbtqzBvg8++MCQZPz5z3+ut/3Xv/61YbFYjJ07d9bbvnjxYkOS8f7777u3TZ8+3TidP8YmTZpkpKam1tsmyZg5c6b7+0GDBhkJCQnGkSNH3Nu+++47w2q1GpMnT3Zvmzlzpvuz9+zZYyQnJxvnn3++UV5efsLP79KlizFlypRG902cONGIjo6u9/666/fee+81+WcEADSNzwyHW7lypcaPH68OHTrIYrHogw8+OK33V1RUaOrUqRowYIACAgJ0xRVXNHrc8uXLNWTIEAUHB6tHjx565ZVXzjg7AJjhmmuuUXl5uT755BMVFxfrk08+OeFQuPfee09RUVEaO3asDh8+7H4NHTpU4eHhWrZs2Wl99qeffiqbzaY77rij3vZ77rlHhmHos88+q7e9oqJCUs2EDs1VWVl50gkfcnJylJmZqalTpyomJsa9feDAgRo7dqw+/fTTBu85cuSIfvGLXygiIkIfffRRs/MVFxerXbt2Z/TzAQCazmdKUGlpqc466yz3MITT5XQ6FRoaqjvuuEMZGRmNHpOdna3LLrtMF110kTIzMzVjxgzddNNNWrRo0ZlEBwBTxMfHKyMjQwsWLNB//vMfOZ1O/frXv2702B07dqioqEgJCQmKj4+v9yopKVFeXt5pffbevXvVoUMHRURE1NteNzxs79699bYfPnxYkhQVFXVan3O8wsJC93C8E2WSpN69ezfYl5aWpsOHD6u0tLTe9ssvv1zbtm1TYWFho88yNVV6eroOHjyoWbNmad++fTp8+LCKioqafT4AwMn5zDNB48aNc49rb4zD4dBDDz2kt956S4WFherfv78ef/xxXXjhhZKksLAwzZs3T5K0evVqFRYWNjjHv/71L6Wmpurvf/+7pJo/FFetWqWnn35av/jFL1r8ZwKA1nbdddfpd7/7nQ4dOqRx48YpOjq60eNcLpcSEhL05ptvNro/Pj6+FVNKe/bskaQTPpPTFIcOHVKXLl1aJlCtH3/8UZ999pmuueYa3XPPPZo/f36zznPXXXdp27ZtevTRRzV79uwWzQgAaMhn7gSdym233aY1a9bo7bff1vfff6+rr75al1xyiXbs2NHkc6xZs6bBXaJf/OIXWrNmTUvHBYA2ceWVV8pqtWrt2rUnHAonSd27d9eRI0c0YsQIZWRkNHidddZZp/W5Xbp00cGDB1VcXFxv+48//ujef7wNGzYoKSlJnTp1Oq3PqVNVVaWdO3e67zSdKJMkbdu2rcG+H3/8UXFxcQoLC6u3/aOPPtIll1yiOXPm6JVXXtEXX3zRrHyhoaF68cUX1a9fP51//vlasmSJnnzyyWadCwBwan5Rgvbt26f58+frvffe08iRI9W9e3f98Y9/1Pnnn39af2t36NAhJSYm1tuWmJgou92u8vLylo4NAK0uPDxc8+bN06xZszR+/PgTHnfNNdfI6XTq0UcfbbCvurq60bvnJ3PppZfK6XTq2Wefrbf96aeflsViqXdn/8iRI1q2bJl++ctfntZnHO/DDz9UeXm5Ro8efcJjkpOTNWjQIL366qv1fp4tW7Zo8eLFuvTSSxu8Z+TIkZKkP/zhDxo+fLhuueWWZv958MADD2jfvn164403lJGRoaFDhzbrPACAU/OZ4XAns3nzZjmdTvXq1avedofDodjYWJNSAYBnmDJlyimPGTVqlG655RbNmTNHmZmZuvjiixUYGKgdO3bovffe0z/+8Y8TPk/UmPHjx+uiiy7SQw89pD179uiss87S4sWL9eGHH2rGjBnq3r27pJo78Pfff7/Ky8sVHx+vN954w32O7du3S5LeeOMNXXnllQ3u0khSWVmZZs6cqeeee07Dhw/XxRdffNJcf/vb3zRu3Dilp6frxhtvdE+RHRUV1WA9oeNZLBa99NJLGjRokGbOnKknnniiyddCqpma++mnn9brr7/e4kP2AAAN+UUJKikpkc1m07fffiubzVZv38kekv25pKQk5ebm1tuWm5uryMhIhYaGtkhWAPBU//rXvzR06FA9//zzevDBBxUQEKCuXbvq+uuv14gRI07rXFarVR999JEeeeQRvfPOO5o/f766du2qv/3tb7rnnnvcxz3//PNauXKlJOmxxx5r9Fy//e1vlZ2d3WgJOnr0qN555x3dfPPNmj17tqzWkw+AyMjI0Oeff66ZM2fqkUceUWBgoEaNGqXHH39cqampJ31vWlqaHnroIT366KOaOHGiBg8efKrLIKnmTteUKVP0m9/8RpMmTWrSewAAZ8ZinMl0Nh7KYrFo4cKF7mmut2/frt69e2vlypXuoQsnM3XqVBUWFjaYZvu+++7Tp59+qs2bN7u3XXfddSooKNDnn3/ekj8CAEA1/z2WdNLlCCwWi7Kzs89o0gQAgH/xmTtBJSUl2rlzp/v77OxsZWZmKiYmRr169dKkSZM0efJk/f3vf9fgwYOVn5+vL774QgMHDtRll10mScrKylJlZaUKCgpUXFyszMxMSdKgQYMkSb///e/17LPP6k9/+pOmTZumL7/8Uu+++67++9//tvWPCwAAAKCZfOZO0PLly3XRRRc12D5lyhS98sorqqqq0p///Ge99tprOnDggOLi4nTeeedp9uzZGjBggKSaqVd/vjaFpHprPyxfvlx33XWXsrKy1KlTJz388MPuv6kEALSsF154QZJ08803n/CY66+/XnPnzlVcXFxbxQIAeDmfKUEAAAAA0BR+MUU2AAAAANShBAEAAADwK149MYLL5dL
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1cAAAIjCAYAAADvBuGTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACG4UlEQVR4nOzdeVxVdf7H8fflsiMgqwIiAioqrrnvVFa2ak1l/irNKdtrypmmrClbpnGqmcaaGjPbl5nMplJbTLPAtdxCRU0FN2QREGSV7d7z+wOhGDdE4NwLr+fjcR8z3Hvuue8LFbzv+Z7PsRiGYQgAAAAAcE5czA4AAAAAAK0B5QoAAAAAmgDlCgAAAACaAOUKAAAAAJoA5QoAAAAAmgDlCgAAAACaAOUKAAAAAJoA5QoAAAAAmgDlCgAAAACaAOUKAAAAAJoA5QoAHNQ777wji8WijRs3nvDY/PnzZbFYNHHiRNlsthbJc8UVV6hLly5n/bx7771XFoul6QMBAOBgKFcA4GQ+++wz3XXXXRo9erQ++ugjWa1WsyMBAABRrgDAqSQmJmry5Mnq1auXlixZIk9PT7MjAQCA4yhXAOAkkpOTNWHCBIWFhembb76Rv7//CdssXLhQAwcOlJeXl4KDg3XTTTcpIyOj7vGMjAxNnjxZERER8vDwUExMjP74xz+quLj4hH29//77ioyMVPv27TV79uy6+xcsWKDw8HAFBwfrueeeO+F533zzjbp376527drp/vvvl2EYkmqKYWxsrPz8/DRjxox6yxkTExNlsViUmJhYb1+XX365LBaLnnzyybr7nnzySVksFuXl5dXbduPGjbJYLHrnnXfq7tu/f/8J90nSPffcI4vFoltuuaXe/UePHtUDDzygyMhIeXh4qGvXrnruuedkt9tP2Off/va3E9577969lZCQUO89ne726/d1MuXl5XryySfVvXt3eXp6KiwsTNdcc43S0tIa9f4kKSEh4aRZavcxa9Ysubm5KTc394Tn3n777Wrfvr3Ky8u1atUqjRs3TsHBwfLy8tKAAQM0d+7cup/36V7r17dab7/9ti644AKFhobKw8NDvXr10ty5c0/7/QEAR+NqdgAAwJmlpaVp/Pjx8vDw0DfffKOwsLATtnnnnXc0bdo0DR48WLNnz9bhw4f10ksvac2aNfrpp5/Uvn17paWl6fDhw7rvvvsUEBCg7du36+WXX9aKFSu0evVqeXl5SZLWrFmjqVOnasSIEZo8ebLef/997d27V8eOHdPTTz+tRx99VMuWLdMjjzyizp07a/LkyZKkvXv3auLEieratav+8pe/aOnSpXXnjN1zzz2677779NNPP+kf//iHQkJCNHPmzFO+55UrV+qrr75q8u9lamqq5s+ff8L9ZWVlGjt2rDIyMnTHHXeoc+fOWrt2rWbOnKmsrCzNmTPnrF6nZ8+eev/99+u+fv3117Vz50794x//qLuvb9++p3y+zWbTFVdcoRUrVuiGG27Q7373OxUXF2v58uVKSUlRbGzsWb2/X+vRo4cee+wxSVJeXp4efPDBusduvvlmPf3001qwYIHuvffeuvsrKyv1ySef6De/+Y08PT21du1ahYaG6k9/+pOsVquSkpJ09913a+vWrXWl6LHHHtNtt91W73Vuv/12jR49+oRMc+fOVXx8vK666iq5urpqyZIluvvuu2W323XPPfec9v0AgMMwAAAO6e233zYkGV988YURGxtrSDIuvvjik25bWVlphIaGGr179zaOHTtWd/8XX3xhSDKeeOKJU77O8uXLDUnG008/XXffVVddZURHRxvl5eWGYRhGcXGxER0dbXh7ext79+41DMMw7Ha7MXLkSKNfv351z7v//vsNX19fIy8vzzAMw6iqqjKGDRtmSDJ+/PHHuu0mT55shIaG1u3/+++/NyQZ33//fd02Q4cONS699FJDkjFr1qy6+2fNmmVIMnJzc+u9jw0bNhiSjLfffrvuvn379p1w3/XXX2/07t3biIyMNKZOnVp3/zPPPGP4+PgYu3fvrrffRx55xLBarcbBgwfr7fOFF1444XsZHx9vjB079oT7DcMwpk6dakRFRZ30sZN56623DEnGiy++eMJjdrv9rN9frZEjRxrnn39+3dcn28fw4cONoUOH1nvep59+esLP6H899thjhiRj5cqVJzx2stf5tbKyshPuu+SSS4yYmJhTvh4AOBqWBQKAg7vllluUnp6u//u//9OyZcu0cOHCE7bZuHGjcnJydPfdd9c7D+vyyy9Xjx499OWXX9bdV1VVpby8vLpb//79NWjQoHr7XbFihS677DJ5eHhIktq1a6devXopJCRE0dHRklQ3rXDLli06cuRI3fPGjBmjoKAgSZKrq6sGDhwoSRoyZEjd/q+55hrl5OQoJSXlpO/5008/1YYNG/TXv/61Ud+zU9m0aZMWLlyo2bNny8Wl/q/AhQsXavTo0QoICKj3/Rk3bpxsNptWrlxZb/uysrJ62+Xl5TXp5Mb//ve/Cg4O1n333XfCY6eavni691ersrKy7ud6KlOmTNGPP/5Yt/xQkj788ENFRkZq7Nixdff97/dg+vTpcnNzO+k/o2dSe9RUkgoLC5WXl6exY8dq7969KiwsPOv9AYAZKFcA4ODy8/P1wQcf6N1331X//v31u9/97oQ/Ng8cOCBJiouLO+H5PXr0qHtcqlnyFxISUu+2ceNGpaamSpIKCgpUWlqqiIiIM2ar3SY9Pb3ufxvzvF+z2Wx69NFHdeONN5522VxjPPLIIxo9erSuuOKKEx7bs2ePli5desL3Zty4cZKknJycetvPmjXrhG1//vnnJsualpamuLg4ubo2fAX/6d5fraNHj6pdu3an3c+kSZPk4eGhDz/8UFJN2fniiy9044031it2zz//fL3336VLF1VVVdX9s3Q21qxZo3HjxsnHx0ft27dXSEiIHn300brXBwBnwDlXAODgXnjhBV133XWSas7bGTZsmGbOnKl//etfjdpfv379tHz58nr3zZ49W+vWrZNUM0ThbB07dqxRz6193q+9+eab2r9/v7755puzznE6y5Yt07ffflv3Pv+X3W7XRRddpD/+8Y8nfbx79+71vr799tvrfi61pk+f3jRhG+FM769Wdna2LrnkktNuExAQoCuuuEIffvihnnjiCX3yySeqqKjQTTfdVG+7KVOmaNSoUfXuu+GGG846e1pami688EL16NFDL774oiIjI+Xu7q6vvvpK//jHP+oNFAEAR0a5AgAHN2bMmLr/P3jwYN1zzz169dVXNWXKFA0bNkySFBUVJUnatWuXLrjggnrP37VrV93jUs0fzrVHY2rNmDGjbkBCcHCw3NzclJmZecZstZMIw8PDJUlhYWGNel6tsrIyPfXUU7r77rvrZT5XhmHokUce0dVXX133PftfsbGxKikpOeF7cyrdunU7YVsfH59zzvrrPD/++KOqqqrk5uZ22m0b8v4k6dChQyouLlbPnj3P+PpTpkzRhAkTtGHDBn344YcaMGCA4uPj620TExOjmJiYuq/z8vKUn59/ymEbp7JkyRJVVFRo8eLF6ty5c93933///VntBwDMxrJAAHAyzz77rMLCwnT77berurpakjRo0CCFhobqtddeU0VFRd22X3/9tXbu3KnLL79ckk56TtCSJUu0bds2XXPNNZIkNzc3DRs2TF999ZUqKyslSSUlJdqxY4dyc3O1f/9+STV/0C9atEidO3euK0JjxozRypUrlZ+fX/d6mzZtkiStX7++7jU///xzeXl5adCgQfWyvPTSSyotLa2bZNdUPvroI23durXeSPn/df3112vdunUnPWJ29OjRuu91S/nNb36jvLw8vfLKKyc8Zvxq3LnUsPdXu52kEwr4yVx66aV14/aTkpJOOGp1sn+WZs+eLcMw6v5ZaqjaC2H/+n0VFhbq7bffPqv9AIDZOHIFAE7G19dX//znP3XNNdfo73//ux5++GG5ubnpueee07Rp0zR27FhNnjy5bhR7ly5d6kZtr1q1So888oiuuuoqBQUFaf369Xr33XfVq1cvPfT
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"inertia = []\n",
"k_values = range(1, 10)\n",
"\n",
"for k in k_values:\n",
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
" kmeans.fit(reduced_data)\n",
" inertia.append(kmeans.inertia_)\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(k_values, inertia, marker='o')\n",
"plt.title(\"Метод локтя\")\n",
"plt.xlabel(\"Количество кластеров\")\n",
"plt.ylabel(\"Инерция\")\n",
"plt.show()\n",
"\n",
"# Коэффициент силуэта для выбора оптимального количества кластеров\n",
"silhouette_scores = []\n",
"\n",
"for k in range(2, 10):\n",
" kmeans = KMeans(n_clusters=k, random_state=42)\n",
" labels = kmeans.fit_predict(reduced_data)\n",
" score = silhouette_score(reduced_data, labels)\n",
" silhouette_scores.append(score)\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"plt.plot(range(2, 10), silhouette_scores, marker='o')\n",
"plt.title(\"Коэффициент силуэта\")\n",
"plt.xlabel(\"Количество кластеров\")\n",
"plt.ylabel(\"Силуэт\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Кластерны анализ"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA28AAAIjCAYAAACUIiNfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOz9d3hc5Z3//z/Pmd7VmyXbcu8VMKYTjE1iUigJpBKSTUgW2BB2E5L95IIk+9nNfpMtIR+SkN38dtkCKZCEhGZCh4AptjEuuFu2Jcvq0ow00tRzfn8ICxTJ4CJrNNLrcV26Luvc95x5n7FszWvu+9y3Ydu2jYiIiIiIiIxpZq4LEBERERERkfem8CYiIiIiIpIHFN5ERERERETygMKbiIiIiIhIHlB4ExERERERyQMKbyIiIiIiInlA4U1ERERERCQPKLyJiIiIiIjkAYU3ERERERGRPKDwJiIiIiIikgcU3kREZETcc889GIbBhg0bBh2PRqOcddZZeL1e1q1b966PNQyDP/3pT0PabdumpqYGwzC4/PLLT0v9o+XAgQMYhsE//dM/DWnbtGkT4XCYGTNm0NzcnIPqRERkLFN4ExGR0yYWi7F69Wq2bNnC7373Oy677LJ37e/1ernvvvuGHH/uuedoaGjA4/GcrlJzbs+ePbz//e8nEAjwxz/+kfLy8lyXJCIiY4zCm4iInBbd3d2sWbOGzZs385vf/Ib3v//97/mYD3zgA9x///1kMplBx++77z6WL19ORUXF6So3p44cOcKaNWtIJBI89thjTJs2LdcliYjIGKTwJiIiI66np4fLLruMTZs28Zvf/Ia1a9ce1+M+/vGP097ezhNPPDFwLJVK8cADD/CJT3xi2MdYlsUPf/hD5s+fj9frpby8nBtuuIHOzs5B/X7/+9+zdu1aqqqq8Hg8TJ8+nb/7u78jm80O6nfRRRexYMEC3nzzTS6++GL8fj+TJk3i+9///pDn/n//7/8xf/58/H4/hYWFnHHGGcOOHL6brq4uLrvsMhobG3nwwQdZsmTJsP2mTp06MLX0nV/PPvvsCV8jwCuvvMIHPvABCgsLCQQCLFq0iDvvvBOAz372s8M+1zu/Dhw4MHCuxx57jPPPP59AIEAoFGLt2rVs37590PN99rOfJRgMsn//ftasWUMgEKCqqorvfve72LY90O/dppUe9e1vfxvDMI7j1RURGV+cuS5ARETGl3g8zvvf/35ee+01HnjggRO6R23q1KmsXLmSX/ziFwMjdY899hjRaJRrr72WH/3oR0Mec8MNN3DPPfdw/fXX81d/9VfU1dVx11138frrr/Piiy/icrmA/vvqgsEgt956K8FgkKeffprbb7+dWCzGD37wg0Hn7Ozs5LLLLuPKK6/kYx/7GA888AC33XYbCxcuHKjr3//93/mrv/orrr76ar7yla+QSCTYsmULr7zyyjGD5p9LJBJ86EMfYtu2bfz617/m4osvftf+559/Pl/84hcB2LFjB//wD/8wqP14r/GJJ57g8ssvp7Kykq985StUVFSwY8cOHn74Yb7yla9www03sGrVqoH+n/70p7niiiu48sorB46VlpYC8D//8z9cd911rFmzhv/v//v/6O3t5ac//SnnnXcer7/+OlOnTh14TDab5bLLLuPss8/m+9//PuvWreOOO+4gk8nw3e9+97heMxGRCc0WEREZAf/5n/9pA/aUKVNsl8tlP/jggyf82Ndee82+66677FAoZPf29tq2bdsf/ehH7Ysvvti2bdueMmWKvXbt2oHHvfDCCzZg33vvvYPOt27duiHHj57vnW644Qbb7/fbiURi4NiFF15oA/Z///d/DxxLJpN2RUWFfdVVVw0c+/CHP2zPnz//uK/xqLq6Ohuw//Ef/9H+8Ic/bAN2aWmpHY1G3/VxkyZNsq+//vqB75955hkbsJ955pkTusZMJmPX1tbaU6ZMsTs7Owf1tSxr2OcG7DvuuGPI8e7ubrugoMD+whe+MOh4U1OTHYlEBh2/7rrrbMC++eabBz3f2rVrbbfbbbe2ttq2/fbr84Mf/GD4F8K27TvuuMPWWxgRmYg0bVJEREZUc3MzXq+Xmpqak3r8xz72Mfr6+nj44Yfp7u7m4YcfPuZI1v33308kEuHSSy+lra1t4Gv58uUEg0GeeeaZgb4+n2/gz93d3bS1tXH++efT29vLzp07B503GAzyqU99auB7t9vNWWedxf79+weOFRQU0NDQwGuvvXZS1/lP//RP/P73v+czn/kM7e3tfPOb33zX/qlU6j0XbDmea3z99depq6vjlltuoaCgYNDjT3Qq4hNPPEFXVxcf//jHB73+DoeDFStWDHr9j7rpppsGPd9NN91EKpXiySefHNSvt7eXtrY2Ojs7B02rFBGZyBTeRERkRP3sZz/D7XZz2WWXsWvXroHj2WyWpqamQV+pVGrI40tLS1m1ahX33Xcfv/3tb8lms1x99dXDPteePXuIRqOUlZVRWlo66Kunp4eWlpaBvtu3b+eKK64gEokQDocpLS0dCGjRaHTQeaurq4cEmcLCwkH30d12220Eg0HOOussZs6cyY033siLL7543K9TW1sbN910E//1X//FX/7lX3L33Xfz8ssvH7N/NBolGAy+6zmP5xr37dsHwIIFC4671mPZs2cPAO973/uGvP5//OMfB73+AKZpDlmMZdasWQCD7qEDuOOOOygtLaWoqAi/38/atWsHnk9EZKLSPW8iIjKi5s2bx6OPPsoll1zCpZdeyosvvkhNTQ319fXU1tYO6vvMM89w0UUXDTnHJz7xCb7whS/Q1NTE+9///iEjREdZlkVZWRn33nvvsO1H78vq6uriwgsvJBwO893vfpfp06fj9XrZtGkTt912G5ZlDXqcw+EY9nzvHAGaO3cuu3bt4uGHH2bdunX85je/4Sc/+Qm333473/nOd4718gz48Ic/PLBAyN///d/z29/+lhtuuIGNGzfidA7+9dzR0UEqlXrX1TZP9BpHwtFz/s///M+wtf35dZyIL37xi3z0ox8lm82yY8cOvv3tb/ORj3xkyEIoIiITicKbiIiMuLPOOosHH3yQtWvXcumll/LCCy9QUVExaBVJgMWLFw/7+CuuuIIbbriBl19+mV/96lfHfJ7p06fz5JNPcu655w6aMvjnnn32Wdrb2/ntb3/LBRdcMHC8rq7uBK9ssEAgwDXXXMM111xDKpXiyiuv5O///u/55je/idfrfdfHnnfeeZhm/wSYcDjM//t//4+rrrqKf/mXf+HrX//6oL5vvvkm0B8Yj+V4r3H69OkAbNu2bdCiJCfj6LnKysqO61yWZbF///6B0TaA3bt3Awxa2ARg5syZA+dcs2YNvb29/J//8384dOjQKdUsIpLPNG1SREROi0suuYRf/OIX7N27l8suu4xUKsWqVasGfRUWFg772GAwyE9/+lO+/e1v88EPfvCYz/Gxj32MbDbL3/3d3w1py2QydHV1AW+PpL1z5CyVSvGTn/zkpK+vvb190Pdut5t58+Zh2zbpdPqEz3fllVfywQ9+kO985ztDphD+8pe/xO12c9555x3z8cd7jcuWLaO2tpYf/vCHA6/PUSd6b9maNWsIh8P8wz/8w7DX3NraOuTYXXfdNej57rrrLlwuF5dccsm7PtfRUb5jjYqKiEwEGnkTEZHT5oorruDf//3f+dznPseHPvQh1q1b954jUkddd91179nnwgsv5IYbbuB73/semzdvZvXq1bhcLvbs2cP999/PnXfeydVXX80555xDYWEh1113HX/1V3+FYRj8z//8zykthLF69WoqKio499xzKS8vZ8eOHdx1112sXbuWUCh0Uuf88Y9/zLx58/jLv/xLHn30Ufbs2cMdd9zBL37xC77xjW8QDoeP+djjvUbTNPnpT3/KBz/4QZYsWcL1119PZWUlO3fuZPv27Tz++OPHXW84HOanP/0pn/70p1m2bBnXXnstpaWlHDp0iEceeYRzzz13UFjzer2sW7eO6667jhUrVvDYY4/xyCOP8Ld/+7cDU1yP2rVrF+vWrcOyLN5
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA28AAAIjCAYAAACUIiNfAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdd3Rc1b3+//eZPhpp1KstyXLv3dimGYhjEwwJLZQ0ShJCAvwC3BS4hZKbhJRvbkIuAVIhlxJKAgRwMMVgmh2MK+5dtnrvZer+/aFYICQbF1njkZ7XWlrLOnvPmc+MR9I8s/fZ2zLGGEREREREROSkZot1ASIiIiIiIvLJFN5ERERERETigMKbiIiIiIhIHFB4ExERERERiQMKbyIiIiIiInFA4U1ERERERCQOKLyJiIiIiIjEAYU3ERERERGROKDwJiIiIiIiEgcU3kREREREROKAwpuIyAl0//33Y1kWc+fOjXUpIiIiEucsY4yJdREiIoPVaaedRnl5OcXFxezatYvRo0fHuiQRERGJUxp5ExE5Qfbt28fKlSv5n//5HzIzM3nsscdiXZKIiIjEMYU3EZET5LHHHiM1NZUlS5Zw6aWX9hneiouLsSyrz69HH320R9+zzjqrz34PP/xwj35PP/00s2bNwuv1kpGRwZe+9CXKysp69Ln66quxLIvp06f3qumee+7BsiwSExN7tT366KPd505LS+OKK66gpKSkV52TJ09m7dq1nHrqqXi9XoqKinjwwQd79AsGg9xxxx3MmjWL5ORkfD4fZ5xxBm+88cYRPT8Hv66++moAHn74YSzLori4uPv20WiUqVOn9nqeDj7+g1+pqamcddZZvP322z1qHDFiBOeff36v5+HGG2/Esqwexx566CHOOeccsrKycLvdTJw4kQceeKDXbcPhMD/84Q8ZO3Ysbre7Rx1r1qzp1f+jrr76akaMGNHj2KOPPorNZuMnP/lJr/6Hes4++hwdad0AL730EgsWLCApKQm/38+cOXN4/PHHgUO/Pj/69fG6++u1tGLFCizLYsWKFT2OL1myBMuyuOuuu7qPPfDAA0ybNq37NTdt2jT++Mc/9rjdBx98wNVXX83IkSPxeDzk5ORw7bXXUldX16PfXXfdhWVZ1NbW9ji+Zs2aXq+5g6/lj/+83nDDDT1exwetX7+ec889l8zMzB7PYV+vRxEZOhyxLkBEZLB67LHHuPjii3G5XFx55ZU88MADvP/++8yZM6dX3yuvvJLzzjuvx7HTTjutV7/x48fzH//xHwDU1tZyyy239Gh/+OGHueaaa5gzZw733HMPVVVV3Hvvvbz77rusX7+elJSU7r4Oh4MtW7awfv16ZsyY0eMcHo+n133/6Ec/4r/+67+47LLL+NrXvkZNTQ3/+7//y5lnntnr3A0NDZx33nlcdtllXHnllTz11FN885vfxOVyce211wLQ3NzMH/7wB6688kq+/vWv09LSwh//+EcWL17M6tWrmT59OpmZmTzyyCPd533mmWd49tlnexwbNWpUX08/AI888gibNm3qsy0jI4Nf/vKXAJSWlnLvvfdy3nnnUVJS0uOxHKkHHniASZMm8dnPfhaHw8ELL7zAt771LaLRKDfccEN3v1/84hf813/9FxdddBHf//73cbvdvP322/zud7876vt85ZVXuPbaa7nxxhu57bbb+uxz0UUXcfHFFwP0eT9HWvfDDz/Mtddey6RJk7j99ttJSUlh/fr1LFu2jC984Qv8x3/8B1/72teAD1+b1113HWeccUavmvr7tdSXt956i3/84x+9jre0tLBo0SJGjRqFMYannnqKr33ta6SkpHDJJZcA8Oqrr7J3716uueYacnJy2LJlC7/73e/YsmUL//znP3sF0WO1e/dufv/73/c63tTUxGc+8xmMMdx6663k5+cD9Pp5F5EhyIiISL9bs2aNAcyrr75qjDEmGo2a4cOHm29/+9s9+u3bt88A5uc///knnvO0004zZ599dq/bPvTQQ8YYY4LBoMnKyjKTJ082HR0d3f1efPFFA5g77rij+9hVV11lfD6fueCCC8yNN97Yffztt982Xq/XXHjhhcbn83UfLy4uNna73fzoRz/qUdOmTZuMw+HocXzBggUGML/4xS+6jwUCATN9+nSTlZVlgsGgMcaYcDhsAoFAj/M1NDSY7Oxsc+211/b5HNx5553mUH+6HnroIQOYffv2GWOM6ezsNAUFBeYzn/lMj+fp4OMvLCzscfvf/e53BjCrV6/uPlZYWGiWLFnS675uuOGGXnW0t7f36rd48WIzcuTIHsfmz59vJkyYYKLRaK/a33///T4fW191r1mzxiQmJprPf/7zJhKJ9OobCoUMYO6+++5e93PwOTrSuhsbG01SUpKZO3duj9eWMabH4zjo46/NjzoRr6U33njDAOaNN97o7jd37tzu//s777yzVx0HhcNh4/f7e/wc9PWc/OUvfzGAeeutt7qPHXw91tTU9Oj7/vvv93r8fT0nl112mZk8ebLJz883V111Vffxl19+2QDmL3/5S4/zHur1KCJDh6ZNioicAI899hjZ2dmcffbZQNf0tcsvv5wnnniCSCRyTOcMBoO43e5Dtq9Zs4bq6mq+9a1v9Rg5W7JkCePHj2fp0qW9bnPttdfy+OOPEwgEgK4pdBdffDHJyck9+j3zzDNEo1Euu+wyamtru79ycnIYM2ZMj6mO0DWq941vfKP7e5fLxTe+8Q2qq6tZu3YtAHa7HZfLBXRNb6yvryccDjN79mzWrVt3lM9Ob7/5zW+oq6vjzjvv7LM9Go12P44NGzbwf//3f+Tm5jJhwoQe/UKhUI/HXFtbS2dnZ6/zeb3e7n83NTVRW1vLggUL2Lt3L01NTd1tLS0tpKamHtfozd69e1myZAnTp0/nkUcewWbr/ec8GAwCHPY1c6R1v/rqq7S0tHDbbbf1GpU92sdxIl5Lfd3H+++/3+dUUoBIJEJtbS379+/nl7/8Jc3NzT1GCD/6nHR2dlJbW8u8efMA+uW1CbB27Vqefvpp7rnnnl7/fy0tLQCkp6f3y32JyOChaZMiIv0sEonwxBNPcPbZZ7Nv377u43PnzuUXv/gFy5cvZ9GiRUd93sbGRgoLCw/Zvn//fgDGjRvXq238+PG88847vY4vWbIEh8PB3//+d5YsWcJTTz3Fc88912NaIsCuXbswxjBmzJg+79vpdPb4Pi8vD5/P1+PY2LFjga5rfw6+Ef7zn//ML37xC7Zv304oFOruW1RUdMjHeSSampr48Y9/zK233kp2dnaffUpKSsjMzOz+Pjc3l7/97W+9rvV75ZVXevQ7lHfffZc777yTVatW0d7e3queg4F4/vz5/OEPf+C3v/0t559/Pm63m9bW1iN+bG1tbSxevJiqqirS09MPGZ4aGxsB+rx28Wjr3rNnDwCTJ08+4joP5US9lg6KRCL8+7//O1/84heZOnXqIWs4GNJdLhf3338/l112WXd7fX09d999N0888QTV1dU9bvvRIH48brvtNs444wzOP/98brzxxh5ts2fPxul0ctddd5GRkdE9bTIajfbLfYtI/FJ4ExHpZ6+//joVFRU88cQTPPHEE73aH3vssWMKb5WVlSxevLg/SuzmdDr50pe+xEMPPUR7ezvp6emcc845vcJbNBrFsixeeukl7HZ7r/N8UkDoy6OPPsrVV1/NhRdeyHe/+12ysrKw2+3cc8893WHhWP30pz/FZrPx3e9+t9ciEwdlZ2d3LwrT1NTEn/70J84991zeeecdpkyZ0t1v7ty5/PCHP+xx2/vuu4+///3v3d/v2bOHT33qU4wfP57/+Z//IT8/H5fLxT/+8Q9++ctf9njTfc8991BWVsb1119/TI+ttrYWn8/HCy+8wIUXXsg999zT5+hiZWUlADk5OYc819HU3V9OxGvpo/74xz9SXFzMyy+/fMg+BQUF3aOJL774Irfccgv5+fndi4FcdtllrFy5ku9+97tMnz6dxMREotEo5557br88J6+88gqvvfYaq1at6rO9sLCQhx56iG9/+9vMnDmzR9uhAqm
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# Оптимальное количество кластеров\n",
"optimal_k = 4\n",
"\n",
"# Неиерархическая кластеризация (K-Means)\n",
"kmeans = KMeans(n_clusters=optimal_k, random_state=42)\n",
"labels_kmeans = kmeans.fit_predict(reduced_data)\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"plt.scatter(reduced_data[:, 0], reduced_data[:, 1], c=labels_kmeans, cmap='viridis', alpha=0.5)\n",
"plt.title(\"K-Means Кластеры\")\n",
"plt.xlabel(\"PC1\")\n",
"plt.ylabel(\"PC2\")\n",
"plt.show()\n",
"\n",
"# Агломеративная кластеризация\n",
"agglomerative = AgglomerativeClustering(n_clusters=optimal_k)\n",
"agglomerative_labels = agglomerative.fit_predict(reduced_data)\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"plt.scatter(reduced_data[:, 0], reduced_data[:, 1], c=agglomerative_labels, cmap='viridis', alpha=0.5)\n",
"plt.title(\"Агломеративная кластеризация\")\n",
"plt.xlabel(\"PC1\")\n",
"plt.ylabel(\"PC2\")\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Оценка качества решения"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"K-Means - Инерция: 14420617136.370611, Коэффициент силуэта: 0.48226441542888965\n",
"Коэффициент силуэта для иерархической кластеризации: 0.46\n"
]
}
],
"source": [
"# Коэффициент силуэта для K-Means\n",
"kmeans_inertia = kmeans.inertia_\n",
"kmeans_silhouette = silhouette_score(reduced_data, labels_kmeans)\n",
"print(f'K-Means - Инерция: {kmeans_inertia}, Коэффициент силуэта: {kmeans_silhouette}')\n",
"\n",
"# Коэффициент силуэта для иерархической кластеризации\n",
"agglomerative_silhouette = silhouette_score(reduced_data, agglomerative_labels)\n",
"print(f\"Коэффициент силуэта для иерархической кластеризации: {agglomerative_silhouette:.2f}\")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "aimvenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}