{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Лабораторная 4"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Информация о диабете индейцев Пима"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',\n",
" 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],\n",
" dtype='object')\n"
]
},
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 6 | \n",
" 148 | \n",
" 72 | \n",
" 35 | \n",
" 0 | \n",
" 33.6 | \n",
" 0.627 | \n",
" 50 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 85 | \n",
" 66 | \n",
" 29 | \n",
" 0 | \n",
" 26.6 | \n",
" 0.351 | \n",
" 31 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 8 | \n",
" 183 | \n",
" 64 | \n",
" 0 | \n",
" 0 | \n",
" 23.3 | \n",
" 0.672 | \n",
" 32 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 89 | \n",
" 66 | \n",
" 23 | \n",
" 94 | \n",
" 28.1 | \n",
" 0.167 | \n",
" 21 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0 | \n",
" 137 | \n",
" 40 | \n",
" 35 | \n",
" 168 | \n",
" 43.1 | \n",
" 2.288 | \n",
" 33 | \n",
" 1 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 763 | \n",
" 10 | \n",
" 101 | \n",
" 76 | \n",
" 48 | \n",
" 180 | \n",
" 32.9 | \n",
" 0.171 | \n",
" 63 | \n",
" 0 | \n",
"
\n",
" \n",
" 764 | \n",
" 2 | \n",
" 122 | \n",
" 70 | \n",
" 27 | \n",
" 0 | \n",
" 36.8 | \n",
" 0.340 | \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" 765 | \n",
" 5 | \n",
" 121 | \n",
" 72 | \n",
" 23 | \n",
" 112 | \n",
" 26.2 | \n",
" 0.245 | \n",
" 30 | \n",
" 0 | \n",
"
\n",
" \n",
" 766 | \n",
" 1 | \n",
" 126 | \n",
" 60 | \n",
" 0 | \n",
" 0 | \n",
" 30.1 | \n",
" 0.349 | \n",
" 47 | \n",
" 1 | \n",
"
\n",
" \n",
" 767 | \n",
" 1 | \n",
" 93 | \n",
" 70 | \n",
" 31 | \n",
" 0 | \n",
" 30.4 | \n",
" 0.315 | \n",
" 23 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
768 rows × 9 columns
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"0 6 148 72 35 0 33.6 \n",
"1 1 85 66 29 0 26.6 \n",
"2 8 183 64 0 0 23.3 \n",
"3 1 89 66 23 94 28.1 \n",
"4 0 137 40 35 168 43.1 \n",
".. ... ... ... ... ... ... \n",
"763 10 101 76 48 180 32.9 \n",
"764 2 122 70 27 0 36.8 \n",
"765 5 121 72 23 112 26.2 \n",
"766 1 126 60 0 0 30.1 \n",
"767 1 93 70 31 0 30.4 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome \n",
"0 0.627 50 1 \n",
"1 0.351 31 0 \n",
"2 0.672 32 1 \n",
"3 0.167 21 0 \n",
"4 2.288 33 1 \n",
".. ... ... ... \n",
"763 0.171 63 0 \n",
"764 0.340 27 0 \n",
"765 0.245 30 0 \n",
"766 0.349 47 1 \n",
"767 0.315 23 0 \n",
"\n",
"[768 rows x 9 columns]"
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn import set_config\n",
"\n",
"set_config(transform_output=\"pandas\")\n",
"df = pd.read_csv(\".//scv//diabetes.csv\")\n",
"print(df.columns)\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Формирование выборок"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'X_train'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 196 | \n",
" 1 | \n",
" 105 | \n",
" 58 | \n",
" 0 | \n",
" 0 | \n",
" 24.3 | \n",
" 0.187 | \n",
" 21 | \n",
" 0 | \n",
"
\n",
" \n",
" 69 | \n",
" 4 | \n",
" 146 | \n",
" 85 | \n",
" 27 | \n",
" 100 | \n",
" 28.9 | \n",
" 0.189 | \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" 494 | \n",
" 3 | \n",
" 80 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.0 | \n",
" 0.174 | \n",
" 22 | \n",
" 0 | \n",
"
\n",
" \n",
" 463 | \n",
" 5 | \n",
" 88 | \n",
" 78 | \n",
" 30 | \n",
" 0 | \n",
" 27.6 | \n",
" 0.258 | \n",
" 37 | \n",
" 0 | \n",
"
\n",
" \n",
" 653 | \n",
" 2 | \n",
" 120 | \n",
" 54 | \n",
" 0 | \n",
" 0 | \n",
" 26.8 | \n",
" 0.455 | \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 322 | \n",
" 0 | \n",
" 124 | \n",
" 70 | \n",
" 20 | \n",
" 0 | \n",
" 27.4 | \n",
" 0.254 | \n",
" 36 | \n",
" 1 | \n",
"
\n",
" \n",
" 109 | \n",
" 0 | \n",
" 95 | \n",
" 85 | \n",
" 25 | \n",
" 36 | \n",
" 37.4 | \n",
" 0.247 | \n",
" 24 | \n",
" 1 | \n",
"
\n",
" \n",
" 27 | \n",
" 1 | \n",
" 97 | \n",
" 66 | \n",
" 15 | \n",
" 140 | \n",
" 23.2 | \n",
" 0.487 | \n",
" 22 | \n",
" 0 | \n",
"
\n",
" \n",
" 651 | \n",
" 1 | \n",
" 117 | \n",
" 60 | \n",
" 23 | \n",
" 106 | \n",
" 33.8 | \n",
" 0.466 | \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" 197 | \n",
" 3 | \n",
" 107 | \n",
" 62 | \n",
" 13 | \n",
" 48 | \n",
" 22.9 | \n",
" 0.678 | \n",
" 23 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
614 rows × 9 columns
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"196 1 105 58 0 0 24.3 \n",
"69 4 146 85 27 100 28.9 \n",
"494 3 80 0 0 0 0.0 \n",
"463 5 88 78 30 0 27.6 \n",
"653 2 120 54 0 0 26.8 \n",
".. ... ... ... ... ... ... \n",
"322 0 124 70 20 0 27.4 \n",
"109 0 95 85 25 36 37.4 \n",
"27 1 97 66 15 140 23.2 \n",
"651 1 117 60 23 106 33.8 \n",
"197 3 107 62 13 48 22.9 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome \n",
"196 0.187 21 0 \n",
"69 0.189 27 0 \n",
"494 0.174 22 0 \n",
"463 0.258 37 0 \n",
"653 0.455 27 0 \n",
".. ... ... ... \n",
"322 0.254 36 1 \n",
"109 0.247 24 1 \n",
"27 0.487 22 0 \n",
"651 0.466 27 0 \n",
"197 0.678 23 1 \n",
"\n",
"[614 rows x 9 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'y_train'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 196 | \n",
" 0 | \n",
"
\n",
" \n",
" 69 | \n",
" 0 | \n",
"
\n",
" \n",
" 494 | \n",
" 0 | \n",
"
\n",
" \n",
" 463 | \n",
" 0 | \n",
"
\n",
" \n",
" 653 | \n",
" 0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 322 | \n",
" 1 | \n",
"
\n",
" \n",
" 109 | \n",
" 1 | \n",
"
\n",
" \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" 651 | \n",
" 0 | \n",
"
\n",
" \n",
" 197 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
614 rows × 1 columns
\n",
"
"
],
"text/plain": [
" Outcome\n",
"196 0\n",
"69 0\n",
"494 0\n",
"463 0\n",
"653 0\n",
".. ...\n",
"322 1\n",
"109 1\n",
"27 0\n",
"651 0\n",
"197 1\n",
"\n",
"[614 rows x 1 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'X_test'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 669 | \n",
" 9 | \n",
" 154 | \n",
" 78 | \n",
" 30 | \n",
" 100 | \n",
" 30.9 | \n",
" 0.164 | \n",
" 45 | \n",
" 0 | \n",
"
\n",
" \n",
" 379 | \n",
" 0 | \n",
" 93 | \n",
" 100 | \n",
" 39 | \n",
" 72 | \n",
" 43.4 | \n",
" 1.021 | \n",
" 35 | \n",
" 0 | \n",
"
\n",
" \n",
" 640 | \n",
" 0 | \n",
" 102 | \n",
" 86 | \n",
" 17 | \n",
" 105 | \n",
" 29.3 | \n",
" 0.695 | \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" 658 | \n",
" 11 | \n",
" 127 | \n",
" 106 | \n",
" 0 | \n",
" 0 | \n",
" 39.0 | \n",
" 0.190 | \n",
" 51 | \n",
" 0 | \n",
"
\n",
" \n",
" 304 | \n",
" 3 | \n",
" 150 | \n",
" 76 | \n",
" 0 | \n",
" 0 | \n",
" 21.0 | \n",
" 0.207 | \n",
" 37 | \n",
" 0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 203 | \n",
" 2 | \n",
" 99 | \n",
" 70 | \n",
" 16 | \n",
" 44 | \n",
" 20.4 | \n",
" 0.235 | \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" 605 | \n",
" 1 | \n",
" 124 | \n",
" 60 | \n",
" 32 | \n",
" 0 | \n",
" 35.8 | \n",
" 0.514 | \n",
" 21 | \n",
" 0 | \n",
"
\n",
" \n",
" 561 | \n",
" 0 | \n",
" 198 | \n",
" 66 | \n",
" 32 | \n",
" 274 | \n",
" 41.3 | \n",
" 0.502 | \n",
" 28 | \n",
" 1 | \n",
"
\n",
" \n",
" 280 | \n",
" 0 | \n",
" 146 | \n",
" 70 | \n",
" 0 | \n",
" 0 | \n",
" 37.9 | \n",
" 0.334 | \n",
" 28 | \n",
" 1 | \n",
"
\n",
" \n",
" 103 | \n",
" 1 | \n",
" 81 | \n",
" 72 | \n",
" 18 | \n",
" 40 | \n",
" 26.6 | \n",
" 0.283 | \n",
" 24 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
154 rows × 9 columns
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"669 9 154 78 30 100 30.9 \n",
"379 0 93 100 39 72 43.4 \n",
"640 0 102 86 17 105 29.3 \n",
"658 11 127 106 0 0 39.0 \n",
"304 3 150 76 0 0 21.0 \n",
".. ... ... ... ... ... ... \n",
"203 2 99 70 16 44 20.4 \n",
"605 1 124 60 32 0 35.8 \n",
"561 0 198 66 32 274 41.3 \n",
"280 0 146 70 0 0 37.9 \n",
"103 1 81 72 18 40 26.6 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome \n",
"669 0.164 45 0 \n",
"379 1.021 35 0 \n",
"640 0.695 27 0 \n",
"658 0.190 51 0 \n",
"304 0.207 37 0 \n",
".. ... ... ... \n",
"203 0.235 27 0 \n",
"605 0.514 21 0 \n",
"561 0.502 28 1 \n",
"280 0.334 28 1 \n",
"103 0.283 24 0 \n",
"\n",
"[154 rows x 9 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'y_test'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 669 | \n",
" 0 | \n",
"
\n",
" \n",
" 379 | \n",
" 0 | \n",
"
\n",
" \n",
" 640 | \n",
" 0 | \n",
"
\n",
" \n",
" 658 | \n",
" 0 | \n",
"
\n",
" \n",
" 304 | \n",
" 0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 203 | \n",
" 0 | \n",
"
\n",
" \n",
" 605 | \n",
" 0 | \n",
"
\n",
" \n",
" 561 | \n",
" 1 | \n",
"
\n",
" \n",
" 280 | \n",
" 1 | \n",
"
\n",
" \n",
" 103 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
154 rows × 1 columns
\n",
"
"
],
"text/plain": [
" Outcome\n",
"669 0\n",
"379 0\n",
"640 0\n",
"658 0\n",
"304 0\n",
".. ...\n",
"203 0\n",
"605 0\n",
"561 1\n",
"280 1\n",
"103 0\n",
"\n",
"[154 rows x 1 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from typing import Tuple\n",
"import pandas as pd\n",
"from pandas import DataFrame\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"def split_stratified_into_train_val_test(\n",
" df_input,\n",
" stratify_colname=\"y\",\n",
" frac_train=0.6,\n",
" frac_val=0.15,\n",
" frac_test=0.25,\n",
" random_state=None,\n",
") -> Tuple[DataFrame, DataFrame, DataFrame, DataFrame, DataFrame, DataFrame]:\n",
" \n",
" if frac_train + frac_val + frac_test != 1.0:\n",
" raise ValueError(\n",
" \"fractions %f, %f, %f do not add up to 1.0\"\n",
" % (frac_train, frac_val, frac_test)\n",
" )\n",
" if stratify_colname not in df_input.columns:\n",
" raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
" X = df_input # Contains all columns.\n",
" y = df_input[\n",
" [stratify_colname]\n",
" ] # Dataframe of just the column on which to stratify.\n",
" # Split original dataframe into train and temp dataframes.\n",
" df_train, df_temp, y_train, y_temp = train_test_split(\n",
" X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
" )\n",
" if frac_val <= 0:\n",
" assert len(df_input) == len(df_train) + len(df_temp)\n",
" return df_train, pd.DataFrame(), df_temp, y_train, pd.DataFrame(), y_temp\n",
" # Split the temp dataframe into val and test dataframes.\n",
" relative_frac_test = frac_test / (frac_val + frac_test)\n",
" df_val, df_test, y_val, y_test = train_test_split(\n",
" df_temp,\n",
" y_temp,\n",
" stratify=y_temp,\n",
" test_size=relative_frac_test,\n",
" random_state=random_state,\n",
" )\n",
" assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
" return df_train, df_val, df_test, y_train, y_val, y_test\n",
"\n",
"X_train, X_val, X_test, y_train, y_val, y_test = split_stratified_into_train_val_test(\n",
" df, stratify_colname=\"Outcome\", frac_train=0.80, frac_val=0, frac_test=0.20, random_state=9\n",
")\n",
"\n",
"display(\"X_train\", X_train)\n",
"display(\"y_train\", y_train)\n",
"\n",
"display(\"X_test\", X_test)\n",
"display(\"y_test\", y_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Классификация данных"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.discriminant_analysis import StandardScaler\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"\n",
"\n",
"\n",
"columns_to_drop = [\"Glucose\", \"Age\", \"BloodPressure\", \"Outcome\", \"DiabetesPedigreeFunction\"]\n",
"num_columns = [\n",
" column\n",
" for column in df.columns\n",
" if column not in columns_to_drop and df[column].dtype != \"object\"\n",
"]\n",
"cat_columns = [\n",
" column\n",
" for column in df.columns\n",
" if column not in columns_to_drop and df[column].dtype == \"object\"\n",
"]\n",
"\n",
"num_imputer = SimpleImputer(strategy=\"median\")\n",
"num_scaler = StandardScaler()\n",
"preprocessing_num = Pipeline(\n",
" [\n",
" (\"imputer\", num_imputer),\n",
" (\"scaler\", num_scaler),\n",
" ]\n",
")\n",
"\n",
"cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n",
"cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n",
"preprocessing_cat = Pipeline(\n",
" [\n",
" (\"imputer\", cat_imputer),\n",
" (\"encoder\", cat_encoder),\n",
" ]\n",
")\n",
"\n",
"features_preprocessing = ColumnTransformer(\n",
" verbose_feature_names_out=False,\n",
" transformers=[\n",
" (\"prepocessing_num\", preprocessing_num, num_columns),\n",
" (\"prepocessing_cat\", preprocessing_cat, cat_columns),\n",
" ],\n",
" remainder=\"passthrough\"\n",
")\n",
"\n",
"drop_columns = ColumnTransformer(\n",
" verbose_feature_names_out=False,\n",
" transformers=[\n",
" (\"drop_columns\", \"drop\", columns_to_drop),\n",
" ],\n",
" remainder=\"passthrough\",\n",
")\n",
"\n",
"\n",
"pipeline_end = Pipeline(\n",
" [\n",
" (\"features_preprocessing\", features_preprocessing),\n",
" (\"drop_columns\", drop_columns),\n",
" ]\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Проверка работы конвеера"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
"
\n",
" \n",
" \n",
" \n",
" 196 | \n",
" -0.838489 | \n",
" -1.297466 | \n",
" -0.688684 | \n",
" -0.946400 | \n",
"
\n",
" \n",
" 69 | \n",
" 0.072181 | \n",
" 0.395520 | \n",
" 0.180416 | \n",
" -0.377190 | \n",
"
\n",
" \n",
" 494 | \n",
" -0.231376 | \n",
" -1.297466 | \n",
" -0.688684 | \n",
" -3.953317 | \n",
"
\n",
" \n",
" 463 | \n",
" 0.375738 | \n",
" 0.583630 | \n",
" -0.688684 | \n",
" -0.538054 | \n",
"
\n",
" \n",
" 653 | \n",
" -0.534932 | \n",
" -1.297466 | \n",
" -0.688684 | \n",
" -0.637047 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 322 | \n",
" -1.142046 | \n",
" -0.043402 | \n",
" -0.688684 | \n",
" -0.562802 | \n",
"
\n",
" \n",
" 109 | \n",
" -1.142046 | \n",
" 0.270114 | \n",
" -0.375808 | \n",
" 0.674613 | \n",
"
\n",
" \n",
" 27 | \n",
" -0.838489 | \n",
" -0.356918 | \n",
" 0.528056 | \n",
" -1.082516 | \n",
"
\n",
" \n",
" 651 | \n",
" -0.838489 | \n",
" 0.144708 | \n",
" 0.232562 | \n",
" 0.229143 | \n",
"
\n",
" \n",
" 197 | \n",
" -0.231376 | \n",
" -0.482325 | \n",
" -0.271516 | \n",
" -1.119638 | \n",
"
\n",
" \n",
"
\n",
"
614 rows × 4 columns
\n",
"
"
],
"text/plain": [
" Pregnancies SkinThickness Insulin BMI\n",
"196 -0.838489 -1.297466 -0.688684 -0.946400\n",
"69 0.072181 0.395520 0.180416 -0.377190\n",
"494 -0.231376 -1.297466 -0.688684 -3.953317\n",
"463 0.375738 0.583630 -0.688684 -0.538054\n",
"653 -0.534932 -1.297466 -0.688684 -0.637047\n",
".. ... ... ... ...\n",
"322 -1.142046 -0.043402 -0.688684 -0.562802\n",
"109 -1.142046 0.270114 -0.375808 0.674613\n",
"27 -0.838489 -0.356918 0.528056 -1.082516\n",
"651 -0.838489 0.144708 0.232562 0.229143\n",
"197 -0.231376 -0.482325 -0.271516 -1.119638\n",
"\n",
"[614 rows x 4 columns]"
]
},
"execution_count": 50,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"\n",
"preprocessing_result = pipeline_end.fit_transform(X_train)\n",
"preprocessed_df = pd.DataFrame(\n",
" preprocessing_result,\n",
" columns=pipeline_end.get_feature_names_out(),\n",
")\n",
"\n",
"preprocessed_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Формирование набора моделей для классификации"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import ensemble, linear_model, naive_bayes, neighbors, neural_network, tree\n",
"\n",
"class_models = {\n",
" \"logistic\": {\"model\": linear_model.LogisticRegression()},\n",
" # \"ridge\": {\"model\": linear_model.RidgeClassifierCV(cv=5, class_weight=\"balanced\")},\n",
" \"ridge\": {\"model\": linear_model.LogisticRegression(penalty=\"l2\", class_weight=\"balanced\")},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeClassifier(max_depth=7, random_state=9)\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsClassifier(n_neighbors=7)},\n",
" \"naive_bayes\": {\"model\": naive_bayes.GaussianNB()},\n",
" \"gradient_boosting\": {\n",
" \"model\": ensemble.GradientBoostingClassifier(n_estimators=210)\n",
" },\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestClassifier(\n",
" max_depth=11, class_weight=\"balanced\", random_state=9\n",
" )\n",
" },\n",
" \"mlp\": {\n",
" \"model\": neural_network.MLPClassifier(\n",
" hidden_layer_sizes=(7,),\n",
" max_iter=500,\n",
" early_stopping=True,\n",
" random_state=9,\n",
" )\n",
" },\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Обучение моделей на обучающем наборе данных и оценка на тестовом"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: logistic\n",
"Model: ridge\n",
"Model: decision_tree\n",
"Model: knn\n",
"Model: naive_bayes\n",
"Model: gradient_boosting\n",
"Model: random_forest\n",
"Model: mlp\n"
]
}
],
"source": [
"from sklearn import metrics\n",
"\n",
"for model_name in class_models.keys():\n",
" print(f\"Model: {model_name}\")\n",
" model = class_models[model_name][\"model\"]\n",
"\n",
" model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n",
" model_pipeline = model_pipeline.fit(X_train, y_train.values.ravel())\n",
"\n",
" y_train_predict = model_pipeline.predict(X_train)\n",
" y_test_probs = model_pipeline.predict_proba(X_test)[:, 1]\n",
" y_test_predict = np.where(y_test_probs > 0.5, 1, 0)\n",
"\n",
" class_models[model_name][\"pipeline\"] = model_pipeline\n",
" class_models[model_name][\"probs\"] = y_test_probs\n",
" class_models[model_name][\"preds\"] = y_test_predict\n",
"\n",
" class_models[model_name][\"Precision_train\"] = metrics.precision_score(\n",
" y_train, y_train_predict\n",
" )\n",
" class_models[model_name][\"Precision_test\"] = metrics.precision_score(\n",
" y_test, y_test_predict\n",
" )\n",
" class_models[model_name][\"Recall_train\"] = metrics.recall_score(\n",
" y_train, y_train_predict\n",
" )\n",
" class_models[model_name][\"Recall_test\"] = metrics.recall_score(\n",
" y_test, y_test_predict\n",
" )\n",
" class_models[model_name][\"Accuracy_train\"] = metrics.accuracy_score(\n",
" y_train, y_train_predict\n",
" )\n",
" class_models[model_name][\"Accuracy_test\"] = metrics.accuracy_score(\n",
" y_test, y_test_predict\n",
" )\n",
" class_models[model_name][\"ROC_AUC_test\"] = metrics.roc_auc_score(\n",
" y_test, y_test_probs\n",
" )\n",
" class_models[model_name][\"F1_train\"] = metrics.f1_score(y_train, y_train_predict)\n",
" class_models[model_name][\"F1_test\"] = metrics.f1_score(y_test, y_test_predict)\n",
" class_models[model_name][\"MCC_test\"] = metrics.matthews_corrcoef(\n",
" y_test, y_test_predict\n",
" )\n",
" class_models[model_name][\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(\n",
" y_test, y_test_predict\n",
" )\n",
" class_models[model_name][\"Confusion_matrix\"] = metrics.confusion_matrix(\n",
" y_test, y_test_predict\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Сводная таблица оценок качества для использованных моделей классификации\n",
"\n",
"Матрица неточностей"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.metrics import ConfusionMatrixDisplay\n",
"import matplotlib.pyplot as plt\n",
"\n",
"_, ax = plt.subplots(int(len(class_models) / 2), 2, figsize=(12, 10), sharex=False, sharey=False)\n",
"for index, key in enumerate(class_models.keys()):\n",
" c_matrix = class_models[key][\"Confusion_matrix\"]\n",
" disp = ConfusionMatrixDisplay(\n",
" confusion_matrix=c_matrix, display_labels=[\"Healthy\", \"Sick\"]\n",
" ).plot(ax=ax.flat[index])\n",
" disp.ax_.set_title(key)\n",
"\n",
"plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.1)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Точность, полнота, верность (аккуратность), F-мера"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Precision_train | \n",
" Precision_test | \n",
" Recall_train | \n",
" Recall_test | \n",
" Accuracy_train | \n",
" Accuracy_test | \n",
" F1_train | \n",
" F1_test | \n",
"
\n",
" \n",
" \n",
" \n",
" naive_bayes | \n",
" 0.564516 | \n",
" 0.628571 | \n",
" 0.327103 | \n",
" 0.407407 | \n",
" 0.677524 | \n",
" 0.707792 | \n",
" 0.414201 | \n",
" 0.494382 | \n",
"
\n",
" \n",
" ridge | \n",
" 0.494382 | \n",
" 0.552632 | \n",
" 0.616822 | \n",
" 0.777778 | \n",
" 0.646580 | \n",
" 0.701299 | \n",
" 0.548857 | \n",
" 0.646154 | \n",
"
\n",
" \n",
" knn | \n",
" 0.670807 | \n",
" 0.551020 | \n",
" 0.504673 | \n",
" 0.500000 | \n",
" 0.741042 | \n",
" 0.681818 | \n",
" 0.576000 | \n",
" 0.524272 | \n",
"
\n",
" \n",
" random_forest | \n",
" 0.955157 | \n",
" 0.535714 | \n",
" 0.995327 | \n",
" 0.555556 | \n",
" 0.982085 | \n",
" 0.675325 | \n",
" 0.974828 | \n",
" 0.545455 | \n",
"
\n",
" \n",
" gradient_boosting | \n",
" 0.920213 | \n",
" 0.525000 | \n",
" 0.808411 | \n",
" 0.388889 | \n",
" 0.908795 | \n",
" 0.662338 | \n",
" 0.860697 | \n",
" 0.446809 | \n",
"
\n",
" \n",
" logistic | \n",
" 0.618644 | \n",
" 0.525000 | \n",
" 0.341121 | \n",
" 0.388889 | \n",
" 0.697068 | \n",
" 0.662338 | \n",
" 0.439759 | \n",
" 0.446809 | \n",
"
\n",
" \n",
" decision_tree | \n",
" 0.718615 | \n",
" 0.459016 | \n",
" 0.775701 | \n",
" 0.518519 | \n",
" 0.815961 | \n",
" 0.616883 | \n",
" 0.746067 | \n",
" 0.486957 | \n",
"
\n",
" \n",
" mlp | \n",
" 0.409195 | \n",
" 0.417391 | \n",
" 0.831776 | \n",
" 0.888889 | \n",
" 0.522801 | \n",
" 0.525974 | \n",
" 0.548536 | \n",
" 0.568047 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n",
" [\n",
" \"Precision_train\",\n",
" \"Precision_test\",\n",
" \"Recall_train\",\n",
" \"Recall_test\",\n",
" \"Accuracy_train\",\n",
" \"Accuracy_test\",\n",
" \"F1_train\",\n",
" \"F1_test\",\n",
" ]\n",
"]\n",
"class_metrics.sort_values(\n",
" by=\"Accuracy_test\", ascending=False\n",
").style.background_gradient(\n",
" cmap=\"plasma\",\n",
" low=0.3,\n",
" high=1,\n",
" subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n",
").background_gradient(\n",
" cmap=\"viridis\",\n",
" low=1,\n",
" high=0.3,\n",
" subset=[\n",
" \"Precision_train\",\n",
" \"Precision_test\",\n",
" \"Recall_train\",\n",
" \"Recall_test\",\n",
" ],\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"ROC-кривая, каппа Коэна, коэффициент корреляции Мэтьюса"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Accuracy_test | \n",
" F1_test | \n",
" ROC_AUC_test | \n",
" Cohen_kappa_test | \n",
" MCC_test | \n",
"
\n",
" \n",
" \n",
" \n",
" ridge | \n",
" 0.701299 | \n",
" 0.646154 | \n",
" 0.767037 | \n",
" 0.400271 | \n",
" 0.417827 | \n",
"
\n",
" \n",
" logistic | \n",
" 0.662338 | \n",
" 0.446809 | \n",
" 0.766296 | \n",
" 0.211501 | \n",
" 0.216434 | \n",
"
\n",
" \n",
" naive_bayes | \n",
" 0.707792 | \n",
" 0.494382 | \n",
" 0.753704 | \n",
" 0.301834 | \n",
" 0.315869 | \n",
"
\n",
" \n",
" knn | \n",
" 0.681818 | \n",
" 0.524272 | \n",
" 0.745556 | \n",
" 0.286093 | \n",
" 0.286855 | \n",
"
\n",
" \n",
" mlp | \n",
" 0.525974 | \n",
" 0.568047 | \n",
" 0.729074 | \n",
" 0.173747 | \n",
" 0.240181 | \n",
"
\n",
" \n",
" random_forest | \n",
" 0.675325 | \n",
" 0.545455 | \n",
" 0.715093 | \n",
" 0.293059 | \n",
" 0.293176 | \n",
"
\n",
" \n",
" gradient_boosting | \n",
" 0.662338 | \n",
" 0.446809 | \n",
" 0.711296 | \n",
" 0.211501 | \n",
" 0.216434 | \n",
"
\n",
" \n",
" decision_tree | \n",
" 0.616883 | \n",
" 0.486957 | \n",
" 0.612870 | \n",
" 0.183061 | \n",
" 0.183927 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 55,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n",
" [\n",
" \"Accuracy_test\",\n",
" \"F1_test\",\n",
" \"ROC_AUC_test\",\n",
" \"Cohen_kappa_test\",\n",
" \"MCC_test\",\n",
" ]\n",
"]\n",
"class_metrics.sort_values(by=\"ROC_AUC_test\", ascending=False).style.background_gradient(\n",
" cmap=\"plasma\",\n",
" low=0.3,\n",
" high=1,\n",
" subset=[\n",
" \"ROC_AUC_test\",\n",
" \"MCC_test\",\n",
" \"Cohen_kappa_test\",\n",
" ],\n",
").background_gradient(\n",
" cmap=\"viridis\",\n",
" low=1,\n",
" high=0.3,\n",
" subset=[\n",
" \"Accuracy_test\",\n",
" \"F1_test\",\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'ridge'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"best_model = str(class_metrics.sort_values(by=\"MCC_test\", ascending=False).iloc[0].name)\n",
"\n",
"display(best_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод данных с ошибкой предсказания для оценки"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Error items count: 46'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Predicted | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 30 | \n",
" 5 | \n",
" 1 | \n",
" 109 | \n",
" 75 | \n",
" 26 | \n",
" 0 | \n",
" 36.0 | \n",
" 0.546 | \n",
" 60 | \n",
" 0 | \n",
"
\n",
" \n",
" 82 | \n",
" 7 | \n",
" 1 | \n",
" 83 | \n",
" 78 | \n",
" 26 | \n",
" 71 | \n",
" 29.3 | \n",
" 0.767 | \n",
" 36 | \n",
" 0 | \n",
"
\n",
" \n",
" 86 | \n",
" 13 | \n",
" 1 | \n",
" 106 | \n",
" 72 | \n",
" 54 | \n",
" 0 | \n",
" 36.6 | \n",
" 0.178 | \n",
" 45 | \n",
" 0 | \n",
"
\n",
" \n",
" 91 | \n",
" 4 | \n",
" 1 | \n",
" 123 | \n",
" 80 | \n",
" 15 | \n",
" 176 | \n",
" 32.0 | \n",
" 0.443 | \n",
" 34 | \n",
" 0 | \n",
"
\n",
" \n",
" 95 | \n",
" 6 | \n",
" 1 | \n",
" 144 | \n",
" 72 | \n",
" 27 | \n",
" 228 | \n",
" 33.9 | \n",
" 0.255 | \n",
" 40 | \n",
" 0 | \n",
"
\n",
" \n",
" 176 | \n",
" 6 | \n",
" 1 | \n",
" 85 | \n",
" 78 | \n",
" 0 | \n",
" 0 | \n",
" 31.2 | \n",
" 0.382 | \n",
" 42 | \n",
" 0 | \n",
"
\n",
" \n",
" 201 | \n",
" 1 | \n",
" 1 | \n",
" 138 | \n",
" 82 | \n",
" 0 | \n",
" 0 | \n",
" 40.1 | \n",
" 0.236 | \n",
" 28 | \n",
" 0 | \n",
"
\n",
" \n",
" 204 | \n",
" 6 | \n",
" 1 | \n",
" 103 | \n",
" 72 | \n",
" 32 | \n",
" 190 | \n",
" 37.7 | \n",
" 0.324 | \n",
" 55 | \n",
" 0 | \n",
"
\n",
" \n",
" 223 | \n",
" 7 | \n",
" 1 | \n",
" 142 | \n",
" 60 | \n",
" 33 | \n",
" 190 | \n",
" 28.8 | \n",
" 0.687 | \n",
" 61 | \n",
" 0 | \n",
"
\n",
" \n",
" 228 | \n",
" 4 | \n",
" 1 | \n",
" 197 | \n",
" 70 | \n",
" 39 | \n",
" 744 | \n",
" 36.7 | \n",
" 2.329 | \n",
" 31 | \n",
" 0 | \n",
"
\n",
" \n",
" 233 | \n",
" 4 | \n",
" 1 | \n",
" 122 | \n",
" 68 | \n",
" 0 | \n",
" 0 | \n",
" 35.0 | \n",
" 0.394 | \n",
" 29 | \n",
" 0 | \n",
"
\n",
" \n",
" 266 | \n",
" 0 | \n",
" 0 | \n",
" 138 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 36.3 | \n",
" 0.933 | \n",
" 25 | \n",
" 1 | \n",
"
\n",
" \n",
" 274 | \n",
" 13 | \n",
" 1 | \n",
" 106 | \n",
" 70 | \n",
" 0 | \n",
" 0 | \n",
" 34.2 | \n",
" 0.251 | \n",
" 52 | \n",
" 0 | \n",
"
\n",
" \n",
" 280 | \n",
" 0 | \n",
" 0 | \n",
" 146 | \n",
" 70 | \n",
" 0 | \n",
" 0 | \n",
" 37.9 | \n",
" 0.334 | \n",
" 28 | \n",
" 1 | \n",
"
\n",
" \n",
" 282 | \n",
" 7 | \n",
" 1 | \n",
" 133 | \n",
" 88 | \n",
" 15 | \n",
" 155 | \n",
" 32.4 | \n",
" 0.262 | \n",
" 37 | \n",
" 0 | \n",
"
\n",
" \n",
" 302 | \n",
" 5 | \n",
" 1 | \n",
" 77 | \n",
" 82 | \n",
" 41 | \n",
" 42 | \n",
" 35.8 | \n",
" 0.156 | \n",
" 35 | \n",
" 0 | \n",
"
\n",
" \n",
" 309 | \n",
" 2 | \n",
" 0 | \n",
" 124 | \n",
" 68 | \n",
" 28 | \n",
" 205 | \n",
" 32.9 | \n",
" 0.875 | \n",
" 30 | \n",
" 1 | \n",
"
\n",
" \n",
" 335 | \n",
" 0 | \n",
" 1 | \n",
" 165 | \n",
" 76 | \n",
" 43 | \n",
" 255 | \n",
" 47.9 | \n",
" 0.259 | \n",
" 26 | \n",
" 0 | \n",
"
\n",
" \n",
" 358 | \n",
" 12 | \n",
" 1 | \n",
" 88 | \n",
" 74 | \n",
" 40 | \n",
" 54 | \n",
" 35.3 | \n",
" 0.378 | \n",
" 48 | \n",
" 0 | \n",
"
\n",
" \n",
" 364 | \n",
" 4 | \n",
" 1 | \n",
" 147 | \n",
" 74 | \n",
" 25 | \n",
" 293 | \n",
" 34.9 | \n",
" 0.385 | \n",
" 30 | \n",
" 0 | \n",
"
\n",
" \n",
" 379 | \n",
" 0 | \n",
" 1 | \n",
" 93 | \n",
" 100 | \n",
" 39 | \n",
" 72 | \n",
" 43.4 | \n",
" 1.021 | \n",
" 35 | \n",
" 0 | \n",
"
\n",
" \n",
" 397 | \n",
" 0 | \n",
" 0 | \n",
" 131 | \n",
" 66 | \n",
" 40 | \n",
" 0 | \n",
" 34.3 | \n",
" 0.196 | \n",
" 22 | \n",
" 1 | \n",
"
\n",
" \n",
" 405 | \n",
" 2 | \n",
" 1 | \n",
" 123 | \n",
" 48 | \n",
" 32 | \n",
" 165 | \n",
" 42.1 | \n",
" 0.520 | \n",
" 26 | \n",
" 0 | \n",
"
\n",
" \n",
" 406 | \n",
" 4 | \n",
" 0 | \n",
" 115 | \n",
" 72 | \n",
" 0 | \n",
" 0 | \n",
" 28.9 | \n",
" 0.376 | \n",
" 46 | \n",
" 1 | \n",
"
\n",
" \n",
" 442 | \n",
" 4 | \n",
" 1 | \n",
" 117 | \n",
" 64 | \n",
" 27 | \n",
" 120 | \n",
" 33.2 | \n",
" 0.230 | \n",
" 24 | \n",
" 0 | \n",
"
\n",
" \n",
" 486 | \n",
" 1 | \n",
" 1 | \n",
" 139 | \n",
" 62 | \n",
" 41 | \n",
" 480 | \n",
" 40.7 | \n",
" 0.536 | \n",
" 21 | \n",
" 0 | \n",
"
\n",
" \n",
" 515 | \n",
" 3 | \n",
" 0 | \n",
" 163 | \n",
" 70 | \n",
" 18 | \n",
" 105 | \n",
" 31.6 | \n",
" 0.268 | \n",
" 28 | \n",
" 1 | \n",
"
\n",
" \n",
" 517 | \n",
" 7 | \n",
" 1 | \n",
" 125 | \n",
" 86 | \n",
" 0 | \n",
" 0 | \n",
" 37.6 | \n",
" 0.304 | \n",
" 51 | \n",
" 0 | \n",
"
\n",
" \n",
" 583 | \n",
" 8 | \n",
" 1 | \n",
" 100 | \n",
" 76 | \n",
" 0 | \n",
" 0 | \n",
" 38.7 | \n",
" 0.190 | \n",
" 42 | \n",
" 0 | \n",
"
\n",
" \n",
" 594 | \n",
" 6 | \n",
" 1 | \n",
" 123 | \n",
" 72 | \n",
" 45 | \n",
" 230 | \n",
" 33.6 | \n",
" 0.733 | \n",
" 34 | \n",
" 0 | \n",
"
\n",
" \n",
" 622 | \n",
" 6 | \n",
" 1 | \n",
" 183 | \n",
" 94 | \n",
" 0 | \n",
" 0 | \n",
" 40.8 | \n",
" 1.461 | \n",
" 45 | \n",
" 0 | \n",
"
\n",
" \n",
" 630 | \n",
" 7 | \n",
" 0 | \n",
" 114 | \n",
" 64 | \n",
" 0 | \n",
" 0 | \n",
" 27.4 | \n",
" 0.732 | \n",
" 34 | \n",
" 1 | \n",
"
\n",
" \n",
" 634 | \n",
" 10 | \n",
" 1 | \n",
" 92 | \n",
" 62 | \n",
" 0 | \n",
" 0 | \n",
" 25.9 | \n",
" 0.167 | \n",
" 31 | \n",
" 0 | \n",
"
\n",
" \n",
" 646 | \n",
" 1 | \n",
" 0 | \n",
" 167 | \n",
" 74 | \n",
" 17 | \n",
" 144 | \n",
" 23.4 | \n",
" 0.447 | \n",
" 33 | \n",
" 1 | \n",
"
\n",
" \n",
" 658 | \n",
" 11 | \n",
" 1 | \n",
" 127 | \n",
" 106 | \n",
" 0 | \n",
" 0 | \n",
" 39.0 | \n",
" 0.190 | \n",
" 51 | \n",
" 0 | \n",
"
\n",
" \n",
" 669 | \n",
" 9 | \n",
" 1 | \n",
" 154 | \n",
" 78 | \n",
" 30 | \n",
" 100 | \n",
" 30.9 | \n",
" 0.164 | \n",
" 45 | \n",
" 0 | \n",
"
\n",
" \n",
" 674 | \n",
" 8 | \n",
" 1 | \n",
" 91 | \n",
" 82 | \n",
" 0 | \n",
" 0 | \n",
" 35.6 | \n",
" 0.587 | \n",
" 68 | \n",
" 0 | \n",
"
\n",
" \n",
" 676 | \n",
" 9 | \n",
" 0 | \n",
" 156 | \n",
" 86 | \n",
" 0 | \n",
" 0 | \n",
" 24.8 | \n",
" 0.230 | \n",
" 53 | \n",
" 1 | \n",
"
\n",
" \n",
" 682 | \n",
" 0 | \n",
" 1 | \n",
" 95 | \n",
" 64 | \n",
" 39 | \n",
" 105 | \n",
" 44.6 | \n",
" 0.366 | \n",
" 22 | \n",
" 0 | \n",
"
\n",
" \n",
" 699 | \n",
" 4 | \n",
" 1 | \n",
" 118 | \n",
" 70 | \n",
" 0 | \n",
" 0 | \n",
" 44.5 | \n",
" 0.904 | \n",
" 26 | \n",
" 0 | \n",
"
\n",
" \n",
" 702 | \n",
" 1 | \n",
" 0 | \n",
" 168 | \n",
" 88 | \n",
" 29 | \n",
" 0 | \n",
" 35.0 | \n",
" 0.905 | \n",
" 52 | \n",
" 1 | \n",
"
\n",
" \n",
" 723 | \n",
" 5 | \n",
" 1 | \n",
" 117 | \n",
" 86 | \n",
" 30 | \n",
" 105 | \n",
" 39.1 | \n",
" 0.251 | \n",
" 42 | \n",
" 0 | \n",
"
\n",
" \n",
" 725 | \n",
" 4 | \n",
" 1 | \n",
" 112 | \n",
" 78 | \n",
" 40 | \n",
" 0 | \n",
" 39.4 | \n",
" 0.236 | \n",
" 38 | \n",
" 0 | \n",
"
\n",
" \n",
" 730 | \n",
" 3 | \n",
" 0 | \n",
" 130 | \n",
" 78 | \n",
" 23 | \n",
" 79 | \n",
" 28.4 | \n",
" 0.323 | \n",
" 34 | \n",
" 1 | \n",
"
\n",
" \n",
" 744 | \n",
" 13 | \n",
" 1 | \n",
" 153 | \n",
" 88 | \n",
" 37 | \n",
" 140 | \n",
" 40.6 | \n",
" 1.174 | \n",
" 39 | \n",
" 0 | \n",
"
\n",
" \n",
" 750 | \n",
" 4 | \n",
" 0 | \n",
" 136 | \n",
" 70 | \n",
" 0 | \n",
" 0 | \n",
" 31.2 | \n",
" 1.182 | \n",
" 22 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pregnancies Predicted Glucose BloodPressure SkinThickness Insulin \\\n",
"30 5 1 109 75 26 0 \n",
"82 7 1 83 78 26 71 \n",
"86 13 1 106 72 54 0 \n",
"91 4 1 123 80 15 176 \n",
"95 6 1 144 72 27 228 \n",
"176 6 1 85 78 0 0 \n",
"201 1 1 138 82 0 0 \n",
"204 6 1 103 72 32 190 \n",
"223 7 1 142 60 33 190 \n",
"228 4 1 197 70 39 744 \n",
"233 4 1 122 68 0 0 \n",
"266 0 0 138 0 0 0 \n",
"274 13 1 106 70 0 0 \n",
"280 0 0 146 70 0 0 \n",
"282 7 1 133 88 15 155 \n",
"302 5 1 77 82 41 42 \n",
"309 2 0 124 68 28 205 \n",
"335 0 1 165 76 43 255 \n",
"358 12 1 88 74 40 54 \n",
"364 4 1 147 74 25 293 \n",
"379 0 1 93 100 39 72 \n",
"397 0 0 131 66 40 0 \n",
"405 2 1 123 48 32 165 \n",
"406 4 0 115 72 0 0 \n",
"442 4 1 117 64 27 120 \n",
"486 1 1 139 62 41 480 \n",
"515 3 0 163 70 18 105 \n",
"517 7 1 125 86 0 0 \n",
"583 8 1 100 76 0 0 \n",
"594 6 1 123 72 45 230 \n",
"622 6 1 183 94 0 0 \n",
"630 7 0 114 64 0 0 \n",
"634 10 1 92 62 0 0 \n",
"646 1 0 167 74 17 144 \n",
"658 11 1 127 106 0 0 \n",
"669 9 1 154 78 30 100 \n",
"674 8 1 91 82 0 0 \n",
"676 9 0 156 86 0 0 \n",
"682 0 1 95 64 39 105 \n",
"699 4 1 118 70 0 0 \n",
"702 1 0 168 88 29 0 \n",
"723 5 1 117 86 30 105 \n",
"725 4 1 112 78 40 0 \n",
"730 3 0 130 78 23 79 \n",
"744 13 1 153 88 37 140 \n",
"750 4 0 136 70 0 0 \n",
"\n",
" BMI DiabetesPedigreeFunction Age Outcome \n",
"30 36.0 0.546 60 0 \n",
"82 29.3 0.767 36 0 \n",
"86 36.6 0.178 45 0 \n",
"91 32.0 0.443 34 0 \n",
"95 33.9 0.255 40 0 \n",
"176 31.2 0.382 42 0 \n",
"201 40.1 0.236 28 0 \n",
"204 37.7 0.324 55 0 \n",
"223 28.8 0.687 61 0 \n",
"228 36.7 2.329 31 0 \n",
"233 35.0 0.394 29 0 \n",
"266 36.3 0.933 25 1 \n",
"274 34.2 0.251 52 0 \n",
"280 37.9 0.334 28 1 \n",
"282 32.4 0.262 37 0 \n",
"302 35.8 0.156 35 0 \n",
"309 32.9 0.875 30 1 \n",
"335 47.9 0.259 26 0 \n",
"358 35.3 0.378 48 0 \n",
"364 34.9 0.385 30 0 \n",
"379 43.4 1.021 35 0 \n",
"397 34.3 0.196 22 1 \n",
"405 42.1 0.520 26 0 \n",
"406 28.9 0.376 46 1 \n",
"442 33.2 0.230 24 0 \n",
"486 40.7 0.536 21 0 \n",
"515 31.6 0.268 28 1 \n",
"517 37.6 0.304 51 0 \n",
"583 38.7 0.190 42 0 \n",
"594 33.6 0.733 34 0 \n",
"622 40.8 1.461 45 0 \n",
"630 27.4 0.732 34 1 \n",
"634 25.9 0.167 31 0 \n",
"646 23.4 0.447 33 1 \n",
"658 39.0 0.190 51 0 \n",
"669 30.9 0.164 45 0 \n",
"674 35.6 0.587 68 0 \n",
"676 24.8 0.230 53 1 \n",
"682 44.6 0.366 22 0 \n",
"699 44.5 0.904 26 0 \n",
"702 35.0 0.905 52 1 \n",
"723 39.1 0.251 42 0 \n",
"725 39.4 0.236 38 0 \n",
"730 28.4 0.323 34 1 \n",
"744 40.6 1.174 39 0 \n",
"750 31.2 1.182 22 1 "
]
},
"execution_count": 57,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"preprocessing_result = pipeline_end.transform(X_test)\n",
"preprocessed_df = pd.DataFrame(\n",
" preprocessing_result,\n",
" columns=pipeline_end.get_feature_names_out(),\n",
")\n",
"\n",
"y_pred = class_models[best_model][\"preds\"]\n",
"\n",
"error_index = y_test[y_test[\"Outcome\"] != y_pred].index.tolist()\n",
"display(f\"Error items count: {len(error_index)}\")\n",
"\n",
"error_predicted = pd.Series(y_pred, index=y_test.index).loc[error_index]\n",
"error_df = X_test.loc[error_index].copy()\n",
"error_df.insert(loc=1, column=\"Predicted\", value=error_predicted)\n",
"error_df.sort_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Пример использования обученной модели (конвейера) для предсказания"
]
},
{
"cell_type": "code",
"execution_count": 58,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 450 | \n",
" 1.0 | \n",
" 82.0 | \n",
" 64.0 | \n",
" 13.0 | \n",
" 95.0 | \n",
" 21.2 | \n",
" 0.415 | \n",
" 23.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"450 1.0 82.0 64.0 13.0 95.0 21.2 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome \n",
"450 0.415 23.0 0.0 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
"
\n",
" \n",
" \n",
" \n",
" 450 | \n",
" -0.838489 | \n",
" -0.482325 | \n",
" 0.136961 | \n",
" -1.329999 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pregnancies SkinThickness Insulin BMI\n",
"450 -0.838489 -0.482325 0.136961 -1.329999"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'predicted: 0 (proba: [0.81353825 0.18646175])'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'real: 0'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"model = class_models[best_model][\"pipeline\"]\n",
"\n",
"example_id = 450\n",
"test = pd.DataFrame(X_test.loc[example_id, :]).T\n",
"test_preprocessed = pd.DataFrame(preprocessed_df.loc[example_id, :]).T\n",
"display(test)\n",
"display(test_preprocessed)\n",
"result_proba = model.predict_proba(test)[0]\n",
"result = model.predict(test)[0]\n",
"real = int(y_test.loc[example_id].values[0])\n",
"display(f\"predicted: {result} (proba: {result_proba})\")\n",
"display(f\"real: {real}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Подбор гиперпараметров методом поиска по сетке"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import StandardScaler\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.ensemble import RandomForestClassifier\n",
"import numpy as np\n",
"from sklearn import metrics\n",
"import pandas as pd\n",
"\n",
"\n",
"# Определяем числовые признаки\n",
"numeric_features = X_train.select_dtypes(include=['float64', 'int64']).columns.tolist()\n",
"\n",
"# Установка random_state\n",
"random_state = 9\n",
"\n",
"# Определение трансформера\n",
"pipeline_end = ColumnTransformer([\n",
" ('numeric', StandardScaler(), numeric_features),\n",
" # Добавьте другие трансформеры, если требуется\n",
"])\n",
"\n",
"# Объявление модели\n",
"optimized_model = RandomForestClassifier(\n",
" random_state=random_state,\n",
" criterion=\"gini\",\n",
" max_depth=5,\n",
" max_features=\"sqrt\",\n",
" n_estimators=10,\n",
")\n",
"\n",
"# Создание пайплайна с корректными шагами\n",
"result = {}\n",
"\n",
"# Обучение модели\n",
"result[\"pipeline\"] = Pipeline([\n",
" (\"pipeline\", pipeline_end),\n",
" (\"model\", optimized_model)\n",
"]).fit(X_train, y_train.values.ravel())\n",
"\n",
"# Прогнозирование и расчет метрик\n",
"result[\"train_preds\"] = result[\"pipeline\"].predict(X_train)\n",
"result[\"probs\"] = result[\"pipeline\"].predict_proba(X_test)[:, 1]\n",
"result[\"preds\"] = np.where(result[\"probs\"] > 0.5, 1, 0)\n",
"\n",
"# Метрики для оценки модели\n",
"result[\"Precision_train\"] = metrics.precision_score(y_train, result[\"train_preds\"])\n",
"result[\"Precision_test\"] = metrics.precision_score(y_test, result[\"preds\"])\n",
"result[\"Recall_train\"] = metrics.recall_score(y_train, result[\"train_preds\"])\n",
"result[\"Recall_test\"] = metrics.recall_score(y_test, result[\"preds\"])\n",
"result[\"Accuracy_train\"] = metrics.accuracy_score(y_train, result[\"train_preds\"])\n",
"result[\"Accuracy_test\"] = metrics.accuracy_score(y_test, result[\"preds\"])\n",
"result[\"ROC_AUC_test\"] = metrics.roc_auc_score(y_test, result[\"probs\"])\n",
"result[\"F1_train\"] = metrics.f1_score(y_train, result[\"train_preds\"])\n",
"result[\"F1_test\"] = metrics.f1_score(y_test, result[\"preds\"])\n",
"result[\"MCC_test\"] = metrics.matthews_corrcoef(y_test, result[\"preds\"])\n",
"result[\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(y_test, result[\"preds\"])\n",
"result[\"Confusion_matrix\"] = metrics.confusion_matrix(y_test, result[\"preds\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Формирование данных для оценки старой и новой версии модели"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"optimized_model_type = \"random_forest\"\n",
"optimized_metrics = pd.DataFrame(columns=list(result.keys()))\n",
"optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n",
" data=class_models[optimized_model_type]\n",
")\n",
"optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n",
" data=result\n",
")\n",
"optimized_metrics.insert(loc=0, column=\"Name\", value=[\"Old\", \"New\"])\n",
"optimized_metrics = optimized_metrics.set_index(\"Name\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Оценка параметров старой и новой модели"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Precision_train | \n",
" Precision_test | \n",
" Recall_train | \n",
" Recall_test | \n",
" Accuracy_train | \n",
" Accuracy_test | \n",
" F1_train | \n",
" F1_test | \n",
"
\n",
" \n",
" Name | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Old | \n",
" 0.955157 | \n",
" 0.535714 | \n",
" 0.995327 | \n",
" 0.555556 | \n",
" 0.982085 | \n",
" 0.675325 | \n",
" 0.974828 | \n",
" 0.545455 | \n",
"
\n",
" \n",
" New | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"optimized_metrics[\n",
" [\n",
" \"Precision_train\",\n",
" \"Precision_test\",\n",
" \"Recall_train\",\n",
" \"Recall_test\",\n",
" \"Accuracy_train\",\n",
" \"Accuracy_test\",\n",
" \"F1_train\",\n",
" \"F1_test\",\n",
" ]\n",
"].style.background_gradient(\n",
" cmap=\"plasma\",\n",
" low=0.3,\n",
" high=1,\n",
" subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n",
").background_gradient(\n",
" cmap=\"viridis\",\n",
" low=1,\n",
" high=0.3,\n",
" subset=[\n",
" \"Precision_train\",\n",
" \"Precision_test\",\n",
" \"Recall_train\",\n",
" \"Recall_test\",\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Accuracy_test | \n",
" F1_test | \n",
" ROC_AUC_test | \n",
" Cohen_kappa_test | \n",
" MCC_test | \n",
"
\n",
" \n",
" Name | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Old | \n",
" 0.675325 | \n",
" 0.545455 | \n",
" 0.715093 | \n",
" 0.293059 | \n",
" 0.293176 | \n",
"
\n",
" \n",
" New | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 62,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"optimized_metrics[\n",
" [\n",
" \"Accuracy_test\",\n",
" \"F1_test\",\n",
" \"ROC_AUC_test\",\n",
" \"Cohen_kappa_test\",\n",
" \"MCC_test\",\n",
" ]\n",
"].style.background_gradient(\n",
" cmap=\"plasma\",\n",
" low=0.3,\n",
" high=1,\n",
" subset=[\n",
" \"ROC_AUC_test\",\n",
" \"MCC_test\",\n",
" \"Cohen_kappa_test\",\n",
" ],\n",
").background_gradient(\n",
" cmap=\"viridis\",\n",
" low=1,\n",
" high=0.3,\n",
" subset=[\n",
" \"Accuracy_test\",\n",
" \"F1_test\",\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 63,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"\n",
"\n",
"_, ax = plt.subplots(1, 2, figsize=(10, 4), sharex=False, sharey=False\n",
")\n",
"\n",
"for index in range(0, len(optimized_metrics)):\n",
" c_matrix = optimized_metrics.iloc[index][\"Confusion_matrix\"]\n",
" disp = ConfusionMatrixDisplay(\n",
" confusion_matrix=c_matrix, display_labels=[\"Healthy\", \"Sick\"]\n",
" ).plot(ax=ax.flat[index])\n",
"\n",
"plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.3)\n",
"plt.show()\n",
"\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"В желтом квадрате мы видим значение 74, что обозначает количество правильно классифицированных объектов, отнесенных к классу \"Sick\". Это свидетельствует о том, что модель успешно идентифицирует объекты этого класса, минимизируя количество ложных положительных срабатываний.\n",
"\n",
"В зеленом квадрате значение 54 указывает на количество правильно классифицированных объектов, отнесенных к классу \"Healthy\". Это также является показателем хорошей точности модели в определении объектов данного класса."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Определение достижимого уровня качества модели для второй задачи"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Подготовка данных"
]
},
{
"cell_type": "code",
"execution_count": 64,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin \\\n",
"count 768.000000 768.000000 768.000000 768.000000 768.000000 \n",
"mean 3.845052 120.894531 69.105469 20.536458 79.799479 \n",
"std 3.369578 31.972618 19.355807 15.952218 115.244002 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 1.000000 99.000000 62.000000 0.000000 0.000000 \n",
"50% 3.000000 117.000000 72.000000 23.000000 30.500000 \n",
"75% 6.000000 140.250000 80.000000 32.000000 127.250000 \n",
"max 17.000000 199.000000 122.000000 99.000000 846.000000 \n",
"\n",
" BMI DiabetesPedigreeFunction Age Outcome \n",
"count 768.000000 768.000000 768.000000 768.000000 \n",
"mean 31.992578 0.471876 33.240885 0.348958 \n",
"std 7.884160 0.331329 11.760232 0.476951 \n",
"min 0.000000 0.078000 21.000000 0.000000 \n",
"25% 27.300000 0.243750 24.000000 0.000000 \n",
"50% 32.000000 0.372500 29.000000 0.000000 \n",
"75% 36.600000 0.626250 41.000000 1.000000 \n",
"max 67.100000 2.420000 81.000000 1.000000 \n"
]
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn import set_config\n",
"\n",
"\n",
"random_state = 9\n",
"set_config(transform_output=\"pandas\")\n",
"df = pd.read_csv(\".//scv//diabetes.csv\")\n",
"print(df.describe())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Формирование выборок"
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'X_train'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 196 | \n",
" 1 | \n",
" 105 | \n",
" 58 | \n",
" 0 | \n",
" 0 | \n",
" 24.3 | \n",
" 0.187 | \n",
" 21 | \n",
" 0 | \n",
"
\n",
" \n",
" 69 | \n",
" 4 | \n",
" 146 | \n",
" 85 | \n",
" 27 | \n",
" 100 | \n",
" 28.9 | \n",
" 0.189 | \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" 494 | \n",
" 3 | \n",
" 80 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.0 | \n",
" 0.174 | \n",
" 22 | \n",
" 0 | \n",
"
\n",
" \n",
" 463 | \n",
" 5 | \n",
" 88 | \n",
" 78 | \n",
" 30 | \n",
" 0 | \n",
" 27.6 | \n",
" 0.258 | \n",
" 37 | \n",
" 0 | \n",
"
\n",
" \n",
" 653 | \n",
" 2 | \n",
" 120 | \n",
" 54 | \n",
" 0 | \n",
" 0 | \n",
" 26.8 | \n",
" 0.455 | \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 322 | \n",
" 0 | \n",
" 124 | \n",
" 70 | \n",
" 20 | \n",
" 0 | \n",
" 27.4 | \n",
" 0.254 | \n",
" 36 | \n",
" 1 | \n",
"
\n",
" \n",
" 109 | \n",
" 0 | \n",
" 95 | \n",
" 85 | \n",
" 25 | \n",
" 36 | \n",
" 37.4 | \n",
" 0.247 | \n",
" 24 | \n",
" 1 | \n",
"
\n",
" \n",
" 27 | \n",
" 1 | \n",
" 97 | \n",
" 66 | \n",
" 15 | \n",
" 140 | \n",
" 23.2 | \n",
" 0.487 | \n",
" 22 | \n",
" 0 | \n",
"
\n",
" \n",
" 651 | \n",
" 1 | \n",
" 117 | \n",
" 60 | \n",
" 23 | \n",
" 106 | \n",
" 33.8 | \n",
" 0.466 | \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" 197 | \n",
" 3 | \n",
" 107 | \n",
" 62 | \n",
" 13 | \n",
" 48 | \n",
" 22.9 | \n",
" 0.678 | \n",
" 23 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
614 rows × 9 columns
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"196 1 105 58 0 0 24.3 \n",
"69 4 146 85 27 100 28.9 \n",
"494 3 80 0 0 0 0.0 \n",
"463 5 88 78 30 0 27.6 \n",
"653 2 120 54 0 0 26.8 \n",
".. ... ... ... ... ... ... \n",
"322 0 124 70 20 0 27.4 \n",
"109 0 95 85 25 36 37.4 \n",
"27 1 97 66 15 140 23.2 \n",
"651 1 117 60 23 106 33.8 \n",
"197 3 107 62 13 48 22.9 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome \n",
"196 0.187 21 0 \n",
"69 0.189 27 0 \n",
"494 0.174 22 0 \n",
"463 0.258 37 0 \n",
"653 0.455 27 0 \n",
".. ... ... ... \n",
"322 0.254 36 1 \n",
"109 0.247 24 1 \n",
"27 0.487 22 0 \n",
"651 0.466 27 0 \n",
"197 0.678 23 1 \n",
"\n",
"[614 rows x 9 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'y_train'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 196 | \n",
" 0 | \n",
"
\n",
" \n",
" 69 | \n",
" 0 | \n",
"
\n",
" \n",
" 494 | \n",
" 0 | \n",
"
\n",
" \n",
" 463 | \n",
" 0 | \n",
"
\n",
" \n",
" 653 | \n",
" 0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 322 | \n",
" 1 | \n",
"
\n",
" \n",
" 109 | \n",
" 1 | \n",
"
\n",
" \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" 651 | \n",
" 0 | \n",
"
\n",
" \n",
" 197 | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
614 rows × 1 columns
\n",
"
"
],
"text/plain": [
" Outcome\n",
"196 0\n",
"69 0\n",
"494 0\n",
"463 0\n",
"653 0\n",
".. ...\n",
"322 1\n",
"109 1\n",
"27 0\n",
"651 0\n",
"197 1\n",
"\n",
"[614 rows x 1 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'X_test'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 669 | \n",
" 9 | \n",
" 154 | \n",
" 78 | \n",
" 30 | \n",
" 100 | \n",
" 30.9 | \n",
" 0.164 | \n",
" 45 | \n",
" 0 | \n",
"
\n",
" \n",
" 379 | \n",
" 0 | \n",
" 93 | \n",
" 100 | \n",
" 39 | \n",
" 72 | \n",
" 43.4 | \n",
" 1.021 | \n",
" 35 | \n",
" 0 | \n",
"
\n",
" \n",
" 640 | \n",
" 0 | \n",
" 102 | \n",
" 86 | \n",
" 17 | \n",
" 105 | \n",
" 29.3 | \n",
" 0.695 | \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" 658 | \n",
" 11 | \n",
" 127 | \n",
" 106 | \n",
" 0 | \n",
" 0 | \n",
" 39.0 | \n",
" 0.190 | \n",
" 51 | \n",
" 0 | \n",
"
\n",
" \n",
" 304 | \n",
" 3 | \n",
" 150 | \n",
" 76 | \n",
" 0 | \n",
" 0 | \n",
" 21.0 | \n",
" 0.207 | \n",
" 37 | \n",
" 0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 203 | \n",
" 2 | \n",
" 99 | \n",
" 70 | \n",
" 16 | \n",
" 44 | \n",
" 20.4 | \n",
" 0.235 | \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" 605 | \n",
" 1 | \n",
" 124 | \n",
" 60 | \n",
" 32 | \n",
" 0 | \n",
" 35.8 | \n",
" 0.514 | \n",
" 21 | \n",
" 0 | \n",
"
\n",
" \n",
" 561 | \n",
" 0 | \n",
" 198 | \n",
" 66 | \n",
" 32 | \n",
" 274 | \n",
" 41.3 | \n",
" 0.502 | \n",
" 28 | \n",
" 1 | \n",
"
\n",
" \n",
" 280 | \n",
" 0 | \n",
" 146 | \n",
" 70 | \n",
" 0 | \n",
" 0 | \n",
" 37.9 | \n",
" 0.334 | \n",
" 28 | \n",
" 1 | \n",
"
\n",
" \n",
" 103 | \n",
" 1 | \n",
" 81 | \n",
" 72 | \n",
" 18 | \n",
" 40 | \n",
" 26.6 | \n",
" 0.283 | \n",
" 24 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
154 rows × 9 columns
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"669 9 154 78 30 100 30.9 \n",
"379 0 93 100 39 72 43.4 \n",
"640 0 102 86 17 105 29.3 \n",
"658 11 127 106 0 0 39.0 \n",
"304 3 150 76 0 0 21.0 \n",
".. ... ... ... ... ... ... \n",
"203 2 99 70 16 44 20.4 \n",
"605 1 124 60 32 0 35.8 \n",
"561 0 198 66 32 274 41.3 \n",
"280 0 146 70 0 0 37.9 \n",
"103 1 81 72 18 40 26.6 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome \n",
"669 0.164 45 0 \n",
"379 1.021 35 0 \n",
"640 0.695 27 0 \n",
"658 0.190 51 0 \n",
"304 0.207 37 0 \n",
".. ... ... ... \n",
"203 0.235 27 0 \n",
"605 0.514 21 0 \n",
"561 0.502 28 1 \n",
"280 0.334 28 1 \n",
"103 0.283 24 0 \n",
"\n",
"[154 rows x 9 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'y_test'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 669 | \n",
" 0 | \n",
"
\n",
" \n",
" 379 | \n",
" 0 | \n",
"
\n",
" \n",
" 640 | \n",
" 0 | \n",
"
\n",
" \n",
" 658 | \n",
" 0 | \n",
"
\n",
" \n",
" 304 | \n",
" 0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 203 | \n",
" 0 | \n",
"
\n",
" \n",
" 605 | \n",
" 0 | \n",
"
\n",
" \n",
" 561 | \n",
" 1 | \n",
"
\n",
" \n",
" 280 | \n",
" 1 | \n",
"
\n",
" \n",
" 103 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
154 rows × 1 columns
\n",
"
"
],
"text/plain": [
" Outcome\n",
"669 0\n",
"379 0\n",
"640 0\n",
"658 0\n",
"304 0\n",
".. ...\n",
"203 0\n",
"605 0\n",
"561 1\n",
"280 1\n",
"103 0\n",
"\n",
"[154 rows x 1 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from typing import Tuple\n",
"import pandas as pd\n",
"from pandas import DataFrame\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"\n",
"def split_stratified_into_train_val_test(\n",
" df_input: DataFrame,\n",
" stratify_colname: str = \"y\",\n",
" frac_train: float = 0.6,\n",
" frac_val: float = 0.15,\n",
" frac_test: float = 0.25,\n",
" random_state: int = None,\n",
") -> Tuple[DataFrame, DataFrame, DataFrame, DataFrame, DataFrame, DataFrame]:\n",
" \n",
"\n",
" if not (0 < frac_train < 1) or not (0 <= frac_val <= 1) or not (0 <= frac_test <= 1):\n",
" raise ValueError(\"Fractions must be between 0 and 1 and the sum must equal 1.\")\n",
" \n",
" if not (frac_train + frac_val + frac_test == 1.0):\n",
" raise ValueError(\"fractions %f, %f, %f do not add up to 1.0\" %\n",
" (frac_train, frac_val, frac_test))\n",
"\n",
" if stratify_colname not in df_input.columns:\n",
" raise ValueError(f\"{stratify_colname} is not a column in the DataFrame.\")\n",
"\n",
" X = df_input\n",
" y = df_input[[stratify_colname]]\n",
"\n",
" \n",
" df_train, df_temp, y_train, y_temp = train_test_split(\n",
" X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
" )\n",
"\n",
" if frac_val == 0:\n",
" return df_train, pd.DataFrame(), df_temp, y_train, pd.DataFrame(), y_temp\n",
"\n",
" relative_frac_test = frac_test / (frac_val + frac_test)\n",
"\n",
" df_val, df_test, y_val, y_test = train_test_split(\n",
" df_temp,\n",
" y_temp,\n",
" stratify=y_temp,\n",
" test_size=relative_frac_test,\n",
" random_state=random_state,\n",
" )\n",
"\n",
" assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
" \n",
" return df_train, df_val, df_test, y_train, y_val, y_test\n",
"\n",
"\n",
"X_train, X_val, X_test, y_train, y_val, y_test = split_stratified_into_train_val_test(\n",
" df, stratify_colname=\"Outcome\", frac_train=0.80, frac_val=0.0, frac_test=0.20, random_state=random_state\n",
")\n",
"\n",
"display(\"X_train\", X_train)\n",
"display(\"y_train\", y_train)\n",
"display(\"X_test\", X_test)\n",
"display(\"y_test\", y_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Формирование конвейера для классификации данных"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"from sklearn.base import BaseEstimator, TransformerMixin\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.discriminant_analysis import StandardScaler\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.preprocessing import OneHotEncoder\n",
"\n",
"class DiabetFeatures(BaseEstimator, TransformerMixin):\n",
" def __init__(self):\n",
" pass\n",
" def fit(self, X, y=None):\n",
" return self\n",
" \n",
"\n",
"columns_to_drop = [\"Pregnancies\", \"SkinThickness\", \"Insulin\", \"BMI\"]\n",
"num_columns = [\"Glucose\", \"Age\", \"BloodPressure\", \"Outcome\", \"DiabetesPedigreeFunction\"]\n",
"cat_columns = []\n",
"\n",
"num_imputer = SimpleImputer(strategy=\"median\")\n",
"num_scaler = StandardScaler()\n",
"preprocessing_num = Pipeline(\n",
" [\n",
" (\"imputer\", num_imputer),\n",
" (\"scaler\", num_scaler),\n",
" ]\n",
")\n",
"\n",
"cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n",
"cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n",
"preprocessing_cat = Pipeline(\n",
" [\n",
" (\"imputer\", cat_imputer),\n",
" (\"encoder\", cat_encoder),\n",
" ]\n",
")\n",
"\n",
"features_preprocessing = ColumnTransformer(\n",
" verbose_feature_names_out=False,\n",
" transformers=[\n",
" (\"prepocessing_num\", preprocessing_num, num_columns),\n",
" (\"prepocessing_cat\", preprocessing_cat, cat_columns),\n",
" ],\n",
" remainder=\"passthrough\"\n",
")\n",
"\n",
"\n",
"drop_columns = ColumnTransformer(\n",
" verbose_feature_names_out=False,\n",
" transformers=[\n",
" (\"drop_columns\", \"drop\", columns_to_drop),\n",
" ],\n",
" remainder=\"passthrough\",\n",
")\n",
"\n",
"features_postprocessing = ColumnTransformer(\n",
" verbose_feature_names_out=False,\n",
" transformers=[\n",
" (\"prepocessing_cat\", preprocessing_cat, [\"Cabin_type\"]),\n",
" ],\n",
" remainder=\"passthrough\",\n",
")\n",
"\n",
"pipeline_end = Pipeline(\n",
" [\n",
" (\"features_preprocessing\", features_preprocessing),\n",
" (\"drop_columns\", drop_columns),\n",
" ]\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Демонстрация работы конвейера"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Glucose | \n",
" Age | \n",
" BloodPressure | \n",
" Outcome | \n",
" DiabetesPedigreeFunction | \n",
"
\n",
" \n",
" \n",
" \n",
" 196 | \n",
" -0.478144 | \n",
" -1.029257 | \n",
" -0.554050 | \n",
" -0.731437 | \n",
" -0.849205 | \n",
"
\n",
" \n",
" 69 | \n",
" 0.818506 | \n",
" -0.522334 | \n",
" 0.804885 | \n",
" -0.731437 | \n",
" -0.843172 | \n",
"
\n",
" \n",
" 494 | \n",
" -1.268784 | \n",
" -0.944770 | \n",
" -3.473244 | \n",
" -0.731437 | \n",
" -0.888421 | \n",
"
\n",
" \n",
" 463 | \n",
" -1.015779 | \n",
" 0.322537 | \n",
" 0.452568 | \n",
" -0.731437 | \n",
" -0.635028 | \n",
"
\n",
" \n",
" 653 | \n",
" -0.003760 | \n",
" -0.522334 | \n",
" -0.755374 | \n",
" -0.731437 | \n",
" -0.040763 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 322 | \n",
" 0.122742 | \n",
" 0.238050 | \n",
" 0.049921 | \n",
" 1.367172 | \n",
" -0.647095 | \n",
"
\n",
" \n",
" 109 | \n",
" -0.794400 | \n",
" -0.775796 | \n",
" 0.804885 | \n",
" 1.367172 | \n",
" -0.668211 | \n",
"
\n",
" \n",
" 27 | \n",
" -0.731149 | \n",
" -0.944770 | \n",
" -0.151403 | \n",
" -0.731437 | \n",
" 0.055767 | \n",
"
\n",
" \n",
" 651 | \n",
" -0.098637 | \n",
" -0.522334 | \n",
" -0.453388 | \n",
" -0.731437 | \n",
" -0.007581 | \n",
"
\n",
" \n",
" 197 | \n",
" -0.414893 | \n",
" -0.860283 | \n",
" -0.352726 | \n",
" 1.367172 | \n",
" 0.631933 | \n",
"
\n",
" \n",
"
\n",
"
614 rows × 5 columns
\n",
"
"
],
"text/plain": [
" Glucose Age BloodPressure Outcome DiabetesPedigreeFunction\n",
"196 -0.478144 -1.029257 -0.554050 -0.731437 -0.849205\n",
"69 0.818506 -0.522334 0.804885 -0.731437 -0.843172\n",
"494 -1.268784 -0.944770 -3.473244 -0.731437 -0.888421\n",
"463 -1.015779 0.322537 0.452568 -0.731437 -0.635028\n",
"653 -0.003760 -0.522334 -0.755374 -0.731437 -0.040763\n",
".. ... ... ... ... ...\n",
"322 0.122742 0.238050 0.049921 1.367172 -0.647095\n",
"109 -0.794400 -0.775796 0.804885 1.367172 -0.668211\n",
"27 -0.731149 -0.944770 -0.151403 -0.731437 0.055767\n",
"651 -0.098637 -0.522334 -0.453388 -0.731437 -0.007581\n",
"197 -0.414893 -0.860283 -0.352726 1.367172 0.631933\n",
"\n",
"[614 rows x 5 columns]"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"preprocessing_result = pipeline_end.fit_transform(X_train)\n",
"preprocessed_df = pd.DataFrame(\n",
" preprocessing_result,\n",
" columns=pipeline_end.get_feature_names_out(),\n",
")\n",
"\n",
"preprocessed_df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Формирование набора моделей для классификации"
]
},
{
"cell_type": "code",
"execution_count": 68,
"metadata": {},
"outputs": [],
"source": [
"from sklearn import ensemble, linear_model, naive_bayes, neighbors, neural_network, tree\n",
"\n",
"class_models = {\n",
" \"logistic\": {\"model\": linear_model.LogisticRegression()},\n",
" \"ridge\": {\"model\": linear_model.RidgeClassifierCV(cv=5, class_weight=\"balanced\")},\n",
" \"ridge\": {\"model\": linear_model.LogisticRegression(penalty=\"l2\", class_weight=\"balanced\")},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeClassifier(max_depth=7, random_state=random_state)\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsClassifier(n_neighbors=7)},\n",
" \"naive_bayes\": {\"model\": naive_bayes.GaussianNB()},\n",
" \"gradient_boosting\": {\n",
" \"model\": ensemble.GradientBoostingClassifier(n_estimators=210)\n",
" },\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestClassifier(\n",
" max_depth=11, class_weight=\"balanced\", random_state=random_state\n",
" )\n",
" },\n",
" \"mlp\": {\n",
" \"model\": neural_network.MLPClassifier(\n",
" hidden_layer_sizes=(7,),\n",
" max_iter=500,\n",
" early_stopping=True,\n",
" random_state=random_state,\n",
" )\n",
" },\n",
"}"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Обучение моделей на обучающем наборе данных и оценка на тестовом¶"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: logistic\n",
"Model: ridge\n",
"Model: decision_tree\n",
"Model: knn\n",
"Model: naive_bayes\n",
"Model: gradient_boosting\n",
"Model: random_forest\n",
"Model: mlp\n"
]
}
],
"source": [
"import numpy as np\n",
"from sklearn import metrics\n",
"\n",
"for model_name in class_models.keys():\n",
" print(f\"Model: {model_name}\")\n",
" model = class_models[model_name][\"model\"]\n",
"\n",
" model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n",
" model_pipeline = model_pipeline.fit(X_train, y_train.values.ravel())\n",
"\n",
" y_train_predict = model_pipeline.predict(X_train)\n",
" y_test_probs = model_pipeline.predict_proba(X_test)[:, 1]\n",
" y_test_predict = np.where(y_test_probs > 0.5, 1, 0)\n",
"\n",
" class_models[model_name][\"pipeline\"] = model_pipeline\n",
" class_models[model_name][\"probs\"] = y_test_probs\n",
" class_models[model_name][\"preds\"] = y_test_predict\n",
"\n",
" class_models[model_name][\"Precision_train\"] = metrics.precision_score(\n",
" y_train, y_train_predict\n",
" )\n",
" class_models[model_name][\"Precision_test\"] = metrics.precision_score(\n",
" y_test, y_test_predict\n",
" )\n",
" class_models[model_name][\"Recall_train\"] = metrics.recall_score(\n",
" y_train, y_train_predict\n",
" )\n",
" class_models[model_name][\"Recall_test\"] = metrics.recall_score(\n",
" y_test, y_test_predict\n",
" )\n",
" class_models[model_name][\"Accuracy_train\"] = metrics.accuracy_score(\n",
" y_train, y_train_predict\n",
" )\n",
" class_models[model_name][\"Accuracy_test\"] = metrics.accuracy_score(\n",
" y_test, y_test_predict\n",
" )\n",
" class_models[model_name][\"ROC_AUC_test\"] = metrics.roc_auc_score(\n",
" y_test, y_test_probs\n",
" )\n",
" class_models[model_name][\"F1_train\"] = metrics.f1_score(y_train, y_train_predict)\n",
" class_models[model_name][\"F1_test\"] = metrics.f1_score(y_test, y_test_predict)\n",
" class_models[model_name][\"MCC_test\"] = metrics.matthews_corrcoef(\n",
" y_test, y_test_predict\n",
" )\n",
" class_models[model_name][\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(\n",
" y_test, y_test_predict\n",
" )\n",
" class_models[model_name][\"Confusion_matrix\"] = metrics.confusion_matrix(\n",
" y_test, y_test_predict\n",
" )"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Сводная таблица оценок качества для использованных моделей классификации¶\n",
"\n",
"Матрица неточностей\n"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from sklearn.metrics import ConfusionMatrixDisplay\n",
"import matplotlib.pyplot as plt\n",
"\n",
"_, ax = plt.subplots(int(len(class_models) / 2), 2, figsize=(12, 10), sharex=False, sharey=False)\n",
"for index, key in enumerate(class_models.keys()):\n",
" c_matrix = class_models[key][\"Confusion_matrix\"]\n",
" disp = ConfusionMatrixDisplay(\n",
" confusion_matrix=c_matrix, display_labels=[\"Healthy\", \"Sick\"]\n",
" ).plot(ax=ax.flat[index])\n",
" disp.ax_.set_title(key)\n",
"\n",
"plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.1)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Precision_train | \n",
" Precision_test | \n",
" Recall_train | \n",
" Recall_test | \n",
" Accuracy_train | \n",
" Accuracy_test | \n",
" F1_train | \n",
" F1_test | \n",
"
\n",
" \n",
" \n",
" \n",
" logistic | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" ridge | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" decision_tree | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" naive_bayes | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" random_forest | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" gradient_boosting | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" knn | \n",
" 1.000000 | \n",
" 0.981818 | \n",
" 0.990654 | \n",
" 1.000000 | \n",
" 0.996743 | \n",
" 0.993506 | \n",
" 0.995305 | \n",
" 0.990826 | \n",
"
\n",
" \n",
" mlp | \n",
" 0.729323 | \n",
" 0.685714 | \n",
" 0.453271 | \n",
" 0.444444 | \n",
" 0.750814 | \n",
" 0.733766 | \n",
" 0.559078 | \n",
" 0.539326 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n",
" [\n",
" \"Precision_train\",\n",
" \"Precision_test\",\n",
" \"Recall_train\",\n",
" \"Recall_test\",\n",
" \"Accuracy_train\",\n",
" \"Accuracy_test\",\n",
" \"F1_train\",\n",
" \"F1_test\",\n",
" ]\n",
"]\n",
"class_metrics.sort_values(\n",
" by=\"Accuracy_test\", ascending=False\n",
").style.background_gradient(\n",
" cmap=\"plasma\",\n",
" low=0.3,\n",
" high=1,\n",
" subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n",
").background_gradient(\n",
" cmap=\"viridis\",\n",
" low=1,\n",
" high=0.3,\n",
" subset=[\n",
" \"Precision_train\",\n",
" \"Precision_test\",\n",
" \"Recall_train\",\n",
" \"Recall_test\",\n",
" ],\n",
")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"\n",
"Почти все модели, включая логистическую регрессию, ридж-регрессию, KNN, наивный байесовский классификатор, многослойную перцептронную сеть, случайный лес, дерево решений и градиентный бустинг, демонстрируют 100% точность (1.000000) на обучающей выборке. Это указывает на то, что модели смогли подстроиться под обучающие данные, что может указывать на возможное переобучение.\n",
"\n",
"ROC-кривая, каппа Коэна, коэффициент корреляции Мэтьюса\n"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Accuracy_test | \n",
" F1_test | \n",
" ROC_AUC_test | \n",
" Cohen_kappa_test | \n",
" MCC_test | \n",
"
\n",
" \n",
" \n",
" \n",
" logistic | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" ridge | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" decision_tree | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" knn | \n",
" 0.993506 | \n",
" 0.990826 | \n",
" 1.000000 | \n",
" 0.985801 | \n",
" 0.985901 | \n",
"
\n",
" \n",
" naive_bayes | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" gradient_boosting | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" random_forest | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" mlp | \n",
" 0.733766 | \n",
" 0.539326 | \n",
" 0.653148 | \n",
" 0.363893 | \n",
" 0.380814 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n",
" [\n",
" \"Accuracy_test\",\n",
" \"F1_test\",\n",
" \"ROC_AUC_test\",\n",
" \"Cohen_kappa_test\",\n",
" \"MCC_test\",\n",
" ]\n",
"]\n",
"class_metrics.sort_values(by=\"ROC_AUC_test\", ascending=False).style.background_gradient(\n",
" cmap=\"plasma\",\n",
" low=0.3,\n",
" high=1,\n",
" subset=[\n",
" \"ROC_AUC_test\",\n",
" \"MCC_test\",\n",
" \"Cohen_kappa_test\",\n",
" ],\n",
").background_gradient(\n",
" cmap=\"viridis\",\n",
" low=1,\n",
" high=0.3,\n",
" subset=[\n",
" \"Accuracy_test\",\n",
" \"F1_test\",\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'logistic'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"best_model = str(class_metrics.sort_values(by=\"MCC_test\", ascending=False).iloc[0].name)\n",
"\n",
"display(best_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод данных с ошибкой предсказания для оценки"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'Error items count: 0'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Predicted | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"Empty DataFrame\n",
"Columns: [Pregnancies, Predicted, Glucose, BloodPressure, SkinThickness, Insulin, BMI, DiabetesPedigreeFunction, Age, Outcome]\n",
"Index: []"
]
},
"execution_count": 74,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"preprocessing_result = pipeline_end.transform(X_test)\n",
"preprocessed_df = pd.DataFrame(\n",
" preprocessing_result,\n",
" columns=pipeline_end.get_feature_names_out(),\n",
")\n",
"\n",
"y_pred = class_models[best_model][\"preds\"]\n",
"\n",
"error_index = y_test[y_test[\"Outcome\"] != y_pred].index.tolist()\n",
"display(f\"Error items count: {len(error_index)}\")\n",
"\n",
"error_predicted = pd.Series(y_pred, index=y_test.index).loc[error_index]\n",
"error_df = X_test.loc[error_index].copy()\n",
"error_df.insert(loc=1, column=\"Predicted\", value=error_predicted)\n",
"error_df.sort_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Пример использования обученной модели (конвейера) для предсказания"
]
},
{
"cell_type": "code",
"execution_count": 75,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 555 | \n",
" 7.0 | \n",
" 124.0 | \n",
" 70.0 | \n",
" 33.0 | \n",
" 215.0 | \n",
" 25.5 | \n",
" 0.161 | \n",
" 37.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"555 7.0 124.0 70.0 33.0 215.0 25.5 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome \n",
"555 0.161 37.0 0.0 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Glucose | \n",
" Age | \n",
" BloodPressure | \n",
" Outcome | \n",
" DiabetesPedigreeFunction | \n",
"
\n",
" \n",
" \n",
" \n",
" 555 | \n",
" 0.122742 | \n",
" 0.322537 | \n",
" 0.049921 | \n",
" -0.731437 | \n",
" -0.927636 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Glucose Age BloodPressure Outcome DiabetesPedigreeFunction\n",
"555 0.122742 0.322537 0.049921 -0.731437 -0.927636"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'predicted: 0 (proba: [0.99431769 0.00568231])'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'real: 0'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"model = class_models[best_model][\"pipeline\"]\n",
"\n",
"example_id = 555\n",
"test = pd.DataFrame(X_test.loc[example_id, :]).T\n",
"test_preprocessed = pd.DataFrame(preprocessed_df.loc[example_id, :]).T\n",
"display(test)\n",
"display(test_preprocessed)\n",
"result_proba = model.predict_proba(test)[0]\n",
"result = model.predict(test)[0]\n",
"real = int(y_test.loc[example_id].values[0])\n",
"display(f\"predicted: {result} (proba: {result_proba})\")\n",
"display(f\"real: {real}\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Подбор гиперпараметров методом поиска по сетке"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"d:\\5_semester\\AIM\\rep\\AIM-PIbd-31-Razubaev-S-M\\.venv\\Lib\\site-packages\\numpy\\ma\\core.py:2881: RuntimeWarning: invalid value encountered in cast\n",
" _data = np.array(data, dtype=dtype, copy=copy,\n"
]
},
{
"data": {
"text/plain": [
"{'model__criterion': 'gini',\n",
" 'model__max_depth': 5,\n",
" 'model__max_features': 'sqrt',\n",
" 'model__n_estimators': 10}"
]
},
"execution_count": 76,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.model_selection import GridSearchCV\n",
"\n",
"optimized_model_type = \"random_forest\"\n",
"\n",
"random_forest_model = class_models[optimized_model_type][\"pipeline\"]\n",
"\n",
"param_grid = {\n",
" \"model__n_estimators\": [10, 50, 100],\n",
" \"model__max_features\": [\"sqrt\", \"log2\"],\n",
" \"model__max_depth\": [5, 7, 10],\n",
" \"model__criterion\": [\"gini\", \"entropy\"],\n",
"}\n",
"\n",
"gs_optomizer = GridSearchCV(\n",
" estimator=random_forest_model, param_grid=param_grid, n_jobs=-1\n",
")\n",
"gs_optomizer.fit(X_train, y_train.values.ravel())\n",
"gs_optomizer.best_params_"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Обучение модели с новыми гиперпараметрами"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"optimized_model = ensemble.RandomForestClassifier(\n",
" random_state=random_state,\n",
" criterion=\"gini\",\n",
" max_depth=5,\n",
" max_features=\"log2\",\n",
" n_estimators=10,\n",
")\n",
"\n",
"result = {}\n",
"\n",
"result[\"pipeline\"] = Pipeline([(\"pipeline\", pipeline_end), (\"model\", optimized_model)]).fit(X_train, y_train.values.ravel())\n",
"result[\"train_preds\"] = result[\"pipeline\"].predict(X_train)\n",
"result[\"probs\"] = result[\"pipeline\"].predict_proba(X_test)[:, 1]\n",
"result[\"preds\"] = np.where(result[\"probs\"] > 0.5, 1, 0)\n",
"\n",
"result[\"Precision_train\"] = metrics.precision_score(y_train, result[\"train_preds\"])\n",
"result[\"Precision_test\"] = metrics.precision_score(y_test, result[\"preds\"])\n",
"result[\"Recall_train\"] = metrics.recall_score(y_train, result[\"train_preds\"])\n",
"result[\"Recall_test\"] = metrics.recall_score(y_test, result[\"preds\"])\n",
"result[\"Accuracy_train\"] = metrics.accuracy_score(y_train, result[\"train_preds\"])\n",
"result[\"Accuracy_test\"] = metrics.accuracy_score(y_test, result[\"preds\"])\n",
"result[\"ROC_AUC_test\"] = metrics.roc_auc_score(y_test, result[\"probs\"])\n",
"result[\"F1_train\"] = metrics.f1_score(y_train, result[\"train_preds\"])\n",
"result[\"F1_test\"] = metrics.f1_score(y_test, result[\"preds\"])\n",
"result[\"MCC_test\"] = metrics.matthews_corrcoef(y_test, result[\"preds\"])\n",
"result[\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(y_test, result[\"preds\"])\n",
"result[\"Confusion_matrix\"] = metrics.confusion_matrix(y_test, result[\"preds\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Формирование данных для оценки старой и новой версии модели"
]
},
{
"cell_type": "code",
"execution_count": 78,
"metadata": {},
"outputs": [],
"source": [
"optimized_metrics = pd.DataFrame(columns=list(result.keys()))\n",
"optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n",
" data=class_models[optimized_model_type]\n",
")\n",
"optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n",
" data=result\n",
")\n",
"optimized_metrics.insert(loc=0, column=\"Name\", value=[\"Old\", \"New\"])\n",
"optimized_metrics = optimized_metrics.set_index(\"Name\")"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Оценка параметров старой и новой модели"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Precision_train | \n",
" Precision_test | \n",
" Recall_train | \n",
" Recall_test | \n",
" Accuracy_train | \n",
" Accuracy_test | \n",
" F1_train | \n",
" F1_test | \n",
"
\n",
" \n",
" Name | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Old | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" New | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 79,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"optimized_metrics[\n",
" [\n",
" \"Precision_train\",\n",
" \"Precision_test\",\n",
" \"Recall_train\",\n",
" \"Recall_test\",\n",
" \"Accuracy_train\",\n",
" \"Accuracy_test\",\n",
" \"F1_train\",\n",
" \"F1_test\",\n",
" ]\n",
"].style.background_gradient(\n",
" cmap=\"plasma\",\n",
" low=0.3,\n",
" high=1,\n",
" subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n",
").background_gradient(\n",
" cmap=\"viridis\",\n",
" low=1,\n",
" high=0.3,\n",
" subset=[\n",
" \"Precision_train\",\n",
" \"Precision_test\",\n",
" \"Recall_train\",\n",
" \"Recall_test\",\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 80,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" Accuracy_test | \n",
" F1_test | \n",
" ROC_AUC_test | \n",
" Cohen_kappa_test | \n",
" MCC_test | \n",
"
\n",
" \n",
" Name | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Old | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
" New | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
" 1.000000 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"optimized_metrics[\n",
" [\n",
" \"Accuracy_test\",\n",
" \"F1_test\",\n",
" \"ROC_AUC_test\",\n",
" \"Cohen_kappa_test\",\n",
" \"MCC_test\",\n",
" ]\n",
"].style.background_gradient(\n",
" cmap=\"plasma\",\n",
" low=0.3,\n",
" high=1,\n",
" subset=[\n",
" \"ROC_AUC_test\",\n",
" \"MCC_test\",\n",
" \"Cohen_kappa_test\",\n",
" ],\n",
").background_gradient(\n",
" cmap=\"viridis\",\n",
" low=1,\n",
" high=0.3,\n",
" subset=[\n",
" \"Accuracy_test\",\n",
" \"F1_test\",\n",
" ],\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 81,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA3MAAAGxCAYAAADI9u/sAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABF70lEQVR4nO3dfZzNdf7/8ecZYy7MlYuYMZphRGQjpERJrBpdEt9vS9p1EW0xiRL5lotSkd+KVaJSJhtdbZuksttqicgKQy0hFGIQzYwZOxfmnN8fs06dncEc53PmzOfzftxvt88t8zmf85730TTPXp/3xcfl8Xg8AgAAAADYSlioOwAAAAAA8B/FHAAAAADYEMUcAAAAANgQxRwAAAAA2BDFHAAAAADYEMUcAAAAANgQxRwAAAAA2BDFHAAAAADYEMUcAAAAANgQxRwAAAAA2BDFHAAg6D777DPdeuutSk5Olsvl0pIlS3xe93g8mjhxoho2bKjo6Gj16NFDu3bt8rnm+PHjGjBggOLj41W7dm3dfffdys/Pr8JPAQBAmeqSaxRzAICgKygo0GWXXaY5c+ZU+Pr06dM1e/ZszZs3T+vXr1dMTIzS09NVWFjovWbAgAH617/+pU8++UTLli3TZ599pnvuuaeqPgIAAF7VJddcHo/HE9AnAQDADy6XS++995569+4tqezuZXJysh566CGNGTNGkpSbm6vExERlZmaqX79+2r59u1q1aqUNGzaoQ4cOkqTly5frpptu0oEDB5ScnByqjwMAMFwocy08KJ8IAFDtFBYWqri42LL2PB6PXC6Xz7nIyEhFRkb61c7evXuVnZ2tHj16eM8lJCSoY8eOWrdunfr166d169apdu3a3sCTpB49eigsLEzr16/X7bffHtiHAQDYDrlGMQcARigsLFRa41hlHym1rM3Y2Nhyc/snTZqkyZMn+9VOdna2JCkxMdHnfGJiove17OxsNWjQwOf18PBw1a1b13sNAMAc5Np/3uNXzwAAtlRcXKzsI6Xau7Gx4uMCXy6dd8KttMu/1/79+xUfH+897+/dSwAAzge5VoZiDgAMEh8XZknoeduLj/cJvfORlJQkSTp8+LAaNmzoPX/48GG1bdvWe82RI0d83nfq1CkdP37c+34AgHlMzzV2swQAg5R63JYdVklLS1NSUpJWrFjhPZeXl6f169erU6dOkqROnTopJydHGzdu9F7z6aefyu12q2PHjpb1BQBgL6bnGiNzAGAQtzxyK/BNjP1tIz8/X99++63367179yorK0t169ZVamqqRo0apSeffFLNmzdXWlqaJkyYoOTkZO/OYJdccol69uypYcOGad68eSopKVFGRob69evHTpYAYDDTc41iDgAQdF9++aW6devm/frBBx+UJA0cOFCZmZkaO3asCgoKdM899ygnJ0fXXHONli9frqioKO97Fi1apIyMDP36179WWFiY+vbtq9mzZ1f5ZwEAoLrkGs+ZAwAD5OXlKSEhQQd3XGjZQvHkFgeUm5sb8NoCAAD8Ra6VYWQOAAxS6vGo1IJ7eFa0AQBAoEzPNTZAAQAAAAAbYmQOAAwSqoXiAAAEg+m5RjEHAAZxy6NSg0MPAOAspuca0ywBAAAAwIYYmQMAg5g+HQUA4Cym5xojcwAAAABgQ4zMAYBBTN/CGQDgLKbnGsUcABjE/Z/DinYAAAg103ONaZYAAAAAYEOMzAGAQUot2sLZijYAAAiU6blGMQcABin1lB1WtAMAQKiZnmtMswQAAAAAG2JkDgAMYvpCcQCAs5ieaxRzAGAQt1wqlcuSdgAACDXTc41plgAAAABgQ4zMAYBB3J6yw4p2AAAINdNzjZE5AAAAALAhRuYAwCClFq0tsKINAAACZXquUcwBgEFMDz0AgLOYnmtMswQAAAAAG2JkDgAM4va45PZYsIWzBW0AABAo03ONYg4ADGL6dBQAgLOYnmtMswQAAAAAG2JkDgAMUqowlVpwH6/Ugr4AABAo03ONYg4ADOKxaG2Bx6ZrCwAAzmJ6rjHNEgAAAABsiJE5ADCI6QvFAQDOYnquUcwBgEFKPWEq9ViwtsBjQWcAAAiQ6bnGNEsAAAAAsCFG5gDAIG655LbgPp5bNr2FCQBwFNNzjZE5AAAAALAhRuYAwCCmLxQHADiL6blGMQcABrFuobg9p6MAAJzF9FxjmiUAAAAA2BAjcwBgkLKF4oFPJbGiDQAAAmV6rlHMAYBB3ApTqcG7fgEAnMX0XGOaJQAAAADYECNzAGAQ0xeKAwCcxfRco5gDAIO4FWb0w1UBAM5ieq4xzRIAAAAAbIiROQAwSKnHpVKPBQ9XtaANAAACZXquMTIHAAAAADbEyBwAGKTUoi2cS226tgAA4Cym5xrFHAAYxO0Jk9uCXb/cNt31CwDgLKbnGtMsAQAAAMCGGJkDAIOYPh0FAOAspucaxRwAGMQta3bscgfeFQAAAmZ6rjHNEgAAAABsiJE5ADCIW2FyW3Afz4o2AAAIlOm5RjEHAAYp9YSp1IJdv6xoAwCAQJmea/bsNQAAAAAYjpE5ADCIWy65ZcVC8cDbAAAgUKbnGsUcABjE9OkoAABnMT3X7NlrAAAAADAcI3MAYBDrHq7KvUAAQOiZnmv27DUAAAAAGI6RuSrmdrt18OBBxcXFyeWy50JLAFXL4/HoxIkTSk5OVlhYYPfg3B6X3B4LFopb0AacgVwD4C9yzToUc1Xs4MGDSklJCXU3ANjQ/v37deGFFwbUhtui6Sh2fbgqrEeuAThf5FrgKOaqWFxcnCTp+01NFB9rzx8aBM/tF7cOdRdQDZ1SidboI+/vD6A6IddwNuQaKkKuWYdiroqdnoISHxum+DhCD77CXTVD3QVUR56yf1gxhc3tCZPbgu2XrWgDzkCu4WzINVSIXLMMxRwAGKRULpVa8GBUK9oAACBQpueaPUtQAAAAADAcI3MAYBDTp6MAAJzF9FyjmAMAg5TKmqkkpYF3BQCAgJmea/YsQQEAAADAcIzMAYBBTJ+OAgBwFtNzzZ69BgAAAADDMTIHAAYp9YSp1IK7j1a0AQBAoEzPNXv2GgBwXjxyyW3B4fFjsXlpaakmTJigtLQ0RUdH66KLLtKUKVPk8Xh+7pfHo4kTJ6phw4aKjo5Wjx49tGvXrmD8FQAAHMT0XKOYAwAE1TPPPKO5c+fq+eef1/bt2/XMM89o+vTpeu6557zXTJ8+XbNnz9a8efO0fv16xcTEKD09XYWFhSHsOQAA5VWnXGOaJQAYJBTTUdauXatevXrp5ptvliQ1adJEb7zxhv75z39KKrt7OWvWLD322GPq1auXJGnhwoVKTEzUkiVL1K9fv4D7CwBwJtNzjZE5ADCI2+Oy7JCkvLw8n6OoqKjc9+zcubNWrFihnTt3SpK2bNmiNWvW6MYbb5Qk7d27V9nZ2erRo4f3PQkJCerYsaPWrVtXBX8rAAC7Mj3XGJkDAJy3lJQUn68nTZqkyZMn+5x75JFHlJeXp5YtW6pGjRoqLS3VU089pQEDBkiSsrOzJUmJiYk+70tMTPS+BgBAVbBbrlHMAYBBShWmUgsmZZxuY//+/YqPj/eej4yMLHft22+/rUWLFmnx4sX61a9+paysLI0aNUrJyckaOHBgwH0BAJjL9FyjmAMAg/xyKkmg7UhSfHy8T+hV5OGHH9YjjzziXSPQunVrff/995o6daoGDhyopKQkSdLhw4fVsGFD7/sOHz6stm3bBtxXAIBzmZ5rrJkDAATVyZMnFRbmGzc1atSQ2+2WJKWlpSkpKUkrVqzwvp6Xl6f169erU6dOVdpXAADOpTrlGiNzAGAQt8LktuA+nj9t3HrrrXrqqaeUmpqqX/3qV9q8ebOeffZZDRkyRJLkcrk0atQoPfnkk2revLnS0tI0YcIEJScnq3fv3gH3FQDgXKbnGsUcABik1ONSqQXTUfxp47nnntOECRM0fPhwHTlyRMnJyfr973+viRMneq8ZO3asCgoKdM899ygnJ0fXXHONli9frqioqID7CgBwLtNzzeX55aPKEXR5eXlKSEjQTzubKj6OWa7wlZ7cNtRdQDV0ylOilXpfubm555zHfyanf/fct7qPImNrBtynovwSze3yl4D6BGcg13A25BoqQq5Zh5E5ADCI1QvFAQAIJdNzjVtoAAAAAGBDjMwBgEE8njC5PYHfx/NY0AYAAIEyPdco5gDAIKVyqVQWLBS3oA0AAAJleq7ZswQFAAAAAMMxMgcABnF7rFnk7WYfZABANWB6rlHMAYBB3BatLbCiDQAAAmV6rtmz1wAAAABgOEbmAMAgbrnktmCRtxVtAAAQKNNzjWIOAAxS6nGp1IK1BVa0AQBAoEzPNaZZAgAAAIANMTIHAAYxfaE4AMBZTM81e/YaAAAAAAzHyBwAGMQtlzXP47HpQnEAgLOYnmsUcwBgEI9Fu355bBp6AABnMT3XmGYJAAAAADbEyBwAGMTtsWg6ik23cAYAOIvpuUYxBwAGMX3XLwCAs5iea/bsNQAAAAAYjpE5ADCI6dNRAADOYnquUcwBgEHcFu36ZdctnAEAzmJ6rjHNEgAAAABsiJE5ADCI6dNRAADOYnquUcwBgEFMDz0AgLOYnmtMswQAAAAAG2JkDgAMYvodTACAs5iea4zMAQAAAIANMTIHAAYx/Q4mAMBZTM81ijkAMIhH1jxLxxN4VwAACJjpucY0SwAAAACwIUbmAMAgpk9HAQA4i+m5RjEHAAYxPfQAAM5ieq4xzRIAAAAAbIiROQAwiOl3MAEAzmJ6rlHMAYBBTA89AICzmJ5rTLMEAAAAABtiZA4ADOLxuOSx4O6jFW0AABAo03ONkTkAAAAAsCFG5gDAIG655JYFawssaAMAgECZnmsUcwBgENMXigMAnMX0XGOaJSz31Rcxmvi7NPVv9yulJ7fV2o8TfF73eKTXpiepf9tf6dambTTujov0w54In2vyfqqhaSNSdfvFrdWnZWs9+2CK/l3Aj6spbh30o15bv00f7NmqPy7bpRZtT4a6SwAMRq4hUOQagsWI3yIrV66Uy+VSTk7OWa9r0qSJZs2aVSV9crLCk2Fq+qt/K+PpAxW+/vacBnr/1fq6f9p+/XHZTkXVcuv/7rxIxYU/3xF5JqOxvt8Rralv7tYTr+3RV+tjNevhlKr6CAihrrf9pHsmHdSiZ5M0Iv1i7dkWpacW71FCvZJQd80RTi8Ut+JA6JBrVYtcQyDIteAyPddCWswNGjRIvXv3Lne+siF1vjIzM1W7du2gtA3piu4nNGhctq6+Mbfcax6PtGR+ffV/IFude+apaatCjZ39vY4drqm1y8vudO7bFakv/xGv0TP2qWX7k7q0Y4GGP3lAq96vrWPZzAx2uj73/Kjli+vqb2/V1b5dUZo97kIV/dul9P7HQ901Rzg9HcWKA+WRa85EriEQ5FpwmZ5rRozMofrI3heh40dqqn2XfO+5mHi3WrY7qe0bYyRJ27+MUWzCKV182b+917TvckKuMOmbzTFV3mdUnfCabjVvc1KbVsd5z3k8Lm1eHadWlzMlBUD1Q67hbMg1BJstirk1a9aoS5cuio6OVkpKikaOHKmCggLv63/605/UoUMHxcXFKSkpSXfeeaeOHDlSYVsrV67U4MGDlZubK5fLJZfLpcmTJ3tfP3nypIYMGaK4uDilpqbqpZde8r7WvXt3ZWRk+LR39OhRRUREaMWKFRV+v6KiIuXl5fkcJjt+pOwOZO36vlMLatcv8b52/Gi4atc75fN6jXAprvYp7zVwpvi6paoRLuUc9f33/NOP4apT/9QZ3gV/mD4dpbog15yDXMPZkGvBZ3quVftibvfu3erZs6f69u2rrVu36q233tKaNWt8wqekpERTpkzRli1btGTJEn333XcaNGhQhe117txZs2bNUnx8vA4dOqRDhw5pzJgx3tdnzJihDh06aPPmzRo+fLjuu+8+7dixQ5I0dOhQLV68WEVFRd7rX3/9dTVq1Ejdu3ev8PtNnTpVCQkJ3iMlhfnxAELHY9FUFLuGXnVArgGAdUzPtZAXc8uWLVNsbKzPceONN3pfnzp1qgYMGKBRo0apefPm6ty5s2bPnq2FCxeqsLBQkjRkyBDdeOONatq0qa666irNnj1bH3/8sfLz88t9v4iICCUkJMjlcikpKUlJSUmKjY31vn7TTTdp+PDhatasmcaNG6cLLrhA//jHPyRJffr0kSS9//773uszMzM1aNAguVwV/wCMHz9eubm53mP//v2B/6XZWN0GZXehco7W9Dmfc7Sm97W69U8p55jvHazSU9KJnHDvNXCmvOM1VHpKqv1fdyvrXHBKPx3l7jXsgVwzC7mGsyHXEGwhL+a6deumrKwsn2P+/Pne17ds2aLMzEyfUExPT5fb7dbevXslSRs3btStt96q1NRUxcXFqWvXrpKkffv2+d2fNm3aeP98OhhPT22JiorSb3/7W7366quSpE2bNunrr78+491SSYqMjFR8fLzPYbKk1GLVbVCizWt+/h+NghNh+mZzLV1yedkUo0s6FCg/N1y7tkZ7r8laEyePW2rZrqBcm3COUyVh2rW1ltpdc8J7zuXyqO01+dq2sVYIe+YcHpVt2BDwEeoPUo2Ra2Yh13A25FrwmZ5rIb8lEBMTo2bNmvmcO3Dg561/8/Pz9fvf/14jR44s997U1FQVFBQoPT1d6enpWrRokerXr699+/YpPT1dxcXFfvenZk3fO2sul0tut9v79dChQ9W2bVsdOHBACxYsUPfu3dW4cWO/v4+T/bsgTAf3Rnq/zt4fod1fRyuu9ik1uLBEvYce1Rt/TFSjtCIlpRbrtekNVS+xRJ17lu0Sltq8SB265WnWmBTd/8wBlZa4NOexRuraK0f1kriD6XR/eekCjZm1Xzu31NKOzbV0+7Cjiqrl1t/erBvqrjmCWy65ZMHDVS1ow6nINech1xAIci24TM+1kBdz59K+fXtt27atXDCe9tVXX+nYsWOaNm2ad97+l19+edY2IyIiVFpael79ad26tTp06KCXX35Zixcv1vPPP39e7TjZzi21NPZ/fv739eLkRpKk6+84rjGz9umOEUdUeDJMfxybovy8GvrVFQV6atEeRUT9fE9k3PPfa86jF+qROy6SK0y65qYcDX/yhyr/LKh6q5bWUUK9Uv3u4WzVqX9Ke/4VrUcHpCnnx5rnfjNgA+Sa/ZBrCAS5hmCq9sXcuHHjdNVVVykjI0NDhw5VTEyMtm3bpk8++UTPP/+8UlNTFRERoeeee0733nuvvv76a02ZMuWsbTZp0kT5+flasWKFLrvsMtWqVUu1alV+qHvo0KHKyMhQTEyMbr/99kA/ouNc1jlffz2YdcbXXS5p4NhsDRybfcZr4uuUavwL3wehd7CDpQsu0NIFF4S6G45k1Y5ddl0oXh2Qa/ZDriFQ5FrwmJ5rIV8zdy5t2rTRqlWrtHPnTnXp0kXt2rXTxIkTlZycLEmqX7++MjMz9c4776hVq1aaNm2a/vCHP5y1zc6dO+vee+/Vb37zG9WvX1/Tp0/3q0/9+/dXeHi4+vfvr6ioqPP+bAAA85BrAACruDwej13X+4XMd999p4suukgbNmxQ+/bt/XpvXl6eEhIS9NPOpoqPq/a1NKpYenLbUHcB1dApT4lW6n3l5uae92YTp3/3XPr2w6pRK/LcbziH0pNF+vqO/xdQn1B9kGsIFnINFSHXrFPtp1lWJyUlJTp27Jgee+wxXXXVVX4HHgCE2uldu6xoB/ZHrgGwO9NzjVtofvj888/VsGFDbdiwQfPmzQt1dwAACAi5BgD2xsicH6677joxKxWAnZm+UBy+yDUAdmd6rlHMAYBBTA89AICzmJ5rTLMEAAAAABtiZA4ADOL2uOSy4O6j26Z3MAEAzmJ6rlHMAYBBTN/1CwDgLKbnGtMsAQAAAMCGGJkDAIOU3cG0YqG4BZ0BACBApucaI3MAAAAAYEOMzAGAQUzfwhkA4Cym5xrFHAAYxPOfw4p2AAAINdNzjWmWAAAAAGBDjMwBgEFMn44CAHAW03ONYg4ATGL6fBQAgLMYnmtMswQAAAAAG6KYAwCT/Gc6SqCH/JyO8sMPP+iuu+5SvXr1FB0drdatW+vLL7/8uVsejyZOnKiGDRsqOjpaPXr00K5du6z+9AAApzE81yjmAMAgZQ9XteaorJ9++klXX321atasqY8//ljbtm3TjBkzVKdOHe8106dP1+zZszVv3jytX79eMTExSk9PV2FhYRD+FgAATmF6rrFmDgAQVM8884xSUlK0YMEC77m0tDTvnz0ej2bNmqXHHntMvXr1kiQtXLhQiYmJWrJkifr161flfQYA4EyqU64xMgcABrFiKsovdw7Ly8vzOYqKisp9z6VLl6pDhw763//9XzVo0EDt2rXTyy+/7H197969ys7OVo8ePbznEhIS1LFjR61bty74fykAANsyPdco5gDAJKfXBVhxSEpJSVFCQoL3mDp1arlvuWfPHs2dO1fNmzfXX//6V913330aOXKkXnvtNUlSdna2JCkxMdHnfYmJid7XAACokOG5xjRLAMB5279/v+Lj471fR0ZGlrvG7XarQ4cOevrppyVJ7dq109dff6158+Zp4MCBVdZXAADOxW65xsgcABjE6oXi8fHxPkdFodewYUO1atXK59wll1yiffv2SZKSkpIkSYcPH/a55vDhw97XAACoiOm5RjEHAAiqq6++Wjt27PA5t3PnTjVu3FhS2aLxpKQkrVixwvt6Xl6e1q9fr06dOlVpXwEAOJfqlGtMswQAk3j+c1jRTiWNHj1anTt31tNPP6077rhD//znP/XSSy/ppZdekiS5XC6NGjVKTz75pJo3b660tDRNmDBBycnJ6t27twWdBQA4luG5RjEHAAb55Y5dgbZTWVdccYXee+89jR8/Xk888YTS0tI0a9YsDRgwwHvN2LFjVVBQoHvuuUc5OTm65pprtHz5ckVFRQXcVwCAc5meaxRzAICgu+WWW3TLLbec8XWXy6UnnnhCTzzxRBX2CgCA81Ndco1iDgBMY8V0FAAAqguDc61SxdzSpUsr3eBtt9123p0BAARXKKajVEfkGgA4g+m5VqlirrIL9Vwul0pLSwPpDwAAQUeuAQCcoFLFnNvtDnY/AABVIQS7flVH5BoAOIThuRbQc+YKCwut6gcAoEq4LDych1wDALsxO9f8LuZKS0s1ZcoUNWrUSLGxsdqzZ48kacKECXrllVcs7yAAAMFErgEA7MrvYu6pp55SZmampk+froiICO/5Sy+9VPPnz7e0cwAAi3ksPByCXAMAGzM81/wu5hYuXKiXXnpJAwYMUI0aNbznL7vsMn3zzTeWdg4AgGAj1wAAduX3c+Z++OEHNWvWrNx5t9utkpISSzoFAAgSwxeKV4RcAwAbMzzX/B6Za9WqlVavXl3u/J///Ge1a9fOkk4BAILE47LucAhyDQBszPBc83tkbuLEiRo4cKB++OEHud1u/eUvf9GOHTu0cOFCLVu2LBh9BAAgaMg1AIBd+T0y16tXL33wwQf6+9//rpiYGE2cOFHbt2/XBx98oOuvvz4YfQQAWMTjse5wCnINAOzL9Fzze2ROkrp06aJPPvnE6r4AAILN8LUFZ0KuAYBNGZ5r51XMSdKXX36p7du3Sypbb3D55Zdb1ikAAKoauQYAsBu/i7kDBw6of//++vzzz1W7dm1JUk5Ojjp37qw333xTF154odV9BABYxapF3jZdKF4Rcg0AbMzwXPN7zdzQoUNVUlKi7du36/jx4zp+/Li2b98ut9utoUOHBqOPAACLuDzWHU5BrgGAfZmea36PzK1atUpr165VixYtvOdatGih5557Tl26dLG0cwAABBu5BgCwK7+LuZSUlAofolpaWqrk5GRLOgUACBLDF4pXhFwDABszPNf8nmb5//7f/9P999+vL7/80nvuyy+/1AMPPKA//OEPlnYOAGAxwx+uWhFyDQBszPBcq9TIXJ06deRy/fwBCwoK1LFjR4WHl7391KlTCg8P15AhQ9S7d++gdBQAAKuQawAAJ6hUMTdr1qwgdwMAUCUMn45yGrkGAA5heK5VqpgbOHBgsPsBAECVIdcAAE5w3g8Nl6TCwkIVFxf7nIuPjw+oQwCAIDL8Dua5kGsAYDOG55rfG6AUFBQoIyNDDRo0UExMjOrUqeNzAACqMY+Fh0OQawBgY4bnmt/F3NixY/Xpp59q7ty5ioyM1Pz58/X4448rOTlZCxcuDEYfAQAIGnINAGBXfk+z/OCDD7Rw4UJdd911Gjx4sLp06aJmzZqpcePGWrRokQYMGBCMfgIArGDV9ss23cK5IuQaANiY4bnm98jc8ePH1bRpU0ll6wiOHz8uSbrmmmv02WefWds7AIClXB7rDqcg1wDAvkzPNb+LuaZNm2rv3r2SpJYtW+rtt9+WVHZns3bt2pZ2DgCAYCPXAAB25XcxN3jwYG3ZskWS9Mgjj2jOnDmKiorS6NGj9fDDD1veQQCAhQxfKF4Rcg0AbMzwXPN7zdzo0aO9f+7Ro4e++eYbbdy4Uc2aNVObNm0s7RwAAMFGrgEA7Cqg58xJUuPGjdW4cWMr+gIAQMiRawAAu6hUMTd79uxKNzhy5Mjz7gwAILhcsmaRtz33/PoZuQYAzmB6rlWqmJs5c2alGnO5XIReJd1+cWuFu2qGuhuoZnbOuzLUXUA15P53oTTq/VB3w1HINeuRa6gIuYaKkGvWqVQxd3qXLwCAzRn+PJ7TyDUAcAjDcy3gNXMAABuxascum+76BQBwGMNzze9HEwAAAAAAQo+ROQAwieF3MAEADmN4rlHMAYBBXB6Ldv2yaegBAJzF9FxjmiUAAAAA2NB5FXOrV6/WXXfdpU6dOumHH36QJP3pT3/SmjVrLO0cAMBiHgsPByHXAMCmDM81v4u5d999V+np6YqOjtbmzZtVVFQkScrNzdXTTz9teQcBABYyPPQqQq4BgI0Znmt+F3NPPvmk5s2bp5dfflk1a/78cNCrr75amzZtsrRzAAAEG7kGALArvzdA2bFjh6699tpy5xMSEpSTk2NFnwAAQWL6QvGKkGsAYF+m55rfI3NJSUn69ttvy51fs2aNmjZtakmnAABB4nFZdzgEuQYANmZ4rvldzA0bNkwPPPCA1q9fL5fLpYMHD2rRokUaM2aM7rvvvmD0EQCAoCHXAAB25fc0y0ceeURut1u//vWvdfLkSV177bWKjIzUmDFjdP/99wejjwAAqxj+cNWKkGsAYGOG55rfxZzL5dKjjz6qhx9+WN9++63y8/PVqlUrxcbGBqN/AAAEFbkGALArv4u50yIiItSqVSsr+wIACDLTF4qfDbkGAPZjeq75Xcx169ZNLteZFwh++umnAXUIABBEhk9HqQi5BgA2Zniu+V3MtW3b1ufrkpISZWVl6euvv9bAgQOt6hcAAFWCXAMA2JXfxdzMmTMrPD958mTl5+cH3CEAQBBZNB3FrncwK0KuAYCNGZ5rfj+a4Ezuuusuvfrqq1Y1BwAIBo+Fh8ORawBgA4bnmmXF3Lp16xQVFWVVcwAAhBS5BgCo7vyeZtmnTx+frz0ejw4dOqQvv/xSEyZMsKxjAIAgMHyheEXINQCwMcNzze9iLiEhwefrsLAwtWjRQk888YRuuOEGyzoGALCe6Vs4V4RcAwD7Mj3X/CrmSktLNXjwYLVu3Vp16tQJVp8AAKgS5BoAwM78WjNXo0YN3XDDDcrJyQlSdwAAqDrkGgDAzvzeAOXSSy/Vnj17gtEXAACqHLkGALArv4u5J598UmPGjNGyZct06NAh5eXl+RwAgGrM8C2cK0KuAYCNGZ5rlV4z98QTT+ihhx7STTfdJEm67bbb5HK5vK97PB65XC6VlpZa30sAgCVMXyj+S+QaANif6blW6WLu8ccf17333qt//OMfwewPAABVglwDANhdpYs5j6esXO3atWvQOgMAqAI2vftoNXINABzC4Fzz69EEv5x+AgCwIcMfrvrfyDUAsDnDc82vYu7iiy8+Z/AdP348oA4BAFBVyDUAgJ35Vcw9/vjjSkhICFZfAABBZvpC8f9GrgGAvZmea34Vc/369VODBg2C1RcAQLAZPh3lv5FrAGBzhudapZ8zx7oCAICTkGsAALvzezdLAIB9mT4d5ZfINQCwP9NzrdLFnNvtDmY/AABVwfDpKL9ErgGAAxiea5WeZgkAgBWmTZsml8ulUaNGec8VFhZqxIgRqlevnmJjY9W3b18dPnw4dJ0EAKCSQplrFHMAYBKPhcd52LBhg1588UW1adPG5/zo0aP1wQcf6J133tGqVat08OBB9enT5/y+CQDAHIbnGsUcAKBK5Ofna8CAAXr55ZdVp04d7/nc3Fy98sorevbZZ9W9e3ddfvnlWrBggdauXasvvvgihD0GAODMqkOuUcwBgEFOLxS34pCkvLw8n6OoqOiM33vEiBG6+eab1aNHD5/zGzduVElJic/5li1bKjU1VevWrQvK3wMAwBlMzzWKOQAwicXTUVJSUpSQkOA9pk6dWuG3ffPNN7Vp06YKX8/OzlZERIRq167tcz4xMVHZ2dmBfV4AgLMZnmt+PTQcAIBf2r9/v+Lj471fR0ZGVnjNAw88oE8++URRUVFV2T0AAPxit1xjZA4ATGLxHcz4+Hifo6LQ27hxo44cOaL27dsrPDxc4eHhWrVqlWbPnq3w8HAlJiaquLhYOTk5Pu87fPiwkpKSLP8rAAA4iOG5xsgcABgkFA9X/fWvf62vvvrK59zgwYPVsmVLjRs3TikpKapZs6ZWrFihvn37SpJ27Nihffv2qVOnToF3FgDgWKbnGsUcACCo4uLidOmll/qci4mJUb169bzn7777bj344IOqW7eu4uPjdf/996tTp0666qqrQtFlAADOqDrlGsUcAJgkgGfplGvHQjNnzlRYWJj69u2roqIipaen64UXXrD2mwAAnMfwXKOYAwCDhGI6SkVWrlzp83VUVJTmzJmjOXPmBNYwAMAopucaG6AAAAAAgA0xMgcAJqmm01EAADgvhucaI3MAAAAAYEOMzAGASQy/gwkAcBjDc41iDgAM4vrPYUU7AACEmum5xjRLAAAAALAhRuYAwCSGT0cBADiM4blGMQcABqkuz+MBAMAKpuca0ywBAAAAwIYYmQMAkxg+HQUA4DCG5xrFHACYxqaBBQBAhQzONaZZAgAAAIANMTIHAAYxfaE4AMBZTM81ijkAMInhawsAAA5jeK4xzRIAAAAAbIiROUkul0vvvfeeevfufc5rJ0+erCVLligrKyvo/XK6Wwf9qP+574jq1j+lPdui9cJjjbQjq1aou4UqVO+DA6r34UGfc8WJUfru8Ta+F3o8avT8TsX8K1c/3NtcBW3rVGEvncX06SimINdCg1wDuVb1TM81I4q5o0ePauLEifrwww91+PBh1alTR5dddpkmTpyoq6++WocOHVKdOvxHVJW63vaT7pl0UM89cqG+2VRLtw87qqcW79HdXVoo91jNUHcPVagoOVoHHmjh/dpTw1XumtorDldll4Bqj1yrfsg1nEauoSoZMc2yb9++2rx5s1577TXt3LlTS5cu1XXXXadjx45JkpKSkhQZGRniXpqlzz0/avniuvrbW3W1b1eUZo+7UEX/dim9//FQdw1VzBPmUmlChPdwx/r+T0/k/gLV+fshZf8uLUQ9dBiPhQdChlyrfsg1nEauVTHDc83xxVxOTo5Wr16tZ555Rt26dVPjxo115ZVXavz48brtttsklU1HWbJkifc9Bw4cUP/+/VW3bl3FxMSoQ4cOWr9+fYXt7969W02bNlVGRoY8Hpv+FFSx8JpuNW9zUptWx3nPeTwubV4dp1aXnwxhzxAKEUcK1XTcZjV5bIuSXtmt8ONF3tdcxaVKemW3jvRrotKEiBD20jlOT0ex4kBokGvVD7mGXyLXqpbpueb4aZaxsbGKjY3VkiVLdNVVV53zTmV+fr66du2qRo0aaenSpUpKStKmTZvkdrvLXbt161alp6fr7rvv1pNPPllhe0VFRSoq+vk/4ry8vMA+kAPE1y1VjXAp56jvj99PP4YrpVnRGd4FJ/p3WqyKBjZVcWKUwnOLVe/Dg0r5w3Z9N7G1PFE1VP+dfSq8KI61BMAvkGvVD7mG08g1VDXHF3Ph4eHKzMzUsGHDNG/ePLVv315du3ZVv3791KZNm3LXL168WEePHtWGDRtUt25dSVKzZs3KXbd27VrdcsstevTRR/XQQw+d8ftPnTpVjz/+uHUfCHCQk5fW9v65+MJaKkyLVdr/bVHcxuMqjQ1XrW/y9P2jl4aug05k+BbOTkCuAdUXuRYChuea46dZSmVrCw4ePKilS5eqZ8+eWrlypdq3b6/MzMxy12ZlZaldu3bewKvIvn37dP3112vixIlnDTxJGj9+vHJzc73H/v37A/04tpd3vIZKT0m165/yOV/nglP66ajj7y/gLNy1wlWSGKWII4WqtSNPNX8sUrMHN6r58H+q+fB/SpKSX9ylC2dsD3FPbczwtQVOQa5VL+QazoRcqwKG55oRxZwkRUVF6frrr9eECRO0du1aDRo0SJMmTSp3XXR09Dnbql+/vq688kq98cYb55xeEhkZqfj4eJ/DdKdKwrRray21u+aE95zL5VHba/K1bSNbOJvMVViqmkcLdSqhpo6nN9T3j12q7x/9+ZCko/+bquyBTUPcUyD0yLXqg1zDmZBrCDZjirn/1qpVKxUUFJQ736ZNG2VlZen48TPvPhUdHa1ly5YpKipK6enpOnHixBmvRcX+8tIFuvHO4+rxv8eV0qxQ9087oKhabv3tzTPfOYbzXPDnfYremafwH4sUtfuEkuftkifMpRNX1FNpQoSKG9XyOSSppG6kTl3ALn3ny/SF4k5GroUWuQaJXAsF03PN8cXcsWPH1L17d73++uvaunWr9u7dq3feeUfTp09Xr169yl3fv39/JSUlqXfv3vr888+1Z88evfvuu1q3bp3PdTExMfrwww8VHh6uG2+8Ufn5+VX1kRxh1dI6enlKsn73cLZe+GSnLvpVoR4dkKacH3kWj0nCc4rV8JXdajJ5qxq+/K1KY8O1f1wrlcbxcxA0hk9HcQJyrXoi1yCRayFheK45fiJ3bGysOnbsqJkzZ2r37t0qKSlRSkqKhg0bpv/7v/8rd31ERIT+9re/6aGHHtJNN92kU6dOqVWrVpozZ06FbX/88cdKT0/XzTffrI8++kgxMTFV8bEcYemCC7R0wQWh7gZCKHto+U0YzmbnvCuD1BPAPsi16otcA7mGquby8BCZKpWXl6eEhARdp14Kd3GXBr74pY6KuP9dqAOjJio3N/e81yed/t3T9rdPqUZEVMB9Ki0uVNafHg2oT3AGcg1nQ66hIuSadRw/zRIAAAAAnMjx0ywBAL9g+PN4AAAOY3iuUcwBgEGs2rHLrrt+AQCcxfRcY5olAAAAANgQI3MAYBLDp6MAABzG8FyjmAMAg5g+HQUA4Cym5xrTLAEAAADAhhiZAwCTGD4dBQDgMIbnGsUcABjE9OkoAABnMT3XmGYJAAAAADbEyBwAmMTw6SgAAIcxPNco5gDAMHadSgIAQEVMzjWmWQIAAACADTEyBwAm8XjKDivaAQAg1AzPNUbmAAAAAMCGGJkDAIOYvoUzAMBZTM81ijkAMInhu34BABzG8FxjmiUAAAAA2BAjcwBgEJe77LCiHQAAQs30XKOYAwCTGD4dBQDgMIbnGtMsAQAAAMCGGJkDAIOYvusXAMBZTM81ijkAMInhD1cFADiM4bnGNEsAAAAAsCFG5gDAIKZPRwEAOIvpucbIHAAAAADYECNzAGASw7dwBgA4jOG5RjEHAAYxfToKAMBZTM81plkCAAAAgA0xMgcAJjF8C2cAgMMYnmsUcwBgENOnowAAnMX0XGOaJQAAAADYECNzAGASw3f9AgA4jOG5RjEHAAYxfToKAMBZTM81plkCAAAAgA0xMgcAJnF7yg4r2gEAINQMzzWKOQAwieFrCwAADmN4rjHNEgAAAABsiJE5ADCISxYtFA+8CQAAAmZ6rjEyBwAAAAA2xMgcAJjE4yk7rGgHAIBQMzzXKOYAwCCmP48HAOAspuca0ywBAEE1depUXXHFFYqLi1ODBg3Uu3dv7dixw+eawsJCjRgxQvXq1VNsbKz69u2rw4cPh6jHAACcWXXKNYo5ADCJx8KjklatWqURI0boiy++0CeffKKSkhLdcMMNKigo8F4zevRoffDBB3rnnXe0atUqHTx4UH369An44wIAHM7wXGOaJQAYxOXxyGXBugB/2li+fLnP15mZmWrQoIE2btyoa6+9Vrm5uXrllVe0ePFide/eXZK0YMECXXLJJfriiy901VVXBdxfAIAzmZ5rjMwBAM5bXl6ez1FUVHTO9+Tm5kqS6tatK0nauHGjSkpK1KNHD+81LVu2VGpqqtatWxecjgMAUAG75RrFHACYxG3hISklJUUJCQneY+rUqWf/9m63Ro0apauvvlqXXnqpJCk7O1sRERGqXbu2z7WJiYnKzs4O/DMDAJzL8FxjmiUAGMTq6Sj79+9XfHy893xkZORZ3zdixAh9/fXXWrNmTcB9AADA9FyjmAMAnLf4+Hif0DubjIwMLVu2TJ999pkuvPBC7/mkpCQVFxcrJyfH5y7m4cOHlZSUZHWXAQA4I7vlGtMsAcAkIdj1y+PxKCMjQ++9954+/fRTpaWl+bx++eWXq2bNmlqxYoX33I4dO7Rv3z516tTp/D4nAMAMhucaI3MAgKAaMWKEFi9erPfff19xcXHe9QIJCQmKjo5WQkKC7r77bj344IOqW7eu4uPjdf/996tTp07sZAkAqHaqU65RzAGASTyessOKdipp7ty5kqTrrrvO5/yCBQs0aNAgSdLMmTMVFhamvn37qqioSOnp6XrhhRcC7ycAwNkMzzWKOQAwiMtTdljRTmV5KhGQUVFRmjNnjubMmRNArwAApjE911gzBwAAAAA2xMgcAJgkBNNRAAAIGsNzjWIOAAzicpcdVrQDAEComZ5rTLMEAAAAABtiZA4ATGL4dBQAgMMYnmsUcwBgEj8fjHrWdgAACDXDc41iroqd3sr0lEps+0OD4HH/uzDUXUA15C4s+7mozFbIQFUj13A25BoqQq5Zh2Kuip04cUKStEYfhbgnqJZGvR/qHqAaO3HihBISEgJqw+XxyGVBeFrRBpyBXMNZkWs4C3ItcBRzVSw5OVn79+9XXFycXC5XqLsTcnl5eUpJSdH+/fsVHx8f6u6gmuDnwpfH49GJEyeUnJxsRWNGry2A9cg1X/z+QkX4ufBFrlmHYq6KhYWF6cILLwx1N6qd+Ph4frmhHH4ufhbonUsgWMi1ivH7CxXh5+Jn5Jo1KOYAwCQeSVY8S8eeNzABAE5jeK7xnDkAAAAAsCFG5hBSkZGRmjRpkiIjI0PdFVQj/FwEj+kLxYFg4/cXKsLPRfCYnmsuD3uCAoDj5eXlKSEhQd3bPqLwGoH/z8Sp0iJ9mjVNubm5rP8AAFQ5cq0M0ywBAAAAwIaYZgkAJjF8C2cAgMMYnmsUcwBgErckKx4FZsXOYQAABMrwXGOaJQAAAADYEMUcQm7lypVyuVzKyck563VNmjTRrFmzqqRPqHoul0tLliyp1LWTJ09W27Ztg9ofpzq965cVB4CKkWuQyLWqYnquUczhjAYNGqTevXuXO1/ZkDpfmZmZql27dlDaRugcPXpU9913n1JTUxUZGamkpCSlp6fr888/lyQdOnRIN954Y4h7aYDTawusOACbIddgJXKtmjA811gzB6BK9O3bV8XFxXrttdfUtGlTHT58WCtWrNCxY8ckSUlJSSHuIQAAlUeuoTpgZA4BW7Nmjbp06aLo6GilpKRo5MiRKigo8L7+pz/9SR06dFBcXJySkpJ055136siRIxW2tXLlSg0ePFi5ublyuVxyuVyaPHmy9/WTJ09qyJAhiouLU2pqql566SXva927d1dGRoZPe0ePHlVERIRWrFhh7YeGX3JycrR69Wo988wz6tatmxo3bqwrr7xS48eP12233Sap/HSUAwcOqH///qpbt65iYmLUoUMHrV+/vsL2d+/eraZNmyojI0M8OvMcDL+DCVQGuYZzIdeqEcNzjWIOAdm9e7d69uypvn37auvWrXrrrbe0Zs0an/ApKSnRlClTtGXLFi1ZskTfffedBg0aVGF7nTt31qxZsxQfH69Dhw7p0KFDGjNmjPf1GTNmqEOHDtq8ebOGDx+u++67Tzt27JAkDR06VIsXL1ZRUZH3+tdff12NGjVS9+7dg/MXgEqJjY1VbGyslixZ4vPv50zy8/PVtWtX/fDDD1q6dKm2bNmisWPHyu0uv9XU1q1bdc011+jOO+/U888/L5fLii2tAJiKXENlkGuoLphmibNatmyZYmNjfc6VlpZ6/zx16lQNGDBAo0aNkiQ1b95cs2fPVteuXTV37lxFRUVpyJAh3uubNm2q2bNn64orrlB+fn65tiMiIpSQkCCXy1Xh9ISbbrpJw4cPlySNGzdOM2fO1D/+8Q+1aNFCffr0UUZGht5//33dcccdksrWKQwaNIhfhCEWHh6uzMxMDRs2TPPmzVP79u3VtWtX9evXT23atCl3/eLFi3X06FFt2LBBdevWlSQ1a9as3HVr167VLbfcokcffVQPPfRQ0D+HIxj+PB6AXIMVyLVqxPBcY2QOZ9WtWzdlZWX5HPPnz/e+vmXLFmVmZnrvUMXGxio9PV1ut1t79+6VJG3cuFG33nqrUlNTFRcXp65du0qS9u3b53d/fvkL8nQwnp7aEhUVpd/+9rd69dVXJUmbNm3S119/fca7pahaffv21cGDB7V06VL17NlTK1euVPv27ZWZmVnu2qysLLVr184beBXZt2+frr/+ek2cOJHA84fbwgOwIXINViHXqgnDc42ROZxVTExMuTtHBw4c8P45Pz9fv//97zVy5Mhy701NTVVBQYHS09OVnp6uRYsWqX79+tq3b5/S09NVXFzsd39q1qzp87XL5fKZojB06FC1bdtWBw4c0IIFC9S9e3c1btzY7++D4IiKitL111+v66+/XhMmTNDQoUM1adKkcv9jEh0dfc626tevr+TkZL3xxhsaMmSI4uPjg9RrAE5CrsFK5BpCjZE5BKR9+/batm2bmjVrVu6IiIjQN998o2PHjmnatGnq0qWLWrZsecZF4qdFRET4THnxR+vWrdWhQwe9/PLLWrx4sc9UGFQ/rVq18tlU4LQ2bdooKytLx48fP+N7o6OjtWzZMkVFRSk9PV0nTpwIZlcdw/Tn8QDnQq4hEORa1TM91yjmEJBx48Zp7dq1ysjIUFZWlnbt2qX333/fu1A8NTVVEREReu6557Rnzx4tXbpUU6ZMOWubTZo0UX5+vlasWKEff/xRJ0+e9KtPQ4cO1bRp0+TxeHT77bef92eDdY4dO6bu3bvr9ddf19atW7V371698847mj59unr16lXu+v79+yspKUm9e/fW559/rj179ujdd9/VunXrfK6LiYnRhx9+qPDwcN14443Kz8+vqo9kX4bv+gWcC7mGyiDXqhHDc41iDgFp06aNVq1apZ07d6pLly5q166dJk6cqOTkZEllUwYyMzP1zjvvqFWrVpo2bZr+8Ic/nLXNzp07695779VvfvMb1a9fX9OnT/erT/3791d4eLj69++vqKio8/5ssE5sbKw6duyomTNn6tprr9Wll16qCRMmaNiwYXr++efLXR8REaG//e1vatCggW666Sa1bt1a06ZNU40aNSps++OPP5bH49HNN99c4R1RAKgscg2VQa6hunB5eHgFHOa7777TRRddpA0bNqh9+/ah7g5QLeTl5SkhIUE9Lhql8BqRAbd3qrRIf989S7m5uazrAIKMXAPKI9fKsAEKHKOkpETHjh3TY489pquuuorAAypi+BbOgJ2Qa0AlGJ5rTLOEY3z++edq2LChNmzYoHnz5oW6OwAABIRcA3AujMzBMa677joxaxg4F6sWefPfGhBs5BpQGWbnGsUcAJjE8OkoAACHMTzXmGYJAAAAADbEyBwAmMTtkSVTSdz2vIMJAHAYw3ONkTkAAAAAsCFG5gDAJB532WFFOwAAhJrhucbIHOCHQYMGqXfv3t6vr7vuOo0aNarK+7Fy5Uq5XC7l5OSc8RqXy6UlS5ZUus3Jkyerbdu2AfXru+++k8vlUlZWVkDtIIhOLxS34gBge+Ta2ZFrNmB4rlHMwfYGDRokl8sll8uliIgINWvWTE888YROnToV9O/9l7/8RVOmTKnUtZUJKgAAyDUAlcU0SzhCz549tWDBAhUVFemjjz7SiBEjVLNmTY0fP77ctcXFxYqIiLDk+9atW9eSdoAqY/hCccAuyDWgkgzPNUbm4AiRkZFKSkpS48aNdd9996lHjx5aunSppJ+nkDz11FNKTk5WixYtJEn79+/XHXfcodq1a6tu3brq1auXvvvuO2+bpaWlevDBB1W7dm3Vq1dPY8eOLffw1v+ejlJUVKRx48YpJSVFkZGRatasmV555RV999136tatmySpTp06crlcGjRokCTJ7XZr6tSpSktLU3R0tC677DL9+c9/9vk+H330kS6++GJFR0erW7duPv2srHHjxuniiy9WrVq11LRpU02YMEElJSXlrnvxxReVkpKiWrVq6Y477lBubq7P6/Pnz9cll1yiqKgotWzZUi+88ILffUEIGT4dBbALcu3cyDVIMj7XKObgSNHR0SouLvZ+vWLFCu3YsUOffPKJli1bppKSEqWnpysuLk6rV6/W559/rtjYWPXs2dP7vhkzZigzM1Ovvvqq1qxZo+PHj+u999476/f93e9+pzfeeEOzZ8/W9u3b9eKLLyo2NlYpKSl69913JUk7duzQoUOH9Mc//lGSNHXqVC1cuFDz5s3Tv/71L40ePVp33XWXVq1aJaksnPv06aNbb71VWVlZGjp0qB555BG//07i4uKUmZmpbdu26Y9//KNefvllzZw50+eab7/9Vm+//bY++OADLV++XJs3b9bw4cO9ry9atEgTJ07UU089pe3bt+vpp5/WhAkT9Nprr/ndHwBA5ZFr5ZFrANMs4TAej0crVqzQX//6V91///3e8zExMZo/f753Gsrrr78ut9ut+fPny+VySZIWLFig2rVra+XKlbrhhhs0a9YsjR8/Xn369JEkzZs3T3/961/P+L137typt99+W5988ol69OghSWratKn39dNTVxo0aKDatWtLKrvj+fTTT+vvf/+7OnXq5H3PmjVr9OKLL6pr166aO3euLrroIs2YMUOS1KJFC3311Vd65pln/Pq7eeyxx7x/btKkicaMGaM333xTY8eO9Z4vLCzUwoUL1ahRI0nSc889p5tvvlkzZsxQUlKSJk2apBkzZnj/TtLS0rRt2za9+OKLGjhwoF/9QYh4ZM3dR3vewARsh1w7M3INkozPNYo5OMKyZcsUGxurkpISud1u3XnnnZo8ebL39datW/usJ9iyZYu+/fZbxcXF+bRTWFio3bt3Kzc3V4cOHVLHjh29r4WHh6tDhw7lpqSclpWVpRo1aqhr166V7ve3336rkydP6vrrr/c5X1xcrHbt2kmStm/f7tMPSd6A9Mdbb72l2bNna/fu3crPz9epU6cUHx/vc01qaqo38E5/H7fbrR07diguLk67d+/W3XffrWHDhnmvOXXqlBISEvzuD0LEqqkkNp2OAtgFuXZu5BokGZ9rFHNwhG7dumnu3LmKiIhQcnKywsN9f7RjYmJ8vs7Pz9fll1+uRYsWlWurfv3659WH6Ohov9+Tn58vSfrwww99wkYqWy9hlXXr1mnAgAF6/PHHlZ6eroSEBL355pveu6L+9PXll18uF8I1atSwrK8AAHLtXMg1oAzFHBwhJiZGzZo1q/T17du311tvvaUGDRqUu4t3WsOGDbV+/Xpde+21ksru1G3cuFHt27ev8PrWrVvL7XZr1apV3ukov3T6Dmppaan3XKtWrRQZGal9+/ad8c7nJZdc4l30ftoXX3xx7g/5C2vXrlXjxo316KOPes99//335a7bt2+fDh48qOTkZO/3CQsLU4sWLZSYmKjk5GTt2bNHAwYM8Ov7oxpxuyVZ8GBUtz0frgrYBbl2duQavAzPNTZAgZEGDBigCy64QL169dLq1au1d+9erVy5UiNHjtSBAwckSQ888ICmTZumJUuW6JtvvtHw4cPP+iydJk2aaODAgRoyZIiWLFnibfPtt9+WJDVu3Fgul0vLli3T0aNHlZ+fr7i4OI0ZM0ajR4/Wa6+9pt27d2vTpk167rnnvIuv7733Xu3atUsPP/ywduzYocWLFyszM9Ovz9u8eXPt27dPb775pnbv3q3Zs2dXuOg9KipKAwcO1JYtW7R69WqNHDlSd9xxh5KSkiRJjz/+uKZOnarZs2dr586d+uqrr7RgwQI9++yzfvUHAGAtco1cg5ko5mCkWrVq6bPPPlNqaqr69OmjSy65RHfffbcKCwu9dzQfeugh/fa3v9XAgQPVqVMnxcXF6fbbbz9ru3PnztX//M//aPjw4WrZsqWGDRumgoICSVKjRo30+OOP65FHHlFiYqIyMjIkSVOmTNGECRM0depUXXLJJerZs6c+/PBDpaWlSSqb7//uu+9qyZIluuyyyzRv3jw9/fTTfn3e2267TaNHj1ZGRobatm2rtWvXasKECeWua9asmfr06aObbrpJN9xwg9q0aeOzRfPQoUM1f/58LViwQK1bt1bXrl2VmZnp7StswPAtnAGnItfINWMZnmsuz5lWvQIAHCMvL08JCQnqccEQhYcF/nDhU+5i/f3HV5Wbm3vGKV0AAAQLuVaGkTkAAAAAsCE2QAEAk7g9suRhOm4mdQAAqgHDc41iDgAM4vG45fEEvmOXFW0AABAo03ONaZYAAAAAYEOMzAGASTwea6aSsHcWAKA6MDzXKOYAwCQei9YW2DT0AAAOY3iuMc0SAAAAAGyIkTkAMInbLbksWORt04XiAACHMTzXKOYAwCSGT0cBADiM4bnGNEsAAAAAsCFG5gDAIB63Wx4LpqPY9Xk8AABnMT3XGJkDAAAAABtiZA4ATGL42gIAgMMYnmsUcwBgErdHcpkbegAAhzE815hmCQAAAAA2xMgcAJjE45FkxfN47HkHEwDgMIbnGsUcABjE4/bIY8F0FI9NQw8A4Cym5xrTLAEAAADAhhiZAwCTeNyyZjqKPZ/HAwBwGMNzjZE5ADCIx+2x7PDXnDlz1KRJE0VFRaljx4765z//GYRPCAAwiem5RjEHAAi6t956Sw8++KAmTZqkTZs26bLLLlN6erqOHDkS6q4BAOC36pJrFHMAYBKP27rDD88++6yGDRumwYMHq1WrVpo3b55q1aqlV199NUgfFABgBMNzjTVzAGCQUyqRLNiw65RKJEl5eXk+5yMjIxUZGelzrri4WBs3btT48eO958LCwtSjRw+tW7cu8M4AAIxleq5RzAGAASIiIpSUlKQ12R9Z1mZsbKxSUlJ8zk2aNEmTJ0/2Offjjz+qtLRUiYmJPucTExP1zTffWNYfAIA5yLUyFHMAYICoqCjt3btXxcXFlrXp8Xjkcrl8zv333UsAAIKBXCtDMQcAhoiKilJUVFSVf98LLrhANWrU0OHDh33OHz58WElJSVXeHwCAM5BrbIACAAiyiIgIXX755VqxYoX3nNvt1ooVK9SpU6cQ9gwAAP9Vp1xjZA4AEHQPPvigBg4cqA4dOujKK6/UrFmzVFBQoMGDB4e6awAA+K265BrFHAAg6H7zm9/o6NGjmjhxorKzs9W2bVstX7683OJxAADsoLrkmsvj8ViwmScAAAAAoCqxZg4AAAAAbIhiDgAAAABsiGIOAAAAAGyIYg4AAAAAbIhiDgAAAABsiGIOAAAAAGyIYg4AAAAAbIhiDgAAAABsiGIOAAAAAGyIYg4AAAAAbIhiDgAAAABs6P8DftFkOSRPZEQAAAAASUVORK5CYII=",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"_, ax = plt.subplots(1, 2, figsize=(10, 4), sharex=False, sharey=False\n",
")\n",
"\n",
"for index in range(0, len(optimized_metrics)):\n",
" c_matrix = optimized_metrics.iloc[index][\"Confusion_matrix\"]\n",
" disp = ConfusionMatrixDisplay(\n",
" confusion_matrix=c_matrix, display_labels=[\"Healthy\", \"Sick\"]\n",
" ).plot(ax=ax.flat[index])\n",
"\n",
"plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.3)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Регрессионная модель"
]
},
{
"cell_type": "code",
"execution_count": 82,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['Pregnancies', 'Glucose', 'BloodPressure', 'SkinThickness', 'Insulin',\n",
" 'BMI', 'DiabetesPedigreeFunction', 'Age', 'Outcome'],\n",
" dtype='object')\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 6 | \n",
" 148 | \n",
" 72 | \n",
" 35 | \n",
" 0 | \n",
" 33.6 | \n",
" 0.627 | \n",
" 50 | \n",
" 1 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 85 | \n",
" 66 | \n",
" 29 | \n",
" 0 | \n",
" 26.6 | \n",
" 0.351 | \n",
" 31 | \n",
" 0 | \n",
"
\n",
" \n",
" 2 | \n",
" 8 | \n",
" 183 | \n",
" 64 | \n",
" 0 | \n",
" 0 | \n",
" 23.3 | \n",
" 0.672 | \n",
" 32 | \n",
" 1 | \n",
"
\n",
" \n",
" 3 | \n",
" 1 | \n",
" 89 | \n",
" 66 | \n",
" 23 | \n",
" 94 | \n",
" 28.1 | \n",
" 0.167 | \n",
" 21 | \n",
" 0 | \n",
"
\n",
" \n",
" 4 | \n",
" 0 | \n",
" 137 | \n",
" 40 | \n",
" 35 | \n",
" 168 | \n",
" 43.1 | \n",
" 2.288 | \n",
" 33 | \n",
" 1 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 763 | \n",
" 10 | \n",
" 101 | \n",
" 76 | \n",
" 48 | \n",
" 180 | \n",
" 32.9 | \n",
" 0.171 | \n",
" 63 | \n",
" 0 | \n",
"
\n",
" \n",
" 764 | \n",
" 2 | \n",
" 122 | \n",
" 70 | \n",
" 27 | \n",
" 0 | \n",
" 36.8 | \n",
" 0.340 | \n",
" 27 | \n",
" 0 | \n",
"
\n",
" \n",
" 765 | \n",
" 5 | \n",
" 121 | \n",
" 72 | \n",
" 23 | \n",
" 112 | \n",
" 26.2 | \n",
" 0.245 | \n",
" 30 | \n",
" 0 | \n",
"
\n",
" \n",
" 766 | \n",
" 1 | \n",
" 126 | \n",
" 60 | \n",
" 0 | \n",
" 0 | \n",
" 30.1 | \n",
" 0.349 | \n",
" 47 | \n",
" 1 | \n",
"
\n",
" \n",
" 767 | \n",
" 1 | \n",
" 93 | \n",
" 70 | \n",
" 31 | \n",
" 0 | \n",
" 30.4 | \n",
" 0.315 | \n",
" 23 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
768 rows × 9 columns
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"0 6 148 72 35 0 33.6 \n",
"1 1 85 66 29 0 26.6 \n",
"2 8 183 64 0 0 23.3 \n",
"3 1 89 66 23 94 28.1 \n",
"4 0 137 40 35 168 43.1 \n",
".. ... ... ... ... ... ... \n",
"763 10 101 76 48 180 32.9 \n",
"764 2 122 70 27 0 36.8 \n",
"765 5 121 72 23 112 26.2 \n",
"766 1 126 60 0 0 30.1 \n",
"767 1 93 70 31 0 30.4 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome \n",
"0 0.627 50 1 \n",
"1 0.351 31 0 \n",
"2 0.672 32 1 \n",
"3 0.167 21 0 \n",
"4 2.288 33 1 \n",
".. ... ... ... \n",
"763 0.171 63 0 \n",
"764 0.340 27 0 \n",
"765 0.245 30 0 \n",
"766 0.349 47 1 \n",
"767 0.315 23 0 \n",
"\n",
"[768 rows x 9 columns]"
]
},
"execution_count": 82,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import numpy as np\n",
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"import seaborn as sns\n",
"from sklearn.model_selection import train_test_split\n",
"from sklearn import set_config\n",
"\n",
"random_state=9\n",
"set_config(transform_output=\"pandas\")\n",
"df = pd.read_csv(\".//scv//diabetes.csv\")\n",
"print(df.columns)\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Разделение набора данных на обучающую и тестовые выборки"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'X_train'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
"
\n",
" \n",
" \n",
" \n",
" 60 | \n",
" 2 | \n",
" 84 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.0 | \n",
" 0.304 | \n",
" 21 | \n",
"
\n",
" \n",
" 618 | \n",
" 9 | \n",
" 112 | \n",
" 82 | \n",
" 24 | \n",
" 0 | \n",
" 28.2 | \n",
" 1.282 | \n",
" 50 | \n",
"
\n",
" \n",
" 346 | \n",
" 1 | \n",
" 139 | \n",
" 46 | \n",
" 19 | \n",
" 83 | \n",
" 28.7 | \n",
" 0.654 | \n",
" 22 | \n",
"
\n",
" \n",
" 294 | \n",
" 0 | \n",
" 161 | \n",
" 50 | \n",
" 0 | \n",
" 0 | \n",
" 21.9 | \n",
" 0.254 | \n",
" 65 | \n",
"
\n",
" \n",
" 231 | \n",
" 6 | \n",
" 134 | \n",
" 80 | \n",
" 37 | \n",
" 370 | \n",
" 46.2 | \n",
" 0.238 | \n",
" 46 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 71 | \n",
" 5 | \n",
" 139 | \n",
" 64 | \n",
" 35 | \n",
" 140 | \n",
" 28.6 | \n",
" 0.411 | \n",
" 26 | \n",
"
\n",
" \n",
" 106 | \n",
" 1 | \n",
" 96 | \n",
" 122 | \n",
" 0 | \n",
" 0 | \n",
" 22.4 | \n",
" 0.207 | \n",
" 27 | \n",
"
\n",
" \n",
" 270 | \n",
" 10 | \n",
" 101 | \n",
" 86 | \n",
" 37 | \n",
" 0 | \n",
" 45.6 | \n",
" 1.136 | \n",
" 38 | \n",
"
\n",
" \n",
" 435 | \n",
" 0 | \n",
" 141 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 42.4 | \n",
" 0.205 | \n",
" 29 | \n",
"
\n",
" \n",
" 102 | \n",
" 0 | \n",
" 125 | \n",
" 96 | \n",
" 0 | \n",
" 0 | \n",
" 22.5 | \n",
" 0.262 | \n",
" 21 | \n",
"
\n",
" \n",
"
\n",
"
614 rows × 8 columns
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"60 2 84 0 0 0 0.0 \n",
"618 9 112 82 24 0 28.2 \n",
"346 1 139 46 19 83 28.7 \n",
"294 0 161 50 0 0 21.9 \n",
"231 6 134 80 37 370 46.2 \n",
".. ... ... ... ... ... ... \n",
"71 5 139 64 35 140 28.6 \n",
"106 1 96 122 0 0 22.4 \n",
"270 10 101 86 37 0 45.6 \n",
"435 0 141 0 0 0 42.4 \n",
"102 0 125 96 0 0 22.5 \n",
"\n",
" DiabetesPedigreeFunction Age \n",
"60 0.304 21 \n",
"618 1.282 50 \n",
"346 0.654 22 \n",
"294 0.254 65 \n",
"231 0.238 46 \n",
".. ... ... \n",
"71 0.411 26 \n",
"106 0.207 27 \n",
"270 1.136 38 \n",
"435 0.205 29 \n",
"102 0.262 21 \n",
"\n",
"[614 rows x 8 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'y_train'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 60 | \n",
" 0 | \n",
"
\n",
" \n",
" 618 | \n",
" 1 | \n",
"
\n",
" \n",
" 346 | \n",
" 0 | \n",
"
\n",
" \n",
" 294 | \n",
" 0 | \n",
"
\n",
" \n",
" 231 | \n",
" 1 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 71 | \n",
" 0 | \n",
"
\n",
" \n",
" 106 | \n",
" 0 | \n",
"
\n",
" \n",
" 270 | \n",
" 1 | \n",
"
\n",
" \n",
" 435 | \n",
" 1 | \n",
"
\n",
" \n",
" 102 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
614 rows × 1 columns
\n",
"
"
],
"text/plain": [
" Outcome\n",
"60 0\n",
"618 1\n",
"346 0\n",
"294 0\n",
"231 1\n",
".. ...\n",
"71 0\n",
"106 0\n",
"270 1\n",
"435 1\n",
"102 0\n",
"\n",
"[614 rows x 1 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'X_test'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
"
\n",
" \n",
" \n",
" \n",
" 668 | \n",
" 6 | \n",
" 98 | \n",
" 58 | \n",
" 33 | \n",
" 190 | \n",
" 34.0 | \n",
" 0.430 | \n",
" 43 | \n",
"
\n",
" \n",
" 324 | \n",
" 2 | \n",
" 112 | \n",
" 75 | \n",
" 32 | \n",
" 0 | \n",
" 35.7 | \n",
" 0.148 | \n",
" 21 | \n",
"
\n",
" \n",
" 624 | \n",
" 2 | \n",
" 108 | \n",
" 64 | \n",
" 0 | \n",
" 0 | \n",
" 30.8 | \n",
" 0.158 | \n",
" 21 | \n",
"
\n",
" \n",
" 690 | \n",
" 8 | \n",
" 107 | \n",
" 80 | \n",
" 0 | \n",
" 0 | \n",
" 24.6 | \n",
" 0.856 | \n",
" 34 | \n",
"
\n",
" \n",
" 473 | \n",
" 7 | \n",
" 136 | \n",
" 90 | \n",
" 0 | \n",
" 0 | \n",
" 29.9 | \n",
" 0.210 | \n",
" 50 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 355 | \n",
" 9 | \n",
" 165 | \n",
" 88 | \n",
" 0 | \n",
" 0 | \n",
" 30.4 | \n",
" 0.302 | \n",
" 49 | \n",
"
\n",
" \n",
" 534 | \n",
" 1 | \n",
" 77 | \n",
" 56 | \n",
" 30 | \n",
" 56 | \n",
" 33.3 | \n",
" 1.251 | \n",
" 24 | \n",
"
\n",
" \n",
" 344 | \n",
" 8 | \n",
" 95 | \n",
" 72 | \n",
" 0 | \n",
" 0 | \n",
" 36.8 | \n",
" 0.485 | \n",
" 57 | \n",
"
\n",
" \n",
" 296 | \n",
" 2 | \n",
" 146 | \n",
" 70 | \n",
" 38 | \n",
" 360 | \n",
" 28.0 | \n",
" 0.337 | \n",
" 29 | \n",
"
\n",
" \n",
" 462 | \n",
" 8 | \n",
" 74 | \n",
" 70 | \n",
" 40 | \n",
" 49 | \n",
" 35.3 | \n",
" 0.705 | \n",
" 39 | \n",
"
\n",
" \n",
"
\n",
"
154 rows × 8 columns
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"668 6 98 58 33 190 34.0 \n",
"324 2 112 75 32 0 35.7 \n",
"624 2 108 64 0 0 30.8 \n",
"690 8 107 80 0 0 24.6 \n",
"473 7 136 90 0 0 29.9 \n",
".. ... ... ... ... ... ... \n",
"355 9 165 88 0 0 30.4 \n",
"534 1 77 56 30 56 33.3 \n",
"344 8 95 72 0 0 36.8 \n",
"296 2 146 70 38 360 28.0 \n",
"462 8 74 70 40 49 35.3 \n",
"\n",
" DiabetesPedigreeFunction Age \n",
"668 0.430 43 \n",
"324 0.148 21 \n",
"624 0.158 21 \n",
"690 0.856 34 \n",
"473 0.210 50 \n",
".. ... ... \n",
"355 0.302 49 \n",
"534 1.251 24 \n",
"344 0.485 57 \n",
"296 0.337 29 \n",
"462 0.705 39 \n",
"\n",
"[154 rows x 8 columns]"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/plain": [
"'y_test'"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Outcome | \n",
"
\n",
" \n",
" \n",
" \n",
" 668 | \n",
" 0 | \n",
"
\n",
" \n",
" 324 | \n",
" 0 | \n",
"
\n",
" \n",
" 624 | \n",
" 0 | \n",
"
\n",
" \n",
" 690 | \n",
" 0 | \n",
"
\n",
" \n",
" 473 | \n",
" 0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 355 | \n",
" 1 | \n",
"
\n",
" \n",
" 534 | \n",
" 0 | \n",
"
\n",
" \n",
" 344 | \n",
" 0 | \n",
"
\n",
" \n",
" 296 | \n",
" 1 | \n",
"
\n",
" \n",
" 462 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
154 rows × 1 columns
\n",
"
"
],
"text/plain": [
" Outcome\n",
"668 0\n",
"324 0\n",
"624 0\n",
"690 0\n",
"473 0\n",
".. ...\n",
"355 1\n",
"534 0\n",
"344 0\n",
"296 1\n",
"462 0\n",
"\n",
"[154 rows x 1 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from typing import Tuple\n",
"import pandas as pd\n",
"from pandas import DataFrame\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"def split_into_train_test(\n",
" df_input: DataFrame,\n",
" target_colname: str = \"above_average_close\",\n",
" frac_train: float = 0.8,\n",
" random_state: int = None,\n",
") -> Tuple[DataFrame, DataFrame, DataFrame, DataFrame]:\n",
" \n",
" if not (0 < frac_train < 1):\n",
" raise ValueError(\"Fraction must be between 0 and 1.\")\n",
" \n",
" # Проверка наличия целевого признака\n",
" if target_colname not in df_input.columns:\n",
" raise ValueError(f\"{target_colname} is not a column in the DataFrame.\")\n",
" \n",
" # Разделяем данные на признаки и целевую переменную\n",
" X = df_input.drop(columns=[target_colname]) # Признаки\n",
" y = df_input[[target_colname]] # Целевая переменная\n",
"\n",
" # Разделяем данные на обучающую и тестовую выборки\n",
" X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y,\n",
" test_size=(1.0 - frac_train),\n",
" random_state=random_state\n",
" )\n",
" \n",
" return X_train, X_test, y_train, y_test\n",
"\n",
"# Применение функции для разделения данных\n",
"X_train, X_test, y_train, y_test = split_into_train_test(\n",
" df, \n",
" target_colname=\"Outcome\", \n",
" frac_train=0.8, \n",
" random_state=42 # Убедитесь, что вы задали нужное значение random_state\n",
")\n",
"\n",
"# Для отображения результатов\n",
"display(\"X_train\", X_train)\n",
"display(\"y_train\", y_train)\n",
"\n",
"display(\"X_test\", X_test)\n",
"display(\"y_test\", y_test)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Определение перечня алгоритмов решения задачи аппроксимации (регрессии)"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"from sklearn.pipeline import make_pipeline\n",
"from sklearn.preprocessing import PolynomialFeatures\n",
"from sklearn import linear_model, tree, neighbors, ensemble, neural_network\n",
"\n",
"random_state = 9\n",
"\n",
"models = {\n",
" \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
" \"linear_poly\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(degree=2),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"linear_interact\": {\n",
" \"model\": make_pipeline(\n",
" PolynomialFeatures(interaction_only=True),\n",
" linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
" )\n",
" },\n",
" \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
" \"decision_tree\": {\n",
" \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n",
" },\n",
" \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
" \"random_forest\": {\n",
" \"model\": ensemble.RandomForestRegressor(\n",
" max_depth=7, random_state=random_state, n_jobs=-1\n",
" )\n",
" },\n",
" \"mlp\": {\n",
" \"model\": neural_network.MLPRegressor(\n",
" activation=\"tanh\",\n",
" hidden_layer_sizes=(3,),\n",
" max_iter=500,\n",
" early_stopping=True,\n",
" random_state=random_state,\n",
" )\n",
" },\n",
"}\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: linear\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Model: linear_poly\n",
"Model: linear_interact\n",
"Model: ridge\n",
"Model: decision_tree\n",
"Model: knn\n",
"Model: random_forest\n",
"Model: mlp\n"
]
}
],
"source": [
"import math\n",
"from pandas import DataFrame\n",
"from sklearn import metrics\n",
"\n",
"for model_name in models.keys():\n",
" print(f\"Model: {model_name}\")\n",
"\n",
" fitted_model = models[model_name][\"model\"].fit(\n",
" X_train.values, y_train.values.ravel()\n",
" )\n",
" y_train_pred = fitted_model.predict(X_train.values)\n",
" y_test_pred = fitted_model.predict(X_test.values)\n",
" models[model_name][\"fitted\"] = fitted_model\n",
" models[model_name][\"train_preds\"] = y_train_pred\n",
" models[model_name][\"preds\"] = y_test_pred\n",
" models[model_name][\"RMSE_train\"] = math.sqrt(\n",
" metrics.mean_squared_error(y_train, y_train_pred)\n",
" )\n",
" models[model_name][\"RMSE_test\"] = math.sqrt(\n",
" metrics.mean_squared_error(y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"RMAE_test\"] = math.sqrt(\n",
" metrics.mean_absolute_error(y_test, y_test_pred)\n",
" )\n",
" models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_pred)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод результатов оценки"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
" \n",
" \n",
" | \n",
" RMSE_train | \n",
" RMSE_test | \n",
" RMAE_test | \n",
" R2_test | \n",
"
\n",
" \n",
" \n",
" \n",
" random_forest | \n",
" 0.240052 | \n",
" 0.405871 | \n",
" 0.559210 | \n",
" 0.282505 | \n",
"
\n",
" \n",
" linear | \n",
" 0.396793 | \n",
" 0.413576 | \n",
" 0.590024 | \n",
" 0.255003 | \n",
"
\n",
" \n",
" ridge | \n",
" 0.396822 | \n",
" 0.414236 | \n",
" 0.590431 | \n",
" 0.252623 | \n",
"
\n",
" \n",
" linear_poly | \n",
" 0.370076 | \n",
" 0.422852 | \n",
" 0.584147 | \n",
" 0.221209 | \n",
"
\n",
" \n",
" linear_interact | \n",
" 0.380128 | \n",
" 0.426815 | \n",
" 0.593532 | \n",
" 0.206543 | \n",
"
\n",
" \n",
" decision_tree | \n",
" 0.249880 | \n",
" 0.445708 | \n",
" 0.520376 | \n",
" 0.134743 | \n",
"
\n",
" \n",
" knn | \n",
" 0.373319 | \n",
" 0.450285 | \n",
" 0.592157 | \n",
" 0.116883 | \n",
"
\n",
" \n",
" mlp | \n",
" 0.623529 | \n",
" 0.544323 | \n",
" 0.658689 | \n",
" -0.290498 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"reg_metrics = pd.DataFrame.from_dict(models, \"index\")[\n",
" [\"RMSE_train\", \"RMSE_test\", \"RMAE_test\", \"R2_test\"]\n",
"]\n",
"reg_metrics.sort_values(by=\"RMSE_test\").style.background_gradient(\n",
" cmap=\"viridis\", low=1, high=0.3, subset=[\"RMSE_train\", \"RMSE_test\"]\n",
").background_gradient(cmap=\"plasma\", low=0.3, high=1, subset=[\"RMAE_test\", \"R2_test\"])"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"\n",
"Вывод реального и \"спрогнозированного\" результата для обучающей и тестовой выборок\n",
"\n",
"Получение лучшей модели\n"
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'random_forest'"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"best_model = str(reg_metrics.sort_values(by=\"RMSE_test\").iloc[0].name)\n",
"\n",
"display(best_model)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод для обучающей выборки"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
" DiabetPred | \n",
"
\n",
" \n",
" \n",
" \n",
" 60 | \n",
" 2 | \n",
" 84 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 0.0 | \n",
" 0.304 | \n",
" 21 | \n",
" 0 | \n",
" 0.001849 | \n",
"
\n",
" \n",
" 618 | \n",
" 9 | \n",
" 112 | \n",
" 82 | \n",
" 24 | \n",
" 0 | \n",
" 28.2 | \n",
" 1.282 | \n",
" 50 | \n",
" 1 | \n",
" 0.758997 | \n",
"
\n",
" \n",
" 346 | \n",
" 1 | \n",
" 139 | \n",
" 46 | \n",
" 19 | \n",
" 83 | \n",
" 28.7 | \n",
" 0.654 | \n",
" 22 | \n",
" 0 | \n",
" 0.149231 | \n",
"
\n",
" \n",
" 294 | \n",
" 0 | \n",
" 161 | \n",
" 50 | \n",
" 0 | \n",
" 0 | \n",
" 21.9 | \n",
" 0.254 | \n",
" 65 | \n",
" 0 | \n",
" 0.239564 | \n",
"
\n",
" \n",
" 231 | \n",
" 6 | \n",
" 134 | \n",
" 80 | \n",
" 37 | \n",
" 370 | \n",
" 46.2 | \n",
" 0.238 | \n",
" 46 | \n",
" 1 | \n",
" 0.773890 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"60 2 84 0 0 0 0.0 \n",
"618 9 112 82 24 0 28.2 \n",
"346 1 139 46 19 83 28.7 \n",
"294 0 161 50 0 0 21.9 \n",
"231 6 134 80 37 370 46.2 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome DiabetPred \n",
"60 0.304 21 0 0.001849 \n",
"618 1.282 50 1 0.758997 \n",
"346 0.654 22 0 0.149231 \n",
"294 0.254 65 0 0.239564 \n",
"231 0.238 46 1 0.773890 "
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [\n",
" X_train,\n",
" y_train,\n",
" pd.Series(\n",
" models[best_model][\"train_preds\"],\n",
" index=y_train.index,\n",
" name=\"DiabetPred\",\n",
" ),\n",
" ],\n",
" axis=1,\n",
").head(5)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Вывод для тестовой выборки"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Pregnancies | \n",
" Glucose | \n",
" BloodPressure | \n",
" SkinThickness | \n",
" Insulin | \n",
" BMI | \n",
" DiabetesPedigreeFunction | \n",
" Age | \n",
" Outcome | \n",
" DiabetPred | \n",
"
\n",
" \n",
" \n",
" \n",
" 668 | \n",
" 6 | \n",
" 98 | \n",
" 58 | \n",
" 33 | \n",
" 190 | \n",
" 34.0 | \n",
" 0.430 | \n",
" 43 | \n",
" 0 | \n",
" 0.516537 | \n",
"
\n",
" \n",
" 324 | \n",
" 2 | \n",
" 112 | \n",
" 75 | \n",
" 32 | \n",
" 0 | \n",
" 35.7 | \n",
" 0.148 | \n",
" 21 | \n",
" 0 | \n",
" 0.205507 | \n",
"
\n",
" \n",
" 624 | \n",
" 2 | \n",
" 108 | \n",
" 64 | \n",
" 0 | \n",
" 0 | \n",
" 30.8 | \n",
" 0.158 | \n",
" 21 | \n",
" 0 | \n",
" 0.047710 | \n",
"
\n",
" \n",
" 690 | \n",
" 8 | \n",
" 107 | \n",
" 80 | \n",
" 0 | \n",
" 0 | \n",
" 24.6 | \n",
" 0.856 | \n",
" 34 | \n",
" 0 | \n",
" 0.128867 | \n",
"
\n",
" \n",
" 473 | \n",
" 7 | \n",
" 136 | \n",
" 90 | \n",
" 0 | \n",
" 0 | \n",
" 29.9 | \n",
" 0.210 | \n",
" 50 | \n",
" 0 | \n",
" 0.438512 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Pregnancies Glucose BloodPressure SkinThickness Insulin BMI \\\n",
"668 6 98 58 33 190 34.0 \n",
"324 2 112 75 32 0 35.7 \n",
"624 2 108 64 0 0 30.8 \n",
"690 8 107 80 0 0 24.6 \n",
"473 7 136 90 0 0 29.9 \n",
"\n",
" DiabetesPedigreeFunction Age Outcome DiabetPred \n",
"668 0.430 43 0 0.516537 \n",
"324 0.148 21 0 0.205507 \n",
"624 0.158 21 0 0.047710 \n",
"690 0.856 34 0 0.128867 \n",
"473 0.210 50 0 0.438512 "
]
},
"execution_count": 89,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.concat(\n",
" [\n",
" X_test,\n",
" y_test,\n",
" pd.Series(\n",
" models[best_model][\"preds\"],\n",
" index=y_test.index,\n",
" name=\"DiabetPred\",\n",
" ),\n",
" ],\n",
" axis=1,\n",
").head(5)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.6"
}
},
"nbformat": 4,
"nbformat_minor": 2
}