3463 lines
270 KiB
Plaintext
3463 lines
270 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Загрузка набора данных"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 102,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Survived</th>\n",
|
|||
|
" <th>Pclass</th>\n",
|
|||
|
" <th>Name</th>\n",
|
|||
|
" <th>Sex</th>\n",
|
|||
|
" <th>Age</th>\n",
|
|||
|
" <th>SibSp</th>\n",
|
|||
|
" <th>Parch</th>\n",
|
|||
|
" <th>Ticket</th>\n",
|
|||
|
" <th>Fare</th>\n",
|
|||
|
" <th>Cabin</th>\n",
|
|||
|
" <th>Embarked</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>PassengerId</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Braund, Mr. Owen Harris</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>22.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>A/5 21171</td>\n",
|
|||
|
" <td>7.2500</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>38.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>PC 17599</td>\n",
|
|||
|
" <td>71.2833</td>\n",
|
|||
|
" <td>C85</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Heikkinen, Miss. Laina</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>26.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>STON/O2. 3101282</td>\n",
|
|||
|
" <td>7.9250</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>35.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>113803</td>\n",
|
|||
|
" <td>53.1000</td>\n",
|
|||
|
" <td>C123</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>5</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Allen, Mr. William Henry</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>35.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>373450</td>\n",
|
|||
|
" <td>8.0500</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>887</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>Montvila, Rev. Juozas</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>27.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>211536</td>\n",
|
|||
|
" <td>13.0000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>888</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Graham, Miss. Margaret Edith</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>19.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>112053</td>\n",
|
|||
|
" <td>30.0000</td>\n",
|
|||
|
" <td>B42</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>889</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Johnston, Miss. Catherine Helen \"Carrie\"</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>W./C. 6607</td>\n",
|
|||
|
" <td>23.4500</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>890</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Behr, Mr. Karl Howell</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>26.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>111369</td>\n",
|
|||
|
" <td>30.0000</td>\n",
|
|||
|
" <td>C148</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>891</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Dooley, Mr. Patrick</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>32.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>370376</td>\n",
|
|||
|
" <td>7.7500</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>Q</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>891 rows × 11 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Survived Pclass \\\n",
|
|||
|
"PassengerId \n",
|
|||
|
"1 0 3 \n",
|
|||
|
"2 1 1 \n",
|
|||
|
"3 1 3 \n",
|
|||
|
"4 1 1 \n",
|
|||
|
"5 0 3 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"887 0 2 \n",
|
|||
|
"888 1 1 \n",
|
|||
|
"889 0 3 \n",
|
|||
|
"890 1 1 \n",
|
|||
|
"891 0 3 \n",
|
|||
|
"\n",
|
|||
|
" Name Sex Age \\\n",
|
|||
|
"PassengerId \n",
|
|||
|
"1 Braund, Mr. Owen Harris male 22.0 \n",
|
|||
|
"2 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 \n",
|
|||
|
"3 Heikkinen, Miss. Laina female 26.0 \n",
|
|||
|
"4 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 \n",
|
|||
|
"5 Allen, Mr. William Henry male 35.0 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"887 Montvila, Rev. Juozas male 27.0 \n",
|
|||
|
"888 Graham, Miss. Margaret Edith female 19.0 \n",
|
|||
|
"889 Johnston, Miss. Catherine Helen \"Carrie\" female NaN \n",
|
|||
|
"890 Behr, Mr. Karl Howell male 26.0 \n",
|
|||
|
"891 Dooley, Mr. Patrick male 32.0 \n",
|
|||
|
"\n",
|
|||
|
" SibSp Parch Ticket Fare Cabin Embarked \n",
|
|||
|
"PassengerId \n",
|
|||
|
"1 1 0 A/5 21171 7.2500 NaN S \n",
|
|||
|
"2 1 0 PC 17599 71.2833 C85 C \n",
|
|||
|
"3 0 0 STON/O2. 3101282 7.9250 NaN S \n",
|
|||
|
"4 1 0 113803 53.1000 C123 S \n",
|
|||
|
"5 0 0 373450 8.0500 NaN S \n",
|
|||
|
"... ... ... ... ... ... ... \n",
|
|||
|
"887 0 0 211536 13.0000 NaN S \n",
|
|||
|
"888 0 0 112053 30.0000 B42 S \n",
|
|||
|
"889 1 2 W./C. 6607 23.4500 NaN S \n",
|
|||
|
"890 0 0 111369 30.0000 C148 C \n",
|
|||
|
"891 0 0 370376 7.7500 NaN Q \n",
|
|||
|
"\n",
|
|||
|
"[891 rows x 11 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 102,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"from sklearn import set_config\n",
|
|||
|
"\n",
|
|||
|
"set_config(transform_output=\"pandas\")\n",
|
|||
|
"\n",
|
|||
|
"random_state=9\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv(\"data/titanic.csv\", index_col=\"PassengerId\")\n",
|
|||
|
"\n",
|
|||
|
"df"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Разделение набора данных на обучающую и тестовые выборки (80/20) для задачи классификации\n",
|
|||
|
"\n",
|
|||
|
"Целевой признак -- Survived"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 37,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'X_train'"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Survived</th>\n",
|
|||
|
" <th>Pclass</th>\n",
|
|||
|
" <th>Name</th>\n",
|
|||
|
" <th>Sex</th>\n",
|
|||
|
" <th>Age</th>\n",
|
|||
|
" <th>SibSp</th>\n",
|
|||
|
" <th>Parch</th>\n",
|
|||
|
" <th>Ticket</th>\n",
|
|||
|
" <th>Fare</th>\n",
|
|||
|
" <th>Cabin</th>\n",
|
|||
|
" <th>Embarked</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>PassengerId</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>145</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>Andrew, Mr. Edgardo Samuel</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>18.00</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>231945</td>\n",
|
|||
|
" <td>11.5000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>206</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Strom, Miss. Telma Matilda</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>2.00</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>347054</td>\n",
|
|||
|
" <td>10.4625</td>\n",
|
|||
|
" <td>G6</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>349</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Coutts, Master. William Loch \"William\"</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>3.00</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>C.A. 37671</td>\n",
|
|||
|
" <td>15.9000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>329</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Goldsmith, Mrs. Frank John (Emily Alice Brown)</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>31.00</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>363291</td>\n",
|
|||
|
" <td>20.5250</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>289</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>Hosono, Mr. Masabumi</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>42.00</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>237798</td>\n",
|
|||
|
" <td>13.0000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>756</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>Hamalainen, Master. Viljo</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>0.67</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>250649</td>\n",
|
|||
|
" <td>14.5000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>816</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Fry, Mr. Richard</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>112058</td>\n",
|
|||
|
" <td>0.0000</td>\n",
|
|||
|
" <td>B102</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>890</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Behr, Mr. Karl Howell</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>26.00</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>111369</td>\n",
|
|||
|
" <td>30.0000</td>\n",
|
|||
|
" <td>C148</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>738</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Lesurer, Mr. Gustave J</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>35.00</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>PC 17755</td>\n",
|
|||
|
" <td>512.3292</td>\n",
|
|||
|
" <td>B101</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>61</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Sirayanian, Mr. Orsen</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>22.00</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>2669</td>\n",
|
|||
|
" <td>7.2292</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>712 rows × 11 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Survived Pclass Name \\\n",
|
|||
|
"PassengerId \n",
|
|||
|
"145 0 2 Andrew, Mr. Edgardo Samuel \n",
|
|||
|
"206 0 3 Strom, Miss. Telma Matilda \n",
|
|||
|
"349 1 3 Coutts, Master. William Loch \"William\" \n",
|
|||
|
"329 1 3 Goldsmith, Mrs. Frank John (Emily Alice Brown) \n",
|
|||
|
"289 1 2 Hosono, Mr. Masabumi \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"756 1 2 Hamalainen, Master. Viljo \n",
|
|||
|
"816 0 1 Fry, Mr. Richard \n",
|
|||
|
"890 1 1 Behr, Mr. Karl Howell \n",
|
|||
|
"738 1 1 Lesurer, Mr. Gustave J \n",
|
|||
|
"61 0 3 Sirayanian, Mr. Orsen \n",
|
|||
|
"\n",
|
|||
|
" Sex Age SibSp Parch Ticket Fare Cabin Embarked \n",
|
|||
|
"PassengerId \n",
|
|||
|
"145 male 18.00 0 0 231945 11.5000 NaN S \n",
|
|||
|
"206 female 2.00 0 1 347054 10.4625 G6 S \n",
|
|||
|
"349 male 3.00 1 1 C.A. 37671 15.9000 NaN S \n",
|
|||
|
"329 female 31.00 1 1 363291 20.5250 NaN S \n",
|
|||
|
"289 male 42.00 0 0 237798 13.0000 NaN S \n",
|
|||
|
"... ... ... ... ... ... ... ... ... \n",
|
|||
|
"756 male 0.67 1 1 250649 14.5000 NaN S \n",
|
|||
|
"816 male NaN 0 0 112058 0.0000 B102 S \n",
|
|||
|
"890 male 26.00 0 0 111369 30.0000 C148 C \n",
|
|||
|
"738 male 35.00 0 0 PC 17755 512.3292 B101 C \n",
|
|||
|
"61 male 22.00 0 0 2669 7.2292 NaN C \n",
|
|||
|
"\n",
|
|||
|
"[712 rows x 11 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'y_train'"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Survived</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>PassengerId</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>145</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>206</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>349</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>329</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>289</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>756</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>816</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>890</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>738</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>61</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>712 rows × 1 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Survived\n",
|
|||
|
"PassengerId \n",
|
|||
|
"145 0\n",
|
|||
|
"206 0\n",
|
|||
|
"349 1\n",
|
|||
|
"329 1\n",
|
|||
|
"289 1\n",
|
|||
|
"... ...\n",
|
|||
|
"756 1\n",
|
|||
|
"816 0\n",
|
|||
|
"890 1\n",
|
|||
|
"738 1\n",
|
|||
|
"61 0\n",
|
|||
|
"\n",
|
|||
|
"[712 rows x 1 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'X_test'"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Survived</th>\n",
|
|||
|
" <th>Pclass</th>\n",
|
|||
|
" <th>Name</th>\n",
|
|||
|
" <th>Sex</th>\n",
|
|||
|
" <th>Age</th>\n",
|
|||
|
" <th>SibSp</th>\n",
|
|||
|
" <th>Parch</th>\n",
|
|||
|
" <th>Ticket</th>\n",
|
|||
|
" <th>Fare</th>\n",
|
|||
|
" <th>Cabin</th>\n",
|
|||
|
" <th>Embarked</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>PassengerId</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>843</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Serepeca, Miss. Augusta</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>30.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>113798</td>\n",
|
|||
|
" <td>31.0000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>791</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Keane, Mr. Andrew \"Andy\"</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>12460</td>\n",
|
|||
|
" <td>7.7500</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>Q</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>509</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Olsen, Mr. Henry Margido</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>28.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>C 4001</td>\n",
|
|||
|
" <td>22.5250</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>828</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>Mallet, Master. Andre</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>S.C./PARIS 2079</td>\n",
|
|||
|
" <td>37.0042</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>414</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>Cunningham, Mr. Alfred Fleming</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>239853</td>\n",
|
|||
|
" <td>0.0000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>824</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Moor, Mrs. (Beila)</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>27.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>392096</td>\n",
|
|||
|
" <td>12.4750</td>\n",
|
|||
|
" <td>E121</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>353</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Elias, Mr. Tannous</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>15.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2695</td>\n",
|
|||
|
" <td>7.2292</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>674</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>Wilhelms, Mr. Charles</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>31.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>244270</td>\n",
|
|||
|
" <td>13.0000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>100</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>Kantor, Mr. Sinai</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>34.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>244367</td>\n",
|
|||
|
" <td>26.0000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>542</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Andersson, Miss. Ingeborg Constanzia</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>9.0</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>347082</td>\n",
|
|||
|
" <td>31.2750</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>179 rows × 11 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Survived Pclass Name Sex \\\n",
|
|||
|
"PassengerId \n",
|
|||
|
"843 1 1 Serepeca, Miss. Augusta female \n",
|
|||
|
"791 0 3 Keane, Mr. Andrew \"Andy\" male \n",
|
|||
|
"509 0 3 Olsen, Mr. Henry Margido male \n",
|
|||
|
"828 1 2 Mallet, Master. Andre male \n",
|
|||
|
"414 0 2 Cunningham, Mr. Alfred Fleming male \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"824 1 3 Moor, Mrs. (Beila) female \n",
|
|||
|
"353 0 3 Elias, Mr. Tannous male \n",
|
|||
|
"674 1 2 Wilhelms, Mr. Charles male \n",
|
|||
|
"100 0 2 Kantor, Mr. Sinai male \n",
|
|||
|
"542 0 3 Andersson, Miss. Ingeborg Constanzia female \n",
|
|||
|
"\n",
|
|||
|
" Age SibSp Parch Ticket Fare Cabin Embarked \n",
|
|||
|
"PassengerId \n",
|
|||
|
"843 30.0 0 0 113798 31.0000 NaN C \n",
|
|||
|
"791 NaN 0 0 12460 7.7500 NaN Q \n",
|
|||
|
"509 28.0 0 0 C 4001 22.5250 NaN S \n",
|
|||
|
"828 1.0 0 2 S.C./PARIS 2079 37.0042 NaN C \n",
|
|||
|
"414 NaN 0 0 239853 0.0000 NaN S \n",
|
|||
|
"... ... ... ... ... ... ... ... \n",
|
|||
|
"824 27.0 0 1 392096 12.4750 E121 S \n",
|
|||
|
"353 15.0 1 1 2695 7.2292 NaN C \n",
|
|||
|
"674 31.0 0 0 244270 13.0000 NaN S \n",
|
|||
|
"100 34.0 1 0 244367 26.0000 NaN S \n",
|
|||
|
"542 9.0 4 2 347082 31.2750 NaN S \n",
|
|||
|
"\n",
|
|||
|
"[179 rows x 11 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'y_test'"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Survived</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>PassengerId</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>843</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>791</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>509</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>828</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>414</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>824</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>353</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>674</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>100</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>542</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>179 rows × 1 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Survived\n",
|
|||
|
"PassengerId \n",
|
|||
|
"843 1\n",
|
|||
|
"791 0\n",
|
|||
|
"509 0\n",
|
|||
|
"828 1\n",
|
|||
|
"414 0\n",
|
|||
|
"... ...\n",
|
|||
|
"824 1\n",
|
|||
|
"353 0\n",
|
|||
|
"674 1\n",
|
|||
|
"100 0\n",
|
|||
|
"542 0\n",
|
|||
|
"\n",
|
|||
|
"[179 rows x 1 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from utils import split_stratified_into_train_val_test\n",
|
|||
|
"\n",
|
|||
|
"X_train, X_val, X_test, y_train, y_val, y_test = split_stratified_into_train_val_test(\n",
|
|||
|
" df, stratify_colname=\"Survived\", frac_train=0.80, frac_val=0, frac_test=0.20, random_state=random_state\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"display(\"X_train\", X_train)\n",
|
|||
|
"display(\"y_train\", y_train)\n",
|
|||
|
"\n",
|
|||
|
"display(\"X_test\", X_test)\n",
|
|||
|
"display(\"y_test\", y_test)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Формирование конвейера для классификации данных\n",
|
|||
|
"\n",
|
|||
|
"preprocessing_num -- конвейер для обработки числовых данных: заполнение пропущенных значений и стандартизация\n",
|
|||
|
"\n",
|
|||
|
"preprocessing_cat -- конвейер для обработки категориальных данных: заполнение пропущенных данных и унитарное кодирование\n",
|
|||
|
"\n",
|
|||
|
"features_preprocessing -- трансформер для предобработки признаков\n",
|
|||
|
"\n",
|
|||
|
"features_engineering -- трансформер для конструирования признаков\n",
|
|||
|
"\n",
|
|||
|
"drop_columns -- трансформер для удаления колонок\n",
|
|||
|
"\n",
|
|||
|
"features_postprocessing -- трансформер для унитарного кодирования новых признаков\n",
|
|||
|
"\n",
|
|||
|
"pipeline_end -- основной конвейер предобработки данных и конструирования признаков\n",
|
|||
|
"\n",
|
|||
|
"Конвейер выполняется последовательно.\n",
|
|||
|
"\n",
|
|||
|
"Трансформер выполняет параллельно для указанного набора колонок.\n",
|
|||
|
"\n",
|
|||
|
"Документация: \n",
|
|||
|
"\n",
|
|||
|
"https://scikit-learn.org/1.5/api/sklearn.pipeline.html\n",
|
|||
|
"\n",
|
|||
|
"https://scikit-learn.org/1.5/modules/generated/sklearn.compose.ColumnTransformer.html#sklearn.compose.ColumnTransformer"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 38,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.discriminant_analysis import StandardScaler\n",
|
|||
|
"from sklearn.impute import SimpleImputer\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"\n",
|
|||
|
"from transformers import TitanicFeatures\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"columns_to_drop = [\"Survived\", \"Name\", \"Cabin\", \"Ticket\", \"Embarked\", \"Parch\", \"Fare\"]\n",
|
|||
|
"num_columns = [\n",
|
|||
|
" column\n",
|
|||
|
" for column in df.columns\n",
|
|||
|
" if column not in columns_to_drop and df[column].dtype != \"object\"\n",
|
|||
|
"]\n",
|
|||
|
"cat_columns = [\n",
|
|||
|
" column\n",
|
|||
|
" for column in df.columns\n",
|
|||
|
" if column not in columns_to_drop and df[column].dtype == \"object\"\n",
|
|||
|
"]\n",
|
|||
|
"\n",
|
|||
|
"num_imputer = SimpleImputer(strategy=\"median\")\n",
|
|||
|
"num_scaler = StandardScaler()\n",
|
|||
|
"preprocessing_num = Pipeline(\n",
|
|||
|
" [\n",
|
|||
|
" (\"imputer\", num_imputer),\n",
|
|||
|
" (\"scaler\", num_scaler),\n",
|
|||
|
" ]\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n",
|
|||
|
"cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n",
|
|||
|
"preprocessing_cat = Pipeline(\n",
|
|||
|
" [\n",
|
|||
|
" (\"imputer\", cat_imputer),\n",
|
|||
|
" (\"encoder\", cat_encoder),\n",
|
|||
|
" ]\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"features_preprocessing = ColumnTransformer(\n",
|
|||
|
" verbose_feature_names_out=False,\n",
|
|||
|
" transformers=[\n",
|
|||
|
" (\"prepocessing_num\", preprocessing_num, num_columns),\n",
|
|||
|
" (\"prepocessing_cat\", preprocessing_cat, cat_columns),\n",
|
|||
|
" (\"prepocessing_features\", cat_imputer, [\"Name\", \"Cabin\"]),\n",
|
|||
|
" ],\n",
|
|||
|
" remainder=\"passthrough\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"features_engineering = ColumnTransformer(\n",
|
|||
|
" verbose_feature_names_out=False,\n",
|
|||
|
" transformers=[\n",
|
|||
|
" (\"add_features\", TitanicFeatures(), [\"Name\", \"Cabin\"]),\n",
|
|||
|
" ],\n",
|
|||
|
" remainder=\"passthrough\",\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"drop_columns = ColumnTransformer(\n",
|
|||
|
" verbose_feature_names_out=False,\n",
|
|||
|
" transformers=[\n",
|
|||
|
" (\"drop_columns\", \"drop\", columns_to_drop),\n",
|
|||
|
" ],\n",
|
|||
|
" remainder=\"passthrough\",\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"features_postprocessing = ColumnTransformer(\n",
|
|||
|
" verbose_feature_names_out=False,\n",
|
|||
|
" transformers=[\n",
|
|||
|
" (\"prepocessing_cat\", preprocessing_cat, [\"Cabin_type\"]),\n",
|
|||
|
" ],\n",
|
|||
|
" remainder=\"passthrough\",\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"pipeline_end = Pipeline(\n",
|
|||
|
" [\n",
|
|||
|
" (\"features_preprocessing\", features_preprocessing),\n",
|
|||
|
" (\"features_engineering\", features_engineering),\n",
|
|||
|
" (\"drop_columns\", drop_columns),\n",
|
|||
|
" (\"features_postprocessing\", features_postprocessing),\n",
|
|||
|
" ]\n",
|
|||
|
")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Демонстрация работы конвейера для предобработки данных при классификации"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 39,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Cabin_type_B</th>\n",
|
|||
|
" <th>Cabin_type_C</th>\n",
|
|||
|
" <th>Cabin_type_D</th>\n",
|
|||
|
" <th>Cabin_type_E</th>\n",
|
|||
|
" <th>Cabin_type_F</th>\n",
|
|||
|
" <th>Cabin_type_G</th>\n",
|
|||
|
" <th>Cabin_type_T</th>\n",
|
|||
|
" <th>Cabin_type_u</th>\n",
|
|||
|
" <th>Is_married</th>\n",
|
|||
|
" <th>Pclass</th>\n",
|
|||
|
" <th>Age</th>\n",
|
|||
|
" <th>SibSp</th>\n",
|
|||
|
" <th>Sex_male</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>PassengerId</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>145</th>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>-0.379423</td>\n",
|
|||
|
" <td>-0.869506</td>\n",
|
|||
|
" <td>-0.473465</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>206</th>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0.821241</td>\n",
|
|||
|
" <td>-2.102186</td>\n",
|
|||
|
" <td>-0.473465</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>349</th>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0.821241</td>\n",
|
|||
|
" <td>-2.025143</td>\n",
|
|||
|
" <td>0.437635</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>329</th>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0.821241</td>\n",
|
|||
|
" <td>0.132047</td>\n",
|
|||
|
" <td>0.437635</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>289</th>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>-0.379423</td>\n",
|
|||
|
" <td>0.979514</td>\n",
|
|||
|
" <td>-0.473465</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>756</th>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>-0.379423</td>\n",
|
|||
|
" <td>-2.204652</td>\n",
|
|||
|
" <td>0.437635</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>816</th>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>-1.580088</td>\n",
|
|||
|
" <td>-0.099081</td>\n",
|
|||
|
" <td>-0.473465</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>890</th>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>-1.580088</td>\n",
|
|||
|
" <td>-0.253166</td>\n",
|
|||
|
" <td>-0.473465</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>738</th>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>-1.580088</td>\n",
|
|||
|
" <td>0.440217</td>\n",
|
|||
|
" <td>-0.473465</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>61</th>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0.821241</td>\n",
|
|||
|
" <td>-0.561336</td>\n",
|
|||
|
" <td>-0.473465</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>712 rows × 13 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Cabin_type_B Cabin_type_C Cabin_type_D Cabin_type_E \\\n",
|
|||
|
"PassengerId \n",
|
|||
|
"145 0.0 0.0 0.0 0.0 \n",
|
|||
|
"206 0.0 0.0 0.0 0.0 \n",
|
|||
|
"349 0.0 0.0 0.0 0.0 \n",
|
|||
|
"329 0.0 0.0 0.0 0.0 \n",
|
|||
|
"289 0.0 0.0 0.0 0.0 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"756 0.0 0.0 0.0 0.0 \n",
|
|||
|
"816 1.0 0.0 0.0 0.0 \n",
|
|||
|
"890 0.0 1.0 0.0 0.0 \n",
|
|||
|
"738 1.0 0.0 0.0 0.0 \n",
|
|||
|
"61 0.0 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Cabin_type_F Cabin_type_G Cabin_type_T Cabin_type_u \\\n",
|
|||
|
"PassengerId \n",
|
|||
|
"145 0.0 0.0 0.0 1.0 \n",
|
|||
|
"206 0.0 1.0 0.0 0.0 \n",
|
|||
|
"349 0.0 0.0 0.0 1.0 \n",
|
|||
|
"329 0.0 0.0 0.0 1.0 \n",
|
|||
|
"289 0.0 0.0 0.0 1.0 \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"756 0.0 0.0 0.0 1.0 \n",
|
|||
|
"816 0.0 0.0 0.0 0.0 \n",
|
|||
|
"890 0.0 0.0 0.0 0.0 \n",
|
|||
|
"738 0.0 0.0 0.0 0.0 \n",
|
|||
|
"61 0.0 0.0 0.0 1.0 \n",
|
|||
|
"\n",
|
|||
|
" Is_married Pclass Age SibSp Sex_male \n",
|
|||
|
"PassengerId \n",
|
|||
|
"145 0 -0.379423 -0.869506 -0.473465 1.0 \n",
|
|||
|
"206 0 0.821241 -2.102186 -0.473465 0.0 \n",
|
|||
|
"349 0 0.821241 -2.025143 0.437635 1.0 \n",
|
|||
|
"329 1 0.821241 0.132047 0.437635 0.0 \n",
|
|||
|
"289 0 -0.379423 0.979514 -0.473465 1.0 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"756 0 -0.379423 -2.204652 0.437635 1.0 \n",
|
|||
|
"816 0 -1.580088 -0.099081 -0.473465 1.0 \n",
|
|||
|
"890 0 -1.580088 -0.253166 -0.473465 1.0 \n",
|
|||
|
"738 0 -1.580088 0.440217 -0.473465 1.0 \n",
|
|||
|
"61 0 0.821241 -0.561336 -0.473465 1.0 \n",
|
|||
|
"\n",
|
|||
|
"[712 rows x 13 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 39,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"preprocessing_result = pipeline_end.fit_transform(X_train)\n",
|
|||
|
"preprocessed_df = pd.DataFrame(\n",
|
|||
|
" preprocessing_result,\n",
|
|||
|
" columns=pipeline_end.get_feature_names_out(),\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"preprocessed_df"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Формирование набора моделей для классификации\n",
|
|||
|
"\n",
|
|||
|
"logistic -- логистическая регрессия\n",
|
|||
|
"\n",
|
|||
|
"ridge -- гребневая регрессия\n",
|
|||
|
"\n",
|
|||
|
"decision_tree -- дерево решений\n",
|
|||
|
"\n",
|
|||
|
"knn -- k-ближайших соседей\n",
|
|||
|
"\n",
|
|||
|
"naive_bayes -- наивный Байесовский классификатор\n",
|
|||
|
"\n",
|
|||
|
"gradient_boosting -- метод градиентного бустинга (набор деревьев решений)\n",
|
|||
|
"\n",
|
|||
|
"random_forest -- метод случайного леса (набор деревьев решений)\n",
|
|||
|
"\n",
|
|||
|
"mlp -- многослойный персептрон (нейронная сеть)\n",
|
|||
|
"\n",
|
|||
|
"Документация: https://scikit-learn.org/1.5/supervised_learning.html"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 40,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"from sklearn import ensemble, linear_model, naive_bayes, neighbors, neural_network, tree\n",
|
|||
|
"\n",
|
|||
|
"class_models = {\n",
|
|||
|
" \"logistic\": {\"model\": linear_model.LogisticRegression()},\n",
|
|||
|
" # \"ridge\": {\"model\": linear_model.RidgeClassifierCV(cv=5, class_weight=\"balanced\")},\n",
|
|||
|
" \"ridge\": {\"model\": linear_model.LogisticRegression(penalty=\"l2\", class_weight=\"balanced\")},\n",
|
|||
|
" \"decision_tree\": {\n",
|
|||
|
" \"model\": tree.DecisionTreeClassifier(max_depth=7, random_state=random_state)\n",
|
|||
|
" },\n",
|
|||
|
" \"knn\": {\"model\": neighbors.KNeighborsClassifier(n_neighbors=7)},\n",
|
|||
|
" \"naive_bayes\": {\"model\": naive_bayes.GaussianNB()},\n",
|
|||
|
" \"gradient_boosting\": {\n",
|
|||
|
" \"model\": ensemble.GradientBoostingClassifier(n_estimators=210)\n",
|
|||
|
" },\n",
|
|||
|
" \"random_forest\": {\n",
|
|||
|
" \"model\": ensemble.RandomForestClassifier(\n",
|
|||
|
" max_depth=11, class_weight=\"balanced\", random_state=random_state\n",
|
|||
|
" )\n",
|
|||
|
" },\n",
|
|||
|
" \"mlp\": {\n",
|
|||
|
" \"model\": neural_network.MLPClassifier(\n",
|
|||
|
" hidden_layer_sizes=(7,),\n",
|
|||
|
" max_iter=500,\n",
|
|||
|
" early_stopping=True,\n",
|
|||
|
" random_state=random_state,\n",
|
|||
|
" )\n",
|
|||
|
" },\n",
|
|||
|
"}"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Обучение моделей на обучающем наборе данных и оценка на тестовом"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 41,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: logistic\n",
|
|||
|
"Model: ridge\n",
|
|||
|
"Model: decision_tree\n",
|
|||
|
"Model: knn\n",
|
|||
|
"Model: naive_bayes\n",
|
|||
|
"Model: gradient_boosting\n",
|
|||
|
"Model: random_forest\n",
|
|||
|
"Model: mlp\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import numpy as np\n",
|
|||
|
"from sklearn import metrics\n",
|
|||
|
"\n",
|
|||
|
"for model_name in class_models.keys():\n",
|
|||
|
" print(f\"Model: {model_name}\")\n",
|
|||
|
" model = class_models[model_name][\"model\"]\n",
|
|||
|
"\n",
|
|||
|
" model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n",
|
|||
|
" model_pipeline = model_pipeline.fit(X_train, y_train.values.ravel())\n",
|
|||
|
"\n",
|
|||
|
" y_train_predict = model_pipeline.predict(X_train)\n",
|
|||
|
" y_test_probs = model_pipeline.predict_proba(X_test)[:, 1]\n",
|
|||
|
" y_test_predict = np.where(y_test_probs > 0.5, 1, 0)\n",
|
|||
|
"\n",
|
|||
|
" class_models[model_name][\"pipeline\"] = model_pipeline\n",
|
|||
|
" class_models[model_name][\"probs\"] = y_test_probs\n",
|
|||
|
" class_models[model_name][\"preds\"] = y_test_predict\n",
|
|||
|
"\n",
|
|||
|
" class_models[model_name][\"Precision_train\"] = metrics.precision_score(\n",
|
|||
|
" y_train, y_train_predict\n",
|
|||
|
" )\n",
|
|||
|
" class_models[model_name][\"Precision_test\"] = metrics.precision_score(\n",
|
|||
|
" y_test, y_test_predict\n",
|
|||
|
" )\n",
|
|||
|
" class_models[model_name][\"Recall_train\"] = metrics.recall_score(\n",
|
|||
|
" y_train, y_train_predict\n",
|
|||
|
" )\n",
|
|||
|
" class_models[model_name][\"Recall_test\"] = metrics.recall_score(\n",
|
|||
|
" y_test, y_test_predict\n",
|
|||
|
" )\n",
|
|||
|
" class_models[model_name][\"Accuracy_train\"] = metrics.accuracy_score(\n",
|
|||
|
" y_train, y_train_predict\n",
|
|||
|
" )\n",
|
|||
|
" class_models[model_name][\"Accuracy_test\"] = metrics.accuracy_score(\n",
|
|||
|
" y_test, y_test_predict\n",
|
|||
|
" )\n",
|
|||
|
" class_models[model_name][\"ROC_AUC_test\"] = metrics.roc_auc_score(\n",
|
|||
|
" y_test, y_test_probs\n",
|
|||
|
" )\n",
|
|||
|
" class_models[model_name][\"F1_train\"] = metrics.f1_score(y_train, y_train_predict)\n",
|
|||
|
" class_models[model_name][\"F1_test\"] = metrics.f1_score(y_test, y_test_predict)\n",
|
|||
|
" class_models[model_name][\"MCC_test\"] = metrics.matthews_corrcoef(\n",
|
|||
|
" y_test, y_test_predict\n",
|
|||
|
" )\n",
|
|||
|
" class_models[model_name][\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(\n",
|
|||
|
" y_test, y_test_predict\n",
|
|||
|
" )\n",
|
|||
|
" class_models[model_name][\"Confusion_matrix\"] = metrics.confusion_matrix(\n",
|
|||
|
" y_test, y_test_predict\n",
|
|||
|
" )"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Сводная таблица оценок качества для использованных моделей классификации\n",
|
|||
|
"\n",
|
|||
|
"Документация: https://scikit-learn.org/1.5/modules/model_evaluation.html"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Матрица неточностей"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 103,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA1QAAAQ9CAYAAABePQxBAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeVhUZfsH8O9hR3YQWQQRFfd9yXBNRVFTIUnT6OeSS6+a62supQi4UylpqW1ub5qZJZmmZu5blmsuuIOisqgICMo2c35/0IxNMAMDM8yc4fu5rnMVz3PmzH1GnZv7nOc8jyCKoggiIiIiIiLSmpmhAyAiIiIiIpIqFlRERERERETlxIKKiIiIiIionFhQERERERERlRMLKiIiIiIionJiQUVERERERFROLKiIiIiIiIjKiQUVERERERFRObGgIiIiIiIiKicWVGRU1q9fD0EQkJiYqJfjJyYmQhAErF+/XifHO3ToEARBwKFDh3RyPCIiIlMRGRkJQRDKtK8gCIiMjNRvQER6woKKqAxWrVqlsyKMiIiIiEyHhaEDIKpMfn5+eP78OSwtLbV63apVq1C9enWMGDFCpb1Lly54/vw5rKysdBglERGR9M2ZMwezZs0ydBhEeseCiqoUQRBgY2Ojs+OZmZnp9HhERESmICcnB3Z2drCw4K+aZPo45I+M3qpVq9CkSRNYW1vD29sbEyZMQEZGRrH9PvvsM9SpUwe2trZ46aWXcPToUbzyyit45ZVXlPuU9AxVSkoKRo4cCR8fH1hbW8PLywshISHK57hq166Ny5cv4/DhwxAEAYIgKI+p7hmqU6dOoW/fvnBxcYGdnR2aN2+OTz75RLcfDBERkRFQPCt15coVvPnmm3BxcUGnTp1KfIYqLy8PU6dOhbu7OxwcHDBgwADcu3evxOMeOnQIbdu2hY2NDerWrYvPP/9c7XNZ33zzDdq0aQNbW1u4urpiyJAhSEpK0sv5Ev0bLxuQUYuMjERUVBSCgoIwbtw4XLt2DatXr8aff/6J48ePK4furV69Gu+++y46d+6MqVOnIjExEaGhoXBxcYGPj4/G9wgLC8Ply5cxceJE1K5dG2lpadi3bx/u3r2L2rVrIzY2FhMnToS9vT0++OADAICHh4fa4+3btw/9+vWDl5cXJk+eDE9PT8THx2Pnzp2YPHmy7j4cIiIiIzJo0CAEBARg0aJFEEURaWlpxfYZPXo0vvnmG7z55pvo0KEDDhw4gFdffbXYfufOnUPv3r3h5eWFqKgoyGQyREdHw93dvdi+CxcuxNy5czF48GCMHj0aDx8+xMqVK9GlSxecO3cOzs7O+jhdohdEIiOybt06EYCYkJAgpqWliVZWVmKvXr1EmUym3OfTTz8VAYhr164VRVEU8/LyRDc3N7Fdu3ZiQUGBcr/169eLAMSuXbsq2xISEkQA4rp160RRFMUnT56IAMQPP/xQY1xNmjRROY7CwYMHRQDiwYMHRVEUxcLCQtHf31/08/MTnzx5orKvXC4v+wdBREQkEfPmzRMBiEOHDi2xXeH8+fMiAHH8+PEq+7355psiAHHevHnKtv79+4vVqlUT79+/r2y7ceOGaGFhoXLMxMRE0dzcXFy4cKHKMS9evChaWFgUayfSBw75I6P122+/IT8/H1OmTIGZ2Yu/qmPGjIGjoyN27doFADh9+jQeP36MMWPGqIzVDg8Ph4uLi8b3sLW1hZWVFQ4dOoQnT55UOOZz584hISEBU6ZMKXZFrKxTxxIREUnRf/7zH439v/zyCwBg0qRJKu1TpkxR+Vkmk+G3335DaGgovL29le316tVDnz59VPb98ccfIZfLMXjwYDx69Ei5eXp6IiAgAAcPHqzAGRGVDYf8kdG6c+cOAKBBgwYq7VZWVqhTp46yX/HfevXqqexnYWGB2rVra3wPa2trLF26FP/973/h4eGBl19+Gf369cOwYcPg6empdcy3bt0CADRt2lTr1xIREUmZv7+/xv47d+7AzMwMdevWVWn/d55PS0vD8+fPi+V1oHiuv3HjBkRRREBAQInvqe2svkTlwYKKqrwpU6agf//+iIuLw969ezF37lwsXrwYBw4cQKtWrQwdHhERkSTY2tpW+nvK5XIIgoDdu3fD3Ny8WL+9vX2lx0RVD4f8kdHy8/MDAFy7dk2lPT8/HwkJCcp+xX9v3rypsl9hYaFypr7S1K1bF//973/x66+/4tKlS8jPz8fHH3+s7C/rcD3FVbdLly6VaX8iIqKqws/PD3K5XDmaQ+Hfeb5GjRqwsbEplteB4rm+bt26EEUR/v7+CAoKKra9/PLLuj8Ron9hQUVGKygoCFZWVlixYgVEUVS2f/3118jMzFTOCtS2bVu4ubnhyy+/RGFhoXK/TZs2lfpc1LNnz5Cbm6vSVrduXTg4OCAvL0/ZZmdnV+JU7f/WunVr+Pv7IzY2ttj+/zwHIiKiqkbx/NOKFStU2mNjY1V+Njc3R1BQEOLi4vDgwQNl+82bN7F7926VfQcOHAhzc3NERUUVy7OiKOLx48c6PAOiknHIHxktd3d3zJ49G1FRUejduzcGDBiAa9euYdWqVWjXrh3eeustAEXPVEVGRmLixIno3r07Bg8ejMTERKxfvx5169bVeHfp+vXr6NGjBwYPHozGjRvDwsIC27dvR2pqKoYMGaLcr02bNli9ejUWLFiAevXqoUaNGujevXux45mZmWH16tXo378/WrZsiZEjR8LLywtXr17F5cuXsXfvXt1/UERERBLQsmVLDB06FKtWrUJmZiY6dOiA/fv3l3gnKjIyEr/++is6duyIcePGQSaT4dNPP0XTpk1x/vx55X5169bFggULMHv2bOWSKQ4ODkhISMD27dsxduxYTJ8+vRLPkqoiFlRk1CIjI+Hu7o5PP/0UU6dOhaurK8aOHYtFixapPGj67rvvQhRFfPzxx5g+fTpatGiBHTt2YNKkSbCxsVF7fF9fXwwdOhT79+/H//73P1hYWKBhw4bYunUrwsLClPtFRETgzp07iImJwdOnT9G1a9cSCyoACA4OxsGDBxEVFYWPP/4YcrkcdevWxZgxY3T3wRAREUnQ2rVr4e7ujk2bNiEuLg7du3fHrl274Ovrq7JfmzZtsHv3bkyfPh1z586Fr68voqOjER8fj6tXr6rsO2vWLNSvXx/Lly9HVFQUgKL83qtXLwwYMKDSzo2qLkHkOCQyUXK5HO7u7hg4cCC+/PJLQ4dDREREFRQaGorLly/jxo0bhg6FSInPUJFJyM3NLTZ2euPGjUhPT8crr7ximKCIiIio3J4/f67y840bN/DLL78wr5PR4R0qMgmHDh3C1KlTMWjQILi5ueHs2bP4+uuv0ahRI5w5cwZWVlaGDpGIiIi04OXlhREjRijXnly9ejXy8vJw7tw5tetOERkCn6Eik1C7dm34+vpixYoVSE9Ph6urK4YNG4YlS5awmCIiIpKg3r1749tvv0VKSgqsra0RGBiIRYsWsZgio8M7VEREREREROXEZ6iIiIiIiIjKiQUVERERERFROfEZKgORy+V48OABHBwcNC48S2SqRFHE06dP4e3tDTMz3V7byc3NRX5+vsZ9rKysNK5RRkRVE/MzVWWGzs2ANPMzCyoDefDgQbFF7IiqoqSkJPj4+OjseLm5ufD3s0dKmkzjfp6enkhISJDclzYR6RfzM5HhcjMgzfzMgspAHBwcAAB3ztaGoz1HXhrCa/WbGTqEKq0QBTiGX5T/FnQlPz8fKWky3DztC0eHkv9tZT2Vo17bJOTn50vqC5uI9I/52fBefzXE0CFUWYWyPBy+vcoguRmQbn5mQWUgimEEjvZmGv9ikf5YCJaGDqFq+3t+UX0NqbF3EGDvUPKx5eAwHiIqGfOz4VmYWxs6hCrPELkZkG5+ZkFFRCapQJShQM2qEAWivJKjISIiIk25uahfmvmZBRURmSQ5RMhR8pe2unYiIiLSH025WdEvRSyoiMgkySFCxoKKiIjIaGjKzYp+KWJBRUQmqUCUo0DN97JUhxQQERFJmabcrOiXIhZURGSS5H9v6vqIiIiocmnKzSi
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x1000 with 16 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.metrics import ConfusionMatrixDisplay\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"\n",
|
|||
|
"_, ax = plt.subplots(int(len(class_models) / 2), 2, figsize=(12, 10), sharex=False, sharey=False)\n",
|
|||
|
"for index, key in enumerate(class_models.keys()):\n",
|
|||
|
" c_matrix = class_models[key][\"Confusion_matrix\"]\n",
|
|||
|
" disp = ConfusionMatrixDisplay(\n",
|
|||
|
" confusion_matrix=c_matrix, display_labels=[\"Died\", \"Sirvived\"]\n",
|
|||
|
" ).plot(ax=ax.flat[index])\n",
|
|||
|
" disp.ax_.set_title(key)\n",
|
|||
|
"\n",
|
|||
|
"plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.1)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Точность, полнота, верность (аккуратность), F-мера"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 43,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<style type=\"text/css\">\n",
|
|||
|
"#T_8c750_row0_col0, #T_8c750_row7_col1 {\n",
|
|||
|
" background-color: #a2da37;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row0_col1, #T_8c750_row3_col2, #T_8c750_row5_col3 {\n",
|
|||
|
" background-color: #77d153;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row0_col2 {\n",
|
|||
|
" background-color: #a5db36;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row0_col3 {\n",
|
|||
|
" background-color: #7cd250;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row0_col4, #T_8c750_row0_col5, #T_8c750_row0_col6, #T_8c750_row0_col7 {\n",
|
|||
|
" background-color: #da5a6a;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row1_col0 {\n",
|
|||
|
" background-color: #a0da39;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row1_col1 {\n",
|
|||
|
" background-color: #7fd34e;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row1_col2 {\n",
|
|||
|
" background-color: #93d741;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row1_col3 {\n",
|
|||
|
" background-color: #70cf57;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row1_col4 {\n",
|
|||
|
" background-color: #d45270;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row1_col5, #T_8c750_row1_col6 {\n",
|
|||
|
" background-color: #d6556d;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row1_col7 {\n",
|
|||
|
" background-color: #d7566c;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row2_col0 {\n",
|
|||
|
" background-color: #3bbb75;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row2_col1 {\n",
|
|||
|
" background-color: #84d44b;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row2_col2, #T_8c750_row4_col0 {\n",
|
|||
|
" background-color: #6ece58;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row2_col3 {\n",
|
|||
|
" background-color: #65cb5e;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row2_col4, #T_8c750_row5_col4 {\n",
|
|||
|
" background-color: #a62098;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row2_col5, #T_8c750_row3_col5, #T_8c750_row4_col5, #T_8c750_row4_col7 {\n",
|
|||
|
" background-color: #d35171;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row2_col6 {\n",
|
|||
|
" background-color: #c03a83;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row2_col7 {\n",
|
|||
|
" background-color: #d5536f;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row3_col0 {\n",
|
|||
|
" background-color: #81d34d;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row3_col1, #T_8c750_row6_col2, #T_8c750_row6_col3, #T_8c750_row7_col0 {\n",
|
|||
|
" background-color: #a8db34;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row3_col3 {\n",
|
|||
|
" background-color: #56c667;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row3_col4 {\n",
|
|||
|
" background-color: #c33d80;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row3_col6, #T_8c750_row5_col7 {\n",
|
|||
|
" background-color: #cc4977;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row3_col7 {\n",
|
|||
|
" background-color: #d24f71;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row4_col1 {\n",
|
|||
|
" background-color: #9bd93c;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row4_col2 {\n",
|
|||
|
" background-color: #6ccd5a;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row4_col3 {\n",
|
|||
|
" background-color: #5cc863;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row4_col4 {\n",
|
|||
|
" background-color: #b83289;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row4_col6 {\n",
|
|||
|
" background-color: #c7427c;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row5_col0 {\n",
|
|||
|
" background-color: #2db27d;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row5_col1 {\n",
|
|||
|
" background-color: #26ad81;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row5_col2 {\n",
|
|||
|
" background-color: #89d548;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row5_col5 {\n",
|
|||
|
" background-color: #ae2892;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row5_col6 {\n",
|
|||
|
" background-color: #c43e7f;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row6_col0, #T_8c750_row6_col1, #T_8c750_row7_col2, #T_8c750_row7_col3 {\n",
|
|||
|
" background-color: #26818e;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row6_col4, #T_8c750_row7_col5, #T_8c750_row7_col6, #T_8c750_row7_col7 {\n",
|
|||
|
" background-color: #4e02a2;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row6_col5 {\n",
|
|||
|
" background-color: #6700a8;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row6_col6 {\n",
|
|||
|
" background-color: #b32c8e;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row6_col7 {\n",
|
|||
|
" background-color: #c5407e;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_8c750_row7_col4 {\n",
|
|||
|
" background-color: #5002a2;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"</style>\n",
|
|||
|
"<table id=\"T_8c750\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th class=\"blank level0\" > </th>\n",
|
|||
|
" <th id=\"T_8c750_level0_col0\" class=\"col_heading level0 col0\" >Precision_train</th>\n",
|
|||
|
" <th id=\"T_8c750_level0_col1\" class=\"col_heading level0 col1\" >Precision_test</th>\n",
|
|||
|
" <th id=\"T_8c750_level0_col2\" class=\"col_heading level0 col2\" >Recall_train</th>\n",
|
|||
|
" <th id=\"T_8c750_level0_col3\" class=\"col_heading level0 col3\" >Recall_test</th>\n",
|
|||
|
" <th id=\"T_8c750_level0_col4\" class=\"col_heading level0 col4\" >Accuracy_train</th>\n",
|
|||
|
" <th id=\"T_8c750_level0_col5\" class=\"col_heading level0 col5\" >Accuracy_test</th>\n",
|
|||
|
" <th id=\"T_8c750_level0_col6\" class=\"col_heading level0 col6\" >F1_train</th>\n",
|
|||
|
" <th id=\"T_8c750_level0_col7\" class=\"col_heading level0 col7\" >F1_test</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_8c750_level0_row0\" class=\"row_heading level0 row0\" >random_forest</th>\n",
|
|||
|
" <td id=\"T_8c750_row0_col0\" class=\"data row0 col0\" >0.894340</td>\n",
|
|||
|
" <td id=\"T_8c750_row0_col1\" class=\"data row0 col1\" >0.794118</td>\n",
|
|||
|
" <td id=\"T_8c750_row0_col2\" class=\"data row0 col2\" >0.868132</td>\n",
|
|||
|
" <td id=\"T_8c750_row0_col3\" class=\"data row0 col3\" >0.782609</td>\n",
|
|||
|
" <td id=\"T_8c750_row0_col4\" class=\"data row0 col4\" >0.910112</td>\n",
|
|||
|
" <td id=\"T_8c750_row0_col5\" class=\"data row0 col5\" >0.837989</td>\n",
|
|||
|
" <td id=\"T_8c750_row0_col6\" class=\"data row0 col6\" >0.881041</td>\n",
|
|||
|
" <td id=\"T_8c750_row0_col7\" class=\"data row0 col7\" >0.788321</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_8c750_level0_row1\" class=\"row_heading level0 row1\" >gradient_boosting</th>\n",
|
|||
|
" <td id=\"T_8c750_row1_col0\" class=\"data row1 col0\" >0.889764</td>\n",
|
|||
|
" <td id=\"T_8c750_row1_col1\" class=\"data row1 col1\" >0.800000</td>\n",
|
|||
|
" <td id=\"T_8c750_row1_col2\" class=\"data row1 col2\" >0.827839</td>\n",
|
|||
|
" <td id=\"T_8c750_row1_col3\" class=\"data row1 col3\" >0.753623</td>\n",
|
|||
|
" <td id=\"T_8c750_row1_col4\" class=\"data row1 col4\" >0.894663</td>\n",
|
|||
|
" <td id=\"T_8c750_row1_col5\" class=\"data row1 col5\" >0.832402</td>\n",
|
|||
|
" <td id=\"T_8c750_row1_col6\" class=\"data row1 col6\" >0.857685</td>\n",
|
|||
|
" <td id=\"T_8c750_row1_col7\" class=\"data row1 col7\" >0.776119</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_8c750_level0_row2\" class=\"row_heading level0 row2\" >logistic</th>\n",
|
|||
|
" <td id=\"T_8c750_row2_col0\" class=\"data row2 col0\" >0.751880</td>\n",
|
|||
|
" <td id=\"T_8c750_row2_col1\" class=\"data row2 col1\" >0.806452</td>\n",
|
|||
|
" <td id=\"T_8c750_row2_col2\" class=\"data row2 col2\" >0.732601</td>\n",
|
|||
|
" <td id=\"T_8c750_row2_col3\" class=\"data row2 col3\" >0.724638</td>\n",
|
|||
|
" <td id=\"T_8c750_row2_col4\" class=\"data row2 col4\" >0.804775</td>\n",
|
|||
|
" <td id=\"T_8c750_row2_col5\" class=\"data row2 col5\" >0.826816</td>\n",
|
|||
|
" <td id=\"T_8c750_row2_col6\" class=\"data row2 col6\" >0.742115</td>\n",
|
|||
|
" <td id=\"T_8c750_row2_col7\" class=\"data row2 col7\" >0.763359</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_8c750_level0_row3\" class=\"row_heading level0 row3\" >decision_tree</th>\n",
|
|||
|
" <td id=\"T_8c750_row3_col0\" class=\"data row3 col0\" >0.852459</td>\n",
|
|||
|
" <td id=\"T_8c750_row3_col1\" class=\"data row3 col1\" >0.839286</td>\n",
|
|||
|
" <td id=\"T_8c750_row3_col2\" class=\"data row3 col2\" >0.761905</td>\n",
|
|||
|
" <td id=\"T_8c750_row3_col3\" class=\"data row3 col3\" >0.681159</td>\n",
|
|||
|
" <td id=\"T_8c750_row3_col4\" class=\"data row3 col4\" >0.858146</td>\n",
|
|||
|
" <td id=\"T_8c750_row3_col5\" class=\"data row3 col5\" >0.826816</td>\n",
|
|||
|
" <td id=\"T_8c750_row3_col6\" class=\"data row3 col6\" >0.804642</td>\n",
|
|||
|
" <td id=\"T_8c750_row3_col7\" class=\"data row3 col7\" >0.752000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_8c750_level0_row4\" class=\"row_heading level0 row4\" >knn</th>\n",
|
|||
|
" <td id=\"T_8c750_row4_col0\" class=\"data row4 col0\" >0.829167</td>\n",
|
|||
|
" <td id=\"T_8c750_row4_col1\" class=\"data row4 col1\" >0.827586</td>\n",
|
|||
|
" <td id=\"T_8c750_row4_col2\" class=\"data row4 col2\" >0.728938</td>\n",
|
|||
|
" <td id=\"T_8c750_row4_col3\" class=\"data row4 col3\" >0.695652</td>\n",
|
|||
|
" <td id=\"T_8c750_row4_col4\" class=\"data row4 col4\" >0.838483</td>\n",
|
|||
|
" <td id=\"T_8c750_row4_col5\" class=\"data row4 col5\" >0.826816</td>\n",
|
|||
|
" <td id=\"T_8c750_row4_col6\" class=\"data row4 col6\" >0.775828</td>\n",
|
|||
|
" <td id=\"T_8c750_row4_col7\" class=\"data row4 col7\" >0.755906</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_8c750_level0_row5\" class=\"row_heading level0 row5\" >ridge</th>\n",
|
|||
|
" <td id=\"T_8c750_row5_col0\" class=\"data row5 col0\" >0.720395</td>\n",
|
|||
|
" <td id=\"T_8c750_row5_col1\" class=\"data row5 col1\" >0.688312</td>\n",
|
|||
|
" <td id=\"T_8c750_row5_col2\" class=\"data row5 col2\" >0.802198</td>\n",
|
|||
|
" <td id=\"T_8c750_row5_col3\" class=\"data row5 col3\" >0.768116</td>\n",
|
|||
|
" <td id=\"T_8c750_row5_col4\" class=\"data row5 col4\" >0.804775</td>\n",
|
|||
|
" <td id=\"T_8c750_row5_col5\" class=\"data row5 col5\" >0.776536</td>\n",
|
|||
|
" <td id=\"T_8c750_row5_col6\" class=\"data row5 col6\" >0.759099</td>\n",
|
|||
|
" <td id=\"T_8c750_row5_col7\" class=\"data row5 col7\" >0.726027</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_8c750_level0_row6\" class=\"row_heading level0 row6\" >naive_bayes</th>\n",
|
|||
|
" <td id=\"T_8c750_row6_col0\" class=\"data row6 col0\" >0.554524</td>\n",
|
|||
|
" <td id=\"T_8c750_row6_col1\" class=\"data row6 col1\" >0.575472</td>\n",
|
|||
|
" <td id=\"T_8c750_row6_col2\" class=\"data row6 col2\" >0.875458</td>\n",
|
|||
|
" <td id=\"T_8c750_row6_col3\" class=\"data row6 col3\" >0.884058</td>\n",
|
|||
|
" <td id=\"T_8c750_row6_col4\" class=\"data row6 col4\" >0.682584</td>\n",
|
|||
|
" <td id=\"T_8c750_row6_col5\" class=\"data row6 col5\" >0.703911</td>\n",
|
|||
|
" <td id=\"T_8c750_row6_col6\" class=\"data row6 col6\" >0.678977</td>\n",
|
|||
|
" <td id=\"T_8c750_row6_col7\" class=\"data row6 col7\" >0.697143</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_8c750_level0_row7\" class=\"row_heading level0 row7\" >mlp</th>\n",
|
|||
|
" <td id=\"T_8c750_row7_col0\" class=\"data row7 col0\" >0.900000</td>\n",
|
|||
|
" <td id=\"T_8c750_row7_col1\" class=\"data row7 col1\" >0.833333</td>\n",
|
|||
|
" <td id=\"T_8c750_row7_col2\" class=\"data row7 col2\" >0.197802</td>\n",
|
|||
|
" <td id=\"T_8c750_row7_col3\" class=\"data row7 col3\" >0.217391</td>\n",
|
|||
|
" <td id=\"T_8c750_row7_col4\" class=\"data row7 col4\" >0.683989</td>\n",
|
|||
|
" <td id=\"T_8c750_row7_col5\" class=\"data row7 col5\" >0.681564</td>\n",
|
|||
|
" <td id=\"T_8c750_row7_col6\" class=\"data row7 col6\" >0.324324</td>\n",
|
|||
|
" <td id=\"T_8c750_row7_col7\" class=\"data row7 col7\" >0.344828</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
"<pandas.io.formats.style.Styler at 0x1f1ec7ca000>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 43,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n",
|
|||
|
" [\n",
|
|||
|
" \"Precision_train\",\n",
|
|||
|
" \"Precision_test\",\n",
|
|||
|
" \"Recall_train\",\n",
|
|||
|
" \"Recall_test\",\n",
|
|||
|
" \"Accuracy_train\",\n",
|
|||
|
" \"Accuracy_test\",\n",
|
|||
|
" \"F1_train\",\n",
|
|||
|
" \"F1_test\",\n",
|
|||
|
" ]\n",
|
|||
|
"]\n",
|
|||
|
"class_metrics.sort_values(\n",
|
|||
|
" by=\"Accuracy_test\", ascending=False\n",
|
|||
|
").style.background_gradient(\n",
|
|||
|
" cmap=\"plasma\",\n",
|
|||
|
" low=0.3,\n",
|
|||
|
" high=1,\n",
|
|||
|
" subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n",
|
|||
|
").background_gradient(\n",
|
|||
|
" cmap=\"viridis\",\n",
|
|||
|
" low=1,\n",
|
|||
|
" high=0.3,\n",
|
|||
|
" subset=[\n",
|
|||
|
" \"Precision_train\",\n",
|
|||
|
" \"Precision_test\",\n",
|
|||
|
" \"Recall_train\",\n",
|
|||
|
" \"Recall_test\",\n",
|
|||
|
" ],\n",
|
|||
|
")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"ROC-кривая, каппа Коэна, коэффициент корреляции Мэтьюса"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 44,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<style type=\"text/css\">\n",
|
|||
|
"#T_2053c_row0_col0, #T_2053c_row0_col1 {\n",
|
|||
|
" background-color: #a8db34;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row0_col2, #T_2053c_row0_col3, #T_2053c_row0_col4 {\n",
|
|||
|
" background-color: #da5a6a;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row1_col0, #T_2053c_row4_col0, #T_2053c_row4_col1, #T_2053c_row5_col0 {\n",
|
|||
|
" background-color: #93d741;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row1_col1 {\n",
|
|||
|
" background-color: #98d83e;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row1_col2 {\n",
|
|||
|
" background-color: #d7566c;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row1_col3 {\n",
|
|||
|
" background-color: #d45270;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row1_col4, #T_2053c_row4_col4, #T_2053c_row5_col3, #T_2053c_row5_col4 {\n",
|
|||
|
" background-color: #d24f71;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row2_col0 {\n",
|
|||
|
" background-color: #42be71;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row2_col1 {\n",
|
|||
|
" background-color: #7fd34e;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row2_col2, #T_2053c_row3_col2 {\n",
|
|||
|
" background-color: #d5536f;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row2_col3 {\n",
|
|||
|
" background-color: #be3885;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row2_col4 {\n",
|
|||
|
" background-color: #b6308b;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row3_col0 {\n",
|
|||
|
" background-color: #9dd93b;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row3_col1 {\n",
|
|||
|
" background-color: #a0da39;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row3_col3, #T_2053c_row3_col4 {\n",
|
|||
|
" background-color: #d6556d;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row4_col2 {\n",
|
|||
|
" background-color: #cc4977;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row4_col3 {\n",
|
|||
|
" background-color: #d35171;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row5_col1 {\n",
|
|||
|
" background-color: #90d743;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row5_col2 {\n",
|
|||
|
" background-color: #a82296;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row6_col0 {\n",
|
|||
|
" background-color: #21908d;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row6_col1 {\n",
|
|||
|
" background-color: #6ece58;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row6_col2 {\n",
|
|||
|
" background-color: #a11b9b;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row6_col3 {\n",
|
|||
|
" background-color: #9e199d;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row6_col4 {\n",
|
|||
|
" background-color: #9c179e;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row7_col0, #T_2053c_row7_col1 {\n",
|
|||
|
" background-color: #26818e;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_2053c_row7_col2, #T_2053c_row7_col3, #T_2053c_row7_col4 {\n",
|
|||
|
" background-color: #4e02a2;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"</style>\n",
|
|||
|
"<table id=\"T_2053c\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th class=\"blank level0\" > </th>\n",
|
|||
|
" <th id=\"T_2053c_level0_col0\" class=\"col_heading level0 col0\" >Accuracy_test</th>\n",
|
|||
|
" <th id=\"T_2053c_level0_col1\" class=\"col_heading level0 col1\" >F1_test</th>\n",
|
|||
|
" <th id=\"T_2053c_level0_col2\" class=\"col_heading level0 col2\" >ROC_AUC_test</th>\n",
|
|||
|
" <th id=\"T_2053c_level0_col3\" class=\"col_heading level0 col3\" >Cohen_kappa_test</th>\n",
|
|||
|
" <th id=\"T_2053c_level0_col4\" class=\"col_heading level0 col4\" >MCC_test</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_2053c_level0_row0\" class=\"row_heading level0 row0\" >random_forest</th>\n",
|
|||
|
" <td id=\"T_2053c_row0_col0\" class=\"data row0 col0\" >0.837989</td>\n",
|
|||
|
" <td id=\"T_2053c_row0_col1\" class=\"data row0 col1\" >0.788321</td>\n",
|
|||
|
" <td id=\"T_2053c_row0_col2\" class=\"data row0 col2\" >0.858893</td>\n",
|
|||
|
" <td id=\"T_2053c_row0_col3\" class=\"data row0 col3\" >0.657111</td>\n",
|
|||
|
" <td id=\"T_2053c_row0_col4\" class=\"data row0 col4\" >0.657157</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_2053c_level0_row1\" class=\"row_heading level0 row1\" >logistic</th>\n",
|
|||
|
" <td id=\"T_2053c_row1_col0\" class=\"data row1 col0\" >0.826816</td>\n",
|
|||
|
" <td id=\"T_2053c_row1_col1\" class=\"data row1 col1\" >0.763359</td>\n",
|
|||
|
" <td id=\"T_2053c_row1_col2\" class=\"data row1 col2\" >0.854084</td>\n",
|
|||
|
" <td id=\"T_2053c_row1_col3\" class=\"data row1 col3\" >0.627409</td>\n",
|
|||
|
" <td id=\"T_2053c_row1_col4\" class=\"data row1 col4\" >0.629641</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_2053c_level0_row2\" class=\"row_heading level0 row2\" >ridge</th>\n",
|
|||
|
" <td id=\"T_2053c_row2_col0\" class=\"data row2 col0\" >0.776536</td>\n",
|
|||
|
" <td id=\"T_2053c_row2_col1\" class=\"data row2 col1\" >0.726027</td>\n",
|
|||
|
" <td id=\"T_2053c_row2_col2\" class=\"data row2 col2\" >0.851054</td>\n",
|
|||
|
" <td id=\"T_2053c_row2_col3\" class=\"data row2 col3\" >0.538303</td>\n",
|
|||
|
" <td id=\"T_2053c_row2_col4\" class=\"data row2 col4\" >0.540613</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_2053c_level0_row3\" class=\"row_heading level0 row3\" >gradient_boosting</th>\n",
|
|||
|
" <td id=\"T_2053c_row3_col0\" class=\"data row3 col0\" >0.832402</td>\n",
|
|||
|
" <td id=\"T_2053c_row3_col1\" class=\"data row3 col1\" >0.776119</td>\n",
|
|||
|
" <td id=\"T_2053c_row3_col2\" class=\"data row3 col2\" >0.850922</td>\n",
|
|||
|
" <td id=\"T_2053c_row3_col3\" class=\"data row3 col3\" >0.642381</td>\n",
|
|||
|
" <td id=\"T_2053c_row3_col4\" class=\"data row3 col4\" >0.643113</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_2053c_level0_row4\" class=\"row_heading level0 row4\" >knn</th>\n",
|
|||
|
" <td id=\"T_2053c_row4_col0\" class=\"data row4 col0\" >0.826816</td>\n",
|
|||
|
" <td id=\"T_2053c_row4_col1\" class=\"data row4 col1\" >0.755906</td>\n",
|
|||
|
" <td id=\"T_2053c_row4_col2\" class=\"data row4 col2\" >0.838735</td>\n",
|
|||
|
" <td id=\"T_2053c_row4_col3\" class=\"data row4 col3\" >0.623260</td>\n",
|
|||
|
" <td id=\"T_2053c_row4_col4\" class=\"data row4 col4\" >0.628905</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_2053c_level0_row5\" class=\"row_heading level0 row5\" >decision_tree</th>\n",
|
|||
|
" <td id=\"T_2053c_row5_col0\" class=\"data row5 col0\" >0.826816</td>\n",
|
|||
|
" <td id=\"T_2053c_row5_col1\" class=\"data row5 col1\" >0.752000</td>\n",
|
|||
|
" <td id=\"T_2053c_row5_col2\" class=\"data row5 col2\" >0.794137</td>\n",
|
|||
|
" <td id=\"T_2053c_row5_col3\" class=\"data row5 col3\" >0.621151</td>\n",
|
|||
|
" <td id=\"T_2053c_row5_col4\" class=\"data row5 col4\" >0.629142</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_2053c_level0_row6\" class=\"row_heading level0 row6\" >naive_bayes</th>\n",
|
|||
|
" <td id=\"T_2053c_row6_col0\" class=\"data row6 col0\" >0.703911</td>\n",
|
|||
|
" <td id=\"T_2053c_row6_col1\" class=\"data row6 col1\" >0.697143</td>\n",
|
|||
|
" <td id=\"T_2053c_row6_col2\" class=\"data row6 col2\" >0.785903</td>\n",
|
|||
|
" <td id=\"T_2053c_row6_col3\" class=\"data row6 col3\" >0.431814</td>\n",
|
|||
|
" <td id=\"T_2053c_row6_col4\" class=\"data row6 col4\" >0.470403</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_2053c_level0_row7\" class=\"row_heading level0 row7\" >mlp</th>\n",
|
|||
|
" <td id=\"T_2053c_row7_col0\" class=\"data row7 col0\" >0.681564</td>\n",
|
|||
|
" <td id=\"T_2053c_row7_col1\" class=\"data row7 col1\" >0.344828</td>\n",
|
|||
|
" <td id=\"T_2053c_row7_col2\" class=\"data row7 col2\" >0.712714</td>\n",
|
|||
|
" <td id=\"T_2053c_row7_col3\" class=\"data row7 col3\" >0.220490</td>\n",
|
|||
|
" <td id=\"T_2053c_row7_col4\" class=\"data row7 col4\" >0.307678</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
"<pandas.io.formats.style.Styler at 0x1f1e8995160>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 44,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n",
|
|||
|
" [\n",
|
|||
|
" \"Accuracy_test\",\n",
|
|||
|
" \"F1_test\",\n",
|
|||
|
" \"ROC_AUC_test\",\n",
|
|||
|
" \"Cohen_kappa_test\",\n",
|
|||
|
" \"MCC_test\",\n",
|
|||
|
" ]\n",
|
|||
|
"]\n",
|
|||
|
"class_metrics.sort_values(by=\"ROC_AUC_test\", ascending=False).style.background_gradient(\n",
|
|||
|
" cmap=\"plasma\",\n",
|
|||
|
" low=0.3,\n",
|
|||
|
" high=1,\n",
|
|||
|
" subset=[\n",
|
|||
|
" \"ROC_AUC_test\",\n",
|
|||
|
" \"MCC_test\",\n",
|
|||
|
" \"Cohen_kappa_test\",\n",
|
|||
|
" ],\n",
|
|||
|
").background_gradient(\n",
|
|||
|
" cmap=\"viridis\",\n",
|
|||
|
" low=1,\n",
|
|||
|
" high=0.3,\n",
|
|||
|
" subset=[\n",
|
|||
|
" \"Accuracy_test\",\n",
|
|||
|
" \"F1_test\",\n",
|
|||
|
" ],\n",
|
|||
|
")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 45,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'random_forest'"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"best_model = str(class_metrics.sort_values(by=\"MCC_test\", ascending=False).iloc[0].name)\n",
|
|||
|
"\n",
|
|||
|
"display(best_model)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Вывод данных с ошибкой предсказания для оценки"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 94,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'Error items count: 29'"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Survived</th>\n",
|
|||
|
" <th>Predicted</th>\n",
|
|||
|
" <th>Pclass</th>\n",
|
|||
|
" <th>Name</th>\n",
|
|||
|
" <th>Sex</th>\n",
|
|||
|
" <th>Age</th>\n",
|
|||
|
" <th>SibSp</th>\n",
|
|||
|
" <th>Parch</th>\n",
|
|||
|
" <th>Ticket</th>\n",
|
|||
|
" <th>Fare</th>\n",
|
|||
|
" <th>Cabin</th>\n",
|
|||
|
" <th>Embarked</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>PassengerId</th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th></th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>26</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Asplund, Mrs. Carl Oscar (Selma Augusta Emilia...</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>38.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>347077</td>\n",
|
|||
|
" <td>31.3875</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>72</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Goodwin, Miss. Lillian Amy</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>16.0</td>\n",
|
|||
|
" <td>5</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>CA 2144</td>\n",
|
|||
|
" <td>46.9000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>103</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>White, Mr. Richard Frasar</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>21.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>35281</td>\n",
|
|||
|
" <td>77.2875</td>\n",
|
|||
|
" <td>D26</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>108</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Moss, Mr. Albert Johan</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>312991</td>\n",
|
|||
|
" <td>7.7750</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>128</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Madsen, Mr. Fridtjof Arne</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>24.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>C 17369</td>\n",
|
|||
|
" <td>7.1417</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>193</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Andersen-Jensen, Miss. Carla Christine Nielsine</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>19.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>350046</td>\n",
|
|||
|
" <td>7.8542</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>241</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Zabour, Miss. Thamine</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>2665</td>\n",
|
|||
|
" <td>14.4542</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>272</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Tornquist, Mr. William Henry</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>25.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>LINE</td>\n",
|
|||
|
" <td>0.0000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>293</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>Levy, Mr. Rene Jacques</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>36.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>SC/Paris 2163</td>\n",
|
|||
|
" <td>12.8750</td>\n",
|
|||
|
" <td>D</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>352</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Williams-Lambert, Mr. Fletcher Fellows</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>113510</td>\n",
|
|||
|
" <td>35.0000</td>\n",
|
|||
|
" <td>C128</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>358</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>Funk, Miss. Annie Clemmer</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>38.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>237671</td>\n",
|
|||
|
" <td>13.0000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>378</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Widener, Mr. Harry Elkins</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>27.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>113503</td>\n",
|
|||
|
" <td>211.5000</td>\n",
|
|||
|
" <td>C82</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>445</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Johannesen-Bratthammer, Mr. Bernt</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>65306</td>\n",
|
|||
|
" <td>8.1125</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>450</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Peuchen, Major. Arthur Godfrey</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>52.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>113786</td>\n",
|
|||
|
" <td>30.5000</td>\n",
|
|||
|
" <td>C104</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>508</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Bradley, Mr. George (\"George Arthur Brayton\")</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>111427</td>\n",
|
|||
|
" <td>26.5500</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>511</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Daly, Mr. Eugene Patrick</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>29.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>382651</td>\n",
|
|||
|
" <td>7.7500</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>Q</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>570</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Jonsson, Mr. Carl</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>32.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>350417</td>\n",
|
|||
|
" <td>7.8542</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>579</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Caram, Mrs. Joseph (Maria Elias)</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>2689</td>\n",
|
|||
|
" <td>14.4583</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>584</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Ross, Mr. John Hugo</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>36.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>13049</td>\n",
|
|||
|
" <td>40.1250</td>\n",
|
|||
|
" <td>A10</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>588</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Frolicher-Stehli, Mr. Maxmillian</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>60.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>13567</td>\n",
|
|||
|
" <td>79.2000</td>\n",
|
|||
|
" <td>B41</td>\n",
|
|||
|
" <td>C</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>618</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Lobb, Mrs. William Arthur (Cordelia K Stanlick)</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>26.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>A/5. 3336</td>\n",
|
|||
|
" <td>16.1000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>658</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Bourke, Mrs. John (Catherine)</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>32.0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>364849</td>\n",
|
|||
|
" <td>15.5000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>Q</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>661</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Frauenthal, Dr. Henry William</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>PC 17611</td>\n",
|
|||
|
" <td>133.6500</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>674</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>Wilhelms, Mr. Charles</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>31.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>244270</td>\n",
|
|||
|
" <td>13.0000</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>745</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Stranden, Mr. Juho</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>31.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>STON/O 2. 3101288</td>\n",
|
|||
|
" <td>7.9250</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>773</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>Mack, Mrs. (Mary)</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>57.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>S.O./P.P. 3</td>\n",
|
|||
|
" <td>10.5000</td>\n",
|
|||
|
" <td>E77</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>807</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Andrews, Mr. Thomas Jr</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>39.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>112050</td>\n",
|
|||
|
" <td>0.0000</td>\n",
|
|||
|
" <td>A36</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>814</th>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>Andersson, Miss. Ebba Iris Alfrida</td>\n",
|
|||
|
" <td>female</td>\n",
|
|||
|
" <td>6.0</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>347082</td>\n",
|
|||
|
" <td>31.2750</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>829</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>3</td>\n",
|
|||
|
" <td>McCormack, Mr. Thomas Joseph</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>367228</td>\n",
|
|||
|
" <td>7.7500</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>Q</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Survived Predicted Pclass \\\n",
|
|||
|
"PassengerId \n",
|
|||
|
"26 1 0 3 \n",
|
|||
|
"72 0 1 3 \n",
|
|||
|
"103 0 1 1 \n",
|
|||
|
"108 1 0 3 \n",
|
|||
|
"128 1 0 3 \n",
|
|||
|
"193 1 0 3 \n",
|
|||
|
"241 0 1 3 \n",
|
|||
|
"272 1 0 3 \n",
|
|||
|
"293 0 1 2 \n",
|
|||
|
"352 0 1 1 \n",
|
|||
|
"358 0 1 2 \n",
|
|||
|
"378 0 1 1 \n",
|
|||
|
"445 1 0 3 \n",
|
|||
|
"450 1 0 1 \n",
|
|||
|
"508 1 0 1 \n",
|
|||
|
"511 1 0 3 \n",
|
|||
|
"570 1 0 3 \n",
|
|||
|
"579 0 1 3 \n",
|
|||
|
"584 0 1 1 \n",
|
|||
|
"588 1 0 1 \n",
|
|||
|
"618 0 1 3 \n",
|
|||
|
"658 0 1 3 \n",
|
|||
|
"661 1 0 1 \n",
|
|||
|
"674 1 0 2 \n",
|
|||
|
"745 1 0 3 \n",
|
|||
|
"773 0 1 2 \n",
|
|||
|
"807 0 1 1 \n",
|
|||
|
"814 0 1 3 \n",
|
|||
|
"829 1 0 3 \n",
|
|||
|
"\n",
|
|||
|
" Name Sex Age \\\n",
|
|||
|
"PassengerId \n",
|
|||
|
"26 Asplund, Mrs. Carl Oscar (Selma Augusta Emilia... female 38.0 \n",
|
|||
|
"72 Goodwin, Miss. Lillian Amy female 16.0 \n",
|
|||
|
"103 White, Mr. Richard Frasar male 21.0 \n",
|
|||
|
"108 Moss, Mr. Albert Johan male NaN \n",
|
|||
|
"128 Madsen, Mr. Fridtjof Arne male 24.0 \n",
|
|||
|
"193 Andersen-Jensen, Miss. Carla Christine Nielsine female 19.0 \n",
|
|||
|
"241 Zabour, Miss. Thamine female NaN \n",
|
|||
|
"272 Tornquist, Mr. William Henry male 25.0 \n",
|
|||
|
"293 Levy, Mr. Rene Jacques male 36.0 \n",
|
|||
|
"352 Williams-Lambert, Mr. Fletcher Fellows male NaN \n",
|
|||
|
"358 Funk, Miss. Annie Clemmer female 38.0 \n",
|
|||
|
"378 Widener, Mr. Harry Elkins male 27.0 \n",
|
|||
|
"445 Johannesen-Bratthammer, Mr. Bernt male NaN \n",
|
|||
|
"450 Peuchen, Major. Arthur Godfrey male 52.0 \n",
|
|||
|
"508 Bradley, Mr. George (\"George Arthur Brayton\") male NaN \n",
|
|||
|
"511 Daly, Mr. Eugene Patrick male 29.0 \n",
|
|||
|
"570 Jonsson, Mr. Carl male 32.0 \n",
|
|||
|
"579 Caram, Mrs. Joseph (Maria Elias) female NaN \n",
|
|||
|
"584 Ross, Mr. John Hugo male 36.0 \n",
|
|||
|
"588 Frolicher-Stehli, Mr. Maxmillian male 60.0 \n",
|
|||
|
"618 Lobb, Mrs. William Arthur (Cordelia K Stanlick) female 26.0 \n",
|
|||
|
"658 Bourke, Mrs. John (Catherine) female 32.0 \n",
|
|||
|
"661 Frauenthal, Dr. Henry William male 50.0 \n",
|
|||
|
"674 Wilhelms, Mr. Charles male 31.0 \n",
|
|||
|
"745 Stranden, Mr. Juho male 31.0 \n",
|
|||
|
"773 Mack, Mrs. (Mary) female 57.0 \n",
|
|||
|
"807 Andrews, Mr. Thomas Jr male 39.0 \n",
|
|||
|
"814 Andersson, Miss. Ebba Iris Alfrida female 6.0 \n",
|
|||
|
"829 McCormack, Mr. Thomas Joseph male NaN \n",
|
|||
|
"\n",
|
|||
|
" SibSp Parch Ticket Fare Cabin Embarked \n",
|
|||
|
"PassengerId \n",
|
|||
|
"26 1 5 347077 31.3875 NaN S \n",
|
|||
|
"72 5 2 CA 2144 46.9000 NaN S \n",
|
|||
|
"103 0 1 35281 77.2875 D26 S \n",
|
|||
|
"108 0 0 312991 7.7750 NaN S \n",
|
|||
|
"128 0 0 C 17369 7.1417 NaN S \n",
|
|||
|
"193 1 0 350046 7.8542 NaN S \n",
|
|||
|
"241 1 0 2665 14.4542 NaN C \n",
|
|||
|
"272 0 0 LINE 0.0000 NaN S \n",
|
|||
|
"293 0 0 SC/Paris 2163 12.8750 D C \n",
|
|||
|
"352 0 0 113510 35.0000 C128 S \n",
|
|||
|
"358 0 0 237671 13.0000 NaN S \n",
|
|||
|
"378 0 2 113503 211.5000 C82 C \n",
|
|||
|
"445 0 0 65306 8.1125 NaN S \n",
|
|||
|
"450 0 0 113786 30.5000 C104 S \n",
|
|||
|
"508 0 0 111427 26.5500 NaN S \n",
|
|||
|
"511 0 0 382651 7.7500 NaN Q \n",
|
|||
|
"570 0 0 350417 7.8542 NaN S \n",
|
|||
|
"579 1 0 2689 14.4583 NaN C \n",
|
|||
|
"584 0 0 13049 40.1250 A10 C \n",
|
|||
|
"588 1 1 13567 79.2000 B41 C \n",
|
|||
|
"618 1 0 A/5. 3336 16.1000 NaN S \n",
|
|||
|
"658 1 1 364849 15.5000 NaN Q \n",
|
|||
|
"661 2 0 PC 17611 133.6500 NaN S \n",
|
|||
|
"674 0 0 244270 13.0000 NaN S \n",
|
|||
|
"745 0 0 STON/O 2. 3101288 7.9250 NaN S \n",
|
|||
|
"773 0 0 S.O./P.P. 3 10.5000 E77 S \n",
|
|||
|
"807 0 0 112050 0.0000 A36 S \n",
|
|||
|
"814 4 2 347082 31.2750 NaN S \n",
|
|||
|
"829 0 0 367228 7.7500 NaN Q "
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 94,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"preprocessing_result = pipeline_end.transform(X_test)\n",
|
|||
|
"preprocessed_df = pd.DataFrame(\n",
|
|||
|
" preprocessing_result,\n",
|
|||
|
" columns=pipeline_end.get_feature_names_out(),\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"y_pred = class_models[best_model][\"preds\"]\n",
|
|||
|
"\n",
|
|||
|
"error_index = y_test[y_test[\"Survived\"] != y_pred].index.tolist()\n",
|
|||
|
"display(f\"Error items count: {len(error_index)}\")\n",
|
|||
|
"\n",
|
|||
|
"error_predicted = pd.Series(y_pred, index=y_test.index).loc[error_index]\n",
|
|||
|
"error_df = X_test.loc[error_index].copy()\n",
|
|||
|
"error_df.insert(loc=1, column=\"Predicted\", value=error_predicted)\n",
|
|||
|
"error_df.sort_index()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Пример использования обученной модели (конвейера) для предсказания"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 49,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Survived</th>\n",
|
|||
|
" <th>Pclass</th>\n",
|
|||
|
" <th>Name</th>\n",
|
|||
|
" <th>Sex</th>\n",
|
|||
|
" <th>Age</th>\n",
|
|||
|
" <th>SibSp</th>\n",
|
|||
|
" <th>Parch</th>\n",
|
|||
|
" <th>Ticket</th>\n",
|
|||
|
" <th>Fare</th>\n",
|
|||
|
" <th>Cabin</th>\n",
|
|||
|
" <th>Embarked</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>450</th>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>Peuchen, Major. Arthur Godfrey</td>\n",
|
|||
|
" <td>male</td>\n",
|
|||
|
" <td>52.0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>113786</td>\n",
|
|||
|
" <td>30.5</td>\n",
|
|||
|
" <td>C104</td>\n",
|
|||
|
" <td>S</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Survived Pclass Name Sex Age SibSp Parch \\\n",
|
|||
|
"450 1 1 Peuchen, Major. Arthur Godfrey male 52.0 0 0 \n",
|
|||
|
"\n",
|
|||
|
" Ticket Fare Cabin Embarked \n",
|
|||
|
"450 113786 30.5 C104 S "
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Cabin_type_B</th>\n",
|
|||
|
" <th>Cabin_type_C</th>\n",
|
|||
|
" <th>Cabin_type_D</th>\n",
|
|||
|
" <th>Cabin_type_E</th>\n",
|
|||
|
" <th>Cabin_type_F</th>\n",
|
|||
|
" <th>Cabin_type_G</th>\n",
|
|||
|
" <th>Cabin_type_T</th>\n",
|
|||
|
" <th>Cabin_type_u</th>\n",
|
|||
|
" <th>Is_married</th>\n",
|
|||
|
" <th>Pclass</th>\n",
|
|||
|
" <th>Age</th>\n",
|
|||
|
" <th>SibSp</th>\n",
|
|||
|
" <th>Sex_male</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>450</th>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>0.0</td>\n",
|
|||
|
" <td>-1.580088</td>\n",
|
|||
|
" <td>1.749939</td>\n",
|
|||
|
" <td>-0.473465</td>\n",
|
|||
|
" <td>1.0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Cabin_type_B Cabin_type_C Cabin_type_D Cabin_type_E Cabin_type_F \\\n",
|
|||
|
"450 0.0 1.0 0.0 0.0 0.0 \n",
|
|||
|
"\n",
|
|||
|
" Cabin_type_G Cabin_type_T Cabin_type_u Is_married Pclass Age \\\n",
|
|||
|
"450 0.0 0.0 0.0 0.0 -1.580088 1.749939 \n",
|
|||
|
"\n",
|
|||
|
" SibSp Sex_male \n",
|
|||
|
"450 -0.473465 1.0 "
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'predicted: 0 (proba: [0.91145747 0.08854253])'"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"'real: 1'"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"model = class_models[best_model][\"pipeline\"]\n",
|
|||
|
"\n",
|
|||
|
"example_id = 450\n",
|
|||
|
"test = pd.DataFrame(X_test.loc[example_id, :]).T\n",
|
|||
|
"test_preprocessed = pd.DataFrame(preprocessed_df.loc[example_id, :]).T\n",
|
|||
|
"display(test)\n",
|
|||
|
"display(test_preprocessed)\n",
|
|||
|
"result_proba = model.predict_proba(test)[0]\n",
|
|||
|
"result = model.predict(test)[0]\n",
|
|||
|
"real = int(y_test.loc[example_id].values[0])\n",
|
|||
|
"display(f\"predicted: {result} (proba: {result_proba})\")\n",
|
|||
|
"display(f\"real: {real}\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"#### Подбор гиперпараметров методом поиска по сетке\n",
|
|||
|
"\n",
|
|||
|
"https://www.kaggle.com/code/sociopath00/random-forest-using-gridsearchcv\n",
|
|||
|
"\n",
|
|||
|
"https://scikit-learn.org/stable/modules/generated/sklearn.model_selection.GridSearchCV.html"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 89,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"{'model__criterion': 'gini',\n",
|
|||
|
" 'model__max_depth': 7,\n",
|
|||
|
" 'model__max_features': 'sqrt',\n",
|
|||
|
" 'model__n_estimators': 30}"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 89,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.model_selection import GridSearchCV\n",
|
|||
|
"\n",
|
|||
|
"optimized_model_type = \"random_forest\"\n",
|
|||
|
"\n",
|
|||
|
"random_forest_model = class_models[optimized_model_type][\"pipeline\"]\n",
|
|||
|
"\n",
|
|||
|
"param_grid = {\n",
|
|||
|
" \"model__n_estimators\": [10, 20, 30, 40, 50, 100, 150, 200, 250, 500],\n",
|
|||
|
" \"model__max_features\": [\"sqrt\", \"log2\", 2],\n",
|
|||
|
" \"model__max_depth\": [2, 3, 4, 5, 6, 7, 8, 9 ,10],\n",
|
|||
|
" \"model__criterion\": [\"gini\", \"entropy\", \"log_loss\"],\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"gs_optomizer = GridSearchCV(\n",
|
|||
|
" estimator=random_forest_model, param_grid=param_grid, n_jobs=-1\n",
|
|||
|
")\n",
|
|||
|
"gs_optomizer.fit(X_train, y_train.values.ravel())\n",
|
|||
|
"gs_optomizer.best_params_"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Обучение модели с новыми гиперпараметрами"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 90,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"optimized_model = ensemble.RandomForestClassifier(\n",
|
|||
|
" random_state=random_state,\n",
|
|||
|
" criterion=\"gini\",\n",
|
|||
|
" max_depth=7,\n",
|
|||
|
" max_features=\"sqrt\",\n",
|
|||
|
" n_estimators=30,\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"result = {}\n",
|
|||
|
"\n",
|
|||
|
"result[\"pipeline\"] = Pipeline([(\"pipeline\", pipeline_end), (\"model\", optimized_model)]).fit(X_train, y_train.values.ravel())\n",
|
|||
|
"result[\"train_preds\"] = result[\"pipeline\"].predict(X_train)\n",
|
|||
|
"result[\"probs\"] = result[\"pipeline\"].predict_proba(X_test)[:, 1]\n",
|
|||
|
"result[\"preds\"] = np.where(result[\"probs\"] > 0.5, 1, 0)\n",
|
|||
|
"\n",
|
|||
|
"result[\"Precision_train\"] = metrics.precision_score(y_train, result[\"train_preds\"])\n",
|
|||
|
"result[\"Precision_test\"] = metrics.precision_score(y_test, result[\"preds\"])\n",
|
|||
|
"result[\"Recall_train\"] = metrics.recall_score(y_train, result[\"train_preds\"])\n",
|
|||
|
"result[\"Recall_test\"] = metrics.recall_score(y_test, result[\"preds\"])\n",
|
|||
|
"result[\"Accuracy_train\"] = metrics.accuracy_score(y_train, result[\"train_preds\"])\n",
|
|||
|
"result[\"Accuracy_test\"] = metrics.accuracy_score(y_test, result[\"preds\"])\n",
|
|||
|
"result[\"ROC_AUC_test\"] = metrics.roc_auc_score(y_test, result[\"probs\"])\n",
|
|||
|
"result[\"F1_train\"] = metrics.f1_score(y_train, result[\"train_preds\"])\n",
|
|||
|
"result[\"F1_test\"] = metrics.f1_score(y_test, result[\"preds\"])\n",
|
|||
|
"result[\"MCC_test\"] = metrics.matthews_corrcoef(y_test, result[\"preds\"])\n",
|
|||
|
"result[\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(y_test, result[\"preds\"])\n",
|
|||
|
"result[\"Confusion_matrix\"] = metrics.confusion_matrix(y_test, result[\"preds\"])"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Формирование данных для оценки старой и новой версии модели"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 98,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"optimized_metrics = pd.DataFrame(columns=list(result.keys()))\n",
|
|||
|
"optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n",
|
|||
|
" data=class_models[optimized_model_type]\n",
|
|||
|
")\n",
|
|||
|
"optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n",
|
|||
|
" data=result\n",
|
|||
|
")\n",
|
|||
|
"optimized_metrics.insert(loc=0, column=\"Name\", value=[\"Old\", \"New\"])\n",
|
|||
|
"optimized_metrics = optimized_metrics.set_index(\"Name\")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Оценка параметров старой и новой модели"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 99,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<style type=\"text/css\">\n",
|
|||
|
"#T_c81c1_row0_col0, #T_c81c1_row0_col2, #T_c81c1_row0_col3, #T_c81c1_row1_col1 {\n",
|
|||
|
" background-color: #a8db34;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_c81c1_row0_col1, #T_c81c1_row1_col0, #T_c81c1_row1_col2, #T_c81c1_row1_col3 {\n",
|
|||
|
" background-color: #26818e;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_c81c1_row0_col4, #T_c81c1_row0_col6, #T_c81c1_row0_col7 {\n",
|
|||
|
" background-color: #da5a6a;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_c81c1_row0_col5, #T_c81c1_row1_col5 {\n",
|
|||
|
" background-color: #0d0887;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_c81c1_row1_col4, #T_c81c1_row1_col6, #T_c81c1_row1_col7 {\n",
|
|||
|
" background-color: #4e02a2;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"</style>\n",
|
|||
|
"<table id=\"T_c81c1\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th class=\"blank level0\" > </th>\n",
|
|||
|
" <th id=\"T_c81c1_level0_col0\" class=\"col_heading level0 col0\" >Precision_train</th>\n",
|
|||
|
" <th id=\"T_c81c1_level0_col1\" class=\"col_heading level0 col1\" >Precision_test</th>\n",
|
|||
|
" <th id=\"T_c81c1_level0_col2\" class=\"col_heading level0 col2\" >Recall_train</th>\n",
|
|||
|
" <th id=\"T_c81c1_level0_col3\" class=\"col_heading level0 col3\" >Recall_test</th>\n",
|
|||
|
" <th id=\"T_c81c1_level0_col4\" class=\"col_heading level0 col4\" >Accuracy_train</th>\n",
|
|||
|
" <th id=\"T_c81c1_level0_col5\" class=\"col_heading level0 col5\" >Accuracy_test</th>\n",
|
|||
|
" <th id=\"T_c81c1_level0_col6\" class=\"col_heading level0 col6\" >F1_train</th>\n",
|
|||
|
" <th id=\"T_c81c1_level0_col7\" class=\"col_heading level0 col7\" >F1_test</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th class=\"index_name level0\" >Name</th>\n",
|
|||
|
" <th class=\"blank col0\" > </th>\n",
|
|||
|
" <th class=\"blank col1\" > </th>\n",
|
|||
|
" <th class=\"blank col2\" > </th>\n",
|
|||
|
" <th class=\"blank col3\" > </th>\n",
|
|||
|
" <th class=\"blank col4\" > </th>\n",
|
|||
|
" <th class=\"blank col5\" > </th>\n",
|
|||
|
" <th class=\"blank col6\" > </th>\n",
|
|||
|
" <th class=\"blank col7\" > </th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_c81c1_level0_row0\" class=\"row_heading level0 row0\" >Old</th>\n",
|
|||
|
" <td id=\"T_c81c1_row0_col0\" class=\"data row0 col0\" >0.894340</td>\n",
|
|||
|
" <td id=\"T_c81c1_row0_col1\" class=\"data row0 col1\" >0.794118</td>\n",
|
|||
|
" <td id=\"T_c81c1_row0_col2\" class=\"data row0 col2\" >0.868132</td>\n",
|
|||
|
" <td id=\"T_c81c1_row0_col3\" class=\"data row0 col3\" >0.782609</td>\n",
|
|||
|
" <td id=\"T_c81c1_row0_col4\" class=\"data row0 col4\" >0.910112</td>\n",
|
|||
|
" <td id=\"T_c81c1_row0_col5\" class=\"data row0 col5\" >0.837989</td>\n",
|
|||
|
" <td id=\"T_c81c1_row0_col6\" class=\"data row0 col6\" >0.881041</td>\n",
|
|||
|
" <td id=\"T_c81c1_row0_col7\" class=\"data row0 col7\" >0.788321</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_c81c1_level0_row1\" class=\"row_heading level0 row1\" >New</th>\n",
|
|||
|
" <td id=\"T_c81c1_row1_col0\" class=\"data row1 col0\" >0.867220</td>\n",
|
|||
|
" <td id=\"T_c81c1_row1_col1\" class=\"data row1 col1\" >0.822581</td>\n",
|
|||
|
" <td id=\"T_c81c1_row1_col2\" class=\"data row1 col2\" >0.765568</td>\n",
|
|||
|
" <td id=\"T_c81c1_row1_col3\" class=\"data row1 col3\" >0.739130</td>\n",
|
|||
|
" <td id=\"T_c81c1_row1_col4\" class=\"data row1 col4\" >0.865169</td>\n",
|
|||
|
" <td id=\"T_c81c1_row1_col5\" class=\"data row1 col5\" >0.837989</td>\n",
|
|||
|
" <td id=\"T_c81c1_row1_col6\" class=\"data row1 col6\" >0.813230</td>\n",
|
|||
|
" <td id=\"T_c81c1_row1_col7\" class=\"data row1 col7\" >0.778626</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
"<pandas.io.formats.style.Styler at 0x1f1f1135d00>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 99,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"optimized_metrics[\n",
|
|||
|
" [\n",
|
|||
|
" \"Precision_train\",\n",
|
|||
|
" \"Precision_test\",\n",
|
|||
|
" \"Recall_train\",\n",
|
|||
|
" \"Recall_test\",\n",
|
|||
|
" \"Accuracy_train\",\n",
|
|||
|
" \"Accuracy_test\",\n",
|
|||
|
" \"F1_train\",\n",
|
|||
|
" \"F1_test\",\n",
|
|||
|
" ]\n",
|
|||
|
"].style.background_gradient(\n",
|
|||
|
" cmap=\"plasma\",\n",
|
|||
|
" low=0.3,\n",
|
|||
|
" high=1,\n",
|
|||
|
" subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n",
|
|||
|
").background_gradient(\n",
|
|||
|
" cmap=\"viridis\",\n",
|
|||
|
" low=1,\n",
|
|||
|
" high=0.3,\n",
|
|||
|
" subset=[\n",
|
|||
|
" \"Precision_train\",\n",
|
|||
|
" \"Precision_test\",\n",
|
|||
|
" \"Recall_train\",\n",
|
|||
|
" \"Recall_test\",\n",
|
|||
|
" ],\n",
|
|||
|
")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 100,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<style type=\"text/css\">\n",
|
|||
|
"#T_fbb13_row0_col0, #T_fbb13_row1_col0 {\n",
|
|||
|
" background-color: #440154;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_fbb13_row0_col1 {\n",
|
|||
|
" background-color: #a8db34;\n",
|
|||
|
" color: #000000;\n",
|
|||
|
"}\n",
|
|||
|
"#T_fbb13_row0_col2, #T_fbb13_row1_col3, #T_fbb13_row1_col4 {\n",
|
|||
|
" background-color: #4e02a2;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_fbb13_row0_col3, #T_fbb13_row0_col4, #T_fbb13_row1_col2 {\n",
|
|||
|
" background-color: #da5a6a;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"#T_fbb13_row1_col1 {\n",
|
|||
|
" background-color: #26818e;\n",
|
|||
|
" color: #f1f1f1;\n",
|
|||
|
"}\n",
|
|||
|
"</style>\n",
|
|||
|
"<table id=\"T_fbb13\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th class=\"blank level0\" > </th>\n",
|
|||
|
" <th id=\"T_fbb13_level0_col0\" class=\"col_heading level0 col0\" >Accuracy_test</th>\n",
|
|||
|
" <th id=\"T_fbb13_level0_col1\" class=\"col_heading level0 col1\" >F1_test</th>\n",
|
|||
|
" <th id=\"T_fbb13_level0_col2\" class=\"col_heading level0 col2\" >ROC_AUC_test</th>\n",
|
|||
|
" <th id=\"T_fbb13_level0_col3\" class=\"col_heading level0 col3\" >Cohen_kappa_test</th>\n",
|
|||
|
" <th id=\"T_fbb13_level0_col4\" class=\"col_heading level0 col4\" >MCC_test</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th class=\"index_name level0\" >Name</th>\n",
|
|||
|
" <th class=\"blank col0\" > </th>\n",
|
|||
|
" <th class=\"blank col1\" > </th>\n",
|
|||
|
" <th class=\"blank col2\" > </th>\n",
|
|||
|
" <th class=\"blank col3\" > </th>\n",
|
|||
|
" <th class=\"blank col4\" > </th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_fbb13_level0_row0\" class=\"row_heading level0 row0\" >Old</th>\n",
|
|||
|
" <td id=\"T_fbb13_row0_col0\" class=\"data row0 col0\" >0.837989</td>\n",
|
|||
|
" <td id=\"T_fbb13_row0_col1\" class=\"data row0 col1\" >0.788321</td>\n",
|
|||
|
" <td id=\"T_fbb13_row0_col2\" class=\"data row0 col2\" >0.858893</td>\n",
|
|||
|
" <td id=\"T_fbb13_row0_col3\" class=\"data row0 col3\" >0.657111</td>\n",
|
|||
|
" <td id=\"T_fbb13_row0_col4\" class=\"data row0 col4\" >0.657157</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th id=\"T_fbb13_level0_row1\" class=\"row_heading level0 row1\" >New</th>\n",
|
|||
|
" <td id=\"T_fbb13_row1_col0\" class=\"data row1 col0\" >0.837989</td>\n",
|
|||
|
" <td id=\"T_fbb13_row1_col1\" class=\"data row1 col1\" >0.778626</td>\n",
|
|||
|
" <td id=\"T_fbb13_row1_col2\" class=\"data row1 col2\" >0.859750</td>\n",
|
|||
|
" <td id=\"T_fbb13_row1_col3\" class=\"data row1 col3\" >0.651447</td>\n",
|
|||
|
" <td id=\"T_fbb13_row1_col4\" class=\"data row1 col4\" >0.653765</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
"<pandas.io.formats.style.Styler at 0x1f1f11345c0>"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 100,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"optimized_metrics[\n",
|
|||
|
" [\n",
|
|||
|
" \"Accuracy_test\",\n",
|
|||
|
" \"F1_test\",\n",
|
|||
|
" \"ROC_AUC_test\",\n",
|
|||
|
" \"Cohen_kappa_test\",\n",
|
|||
|
" \"MCC_test\",\n",
|
|||
|
" ]\n",
|
|||
|
"].style.background_gradient(\n",
|
|||
|
" cmap=\"plasma\",\n",
|
|||
|
" low=0.3,\n",
|
|||
|
" high=1,\n",
|
|||
|
" subset=[\n",
|
|||
|
" \"ROC_AUC_test\",\n",
|
|||
|
" \"MCC_test\",\n",
|
|||
|
" \"Cohen_kappa_test\",\n",
|
|||
|
" ],\n",
|
|||
|
").background_gradient(\n",
|
|||
|
" cmap=\"viridis\",\n",
|
|||
|
" low=1,\n",
|
|||
|
" high=0.3,\n",
|
|||
|
" subset=[\n",
|
|||
|
" \"Accuracy_test\",\n",
|
|||
|
" \"F1_test\",\n",
|
|||
|
" ],\n",
|
|||
|
")"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 104,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA20AAAGjCAYAAAC/j/0nAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABXeklEQVR4nO3deXQUZfr28atDyAJJdwAhCwQIsiTIJqAYcVAwLC4jSAaVyYyAoK8KyKIy4sgqCjIq/FAEZ0QWBRFEEEVgEAUBASUKoiICsgSyoGISFrOQ7vcPhpY2LGlSSXeqvp9z6syku7r66RBz5a7nrqdsLpfLJQAAAACAXwrw9QAAAAAAABdG0QYAAAAAfoyiDQAAAAD8GEUbAAAAAPgxijYAAAAA8GMUbQAAAADgxyjaAAAAAMCPBfp6AACA8pWXl6eCggLDjhcUFKSQkBDDjgcAgDeskGsUbQBgIXl5eYqrF6bMo0WGHTMqKkr79+/3u4ADAJifVXKNog0ALKSgoECZR4t0MLW+7OGl75DPPe5UvTYHVFBQ4FfhBgCwBqvkGkUbAFhQWLhNYeG2Uh/HqdIfAwCA0jJ7rlG0AYAFFbmcKnIZcxwAAHzN7LnG6pEAAAAA4MeYaQMAC3LKJadKf0rSiGMAAFBaZs81ijYAsCCnnDKiAcSYowAAUDpmzzXaIwEAAADAjzHTBgAWVORyqchV+hYQI44BAEBpmT3XKNoAwILM3vsPALAWs+ca7ZEAAAAA4MeYaQMAC3LKpSITn5EEAFiL2XONog0AAABAhWb29kiKNgCwILOHGwAAZkLRBgAWZPZVtgAA1mL2XGMhEgAAAADwY8y0AYAFOf+3GXEcAAB8zey5RtEGABZUZNAqW0YcAwCA0jJ7rtEeCQAAAAB+jJk2ALCgIteZzYjjAADga2bPNYo2ALAgs/f+AwCsxey5RnskAAAAAPgxZtoAwIKcsqlINkOOAwCAr5k91yjaAMCCnK4zmxHHAQDA18yea7RHAgAAAIAfY6YNACyoyKA2EiOOAQBAaZk915hpAwCUm+PHj2vo0KGqV6+eQkNDdf311+uLL75wP+9yuTR69GhFR0crNDRUSUlJ2rNnjw9HDACA71G0AYAFnT0jacTmjQEDBmjNmjV64403tHPnTnXp0kVJSUk6cuSIJGny5MmaNm2aZs6cqa1bt6pq1arq2rWr8vLyyuLbAAAwCV/lWnmhaAMAC3K6bIZtkpSbm+ux5efnF3vP3377TUuWLNHkyZPVoUMHNWzYUGPHjlXDhg01Y8YMuVwuTZ06VU899ZS6d++uFi1aaN68eUpPT9eyZcvK+TsEAKhIjM41f0PRBgAotdjYWDkcDvc2ceLEYvucPn1aRUVFCgkJ8Xg8NDRUGzdu1P79+5WZmamkpCT3cw6HQ+3atdPmzZvL/DMAAOCvWIgEACzI6Au209LSZLfb3Y8HBwcX2zc8PFyJiYl6+umnlZCQoMjISL311lvavHmzGjZsqMzMTElSZGSkx+siIyPdzwEAcD5mX4iEog0ALKhIASoyoNmi6H//a7fbPYq2C3njjTd03333qXbt2qpUqZJat26t3r17KzU1tdRjAQBYl9G55m9ojwQAlJsrr7xS69ev14kTJ5SWlqbPP/9chYWFatCggaKioiRJWVlZHq/JyspyPwcAgBVRtAGABbkMuljbdZkXbFetWlXR0dH69ddftXr1anXv3l1xcXGKiorS2rVr3fvl5uZq69atSkxMNOqjAwBMyNe5VtZojwQAC/JV7//q1avlcrnUpEkT7d27V48//rji4+PVr18/2Ww2DR06VBMmTFCjRo0UFxenUaNGKSYmRj169Cj1WAEA5sU1bQAAGCQnJ0cjR47U4cOHVb16dSUnJ+uZZ55R5cqVJUkjRozQyZMn9cADDyg7O1s33HCDVq1aVWzFSQAArMTmcrlcvh4EAKB85ObmyuFwaOXXcaoaXvoO+ZPHnbqlxX7l5OSUaCESAACMZJVc45o2AAAAABWaUzY5FWDA5l175PHjxzV06FDVq1dPoaGhuv766/XFF1+4n3e5XBo9erSio6MVGhqqpKQk7dmzx+vPR9EGABbkq3ADAMBMBgwYoDVr1uiNN97Qzp071aVLFyUlJenIkSOSpMmTJ2vatGmaOXOmtm7dqqpVq6pr167Ky8vz6n0o2gDAgs5esG3EBgCAr/ki13777TctWbJEkydPVocOHdSwYUONHTtWDRs21IwZM+RyuTR16lQ99dRT6t69u1q0aKF58+YpPT1dy5Yt8+rzUbQBAAAAwDlyc3M9tvz8/GL7nD59WkVFRcUWywoNDdXGjRu1f/9+ZWZmKikpyf2cw+FQu3bttHnzZq/GQ9EGABZU5AowbAMAwNeMzrXY2Fg5HA73NnHixGLvGR4ersTERD399NNKT09XUVGR3nzzTW3evFkZGRnKzMyUJEVGRnq8LjIy0v1cSbHkPwBY0Jlr2krf2sg1bQAAf2B0rqWlpXmsHhkcHHze/d944w3dd999ql27tipVqqTWrVurd+/eSk1NLfVYzsUpUgAAAAA4h91u99guVLRdeeWVWr9+vU6cOKG0tDR9/vnnKiwsVIMGDRQVFSVJysrK8nhNVlaW+7mSomgDAAtyKkBFBmxOYgQA4Ad8nWtVq1ZVdHS0fv31V61evVrdu3dXXFycoqKitHbtWvd+ubm52rp1qxITE706Pu2RAGBBRl2PVuRyGTAaAABKx1e5tnr1arlcLjVp0kR79+7V448/rvj4ePXr1082m01Dhw7VhAkT1KhRI8XFxWnUqFGKiYlRjx49vHofijYAAAAAuAw5OTkaOXKkDh8+rOrVqys5OVnPPPOMKleuLEkaMWKETp48qQceeEDZ2dm64YYbtGrVqmIrTl6KzeXiNCkAWEVubq4cDocWbG+mKuGVSn28U8eL9NdW3ygnJ8fjgm0AAMqDVXKNmTYAsKAil01FrtKvsmXEMQAAKC2z5xpXkAMAAACAH2OmDQAs6OwqWaU/Dh32AADfM3uuMdMGAAAAAH6MmTYAsCCnK0BOA5ZGdrKWFQDAD5g91yjaAMCCzN5GAgCwFrPnGu2RAAAAAODHmGkDAAtyyphljZ2lHwoAAKVm9lyjaAMAC3IqQE4Dmi2MOAYAAKVl9lzzz1EBAAAAACQx0wYAllTkClCRAatsGXEMAABKy+y5RtEGABbklE1OGdH7X/pjAABQWmbPNf8sJQEAAAAAkphpAwBLMnsbCQDAWsyeaxRtAAAAACo0426uTdEGAPATZg83AADMhKLNR5xOp9LT0xUeHi6bzT8veATgX1wul44fP66YmBgFBJSuWHK6bHIacRNSA44BcyDXAHiLXCs5ijYfSU9PV2xsrK+HAaACSktLU506dXw9DMADuQbgcpFrl0bR5iPh4eGSpINf1pc9jPYieLqzcXNfDwF+6LQKtVEfun9/lIbToPZIJ+2R+B9yDRdDruF8yLWSo2jzkbOtI/awANnD/fOHA74TaKvs6yHAH7nO/I8RrWdOV4CcBqyQZcQxYA7kGi6GXMN5kWsl5p+jAgAAAABIYqYNACypSDYVqfRnNo04BgAApWX2XKNoAwALMnsbCQDAWsyea/45KgAAAACAJGbaAMCSimRMC0hR6YcCAECpmT3XKNoAwILM3kYCALAWs+eaf44KAAAAACCJog0ALKnIFWDY5tX7FhVp1KhRiouLU2hoqK688ko9/fTTcrlc7n1cLpdGjx6t6OhohYaGKikpSXv27DH6WwAAMBFf5Vp58c9RAQBM6bnnntOMGTP08ssva9euXXruuec0efJkvfTSS+59Jk+erGnTpmnmzJnaunWrqlatqq5duyovL8+HIwcAwHe4pg0ALMglm5wGXLDt+t8xcnNzPR4PDg5WcHBwsf0/++wzde/eXbfddpskqX79+nrrrbf0+eefnzmey6WpU6fqqaeeUvfu3SVJ8+bNU2RkpJYtW6Z77rmn1GMGAJiP0bnmb5hpAwALMrqNJDY2Vg6Hw71NnDjxvO97/fXXa+3atfrhhx8kSTt27NDGjRt1yy23SJL279+
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x400 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"_, ax = plt.subplots(1, 2, figsize=(10, 4), sharex=False, sharey=False\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"for index in range(0, len(optimized_metrics)):\n",
|
|||
|
" c_matrix = optimized_metrics.iloc[index][\"Confusion_matrix\"]\n",
|
|||
|
" disp = ConfusionMatrixDisplay(\n",
|
|||
|
" confusion_matrix=c_matrix, display_labels=[\"Died\", \"Sirvived\"]\n",
|
|||
|
" ).plot(ax=ax.flat[index])\n",
|
|||
|
"\n",
|
|||
|
"plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.3)\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": ".venv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.7"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|