ещё коммит

2024-11-15 23:33:34 +04:00 · 2024-11-15 23:33:34 +04:00 · 16d54136f0
commit 16d54136f0
parent 8fc280c7e7
1 changed files with 665 additions and 51 deletions
--- a/lab_4/lab4.ipynb
+++ b/lab_4/lab4.ipynb
@ -739,30 +739,537 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 30,
+   "execution_count": 37,
   "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>prepocessing_num__Networth</th>\n",
+       "      <th>prepocessing_cat__Country_Argentina</th>\n",
+       "      <th>prepocessing_cat__Country_Australia</th>\n",
+       "      <th>prepocessing_cat__Country_Austria</th>\n",
+       "      <th>prepocessing_cat__Country_Barbados</th>\n",
+       "      <th>prepocessing_cat__Country_Belgium</th>\n",
+       "      <th>prepocessing_cat__Country_Belize</th>\n",
+       "      <th>prepocessing_cat__Country_Brazil</th>\n",
+       "      <th>prepocessing_cat__Country_Bulgaria</th>\n",
+       "      <th>prepocessing_cat__Country_Canada</th>\n",
+       "      <th>...</th>\n",
+       "      <th>prepocessing_cat__Industry_Logistics</th>\n",
+       "      <th>prepocessing_cat__Industry_Manufacturing</th>\n",
+       "      <th>prepocessing_cat__Industry_Media &amp; Entertainment</th>\n",
+       "      <th>prepocessing_cat__Industry_Metals &amp; Mining</th>\n",
+       "      <th>prepocessing_cat__Industry_Real Estate</th>\n",
+       "      <th>prepocessing_cat__Industry_Service</th>\n",
+       "      <th>prepocessing_cat__Industry_Sports</th>\n",
+       "      <th>prepocessing_cat__Industry_Technology</th>\n",
+       "      <th>prepocessing_cat__Industry_Telecom</th>\n",
+       "      <th>prepocessing_cat__Industry_diversified</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>1909</th>\n",
+       "      <td>-0.309917</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2099</th>\n",
+       "      <td>-0.329245</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1392</th>\n",
+       "      <td>-0.242268</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>627</th>\n",
+       "      <td>-0.019995</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>527</th>\n",
+       "      <td>0.037990</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>84</th>\n",
+       "      <td>1.342637</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>633</th>\n",
+       "      <td>-0.019995</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>922</th>\n",
+       "      <td>-0.145628</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2178</th>\n",
+       "      <td>-0.329245</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>415</th>\n",
+       "      <td>0.134630</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "      <td>0.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2080 rows × 860 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      prepocessing_num__Networth  prepocessing_cat__Country_Argentina  \\\n",
+       "1909                   -0.309917                                  0.0   \n",
+       "2099                   -0.329245                                  0.0   \n",
+       "1392                   -0.242268                                  0.0   \n",
+       "627                    -0.019995                                  0.0   \n",
+       "527                     0.037990                                  0.0   \n",
+       "...                          ...                                  ...   \n",
+       "84                      1.342637                                  0.0   \n",
+       "633                    -0.019995                                  0.0   \n",
+       "922                    -0.145628                                  0.0   \n",
+       "2178                   -0.329245                                  0.0   \n",
+       "415                     0.134630                                  0.0   \n",
+       "\n",
+       "      prepocessing_cat__Country_Australia  prepocessing_cat__Country_Austria  \\\n",
+       "1909                                  0.0                                0.0   \n",
+       "2099                                  0.0                                0.0   \n",
+       "1392                                  0.0                                0.0   \n",
+       "627                                   0.0                                0.0   \n",
+       "527                                   0.0                                0.0   \n",
+       "...                                   ...                                ...   \n",
+       "84                                    0.0                                0.0   \n",
+       "633                                   0.0                                0.0   \n",
+       "922                                   0.0                                0.0   \n",
+       "2178                                  0.0                                0.0   \n",
+       "415                                   0.0                                0.0   \n",
+       "\n",
+       "      prepocessing_cat__Country_Barbados  prepocessing_cat__Country_Belgium  \\\n",
+       "1909                                 0.0                                0.0   \n",
+       "2099                                 0.0                                0.0   \n",
+       "1392                                 0.0                                0.0   \n",
+       "627                                  0.0                                0.0   \n",
+       "527                                  0.0                                0.0   \n",
+       "...                                  ...                                ...   \n",
+       "84                                   0.0                                0.0   \n",
+       "633                                  0.0                                0.0   \n",
+       "922                                  0.0                                0.0   \n",
+       "2178                                 0.0                                0.0   \n",
+       "415                                  0.0                                0.0   \n",
+       "\n",
+       "      prepocessing_cat__Country_Belize  prepocessing_cat__Country_Brazil  \\\n",
+       "1909                               0.0                               0.0   \n",
+       "2099                               0.0                               0.0   \n",
+       "1392                               0.0                               0.0   \n",
+       "627                                0.0                               0.0   \n",
+       "527                                0.0                               0.0   \n",
+       "...                                ...                               ...   \n",
+       "84                                 0.0                               0.0   \n",
+       "633                                0.0                               0.0   \n",
+       "922                                0.0                               0.0   \n",
+       "2178                               0.0                               0.0   \n",
+       "415                                0.0                               0.0   \n",
+       "\n",
+       "      prepocessing_cat__Country_Bulgaria  prepocessing_cat__Country_Canada  \\\n",
+       "1909                                 0.0                               0.0   \n",
+       "2099                                 0.0                               0.0   \n",
+       "1392                                 0.0                               0.0   \n",
+       "627                                  0.0                               1.0   \n",
+       "527                                  0.0                               0.0   \n",
+       "...                                  ...                               ...   \n",
+       "84                                   0.0                               0.0   \n",
+       "633                                  0.0                               0.0   \n",
+       "922                                  0.0                               1.0   \n",
+       "2178                                 0.0                               0.0   \n",
+       "415                                  0.0                               0.0   \n",
+       "\n",
+       "      ...  prepocessing_cat__Industry_Logistics   \\\n",
+       "1909  ...                                    0.0   \n",
+       "2099  ...                                    0.0   \n",
+       "1392  ...                                    0.0   \n",
+       "627   ...                                    0.0   \n",
+       "527   ...                                    0.0   \n",
+       "...   ...                                    ...   \n",
+       "84    ...                                    0.0   \n",
+       "633   ...                                    0.0   \n",
+       "922   ...                                    0.0   \n",
+       "2178  ...                                    0.0   \n",
+       "415   ...                                    0.0   \n",
+       "\n",
+       "      prepocessing_cat__Industry_Manufacturing   \\\n",
+       "1909                                        0.0   \n",
+       "2099                                        0.0   \n",
+       "1392                                        0.0   \n",
+       "627                                         0.0   \n",
+       "527                                         1.0   \n",
+       "...                                         ...   \n",
+       "84                                          0.0   \n",
+       "633                                         0.0   \n",
+       "922                                         0.0   \n",
+       "2178                                        0.0   \n",
+       "415                                         0.0   \n",
+       "\n",
+       "      prepocessing_cat__Industry_Media & Entertainment   \\\n",
+       "1909                                                0.0   \n",
+       "2099                                                0.0   \n",
+       "1392                                                0.0   \n",
+       "627                                                 0.0   \n",
+       "527                                                 0.0   \n",
+       "...                                                 ...   \n",
+       "84                                                  0.0   \n",
+       "633                                                 0.0   \n",
+       "922                                                 0.0   \n",
+       "2178                                                0.0   \n",
+       "415                                                 0.0   \n",
+       "\n",
+       "      prepocessing_cat__Industry_Metals & Mining   \\\n",
+       "1909                                          0.0   \n",
+       "2099                                          0.0   \n",
+       "1392                                          0.0   \n",
+       "627                                           0.0   \n",
+       "527                                           0.0   \n",
+       "...                                           ...   \n",
+       "84                                            0.0   \n",
+       "633                                           0.0   \n",
+       "922                                           0.0   \n",
+       "2178                                          0.0   \n",
+       "415                                           0.0   \n",
+       "\n",
+       "      prepocessing_cat__Industry_Real Estate   \\\n",
+       "1909                                      0.0   \n",
+       "2099                                      1.0   \n",
+       "1392                                      0.0   \n",
+       "627                                       0.0   \n",
+       "527                                       0.0   \n",
+       "...                                       ...   \n",
+       "84                                        0.0   \n",
+       "633                                       0.0   \n",
+       "922                                       1.0   \n",
+       "2178                                      0.0   \n",
+       "415                                       1.0   \n",
+       "\n",
+       "      prepocessing_cat__Industry_Service   prepocessing_cat__Industry_Sports   \\\n",
+       "1909                                  0.0                                 0.0   \n",
+       "2099                                  0.0                                 0.0   \n",
+       "1392                                  0.0                                 0.0   \n",
+       "627                                   0.0                                 0.0   \n",
+       "527                                   0.0                                 0.0   \n",
+       "...                                   ...                                 ...   \n",
+       "84                                    0.0                                 0.0   \n",
+       "633                                   0.0                                 0.0   \n",
+       "922                                   0.0                                 0.0   \n",
+       "2178                                  0.0                                 0.0   \n",
+       "415                                   0.0                                 0.0   \n",
+       "\n",
+       "      prepocessing_cat__Industry_Technology   \\\n",
+       "1909                                     0.0   \n",
+       "2099                                     0.0   \n",
+       "1392                                     0.0   \n",
+       "627                                      0.0   \n",
+       "527                                      0.0   \n",
+       "...                                      ...   \n",
+       "84                                       0.0   \n",
+       "633                                      0.0   \n",
+       "922                                      0.0   \n",
+       "2178                                     0.0   \n",
+       "415                                      0.0   \n",
+       "\n",
+       "      prepocessing_cat__Industry_Telecom   \\\n",
+       "1909                                  0.0   \n",
+       "2099                                  0.0   \n",
+       "1392                                  0.0   \n",
+       "627                                   0.0   \n",
+       "527                                   0.0   \n",
+       "...                                   ...   \n",
+       "84                                    0.0   \n",
+       "633                                   0.0   \n",
+       "922                                   0.0   \n",
+       "2178                                  0.0   \n",
+       "415                                   0.0   \n",
+       "\n",
+       "      prepocessing_cat__Industry_diversified     \n",
+       "1909                                        0.0  \n",
+       "2099                                        0.0  \n",
+       "1392                                        0.0  \n",
+       "627                                         0.0  \n",
+       "527                                         0.0  \n",
+       "...                                         ...  \n",
+       "84                                          0.0  \n",
+       "633                                         0.0  \n",
+       "922                                         0.0  \n",
+       "2178                                        0.0  \n",
+       "415                                         0.0  \n",
+       "\n",
+       "[2080 rows x 860 columns]"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
   "source": [
    "\n",
    "from sklearn.compose import ColumnTransformer\n",
-    "from sklearn.discriminant_analysis import StandardScaler\n",
+    "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
    "from sklearn.impute import SimpleImputer\n",
    "from sklearn.pipeline import Pipeline\n",
-    "from sklearn.preprocessing import OneHotEncoder\n",
+    "import pandas as pd\n",
    "\n",
+    "# Исправляем ColumnTransformer с сохранением имен колонок\n",
+    "columns_to_drop = [\"Age_category\", \"Rank \", \"Name\"]\n",
    "\n",
-    "columns_to_drop = [\"Rank \", \"Name\"]\n",
    "num_columns = [\n",
    "    column\n",
-    "    for column in df.columns\n",
-    "    if column not in columns_to_drop and df[column].dtype != \"object\"\n",
+    "    for column in X_train.columns\n",
+    "    if column not in columns_to_drop and X_train[column].dtype != \"object\"\n",
    "]\n",
    "cat_columns = [\n",
    "    column\n",
-    "    for column in df.columns\n",
-    "    if column not in columns_to_drop and df[column].dtype == \"object\"\n",
+    "    for column in X_train.columns\n",
+    "    if column not in columns_to_drop and X_train[column].dtype == \"object\"\n",
    "]\n",
    "\n",
+    "# Предобработка числовых данных\n",
    "num_imputer = SimpleImputer(strategy=\"median\")\n",
    "num_scaler = StandardScaler()\n",
    "preprocessing_num = Pipeline(\n",
@ -772,6 +1279,7 @@
    "    ]\n",
    ")\n",
    "\n",
+    "# Предобработка категориальных данных\n",
    "cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n",
    "cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n",
    "preprocessing_cat = Pipeline(\n",
@ -781,76 +1289,182 @@
    "    ]\n",
    ")\n",
    "\n",
+    "# Общая предобработка признаков\n",
    "features_preprocessing = ColumnTransformer(\n",
-    "    verbose_feature_names_out=False,\n",
+    "    verbose_feature_names_out=True,  # Сохраняем имена колонок\n",
    "    transformers=[\n",
    "        (\"prepocessing_num\", preprocessing_num, num_columns),\n",
    "        (\"prepocessing_cat\", preprocessing_cat, cat_columns),\n",
-    "        (\"prepocessing_features\", cat_imputer),\n",
    "    ],\n",
-    "    remainder=\"passthrough\"\n",
-    ")\n",
-    "\n",
-    "\n",
-    "drop_columns = ColumnTransformer(\n",
-    "    verbose_feature_names_out=False,\n",
-    "    transformers=[\n",
-    "        (\"drop_columns\", \"drop\", columns_to_drop),\n",
-    "    ],\n",
-    "    remainder=\"passthrough\",\n",
-    ")\n",
-    "\n",
-    "features_postprocessing = ColumnTransformer(\n",
-    "    verbose_feature_names_out=False,\n",
-    "    transformers=[\n",
-    "        (\"prepocessing_cat\", preprocessing_cat, [\"Cabin_type\"]),\n",
-    "    ],\n",
-    "    remainder=\"passthrough\",\n",
+    "    remainder=\"drop\"  # Убираем неиспользуемые столбцы\n",
    ")\n",
    "\n",
+    "# Итоговый конвейер\n",
    "pipeline_end = Pipeline(\n",
    "    [\n",
    "        (\"features_preprocessing\", features_preprocessing),\n",
-    "        (\"drop_columns\", drop_columns),\n",
-    "        (\"features_postprocessing\", features_postprocessing),\n",
    "    ]\n",
-    ")"
+    ")\n",
+    "\n",
+    "# Преобразуем данные\n",
+    "preprocessing_result = pipeline_end.fit_transform(X_train)\n",
+    "\n",
+    "# Создаем DataFrame с правильными именами колонок\n",
+    "preprocessed_df = pd.DataFrame(\n",
+    "    preprocessing_result,\n",
+    "    columns=pipeline_end.get_feature_names_out(),\n",
+    "    index=X_train.index,  # Сохраняем индексы\n",
+    ")\n",
+    "\n",
+    "preprocessed_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Формирование набора моделей для классификации\n",
+    "## logistic -- логистическая регрессия\n",
+    "## ridge -- гребневая регрессия\n",
+    "## decision_tree -- дерево решений\n",
+    "## knn -- k-ближайших соседей\n",
+    "## naive_bayes -- наивный Байесовский классификатор\n",
+    "## gradient_boosting -- метод градиентного бустинга (набор деревьев решений)\n",
+    "## random_forest -- метод случайного леса (набор деревьев решений)\n",
+    "## mlp -- многослойный персептрон (нейронная сеть)"
   ]
  },
  {
   "cell_type": "code",
-   "execution_count": 31,
+   "execution_count": 38,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn import ensemble, linear_model, naive_bayes, neighbors, neural_network, tree\n",
+    "\n",
+    "class_models = {\n",
+    "    \"logistic\": {\"model\": linear_model.LogisticRegression()},\n",
+    "    # \"ridge\": {\"model\": linear_model.RidgeClassifierCV(cv=5, class_weight=\"balanced\")},\n",
+    "    \"ridge\": {\"model\": linear_model.LogisticRegression(penalty=\"l2\", class_weight=\"balanced\")},\n",
+    "    \"decision_tree\": {\n",
+    "        \"model\": tree.DecisionTreeClassifier(max_depth=7, random_state=9)\n",
+    "    },\n",
+    "    \"knn\": {\"model\": neighbors.KNeighborsClassifier(n_neighbors=7)},\n",
+    "    \"naive_bayes\": {\"model\": naive_bayes.GaussianNB()},\n",
+    "    \"gradient_boosting\": {\n",
+    "        \"model\": ensemble.GradientBoostingClassifier(n_estimators=210)\n",
+    "    },\n",
+    "    \"random_forest\": {\n",
+    "        \"model\": ensemble.RandomForestClassifier(\n",
+    "            max_depth=11, class_weight=\"balanced\", random_state=9\n",
+    "        )\n",
+    "    },\n",
+    "    \"mlp\": {\n",
+    "        \"model\": neural_network.MLPClassifier(\n",
+    "            hidden_layer_sizes=(7,),\n",
+    "            max_iter=500,\n",
+    "            early_stopping=True,\n",
+    "            random_state=9,\n",
+    "        )\n",
+    "    },\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Обучение моделей на обучающем наборе данных и оценка на тестовом"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 40,
   "metadata": {},
   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model: logistic\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:242: UserWarning: Found unknown categories in columns [0, 1] during transform. These unknown categories will be encoded as all zeros\n",
+      "  warnings.warn(\n"
+     ]
+    },
    {
     "ename": "ValueError",
-     "evalue": "not enough values to unpack (expected 3, got 2)",
+     "evalue": "Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted'].",
     "output_type": "error",
     "traceback": [
      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
-      "Cell \u001b[1;32mIn[31], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m preprocessing_result \u001b[38;5;241m=\u001b[39m \u001b[43mpipeline_end\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_transform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m      2\u001b[0m preprocessed_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(\n\u001b[0;32m      3\u001b[0m     preprocessing_result,\n\u001b[0;32m      4\u001b[0m     columns\u001b[38;5;241m=\u001b[39mpipeline_end\u001b[38;5;241m.\u001b[39mget_feature_names_out(),\n\u001b[0;32m      5\u001b[0m )\n\u001b[0;32m      7\u001b[0m preprocessed_df\n",
-      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\base.py:1473\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1466\u001b[0m     estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m   1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m   1469\u001b[0m     skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m   1470\u001b[0m         prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m   1471\u001b[0m     )\n\u001b[0;32m   1472\u001b[0m ):\n\u001b[1;32m-> 1473\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py:533\u001b[0m, in \u001b[0;36mPipeline.fit_transform\u001b[1;34m(self, X, y, **params)\u001b[0m\n\u001b[0;32m    490\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Fit the model and transform with the final estimator.\u001b[39;00m\n\u001b[0;32m    491\u001b[0m \n\u001b[0;32m    492\u001b[0m \u001b[38;5;124;03mFit all the transformers one after the other and sequentially transform\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    530\u001b[0m \u001b[38;5;124;03m    Transformed samples.\u001b[39;00m\n\u001b[0;32m    531\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m    532\u001b[0m routed_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_method_params(method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfit_transform\u001b[39m\u001b[38;5;124m\"\u001b[39m, props\u001b[38;5;241m=\u001b[39mparams)\n\u001b[1;32m--> 533\u001b[0m Xt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrouted_params\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    535\u001b[0m last_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_final_estimator\n\u001b[0;32m    536\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _print_elapsed_time(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPipeline\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_log_message(\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msteps) \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m)):\n",
-      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py:406\u001b[0m, in \u001b[0;36mPipeline._fit\u001b[1;34m(self, X, y, routed_params)\u001b[0m\n\u001b[0;32m    404\u001b[0m     cloned_transformer \u001b[38;5;241m=\u001b[39m clone(transformer)\n\u001b[0;32m    405\u001b[0m \u001b[38;5;66;03m# Fit or load from cache the current transformer\u001b[39;00m\n\u001b[1;32m--> 406\u001b[0m X, fitted_transformer \u001b[38;5;241m=\u001b[39m \u001b[43mfit_transform_one_cached\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m    407\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcloned_transformer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    408\u001b[0m \u001b[43m    \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    409\u001b[0m \u001b[43m    \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    410\u001b[0m \u001b[43m    \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m    411\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmessage_clsname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPipeline\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m    412\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_log_message\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstep_idx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    413\u001b[0m \u001b[43m    \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrouted_params\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    414\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    415\u001b[0m \u001b[38;5;66;03m# Replace the transformer of the step with the fitted\u001b[39;00m\n\u001b[0;32m    416\u001b[0m \u001b[38;5;66;03m# transformer. This is necessary when loading the transformer\u001b[39;00m\n\u001b[0;32m    417\u001b[0m \u001b[38;5;66;03m# from the cache.\u001b[39;00m\n\u001b[0;32m    418\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msteps[step_idx] \u001b[38;5;241m=\u001b[39m (name, fitted_transformer)\n",
-      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\joblib\\memory.py:312\u001b[0m, in \u001b[0;36mNotMemorizedFunc.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m    311\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m--> 312\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py:1310\u001b[0m, in \u001b[0;36m_fit_transform_one\u001b[1;34m(transformer, X, y, weight, message_clsname, message, params)\u001b[0m\n\u001b[0;32m   1308\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _print_elapsed_time(message_clsname, message):\n\u001b[0;32m   1309\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(transformer, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfit_transform\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m-> 1310\u001b[0m         res \u001b[38;5;241m=\u001b[39m \u001b[43mtransformer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_transform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfit_transform\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1311\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m   1312\u001b[0m         res \u001b[38;5;241m=\u001b[39m transformer\u001b[38;5;241m.\u001b[39mfit(X, y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfit\u001b[39m\u001b[38;5;124m\"\u001b[39m, {}))\u001b[38;5;241m.\u001b[39mtransform(\n\u001b[0;32m   1313\u001b[0m             X, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtransform\u001b[39m\u001b[38;5;124m\"\u001b[39m, {})\n\u001b[0;32m   1314\u001b[0m         )\n",
-      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_set_output.py:316\u001b[0m, in \u001b[0;36m_wrap_method_output.<locals>.wrapped\u001b[1;34m(self, X, *args, **kwargs)\u001b[0m\n\u001b[0;32m    314\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(f)\n\u001b[0;32m    315\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapped\u001b[39m(\u001b[38;5;28mself\u001b[39m, X, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m--> 316\u001b[0m     data_to_wrap \u001b[38;5;241m=\u001b[39m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    317\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_to_wrap, \u001b[38;5;28mtuple\u001b[39m):\n\u001b[0;32m    318\u001b[0m         \u001b[38;5;66;03m# only wrap the first output for cross decomposition\u001b[39;00m\n\u001b[0;32m    319\u001b[0m         return_tuple \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m    320\u001b[0m             _wrap_data_with_container(method, data_to_wrap[\u001b[38;5;241m0\u001b[39m], X, \u001b[38;5;28mself\u001b[39m),\n\u001b[0;32m    321\u001b[0m             \u001b[38;5;241m*\u001b[39mdata_to_wrap[\u001b[38;5;241m1\u001b[39m:],\n\u001b[0;32m    322\u001b[0m         )\n",
-      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\base.py:1473\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1466\u001b[0m     estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m   1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m   1469\u001b[0m     skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m   1470\u001b[0m         prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m   1471\u001b[0m     )\n\u001b[0;32m   1472\u001b[0m ):\n\u001b[1;32m-> 1473\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py:965\u001b[0m, in \u001b[0;36mColumnTransformer.fit_transform\u001b[1;34m(self, X, y, **params)\u001b[0m\n\u001b[0;32m    963\u001b[0m \u001b[38;5;66;03m# set n_features_in_ attribute\u001b[39;00m\n\u001b[0;32m    964\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_n_features(X, reset\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m--> 965\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_transformers\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    966\u001b[0m n_samples \u001b[38;5;241m=\u001b[39m _num_samples(X)\n\u001b[0;32m    968\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_column_callables(X)\n",
-      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py:501\u001b[0m, in \u001b[0;36mColumnTransformer._validate_transformers\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m    498\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransformers:\n\u001b[0;32m    499\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[1;32m--> 501\u001b[0m names, transformers, _ \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mzip\u001b[39m(\u001b[38;5;241m*\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtransformers)\n\u001b[0;32m    503\u001b[0m \u001b[38;5;66;03m# validate names\u001b[39;00m\n\u001b[0;32m    504\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_names(names)\n",
-      "\u001b[1;31mValueError\u001b[0m: not enough values to unpack (expected 3, got 2)"
+      "Cell \u001b[1;32mIn[40], line 19\u001b[0m\n\u001b[0;32m     16\u001b[0m class_models[model_name][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mprobs\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m y_test_probs\n\u001b[0;32m     17\u001b[0m class_models[model_name][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpreds\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m y_test_predict\n\u001b[1;32m---> 19\u001b[0m class_models[model_name][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrecision_train\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m \u001b[43mmetrics\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprecision_score\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m     20\u001b[0m \u001b[43m    \u001b[49m\u001b[43my_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train_predict\u001b[49m\n\u001b[0;32m     21\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     22\u001b[0m class_models[model_name][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPrecision_test\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m metrics\u001b[38;5;241m.\u001b[39mprecision_score(\n\u001b[0;32m     23\u001b[0m     y_test, y_test_predict\n\u001b[0;32m     24\u001b[0m )\n\u001b[0;32m     25\u001b[0m class_models[model_name][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRecall_train\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m metrics\u001b[38;5;241m.\u001b[39mrecall_score(\n\u001b[0;32m     26\u001b[0m     y_train, y_train_predict\n\u001b[0;32m     27\u001b[0m )\n",
+      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_param_validation.py:213\u001b[0m, in \u001b[0;36mvalidate_params.<locals>.decorator.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    207\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m    208\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m    209\u001b[0m         skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m    210\u001b[0m             prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m    211\u001b[0m         )\n\u001b[0;32m    212\u001b[0m     ):\n\u001b[1;32m--> 213\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    214\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m InvalidParameterError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m    215\u001b[0m     \u001b[38;5;66;03m# When the function is just a wrapper around an estimator, we allow\u001b[39;00m\n\u001b[0;32m    216\u001b[0m     \u001b[38;5;66;03m# the function to delegate validation to the estimator, but we replace\u001b[39;00m\n\u001b[0;32m    217\u001b[0m     \u001b[38;5;66;03m# the name of the estimator by the name of the function in the error\u001b[39;00m\n\u001b[0;32m    218\u001b[0m     \u001b[38;5;66;03m# message to avoid confusion.\u001b[39;00m\n\u001b[0;32m    219\u001b[0m     msg \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39msub(\n\u001b[0;32m    220\u001b[0m         \u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameter of \u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mw+ must be\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m    221\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameter of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__qualname__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m must be\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m    222\u001b[0m         \u001b[38;5;28mstr\u001b[39m(e),\n\u001b[0;32m    223\u001b[0m     )\n",
+      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:2204\u001b[0m, in \u001b[0;36mprecision_score\u001b[1;34m(y_true, y_pred, labels, pos_label, average, sample_weight, zero_division)\u001b[0m\n\u001b[0;32m   2037\u001b[0m \u001b[38;5;129m@validate_params\u001b[39m(\n\u001b[0;32m   2038\u001b[0m     {\n\u001b[0;32m   2039\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124my_true\u001b[39m\u001b[38;5;124m\"\u001b[39m: [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marray-like\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msparse matrix\u001b[39m\u001b[38;5;124m\"\u001b[39m],\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   2064\u001b[0m     zero_division\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwarn\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m   2065\u001b[0m ):\n\u001b[0;32m   2066\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Compute the precision.\u001b[39;00m\n\u001b[0;32m   2067\u001b[0m \n\u001b[0;32m   2068\u001b[0m \u001b[38;5;124;03m    The precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   2202\u001b[0m \u001b[38;5;124;03m    array([0.5, 1. , 1. ])\u001b[39;00m\n\u001b[0;32m   2203\u001b[0m \u001b[38;5;124;03m    \"\"\"\u001b[39;00m\n\u001b[1;32m-> 2204\u001b[0m     p, _, _, _ \u001b[38;5;241m=\u001b[39m \u001b[43mprecision_recall_fscore_support\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   2205\u001b[0m \u001b[43m        \u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2206\u001b[0m \u001b[43m        \u001b[49m\u001b[43my_pred\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2207\u001b[0m \u001b[43m        \u001b[49m\u001b[43mlabels\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2208\u001b[0m \u001b[43m        \u001b[49m\u001b[43mpos_label\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpos_label\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2209\u001b[0m \u001b[43m        \u001b[49m\u001b[43maverage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43maverage\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2210\u001b[0m \u001b[43m        \u001b[49m\u001b[43mwarn_for\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mprecision\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2211\u001b[0m \u001b[43m        \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43msample_weight\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2212\u001b[0m \u001b[43m        \u001b[49m\u001b[43mzero_division\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mzero_division\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   2213\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   2214\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m p\n",
+      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_param_validation.py:186\u001b[0m, in \u001b[0;36mvalidate_params.<locals>.decorator.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m    184\u001b[0m global_skip_validation \u001b[38;5;241m=\u001b[39m get_config()[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mskip_parameter_validation\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m    185\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m global_skip_validation:\n\u001b[1;32m--> 186\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    188\u001b[0m func_sig \u001b[38;5;241m=\u001b[39m signature(func)\n\u001b[0;32m    190\u001b[0m \u001b[38;5;66;03m# Map *args/**kwargs to the function signature\u001b[39;00m\n",
+      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1789\u001b[0m, in \u001b[0;36mprecision_recall_fscore_support\u001b[1;34m(y_true, y_pred, beta, labels, pos_label, average, warn_for, sample_weight, zero_division)\u001b[0m\n\u001b[0;32m   1626\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Compute precision, recall, F-measure and support for each class.\u001b[39;00m\n\u001b[0;32m   1627\u001b[0m \n\u001b[0;32m   1628\u001b[0m \u001b[38;5;124;03mThe precision is the ratio ``tp / (tp + fp)`` where ``tp`` is the number of\u001b[39;00m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   1786\u001b[0m \u001b[38;5;124;03m array([2, 2, 2]))\u001b[39;00m\n\u001b[0;32m   1787\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m   1788\u001b[0m _check_zero_division(zero_division)\n\u001b[1;32m-> 1789\u001b[0m labels \u001b[38;5;241m=\u001b[39m \u001b[43m_check_set_wise_labels\u001b[49m\u001b[43m(\u001b[49m\u001b[43my_true\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_pred\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43maverage\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mlabels\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mpos_label\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1791\u001b[0m \u001b[38;5;66;03m# Calculate tp_sum, pred_sum, true_sum ###\u001b[39;00m\n\u001b[0;32m   1792\u001b[0m samplewise \u001b[38;5;241m=\u001b[39m average \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msamples\u001b[39m\u001b[38;5;124m\"\u001b[39m\n",
+      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1578\u001b[0m, in \u001b[0;36m_check_set_wise_labels\u001b[1;34m(y_true, y_pred, average, labels, pos_label)\u001b[0m\n\u001b[0;32m   1576\u001b[0m         \u001b[38;5;28;01mif\u001b[39;00m y_type \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmulticlass\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m   1577\u001b[0m             average_options\u001b[38;5;241m.\u001b[39mremove(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msamples\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m-> 1578\u001b[0m         \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m   1579\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTarget is \u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m but average=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbinary\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m. Please \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   1580\u001b[0m             \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mchoose another average setting, one of \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;241m%\u001b[39m (y_type, average_options)\n\u001b[0;32m   1581\u001b[0m         )\n\u001b[0;32m   1582\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m pos_label \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m (\u001b[38;5;28;01mNone\u001b[39;00m, \u001b[38;5;241m1\u001b[39m):\n\u001b[0;32m   1583\u001b[0m     warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[0;32m   1584\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNote that pos_label (set to \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m) is ignored when \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m   1585\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124maverage != \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mbinary\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m (got \u001b[39m\u001b[38;5;132;01m%r\u001b[39;00m\u001b[38;5;124m). You may use \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   1588\u001b[0m         \u001b[38;5;167;01mUserWarning\u001b[39;00m,\n\u001b[0;32m   1589\u001b[0m     )\n",
+      "\u001b[1;31mValueError\u001b[0m: Target is multiclass but average='binary'. Please choose another average setting, one of [None, 'micro', 'macro', 'weighted']."
     ]
    }
   ],
   "source": [
-    "preprocessing_result = pipeline_end.fit_transform(X_train)\n",
-    "preprocessed_df = pd.DataFrame(\n",
-    "    preprocessing_result,\n",
-    "    columns=pipeline_end.get_feature_names_out(),\n",
-    ")\n",
+    "import numpy as np\n",
+    "from sklearn import metrics\n",
    "\n",
-    "preprocessed_df"
+    "for model_name in class_models.keys():\n",
+    "    print(f\"Model: {model_name}\")\n",
+    "    model = class_models[model_name][\"model\"]\n",
+    "\n",
+    "    model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n",
+    "    model_pipeline = model_pipeline.fit(X_train, y_train.values.ravel())\n",
+    "\n",
+    "    y_train_predict = model_pipeline.predict(X_train)\n",
+    "    y_test_probs = model_pipeline.predict_proba(X_test)[:, 1]\n",
+    "    y_test_predict = np.where(y_test_probs > 0.5, 1, 0)\n",
+    "\n",
+    "    class_models[model_name][\"pipeline\"] = model_pipeline\n",
+    "    class_models[model_name][\"probs\"] = y_test_probs\n",
+    "    class_models[model_name][\"preds\"] = y_test_predict\n",
+    "\n",
+    "    class_models[model_name][\"Precision_train\"] = metrics.precision_score(\n",
+    "        y_train, y_train_predict\n",
+    "    )\n",
+    "    class_models[model_name][\"Precision_test\"] = metrics.precision_score(\n",
+    "        y_test, y_test_predict\n",
+    "    )\n",
+    "    class_models[model_name][\"Recall_train\"] = metrics.recall_score(\n",
+    "        y_train, y_train_predict\n",
+    "    )\n",
+    "    class_models[model_name][\"Recall_test\"] = metrics.recall_score(\n",
+    "        y_test, y_test_predict\n",
+    "    )\n",
+    "    class_models[model_name][\"Accuracy_train\"] = metrics.accuracy_score(\n",
+    "        y_train, y_train_predict\n",
+    "    )\n",
+    "    class_models[model_name][\"Accuracy_test\"] = metrics.accuracy_score(\n",
+    "        y_test, y_test_predict\n",
+    "    )\n",
+    "    class_models[model_name][\"ROC_AUC_test\"] = metrics.roc_auc_score(\n",
+    "        y_test, y_test_probs\n",
+    "    )\n",
+    "    class_models[model_name][\"F1_train\"] = metrics.f1_score(y_train, y_train_predict)\n",
+    "    class_models[model_name][\"F1_test\"] = metrics.f1_score(y_test, y_test_predict)\n",
+    "    class_models[model_name][\"MCC_test\"] = metrics.matthews_corrcoef(\n",
+    "        y_test, y_test_predict\n",
+    "    )\n",
+    "    class_models[model_name][\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(\n",
+    "        y_test, y_test_predict\n",
+    "    )\n",
+    "    class_models[model_name][\"Confusion_matrix\"] = metrics.confusion_matrix(\n",
+    "        y_test, y_test_predict\n",
+    "    )"
   ]
  }
 ],