ещё коммит...

2024-11-29 01:37:09 +04:00 · 2024-11-29 01:37:09 +04:00 · a0218641ee
commit a0218641ee
parent 4135ed6c05
1 changed files with 397 additions and 41 deletions
--- a/lab_4/lab4.ipynb
+++ b/lab_4/lab4.ipynb
@ -2779,7 +2779,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
@ -3182,7 +3182,197 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Rank</th>\n",
+       "      <th>Name</th>\n",
+       "      <th>Networth</th>\n",
+       "      <th>Age</th>\n",
+       "      <th>Country</th>\n",
+       "      <th>Source</th>\n",
+       "      <th>Industry</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>1</td>\n",
+       "      <td>Elon Musk</td>\n",
+       "      <td>219.0</td>\n",
+       "      <td>50</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>Tesla, SpaceX</td>\n",
+       "      <td>Automotive</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2</td>\n",
+       "      <td>Jeff Bezos</td>\n",
+       "      <td>171.0</td>\n",
+       "      <td>58</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>Amazon</td>\n",
+       "      <td>Technology</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>3</td>\n",
+       "      <td>Bernard Arnault &amp; family</td>\n",
+       "      <td>158.0</td>\n",
+       "      <td>73</td>\n",
+       "      <td>France</td>\n",
+       "      <td>LVMH</td>\n",
+       "      <td>Fashion &amp; Retail</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>4</td>\n",
+       "      <td>Bill Gates</td>\n",
+       "      <td>129.0</td>\n",
+       "      <td>66</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>Microsoft</td>\n",
+       "      <td>Technology</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>5</td>\n",
+       "      <td>Warren Buffett</td>\n",
+       "      <td>118.0</td>\n",
+       "      <td>91</td>\n",
+       "      <td>United States</td>\n",
+       "      <td>Berkshire Hathaway</td>\n",
+       "      <td>Finance &amp; Investments</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2595</th>\n",
+       "      <td>2578</td>\n",
+       "      <td>Jorge Gallardo Ballart</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>80</td>\n",
+       "      <td>Spain</td>\n",
+       "      <td>pharmaceuticals</td>\n",
+       "      <td>Healthcare</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2596</th>\n",
+       "      <td>2578</td>\n",
+       "      <td>Nari Genomal</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>82</td>\n",
+       "      <td>Philippines</td>\n",
+       "      <td>apparel</td>\n",
+       "      <td>Fashion &amp; Retail</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2597</th>\n",
+       "      <td>2578</td>\n",
+       "      <td>Ramesh Genomal</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>71</td>\n",
+       "      <td>Philippines</td>\n",
+       "      <td>apparel</td>\n",
+       "      <td>Fashion &amp; Retail</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2598</th>\n",
+       "      <td>2578</td>\n",
+       "      <td>Sunder Genomal</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>68</td>\n",
+       "      <td>Philippines</td>\n",
+       "      <td>garments</td>\n",
+       "      <td>Fashion &amp; Retail</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2599</th>\n",
+       "      <td>2578</td>\n",
+       "      <td>Horst-Otto Gerberding</td>\n",
+       "      <td>1.0</td>\n",
+       "      <td>69</td>\n",
+       "      <td>Germany</td>\n",
+       "      <td>flavors and fragrances</td>\n",
+       "      <td>Food &amp; Beverage</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2600 rows × 7 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      Rank                        Name  Networth  Age        Country  \\\n",
+       "0         1                 Elon Musk      219.0   50  United States   \n",
+       "1         2                Jeff Bezos      171.0   58  United States   \n",
+       "2         3  Bernard Arnault & family      158.0   73         France   \n",
+       "3         4                Bill Gates      129.0   66  United States   \n",
+       "4         5            Warren Buffett      118.0   91  United States   \n",
+       "...     ...                        ...       ...  ...            ...   \n",
+       "2595   2578    Jorge Gallardo Ballart        1.0   80          Spain   \n",
+       "2596   2578              Nari Genomal        1.0   82    Philippines   \n",
+       "2597   2578            Ramesh Genomal        1.0   71    Philippines   \n",
+       "2598   2578            Sunder Genomal        1.0   68    Philippines   \n",
+       "2599   2578     Horst-Otto Gerberding        1.0   69        Germany   \n",
+       "\n",
+       "                      Source                Industry  \n",
+       "0              Tesla, SpaceX             Automotive   \n",
+       "1                     Amazon             Technology   \n",
+       "2                       LVMH       Fashion & Retail   \n",
+       "3                  Microsoft             Technology   \n",
+       "4         Berkshire Hathaway  Finance & Investments   \n",
+       "...                      ...                     ...  \n",
+       "2595         pharmaceuticals             Healthcare   \n",
+       "2596                 apparel       Fashion & Retail   \n",
+       "2597                 apparel       Fashion & Retail   \n",
+       "2598                garments       Fashion & Retail   \n",
+       "2599  flavors and fragrances        Food & Beverage   \n",
+       "\n",
+       "[2600 rows x 7 columns]"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
@ -3763,7 +3953,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
@ -3780,8 +3970,6 @@
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:320: UserWarning: The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.\n",
      "  warnings.warn(\n",
      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:242: UserWarning: Found unknown categories in columns [0, 1] during transform. These unknown categories will be encoded as all zeros\n",
-      "  warnings.warn(\n",
-      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:320: UserWarning: The total space of parameters 9 is smaller than n_iter=10. Running 9 iterations. For exhaustive searches, use GridSearchCV.\n",
      "  warnings.warn(\n"
     ]
    },
@ -3813,16 +4001,16 @@
      "R2: -7135788186375614.0\n",
      "\n",
      "Model: RandomForestRegressor\n",
-      "Best Params: {'model__n_estimators': 100, 'model__max_depth': None}\n",
-      "MAE: 3.372747412240537\n",
-      "RMSE: 8.304219801175332\n",
-      "R2: -1.9013866015383956\n",
+      "Best Params: {'model__n_estimators': 40, 'model__max_depth': 10}\n",
+      "MAE: 3.454630023161808\n",
+      "RMSE: 7.755775760541111\n",
+      "R2: -1.530803448377045\n",
      "\n",
      "Model: GradientBoostingRegressor\n",
-      "Best Params: {'model__n_estimators': 200, 'model__max_depth': 5, 'model__learning_rate': 0.2}\n",
-      "MAE: 3.572597806187309\n",
-      "RMSE: 10.306842221909957\n",
-      "R2: -3.4695025074945356\n"
+      "Best Params: {'model__n_estimators': 100, 'model__max_depth': 4, 'model__learning_rate': 0.4}\n",
+      "MAE: 3.585784679817764\n",
+      "RMSE: 10.312249036012052\n",
+      "R2: -3.474193004771121\n"
     ]
    },
    {
@ -3855,13 +4043,13 @@
    "param_grids_regression = {\n",
    "    \"LinearRegression\": {},\n",
    "    \"RandomForestRegressor\": {\n",
-    "        'model__n_estimators': [50, 100, 200],\n",
-    "        'model__max_depth': [None, 10, 20],\n",
+    "        'model__n_estimators': [10, 20, 30, 40, 50, 100, 150, 200, 250, 500],\n",
+    "        'model__max_depth': [None, 2, 3, 4, 5, 6, 7, 8, 9 ,10],\n",
    "    },\n",
    "    \"GradientBoostingRegressor\": {\n",
-    "        'model__n_estimators': [50, 100, 200],\n",
-    "        'model__learning_rate': [0.01, 0.1, 0.2],\n",
-    "        'model__max_depth': [3, 5, 10]\n",
+    "        'model__n_estimators': [10, 20, 30, 40, 50, 100, 150, 200, 250, 500],\n",
+    "        'model__learning_rate': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5],\n",
+    "        'model__max_depth': [2, 3, 4, 5, 6, 7, 8, 9 ,10]\n",
    "    }\n",
    "}\n",
    "\n",
@ -3905,66 +4093,66 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<style type=\"text/css\">\n",
-       "#T_c5529_row0_col0, #T_c5529_row0_col1, #T_c5529_row1_col0, #T_c5529_row1_col1 {\n",
+       "#T_5e893_row0_col0, #T_5e893_row0_col1, #T_5e893_row1_col0, #T_5e893_row1_col1 {\n",
       "  background-color: #26818e;\n",
       "  color: #f1f1f1;\n",
       "}\n",
-       "#T_c5529_row0_col2, #T_c5529_row1_col2 {\n",
+       "#T_5e893_row0_col2, #T_5e893_row1_col2 {\n",
       "  background-color: #da5a6a;\n",
       "  color: #f1f1f1;\n",
       "}\n",
-       "#T_c5529_row2_col0, #T_c5529_row2_col1 {\n",
+       "#T_5e893_row2_col0, #T_5e893_row2_col1 {\n",
       "  background-color: #a8db34;\n",
       "  color: #000000;\n",
       "}\n",
-       "#T_c5529_row2_col2 {\n",
+       "#T_5e893_row2_col2 {\n",
       "  background-color: #4e02a2;\n",
       "  color: #f1f1f1;\n",
       "}\n",
       "</style>\n",
-       "<table id=\"T_c5529\">\n",
+       "<table id=\"T_5e893\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th class=\"blank level0\" >&nbsp;</th>\n",
-       "      <th id=\"T_c5529_level0_col0\" class=\"col_heading level0 col0\" >MAE</th>\n",
-       "      <th id=\"T_c5529_level0_col1\" class=\"col_heading level0 col1\" >RMSE</th>\n",
-       "      <th id=\"T_c5529_level0_col2\" class=\"col_heading level0 col2\" >R2</th>\n",
+       "      <th id=\"T_5e893_level0_col0\" class=\"col_heading level0 col0\" >MAE</th>\n",
+       "      <th id=\"T_5e893_level0_col1\" class=\"col_heading level0 col1\" >RMSE</th>\n",
+       "      <th id=\"T_5e893_level0_col2\" class=\"col_heading level0 col2\" >R2</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
-       "      <th id=\"T_c5529_level0_row0\" class=\"row_heading level0 row0\" >RandomForestRegressor</th>\n",
-       "      <td id=\"T_c5529_row0_col0\" class=\"data row0 col0\" >3.372747</td>\n",
-       "      <td id=\"T_c5529_row0_col1\" class=\"data row0 col1\" >8.304220</td>\n",
-       "      <td id=\"T_c5529_row0_col2\" class=\"data row0 col2\" >-1.901387</td>\n",
+       "      <th id=\"T_5e893_level0_row0\" class=\"row_heading level0 row0\" >RandomForestRegressor</th>\n",
+       "      <td id=\"T_5e893_row0_col0\" class=\"data row0 col0\" >3.454630</td>\n",
+       "      <td id=\"T_5e893_row0_col1\" class=\"data row0 col1\" >7.755776</td>\n",
+       "      <td id=\"T_5e893_row0_col2\" class=\"data row0 col2\" >-1.530803</td>\n",
       "    </tr>\n",
       "    <tr>\n",
-       "      <th id=\"T_c5529_level0_row1\" class=\"row_heading level0 row1\" >GradientBoostingRegressor</th>\n",
-       "      <td id=\"T_c5529_row1_col0\" class=\"data row1 col0\" >3.572598</td>\n",
-       "      <td id=\"T_c5529_row1_col1\" class=\"data row1 col1\" >10.306842</td>\n",
-       "      <td id=\"T_c5529_row1_col2\" class=\"data row1 col2\" >-3.469503</td>\n",
+       "      <th id=\"T_5e893_level0_row1\" class=\"row_heading level0 row1\" >GradientBoostingRegressor</th>\n",
+       "      <td id=\"T_5e893_row1_col0\" class=\"data row1 col0\" >3.585785</td>\n",
+       "      <td id=\"T_5e893_row1_col1\" class=\"data row1 col1\" >10.312249</td>\n",
+       "      <td id=\"T_5e893_row1_col2\" class=\"data row1 col2\" >-3.474193</td>\n",
       "    </tr>\n",
       "    <tr>\n",
-       "      <th id=\"T_c5529_level0_row2\" class=\"row_heading level0 row2\" >LinearRegression</th>\n",
-       "      <td id=\"T_c5529_row2_col0\" class=\"data row2 col0\" >18059903.801767</td>\n",
-       "      <td id=\"T_c5529_row2_col1\" class=\"data row2 col1\" >411829080.658451</td>\n",
-       "      <td id=\"T_c5529_row2_col2\" class=\"data row2 col2\" >-7135788186375614.000000</td>\n",
+       "      <th id=\"T_5e893_level0_row2\" class=\"row_heading level0 row2\" >LinearRegression</th>\n",
+       "      <td id=\"T_5e893_row2_col0\" class=\"data row2 col0\" >18059903.801767</td>\n",
+       "      <td id=\"T_5e893_row2_col1\" class=\"data row2 col1\" >411829080.658451</td>\n",
+       "      <td id=\"T_5e893_row2_col2\" class=\"data row2 col2\" >-7135788186375614.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n"
      ],
      "text/plain": [
-       "<pandas.io.formats.style.Styler at 0x182a6929490>"
+       "<pandas.io.formats.style.Styler at 0x182a6043ef0>"
      ]
     },
-     "execution_count": 9,
+     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
@ -3987,6 +4175,174 @@
    "\n",
    "styled_metrics"
   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Шикарный вывод: по стране, возрасту, сфере деятельности и источнику доходов невозможно предсказать состояние человека. Значит ли это, что кто угодно, где угодно, и в чём угодно может добиться успеха?"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Классификация"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   Rank                        Name  Networth        Country  \\\n",
+      "0      1                 Elon Musk      219.0  United States   \n",
+      "1      2                Jeff Bezos      171.0  United States   \n",
+      "2      3  Bernard Arnault & family      158.0         France   \n",
+      "3      4                Bill Gates      129.0  United States   \n",
+      "4      5            Warren Buffett      118.0  United States   \n",
+      "\n",
+      "               Source                Industry Age_category  \n",
+      "0       Tesla, SpaceX             Automotive         50-60  \n",
+      "1              Amazon             Technology         50-60  \n",
+      "2                LVMH       Fashion & Retail         70-80  \n",
+      "3           Microsoft             Technology         60-70  \n",
+      "4  Berkshire Hathaway  Finance & Investments           80+  \n"
+     ]
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "import matplotlib.pyplot as plt\n",
+    "df = pd.read_csv(\"C://Users//annal//aim//static//csv//Forbes_Billionaires.csv\")\n",
+    "\n",
+    "bins = [0, 30, 40, 50, 60, 70, 80, 101]  # границы для возрастных категорий\n",
+    "labels = ['Under 30', '30-40', '40-50', '50-60', '60-70', '70-80', '80+']  # метки для категорий\n",
+    "\n",
+    "df[\"Age_category\"] = pd.cut(df['Age'], bins=bins, labels=labels, right=False)\n",
+    "# Удаляем оригинальные колонки 'country', 'industry' и 'source' из исходного DataFrame\n",
+    "df.drop(columns=['Age'], inplace=True)\n",
+    "\n",
+    "# Просмотр результата\n",
+    "print(df.head())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Training LogisticRegression...\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:320: UserWarning: The total space of parameters 3 is smaller than n_iter=10. Running 3 iterations. For exhaustive searches, use GridSearchCV.\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "\nAll the 15 fits failed.\nIt is very likely that your model is misconfigured.\nYou can try to debug the error by setting error_score='raise'.\n\nBelow are more details about the failures:\n--------------------------------------------------------------------------------\n15 fits failed with the following error:\nTraceback (most recent call last):\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py\", line 3805, in get_loc\n    return self._engine.get_loc(casted_key)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"index.pyx\", line 167, in pandas._libs.index.IndexEngine.get_loc\n  File \"index.pyx\", line 196, in pandas._libs.index.IndexEngine.get_loc\n  File \"pandas\\\\_libs\\\\hashtable_class_helper.pxi\", line 7081, in pandas._libs.hashtable.PyObjectHashTable.get_item\n  File \"pandas\\\\_libs\\\\hashtable_class_helper.pxi\", line 7089, in pandas._libs.hashtable.PyObjectHashTable.get_item\nKeyError: 'Age'\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_indexing.py\", line 361, in _get_column_indices\n    col_idx = all_columns.get_loc(col)\n              ^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py\", line 3812, in get_loc\n    raise KeyError(key) from err\nKeyError: 'Age'\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 888, in _fit_and_score\n    estimator.fit(X_train, y_train, **fit_params)\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n    return fit_method(estimator, *args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py\", line 469, in fit\n    Xt = self._fit(X, y, routed_params)\n         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py\", line 406, in _fit\n    X, fitted_transformer = fit_transform_one_cached(\n                            ^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\joblib\\memory.py\", line 312, in __call__\n    return self.func(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py\", line 1310, in _fit_transform_one\n    res = transformer.fit_transform(X, y, **params.get(\"fit_transform\", {}))\n          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_set_output.py\", line 316, in wrapped\n    data_to_wrap = f(self, X, *args, **kwargs)\n                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n    return fit_method(estimator, *args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py\", line 968, in fit_transform\n    self._validate_column_callables(X)\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py\", line 536, in _validate_column_callables\n    transformer_to_input_indices[name] = _get_column_indices(X, columns)\n                                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_indexing.py\", line 369, in _get_column_indices\n    raise ValueError(\"A given column is not a column of the dataframe\") from e\nValueError: A given column is not a column of the dataframe\n",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[18], line 48\u001b[0m\n\u001b[0;32m     46\u001b[0m param_grid \u001b[38;5;241m=\u001b[39m param_grids_classification[name]\n\u001b[0;32m     47\u001b[0m grid_search \u001b[38;5;241m=\u001b[39m RandomizedSearchCV(pipeline, param_grid, cv\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m, scoring\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mf1\u001b[39m\u001b[38;5;124m'\u001b[39m, n_jobs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m---> 48\u001b[0m \u001b[43mgrid_search\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train_clf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train_clf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m     50\u001b[0m \u001b[38;5;66;03m# Лучшая модель\u001b[39;00m\n\u001b[0;32m     51\u001b[0m best_model \u001b[38;5;241m=\u001b[39m grid_search\u001b[38;5;241m.\u001b[39mbest_estimator_\n",
+      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\base.py:1473\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m   1466\u001b[0m     estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m   1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m   1469\u001b[0m     skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m   1470\u001b[0m         prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m   1471\u001b[0m     )\n\u001b[0;32m   1472\u001b[0m ):\n\u001b[1;32m-> 1473\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:1019\u001b[0m, in \u001b[0;36mBaseSearchCV.fit\u001b[1;34m(self, X, y, **params)\u001b[0m\n\u001b[0;32m   1013\u001b[0m     results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_results(\n\u001b[0;32m   1014\u001b[0m         all_candidate_params, n_splits, all_out, all_more_results\n\u001b[0;32m   1015\u001b[0m     )\n\u001b[0;32m   1017\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m results\n\u001b[1;32m-> 1019\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_search\u001b[49m\u001b[43m(\u001b[49m\u001b[43mevaluate_candidates\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1021\u001b[0m \u001b[38;5;66;03m# multimetric is determined here because in the case of a callable\u001b[39;00m\n\u001b[0;32m   1022\u001b[0m \u001b[38;5;66;03m# self.scoring the return type is only known after calling\u001b[39;00m\n\u001b[0;32m   1023\u001b[0m first_test_score \u001b[38;5;241m=\u001b[39m all_out[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_scores\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
+      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:1960\u001b[0m, in \u001b[0;36mRandomizedSearchCV._run_search\u001b[1;34m(self, evaluate_candidates)\u001b[0m\n\u001b[0;32m   1958\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_run_search\u001b[39m(\u001b[38;5;28mself\u001b[39m, evaluate_candidates):\n\u001b[0;32m   1959\u001b[0m \u001b[38;5;250m    \u001b[39m\u001b[38;5;124;03m\"\"\"Search n_iter candidates from param_distributions\"\"\"\u001b[39;00m\n\u001b[1;32m-> 1960\u001b[0m     \u001b[43mevaluate_candidates\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   1961\u001b[0m \u001b[43m        \u001b[49m\u001b[43mParameterSampler\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   1962\u001b[0m \u001b[43m            \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparam_distributions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_iter\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom_state\u001b[49m\n\u001b[0;32m   1963\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1964\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:996\u001b[0m, in \u001b[0;36mBaseSearchCV.fit.<locals>.evaluate_candidates\u001b[1;34m(candidate_params, cv, more_results)\u001b[0m\n\u001b[0;32m    989\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(out) \u001b[38;5;241m!=\u001b[39m n_candidates \u001b[38;5;241m*\u001b[39m n_splits:\n\u001b[0;32m    990\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m    991\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcv.split and cv.get_n_splits returned \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    992\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minconsistent results. Expected \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    993\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msplits, got \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(n_splits, \u001b[38;5;28mlen\u001b[39m(out) \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m n_candidates)\n\u001b[0;32m    994\u001b[0m     )\n\u001b[1;32m--> 996\u001b[0m \u001b[43m_warn_or_raise_about_fit_failures\u001b[49m\u001b[43m(\u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merror_score\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    998\u001b[0m \u001b[38;5;66;03m# For callable self.scoring, the return type is only know after\u001b[39;00m\n\u001b[0;32m    999\u001b[0m \u001b[38;5;66;03m# calling. If the return type is a dictionary, the error scores\u001b[39;00m\n\u001b[0;32m   1000\u001b[0m \u001b[38;5;66;03m# can now be inserted with the correct key. The type checking\u001b[39;00m\n\u001b[0;32m   1001\u001b[0m \u001b[38;5;66;03m# of out will be done in `_insert_error_scores`.\u001b[39;00m\n\u001b[0;32m   1002\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscoring):\n",
+      "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py:529\u001b[0m, in \u001b[0;36m_warn_or_raise_about_fit_failures\u001b[1;34m(results, error_score)\u001b[0m\n\u001b[0;32m    522\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m num_failed_fits \u001b[38;5;241m==\u001b[39m num_fits:\n\u001b[0;32m    523\u001b[0m     all_fits_failed_message \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m    524\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mAll the \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnum_fits\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m fits failed.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    525\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIt is very likely that your model is misconfigured.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    526\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou can try to debug the error by setting error_score=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    527\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBelow are more details about the failures:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mfit_errors_summary\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    528\u001b[0m     )\n\u001b[1;32m--> 529\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(all_fits_failed_message)\n\u001b[0;32m    531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    532\u001b[0m     some_fits_failed_message \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m    533\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mnum_failed_fits\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m fits failed out of a total of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnum_fits\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    534\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe score on these train-test partitions for these parameters\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m    538\u001b[0m         \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBelow are more details about the failures:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mfit_errors_summary\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m    539\u001b[0m     )\n",
+      "\u001b[1;31mValueError\u001b[0m: \nAll the 15 fits failed.\nIt is very likely that your model is misconfigured.\nYou can try to debug the error by setting error_score='raise'.\n\nBelow are more details about the failures:\n--------------------------------------------------------------------------------\n15 fits failed with the following error:\nTraceback (most recent call last):\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py\", line 3805, in get_loc\n    return self._engine.get_loc(casted_key)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"index.pyx\", line 167, in pandas._libs.index.IndexEngine.get_loc\n  File \"index.pyx\", line 196, in pandas._libs.index.IndexEngine.get_loc\n  File \"pandas\\\\_libs\\\\hashtable_class_helper.pxi\", line 7081, in pandas._libs.hashtable.PyObjectHashTable.get_item\n  File \"pandas\\\\_libs\\\\hashtable_class_helper.pxi\", line 7089, in pandas._libs.hashtable.PyObjectHashTable.get_item\nKeyError: 'Age'\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_indexing.py\", line 361, in _get_column_indices\n    col_idx = all_columns.get_loc(col)\n              ^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py\", line 3812, in get_loc\n    raise KeyError(key) from err\nKeyError: 'Age'\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 888, in _fit_and_score\n    estimator.fit(X_train, y_train, **fit_params)\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n    return fit_method(estimator, *args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py\", line 469, in fit\n    Xt = self._fit(X, y, routed_params)\n         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py\", line 406, in _fit\n    X, fitted_transformer = fit_transform_one_cached(\n                            ^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\joblib\\memory.py\", line 312, in __call__\n    return self.func(*args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py\", line 1310, in _fit_transform_one\n    res = transformer.fit_transform(X, y, **params.get(\"fit_transform\", {}))\n          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_set_output.py\", line 316, in wrapped\n    data_to_wrap = f(self, X, *args, **kwargs)\n                   ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n    return fit_method(estimator, *args, **kwargs)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py\", line 968, in fit_transform\n    self._validate_column_callables(X)\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py\", line 536, in _validate_column_callables\n    transformer_to_input_indices[name] = _get_column_indices(X, columns)\n                                         ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_indexing.py\", line 369, in _get_column_indices\n    raise ValueError(\"A given column is not a column of the dataframe\") from e\nValueError: A given column is not a column of the dataframe\n"
+     ]
+    }
+   ],
+   "source": [
+    "from sklearn.ensemble import RandomForestClassifier\n",
+    "from sklearn.linear_model import LogisticRegression\n",
+    "from sklearn.neighbors import KNeighborsClassifier\n",
+    "from sklearn.metrics import accuracy_score, confusion_matrix, f1_score\n",
+    "\n",
+    "X = df.drop(columns=['Age_category','Rank ', 'Name'])  # Признаки\n",
+    "# Целевая переменная для классификации\n",
+    "y_class = df['Age_category']  \n",
+    "\n",
+    "# Разделение данных\n",
+    "X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X, y_class, test_size=0.2, random_state=42)\n",
+    "\n",
+    "# Модели и параметры\n",
+    "models_classification = {\n",
+    "    \"LogisticRegression\": LogisticRegression(max_iter=1000),\n",
+    "    \"RandomForestClassifier\": RandomForestClassifier(random_state=42),\n",
+    "    \"KNN\": KNeighborsClassifier()\n",
+    "}\n",
+    "\n",
+    "param_grids_classification = {\n",
+    "    \"LogisticRegression\": {\n",
+    "        'model__C': [0.1, 1, 10]\n",
+    "    },\n",
+    "    \"RandomForestClassifier\": {\n",
+    "        \"model__n_estimators\": [10, 20, 30, 40, 50, 100, 150, 200, 250, 500],\n",
+    "        \"model__max_features\": [\"sqrt\", \"log2\", 2],\n",
+    "        \"model__max_depth\": [2, 3, 4, 5, 6, 7, 8, 9 ,10, 20],\n",
+    "        \"model__criterion\": [\"gini\", \"entropy\", \"log_loss\"],\n",
+    "    },\n",
+    "    \"KNN\": {\n",
+    "        'model__n_neighbors': [3, 5, 7, 9, 11],\n",
+    "        'model__weights': ['uniform', 'distance']\n",
+    "    }\n",
+    "}\n",
+    "\n",
+    "# Результаты\n",
+    "results_classification = {}\n",
+    "\n",
+    "# Перебор моделей\n",
+    "for name, model in models_classification.items():\n",
+    "    print(f\"Training {name}...\")\n",
+    "    pipeline = Pipeline(steps=[\n",
+    "        ('features_preprocessing', features_preprocessing),\n",
+    "        ('model', model)\n",
+    "    ])\n",
+    "    param_grid = param_grids_classification[name]\n",
+    "    grid_search = RandomizedSearchCV(pipeline, param_grid, cv=5, scoring='f1', n_jobs=-1)\n",
+    "    grid_search.fit(X_train_clf, y_train_clf)\n",
+    "\n",
+    "    # Лучшая модель\n",
+    "    best_model = grid_search.best_estimator_\n",
+    "    y_pred = best_model.predict(X_test_clf)\n",
+    "\n",
+    "    # Метрики\n",
+    "    acc = accuracy_score(y_test_clf, y_pred)\n",
+    "    f1 = f1_score(y_test_clf, y_pred)\n",
+    "\n",
+    "    # Вычисление матрицы ошибок\n",
+    "    c_matrix = confusion_matrix(y_test_clf, y_pred)\n",
+    "\n",
+    "    # Сохранение результатов\n",
+    "    results_classification[name] = {\n",
+    "        \"Best Params\": grid_search.best_params_,\n",
+    "        \"Accuracy\": acc,\n",
+    "        \"F1 Score\": f1,\n",
+    "        \"Confusion_matrix\": c_matrix\n",
+    "    }\n",
+    "\n",
+    "# Печать результатов\n",
+    "for name, metrics in results_classification.items():\n",
+    "    print(f\"\\nModel: {name}\")\n",
+    "    for metric, value in metrics.items():\n",
+    "        print(f\"{metric}: {value}\")"
+   ]
  }
 ],
 "metadata": {