diff --git a/lab_4/lab4.ipynb b/lab_4/lab4.ipynb
index de77553..4104768 100644
--- a/lab_4/lab4.ipynb
+++ b/lab_4/lab4.ipynb
@@ -2779,7 +2779,7 @@
},
{
"cell_type": "code",
- "execution_count": 60,
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -3182,7 +3182,197 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 12,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Rank | \n",
+ " Name | \n",
+ " Networth | \n",
+ " Age | \n",
+ " Country | \n",
+ " Source | \n",
+ " Industry | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 1 | \n",
+ " Elon Musk | \n",
+ " 219.0 | \n",
+ " 50 | \n",
+ " United States | \n",
+ " Tesla, SpaceX | \n",
+ " Automotive | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 2 | \n",
+ " Jeff Bezos | \n",
+ " 171.0 | \n",
+ " 58 | \n",
+ " United States | \n",
+ " Amazon | \n",
+ " Technology | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 3 | \n",
+ " Bernard Arnault & family | \n",
+ " 158.0 | \n",
+ " 73 | \n",
+ " France | \n",
+ " LVMH | \n",
+ " Fashion & Retail | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 4 | \n",
+ " Bill Gates | \n",
+ " 129.0 | \n",
+ " 66 | \n",
+ " United States | \n",
+ " Microsoft | \n",
+ " Technology | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 5 | \n",
+ " Warren Buffett | \n",
+ " 118.0 | \n",
+ " 91 | \n",
+ " United States | \n",
+ " Berkshire Hathaway | \n",
+ " Finance & Investments | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 2595 | \n",
+ " 2578 | \n",
+ " Jorge Gallardo Ballart | \n",
+ " 1.0 | \n",
+ " 80 | \n",
+ " Spain | \n",
+ " pharmaceuticals | \n",
+ " Healthcare | \n",
+ "
\n",
+ " \n",
+ " 2596 | \n",
+ " 2578 | \n",
+ " Nari Genomal | \n",
+ " 1.0 | \n",
+ " 82 | \n",
+ " Philippines | \n",
+ " apparel | \n",
+ " Fashion & Retail | \n",
+ "
\n",
+ " \n",
+ " 2597 | \n",
+ " 2578 | \n",
+ " Ramesh Genomal | \n",
+ " 1.0 | \n",
+ " 71 | \n",
+ " Philippines | \n",
+ " apparel | \n",
+ " Fashion & Retail | \n",
+ "
\n",
+ " \n",
+ " 2598 | \n",
+ " 2578 | \n",
+ " Sunder Genomal | \n",
+ " 1.0 | \n",
+ " 68 | \n",
+ " Philippines | \n",
+ " garments | \n",
+ " Fashion & Retail | \n",
+ "
\n",
+ " \n",
+ " 2599 | \n",
+ " 2578 | \n",
+ " Horst-Otto Gerberding | \n",
+ " 1.0 | \n",
+ " 69 | \n",
+ " Germany | \n",
+ " flavors and fragrances | \n",
+ " Food & Beverage | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
2600 rows × 7 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Rank Name Networth Age Country \\\n",
+ "0 1 Elon Musk 219.0 50 United States \n",
+ "1 2 Jeff Bezos 171.0 58 United States \n",
+ "2 3 Bernard Arnault & family 158.0 73 France \n",
+ "3 4 Bill Gates 129.0 66 United States \n",
+ "4 5 Warren Buffett 118.0 91 United States \n",
+ "... ... ... ... ... ... \n",
+ "2595 2578 Jorge Gallardo Ballart 1.0 80 Spain \n",
+ "2596 2578 Nari Genomal 1.0 82 Philippines \n",
+ "2597 2578 Ramesh Genomal 1.0 71 Philippines \n",
+ "2598 2578 Sunder Genomal 1.0 68 Philippines \n",
+ "2599 2578 Horst-Otto Gerberding 1.0 69 Germany \n",
+ "\n",
+ " Source Industry \n",
+ "0 Tesla, SpaceX Automotive \n",
+ "1 Amazon Technology \n",
+ "2 LVMH Fashion & Retail \n",
+ "3 Microsoft Technology \n",
+ "4 Berkshire Hathaway Finance & Investments \n",
+ "... ... ... \n",
+ "2595 pharmaceuticals Healthcare \n",
+ "2596 apparel Fashion & Retail \n",
+ "2597 apparel Fashion & Retail \n",
+ "2598 garments Fashion & Retail \n",
+ "2599 flavors and fragrances Food & Beverage \n",
+ "\n",
+ "[2600 rows x 7 columns]"
+ ]
+ },
+ "execution_count": 12,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
"metadata": {},
"outputs": [
{
@@ -3763,7 +3953,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -3780,8 +3970,6 @@
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:320: UserWarning: The total space of parameters 1 is smaller than n_iter=10. Running 1 iterations. For exhaustive searches, use GridSearchCV.\n",
" warnings.warn(\n",
"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\preprocessing\\_encoders.py:242: UserWarning: Found unknown categories in columns [0, 1] during transform. These unknown categories will be encoded as all zeros\n",
- " warnings.warn(\n",
- "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:320: UserWarning: The total space of parameters 9 is smaller than n_iter=10. Running 9 iterations. For exhaustive searches, use GridSearchCV.\n",
" warnings.warn(\n"
]
},
@@ -3813,16 +4001,16 @@
"R2: -7135788186375614.0\n",
"\n",
"Model: RandomForestRegressor\n",
- "Best Params: {'model__n_estimators': 100, 'model__max_depth': None}\n",
- "MAE: 3.372747412240537\n",
- "RMSE: 8.304219801175332\n",
- "R2: -1.9013866015383956\n",
+ "Best Params: {'model__n_estimators': 40, 'model__max_depth': 10}\n",
+ "MAE: 3.454630023161808\n",
+ "RMSE: 7.755775760541111\n",
+ "R2: -1.530803448377045\n",
"\n",
"Model: GradientBoostingRegressor\n",
- "Best Params: {'model__n_estimators': 200, 'model__max_depth': 5, 'model__learning_rate': 0.2}\n",
- "MAE: 3.572597806187309\n",
- "RMSE: 10.306842221909957\n",
- "R2: -3.4695025074945356\n"
+ "Best Params: {'model__n_estimators': 100, 'model__max_depth': 4, 'model__learning_rate': 0.4}\n",
+ "MAE: 3.585784679817764\n",
+ "RMSE: 10.312249036012052\n",
+ "R2: -3.474193004771121\n"
]
},
{
@@ -3855,13 +4043,13 @@
"param_grids_regression = {\n",
" \"LinearRegression\": {},\n",
" \"RandomForestRegressor\": {\n",
- " 'model__n_estimators': [50, 100, 200],\n",
- " 'model__max_depth': [None, 10, 20],\n",
+ " 'model__n_estimators': [10, 20, 30, 40, 50, 100, 150, 200, 250, 500],\n",
+ " 'model__max_depth': [None, 2, 3, 4, 5, 6, 7, 8, 9 ,10],\n",
" },\n",
" \"GradientBoostingRegressor\": {\n",
- " 'model__n_estimators': [50, 100, 200],\n",
- " 'model__learning_rate': [0.01, 0.1, 0.2],\n",
- " 'model__max_depth': [3, 5, 10]\n",
+ " 'model__n_estimators': [10, 20, 30, 40, 50, 100, 150, 200, 250, 500],\n",
+ " 'model__learning_rate': [0.01, 0.1, 0.2, 0.3, 0.4, 0.5],\n",
+ " 'model__max_depth': [2, 3, 4, 5, 6, 7, 8, 9 ,10]\n",
" }\n",
"}\n",
"\n",
@@ -3905,66 +4093,66 @@
},
{
"cell_type": "code",
- "execution_count": 9,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
- "\n",
+ "\n",
" \n",
" \n",
" | \n",
- " MAE | \n",
- " RMSE | \n",
- " R2 | \n",
+ " MAE | \n",
+ " RMSE | \n",
+ " R2 | \n",
"
\n",
" \n",
" \n",
" \n",
- " RandomForestRegressor | \n",
- " 3.372747 | \n",
- " 8.304220 | \n",
- " -1.901387 | \n",
+ " RandomForestRegressor | \n",
+ " 3.454630 | \n",
+ " 7.755776 | \n",
+ " -1.530803 | \n",
"
\n",
" \n",
- " GradientBoostingRegressor | \n",
- " 3.572598 | \n",
- " 10.306842 | \n",
- " -3.469503 | \n",
+ " GradientBoostingRegressor | \n",
+ " 3.585785 | \n",
+ " 10.312249 | \n",
+ " -3.474193 | \n",
"
\n",
" \n",
- " LinearRegression | \n",
- " 18059903.801767 | \n",
- " 411829080.658451 | \n",
- " -7135788186375614.000000 | \n",
+ " LinearRegression | \n",
+ " 18059903.801767 | \n",
+ " 411829080.658451 | \n",
+ " -7135788186375614.000000 | \n",
"
\n",
" \n",
"
\n"
],
"text/plain": [
- ""
+ ""
]
},
- "execution_count": 9,
+ "execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
@@ -3987,6 +4175,174 @@
"\n",
"styled_metrics"
]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Шикарный вывод: по стране, возрасту, сфере деятельности и источнику доходов невозможно предсказать состояние человека. Значит ли это, что кто угодно, где угодно, и в чём угодно может добиться успеха?"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Классификация"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Rank Name Networth Country \\\n",
+ "0 1 Elon Musk 219.0 United States \n",
+ "1 2 Jeff Bezos 171.0 United States \n",
+ "2 3 Bernard Arnault & family 158.0 France \n",
+ "3 4 Bill Gates 129.0 United States \n",
+ "4 5 Warren Buffett 118.0 United States \n",
+ "\n",
+ " Source Industry Age_category \n",
+ "0 Tesla, SpaceX Automotive 50-60 \n",
+ "1 Amazon Technology 50-60 \n",
+ "2 LVMH Fashion & Retail 70-80 \n",
+ "3 Microsoft Technology 60-70 \n",
+ "4 Berkshire Hathaway Finance & Investments 80+ \n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import matplotlib.pyplot as plt\n",
+ "df = pd.read_csv(\"C://Users//annal//aim//static//csv//Forbes_Billionaires.csv\")\n",
+ "\n",
+ "bins = [0, 30, 40, 50, 60, 70, 80, 101] # границы для возрастных категорий\n",
+ "labels = ['Under 30', '30-40', '40-50', '50-60', '60-70', '70-80', '80+'] # метки для категорий\n",
+ "\n",
+ "df[\"Age_category\"] = pd.cut(df['Age'], bins=bins, labels=labels, right=False)\n",
+ "# Удаляем оригинальные колонки 'country', 'industry' и 'source' из исходного DataFrame\n",
+ "df.drop(columns=['Age'], inplace=True)\n",
+ "\n",
+ "# Просмотр результата\n",
+ "print(df.head())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Training LogisticRegression...\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:320: UserWarning: The total space of parameters 3 is smaller than n_iter=10. Running 3 iterations. For exhaustive searches, use GridSearchCV.\n",
+ " warnings.warn(\n"
+ ]
+ },
+ {
+ "ename": "ValueError",
+ "evalue": "\nAll the 15 fits failed.\nIt is very likely that your model is misconfigured.\nYou can try to debug the error by setting error_score='raise'.\n\nBelow are more details about the failures:\n--------------------------------------------------------------------------------\n15 fits failed with the following error:\nTraceback (most recent call last):\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py\", line 3805, in get_loc\n return self._engine.get_loc(casted_key)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"index.pyx\", line 167, in pandas._libs.index.IndexEngine.get_loc\n File \"index.pyx\", line 196, in pandas._libs.index.IndexEngine.get_loc\n File \"pandas\\\\_libs\\\\hashtable_class_helper.pxi\", line 7081, in pandas._libs.hashtable.PyObjectHashTable.get_item\n File \"pandas\\\\_libs\\\\hashtable_class_helper.pxi\", line 7089, in pandas._libs.hashtable.PyObjectHashTable.get_item\nKeyError: 'Age'\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_indexing.py\", line 361, in _get_column_indices\n col_idx = all_columns.get_loc(col)\n ^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py\", line 3812, in get_loc\n raise KeyError(key) from err\nKeyError: 'Age'\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 888, in _fit_and_score\n estimator.fit(X_train, y_train, **fit_params)\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n return fit_method(estimator, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py\", line 469, in fit\n Xt = self._fit(X, y, routed_params)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py\", line 406, in _fit\n X, fitted_transformer = fit_transform_one_cached(\n ^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\joblib\\memory.py\", line 312, in __call__\n return self.func(*args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py\", line 1310, in _fit_transform_one\n res = transformer.fit_transform(X, y, **params.get(\"fit_transform\", {}))\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_set_output.py\", line 316, in wrapped\n data_to_wrap = f(self, X, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n return fit_method(estimator, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py\", line 968, in fit_transform\n self._validate_column_callables(X)\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py\", line 536, in _validate_column_callables\n transformer_to_input_indices[name] = _get_column_indices(X, columns)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_indexing.py\", line 369, in _get_column_indices\n raise ValueError(\"A given column is not a column of the dataframe\") from e\nValueError: A given column is not a column of the dataframe\n",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
+ "Cell \u001b[1;32mIn[18], line 48\u001b[0m\n\u001b[0;32m 46\u001b[0m param_grid \u001b[38;5;241m=\u001b[39m param_grids_classification[name]\n\u001b[0;32m 47\u001b[0m grid_search \u001b[38;5;241m=\u001b[39m RandomizedSearchCV(pipeline, param_grid, cv\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m5\u001b[39m, scoring\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mf1\u001b[39m\u001b[38;5;124m'\u001b[39m, n_jobs\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m)\n\u001b[1;32m---> 48\u001b[0m \u001b[43mgrid_search\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train_clf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train_clf\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 50\u001b[0m \u001b[38;5;66;03m# Лучшая модель\u001b[39;00m\n\u001b[0;32m 51\u001b[0m best_model \u001b[38;5;241m=\u001b[39m grid_search\u001b[38;5;241m.\u001b[39mbest_estimator_\n",
+ "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\base.py:1473\u001b[0m, in \u001b[0;36m_fit_context..decorator..wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1466\u001b[0m estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m 1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m 1469\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m 1470\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 1471\u001b[0m )\n\u001b[0;32m 1472\u001b[0m ):\n\u001b[1;32m-> 1473\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:1019\u001b[0m, in \u001b[0;36mBaseSearchCV.fit\u001b[1;34m(self, X, y, **params)\u001b[0m\n\u001b[0;32m 1013\u001b[0m results \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_format_results(\n\u001b[0;32m 1014\u001b[0m all_candidate_params, n_splits, all_out, all_more_results\n\u001b[0;32m 1015\u001b[0m )\n\u001b[0;32m 1017\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m results\n\u001b[1;32m-> 1019\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_run_search\u001b[49m\u001b[43m(\u001b[49m\u001b[43mevaluate_candidates\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1021\u001b[0m \u001b[38;5;66;03m# multimetric is determined here because in the case of a callable\u001b[39;00m\n\u001b[0;32m 1022\u001b[0m \u001b[38;5;66;03m# self.scoring the return type is only known after calling\u001b[39;00m\n\u001b[0;32m 1023\u001b[0m first_test_score \u001b[38;5;241m=\u001b[39m all_out[\u001b[38;5;241m0\u001b[39m][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_scores\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n",
+ "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:1960\u001b[0m, in \u001b[0;36mRandomizedSearchCV._run_search\u001b[1;34m(self, evaluate_candidates)\u001b[0m\n\u001b[0;32m 1958\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_run_search\u001b[39m(\u001b[38;5;28mself\u001b[39m, evaluate_candidates):\n\u001b[0;32m 1959\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Search n_iter candidates from param_distributions\"\"\"\u001b[39;00m\n\u001b[1;32m-> 1960\u001b[0m \u001b[43mevaluate_candidates\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1961\u001b[0m \u001b[43m \u001b[49m\u001b[43mParameterSampler\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 1962\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparam_distributions\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mn_iter\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrandom_state\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrandom_state\u001b[49m\n\u001b[0;32m 1963\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1964\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n",
+ "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_search.py:996\u001b[0m, in \u001b[0;36mBaseSearchCV.fit..evaluate_candidates\u001b[1;34m(candidate_params, cv, more_results)\u001b[0m\n\u001b[0;32m 989\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(out) \u001b[38;5;241m!=\u001b[39m n_candidates \u001b[38;5;241m*\u001b[39m n_splits:\n\u001b[0;32m 990\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 991\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcv.split and cv.get_n_splits returned \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 992\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124minconsistent results. Expected \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 993\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msplits, got \u001b[39m\u001b[38;5;132;01m{}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;241m.\u001b[39mformat(n_splits, \u001b[38;5;28mlen\u001b[39m(out) \u001b[38;5;241m/\u001b[39m\u001b[38;5;241m/\u001b[39m n_candidates)\n\u001b[0;32m 994\u001b[0m )\n\u001b[1;32m--> 996\u001b[0m \u001b[43m_warn_or_raise_about_fit_failures\u001b[49m\u001b[43m(\u001b[49m\u001b[43mout\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43merror_score\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 998\u001b[0m \u001b[38;5;66;03m# For callable self.scoring, the return type is only know after\u001b[39;00m\n\u001b[0;32m 999\u001b[0m \u001b[38;5;66;03m# calling. If the return type is a dictionary, the error scores\u001b[39;00m\n\u001b[0;32m 1000\u001b[0m \u001b[38;5;66;03m# can now be inserted with the correct key. The type checking\u001b[39;00m\n\u001b[0;32m 1001\u001b[0m \u001b[38;5;66;03m# of out will be done in `_insert_error_scores`.\u001b[39;00m\n\u001b[0;32m 1002\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mscoring):\n",
+ "File \u001b[1;32mc:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py:529\u001b[0m, in \u001b[0;36m_warn_or_raise_about_fit_failures\u001b[1;34m(results, error_score)\u001b[0m\n\u001b[0;32m 522\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m num_failed_fits \u001b[38;5;241m==\u001b[39m num_fits:\n\u001b[0;32m 523\u001b[0m all_fits_failed_message \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 524\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mAll the \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnum_fits\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m fits failed.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 525\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIt is very likely that your model is misconfigured.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 526\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou can try to debug the error by setting error_score=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 527\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBelow are more details about the failures:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mfit_errors_summary\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 528\u001b[0m )\n\u001b[1;32m--> 529\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(all_fits_failed_message)\n\u001b[0;32m 531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 532\u001b[0m some_fits_failed_message \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 533\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mnum_failed_fits\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m fits failed out of a total of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnum_fits\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 534\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe score on these train-test partitions for these parameters\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 538\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBelow are more details about the failures:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mfit_errors_summary\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 539\u001b[0m )\n",
+ "\u001b[1;31mValueError\u001b[0m: \nAll the 15 fits failed.\nIt is very likely that your model is misconfigured.\nYou can try to debug the error by setting error_score='raise'.\n\nBelow are more details about the failures:\n--------------------------------------------------------------------------------\n15 fits failed with the following error:\nTraceback (most recent call last):\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py\", line 3805, in get_loc\n return self._engine.get_loc(casted_key)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"index.pyx\", line 167, in pandas._libs.index.IndexEngine.get_loc\n File \"index.pyx\", line 196, in pandas._libs.index.IndexEngine.get_loc\n File \"pandas\\\\_libs\\\\hashtable_class_helper.pxi\", line 7081, in pandas._libs.hashtable.PyObjectHashTable.get_item\n File \"pandas\\\\_libs\\\\hashtable_class_helper.pxi\", line 7089, in pandas._libs.hashtable.PyObjectHashTable.get_item\nKeyError: 'Age'\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_indexing.py\", line 361, in _get_column_indices\n col_idx = all_columns.get_loc(col)\n ^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\pandas\\core\\indexes\\base.py\", line 3812, in get_loc\n raise KeyError(key) from err\nKeyError: 'Age'\n\nThe above exception was the direct cause of the following exception:\n\nTraceback (most recent call last):\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 888, in _fit_and_score\n estimator.fit(X_train, y_train, **fit_params)\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n return fit_method(estimator, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py\", line 469, in fit\n Xt = self._fit(X, y, routed_params)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py\", line 406, in _fit\n X, fitted_transformer = fit_transform_one_cached(\n ^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\joblib\\memory.py\", line 312, in __call__\n return self.func(*args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\pipeline.py\", line 1310, in _fit_transform_one\n res = transformer.fit_transform(X, y, **params.get(\"fit_transform\", {}))\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_set_output.py\", line 316, in wrapped\n data_to_wrap = f(self, X, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n return fit_method(estimator, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py\", line 968, in fit_transform\n self._validate_column_callables(X)\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py\", line 536, in _validate_column_callables\n transformer_to_input_indices[name] = _get_column_indices(X, columns)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\annal\\aim\\.venv\\Lib\\site-packages\\sklearn\\utils\\_indexing.py\", line 369, in _get_column_indices\n raise ValueError(\"A given column is not a column of the dataframe\") from e\nValueError: A given column is not a column of the dataframe\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.ensemble import RandomForestClassifier\n",
+ "from sklearn.linear_model import LogisticRegression\n",
+ "from sklearn.neighbors import KNeighborsClassifier\n",
+ "from sklearn.metrics import accuracy_score, confusion_matrix, f1_score\n",
+ "\n",
+ "X = df.drop(columns=['Age_category','Rank ', 'Name']) # Признаки\n",
+ "# Целевая переменная для классификации\n",
+ "y_class = df['Age_category'] \n",
+ "\n",
+ "# Разделение данных\n",
+ "X_train_clf, X_test_clf, y_train_clf, y_test_clf = train_test_split(X, y_class, test_size=0.2, random_state=42)\n",
+ "\n",
+ "# Модели и параметры\n",
+ "models_classification = {\n",
+ " \"LogisticRegression\": LogisticRegression(max_iter=1000),\n",
+ " \"RandomForestClassifier\": RandomForestClassifier(random_state=42),\n",
+ " \"KNN\": KNeighborsClassifier()\n",
+ "}\n",
+ "\n",
+ "param_grids_classification = {\n",
+ " \"LogisticRegression\": {\n",
+ " 'model__C': [0.1, 1, 10]\n",
+ " },\n",
+ " \"RandomForestClassifier\": {\n",
+ " \"model__n_estimators\": [10, 20, 30, 40, 50, 100, 150, 200, 250, 500],\n",
+ " \"model__max_features\": [\"sqrt\", \"log2\", 2],\n",
+ " \"model__max_depth\": [2, 3, 4, 5, 6, 7, 8, 9 ,10, 20],\n",
+ " \"model__criterion\": [\"gini\", \"entropy\", \"log_loss\"],\n",
+ " },\n",
+ " \"KNN\": {\n",
+ " 'model__n_neighbors': [3, 5, 7, 9, 11],\n",
+ " 'model__weights': ['uniform', 'distance']\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "# Результаты\n",
+ "results_classification = {}\n",
+ "\n",
+ "# Перебор моделей\n",
+ "for name, model in models_classification.items():\n",
+ " print(f\"Training {name}...\")\n",
+ " pipeline = Pipeline(steps=[\n",
+ " ('features_preprocessing', features_preprocessing),\n",
+ " ('model', model)\n",
+ " ])\n",
+ " param_grid = param_grids_classification[name]\n",
+ " grid_search = RandomizedSearchCV(pipeline, param_grid, cv=5, scoring='f1', n_jobs=-1)\n",
+ " grid_search.fit(X_train_clf, y_train_clf)\n",
+ "\n",
+ " # Лучшая модель\n",
+ " best_model = grid_search.best_estimator_\n",
+ " y_pred = best_model.predict(X_test_clf)\n",
+ "\n",
+ " # Метрики\n",
+ " acc = accuracy_score(y_test_clf, y_pred)\n",
+ " f1 = f1_score(y_test_clf, y_pred)\n",
+ "\n",
+ " # Вычисление матрицы ошибок\n",
+ " c_matrix = confusion_matrix(y_test_clf, y_pred)\n",
+ "\n",
+ " # Сохранение результатов\n",
+ " results_classification[name] = {\n",
+ " \"Best Params\": grid_search.best_params_,\n",
+ " \"Accuracy\": acc,\n",
+ " \"F1 Score\": f1,\n",
+ " \"Confusion_matrix\": c_matrix\n",
+ " }\n",
+ "\n",
+ "# Печать результатов\n",
+ "for name, metrics in results_classification.items():\n",
+ " print(f\"\\nModel: {name}\")\n",
+ " for metric, value in metrics.items():\n",
+ " print(f\"{metric}: {value}\")"
+ ]
}
],
"metadata": {