1947 lines
334 KiB
Plaintext
1947 lines
334 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Начало лабораторной работы"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 130,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"<class 'pandas.core.frame.DataFrame'>\n",
|
|||
|
"Index: 235 entries, 1 to 235\n",
|
|||
|
"Data columns (total 11 columns):\n",
|
|||
|
" # Column Non-Null Count Dtype \n",
|
|||
|
"--- ------ -------------- ----- \n",
|
|||
|
" 0 Country (or dependency) 235 non-null object \n",
|
|||
|
" 1 Population2020 235 non-null int64 \n",
|
|||
|
" 2 Yearly Change 235 non-null float64\n",
|
|||
|
" 3 NetChange 235 non-null int64 \n",
|
|||
|
" 4 Density 235 non-null object \n",
|
|||
|
" 5 LandArea 235 non-null int64 \n",
|
|||
|
" 6 Migrants (net) 201 non-null object \n",
|
|||
|
" 7 Fert. Rate 235 non-null object \n",
|
|||
|
" 8 Med. Age 235 non-null object \n",
|
|||
|
" 9 Urban Pop % 235 non-null object \n",
|
|||
|
" 10 World Share 235 non-null object \n",
|
|||
|
"dtypes: float64(1), int64(3), object(7)\n",
|
|||
|
"memory usage: 22.0+ KB\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv(\n",
|
|||
|
" \".//static//csv///world-population-by-country-2020.csv\", index_col=\"no\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"df[\"Population2020\"] = df[\"Population2020\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"NetChange\"] = df[\"NetChange\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Yearly Change\"] = df[\"Yearly Change\"].apply(lambda x: float(\"\".join(x.rstrip(\"%\"))))\n",
|
|||
|
"df[\"LandArea\"] = df[\"LandArea\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"\n",
|
|||
|
"df.info()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Бизнес-цели"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование популяции в странах:\n",
|
|||
|
"\n",
|
|||
|
"Цель: Разработать модель, которая будет предсказывать популяцию в странах\n",
|
|||
|
"Применение:\n",
|
|||
|
"Погнозировать глобальную мировую экономику\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация характеристик:\n",
|
|||
|
"Цель: Определить оптимальные характеристик по увеличению популяции.\n",
|
|||
|
"Применение:\n",
|
|||
|
"Понять, какие характеристики соответствуют увеличению популяцию\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование популяции"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 131,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Среднее значение поля 'Population2020: 33171202.680851065\n",
|
|||
|
" Country (or dependency) Population2020 Yearly Change NetChange Density \\\n",
|
|||
|
"no \n",
|
|||
|
"1 China 1439323776 0.39 5540090 153 \n",
|
|||
|
"2 India 1380004385 0.99 13586631 464 \n",
|
|||
|
"3 United States 331002651 0.59 1937734 36 \n",
|
|||
|
"4 Indonesia 273523615 1.07 2898047 151 \n",
|
|||
|
"5 Pakistan 220892340 2.00 4327022 287 \n",
|
|||
|
"\n",
|
|||
|
" LandArea Migrants (net) Fert. Rate Med. Age Urban Pop % World Share \\\n",
|
|||
|
"no \n",
|
|||
|
"1 9388211 -348,399 1.7 38 61% 18.47% \n",
|
|||
|
"2 2973190 -532,687 2.2 28 35% 17.70% \n",
|
|||
|
"3 9147420 954,806 1.8 38 83% 4.25% \n",
|
|||
|
"4 1811570 -98,955 2.3 30 56% 3.51% \n",
|
|||
|
"5 770880 -233,379 3.6 23 35% 2.83% \n",
|
|||
|
"\n",
|
|||
|
" above_average_Population2020 Population2020_volatility \n",
|
|||
|
"no \n",
|
|||
|
"1 1 1439322975 \n",
|
|||
|
"2 1 1439322975 \n",
|
|||
|
"3 1 1439322975 \n",
|
|||
|
"4 1 1439322975 \n",
|
|||
|
"5 1 1439322975 \n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv(\n",
|
|||
|
" \".//static//csv///world-population-by-country-2020.csv\", index_col=\"no\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"df[\"Population2020\"] = df[\"Population2020\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"NetChange\"] = df[\"NetChange\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Yearly Change\"] = df[\"Yearly Change\"].apply(lambda x: float(\"\".join(x.rstrip(\"%\"))))\n",
|
|||
|
"df[\"LandArea\"] = df[\"LandArea\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"\n",
|
|||
|
"# Устанавливаем случайное состояние\n",
|
|||
|
"random_state = 28\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем среднее значение популяции\n",
|
|||
|
"average_count = df['Population2020'].mean()\n",
|
|||
|
"print(f\"Среднее значение поля 'Population2020: {average_count}\")\n",
|
|||
|
"\n",
|
|||
|
"# Создаем новую переменную, указывающую, превышает ли популяция среднее\n",
|
|||
|
"df[\"above_average_Population2020\"] = (df[\"Population2020\"] > average_count).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем волатильность (разницу между максимальной и минимальной популяцией)\n",
|
|||
|
"df[\"Population2020_volatility\"] = df[\"Population2020\"].max() - df[\"Population2020\"].min()\n",
|
|||
|
"\n",
|
|||
|
"# Выводим первые строки измененной таблицы для проверки\n",
|
|||
|
"print(df.head())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"2. Оптимизация параметров магазина:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 132,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Средняя популяция для 'NetChange':\n",
|
|||
|
"NetChange\n",
|
|||
|
"-383840 1.264765e+08\n",
|
|||
|
"-259876 4.373376e+07\n",
|
|||
|
"-126866 1.923769e+07\n",
|
|||
|
"-88249 6.046183e+07\n",
|
|||
|
"-79889 2.843594e+07\n",
|
|||
|
" ... \n",
|
|||
|
" 2898047 2.735236e+08\n",
|
|||
|
" 4327022 2.208923e+08\n",
|
|||
|
" 5175990 2.061396e+08\n",
|
|||
|
" 5540090 1.439324e+09\n",
|
|||
|
" 13586631 1.380004e+09\n",
|
|||
|
"Name: Population2020, Length: 234, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя популяция для 'Yearly Change':\n",
|
|||
|
"Yearly Change\n",
|
|||
|
"-2.47 2860853.0\n",
|
|||
|
"-1.69 11239.0\n",
|
|||
|
"-1.35 2722289.0\n",
|
|||
|
"-1.08 1886198.0\n",
|
|||
|
"-0.74 6948445.0\n",
|
|||
|
" ... \n",
|
|||
|
" 3.27 32866272.0\n",
|
|||
|
" 3.32 45741007.0\n",
|
|||
|
" 3.47 1402985.0\n",
|
|||
|
" 3.68 1701575.0\n",
|
|||
|
" 3.84 24206644.0\n",
|
|||
|
"Name: Population2020, Length: 174, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя популяция для 'LandArea':\n",
|
|||
|
"LandArea\n",
|
|||
|
"0 8.010000e+02\n",
|
|||
|
"1 3.924200e+04\n",
|
|||
|
"10 1.752400e+04\n",
|
|||
|
"20 1.082400e+04\n",
|
|||
|
"21 9.877000e+03\n",
|
|||
|
" ... \n",
|
|||
|
"8358140 2.125594e+08\n",
|
|||
|
"9093510 3.774215e+07\n",
|
|||
|
"9147420 3.310027e+08\n",
|
|||
|
"9388211 1.439324e+09\n",
|
|||
|
"16376870 1.459345e+08\n",
|
|||
|
"Name: Population2020, Length: 226, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя популяция для 'Density':\n",
|
|||
|
"Density\n",
|
|||
|
"0 30125.0\n",
|
|||
|
"1,246 62278.0\n",
|
|||
|
"1,261 42876.0\n",
|
|||
|
"1,265 164689383.0\n",
|
|||
|
"1,380 441543.0\n",
|
|||
|
" ... \n",
|
|||
|
"93 40222493.0\n",
|
|||
|
"94 50263037.0\n",
|
|||
|
"95 17109811.5\n",
|
|||
|
"96 71986.0\n",
|
|||
|
"99 32365999.0\n",
|
|||
|
"Name: Population2020, Length: 165, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя популяция для комбинации 'NetChange' и 'LandArea':\n",
|
|||
|
"NetChange LandArea\n",
|
|||
|
"-383840 364555 1.264765e+08\n",
|
|||
|
"-259876 579320 4.373376e+07\n",
|
|||
|
"-126866 230170 1.923769e+07\n",
|
|||
|
"-88249 294140 6.046183e+07\n",
|
|||
|
"-79889 882050 2.843594e+07\n",
|
|||
|
" ... \n",
|
|||
|
" 2898047 1811570 2.735236e+08\n",
|
|||
|
" 4327022 770880 2.208923e+08\n",
|
|||
|
" 5175990 910770 2.061396e+08\n",
|
|||
|
" 5540090 9388211 1.439324e+09\n",
|
|||
|
" 13586631 2973190 1.380004e+09\n",
|
|||
|
"Name: Population2020, Length: 235, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя популяция для комбинации 'LandArea' и 'Density':\n",
|
|||
|
"LandArea Density\n",
|
|||
|
"0 2,003 8.010000e+02\n",
|
|||
|
"1 26,337 3.924200e+04\n",
|
|||
|
"10 136 1.357000e+03\n",
|
|||
|
" 3,369 3.369100e+04\n",
|
|||
|
"20 541 1.082400e+04\n",
|
|||
|
" ... \n",
|
|||
|
"8358140 25 2.125594e+08\n",
|
|||
|
"9093510 4 3.774215e+07\n",
|
|||
|
"9147420 36 3.310027e+08\n",
|
|||
|
"9388211 153 1.439324e+09\n",
|
|||
|
"16376870 9 1.459345e+08\n",
|
|||
|
"Name: Population2020, Length: 235, dtype: float64\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\n",
|
|||
|
" \".//static//csv///world-population-by-country-2020.csv\", index_col=\"no\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"df[\"Population2020\"] = df[\"Population2020\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"NetChange\"] = df[\"NetChange\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Yearly Change\"] = df[\"Yearly Change\"].apply(lambda x: float(\"\".join(x.rstrip(\"%\"))))\n",
|
|||
|
"df[\"LandArea\"] = df[\"LandArea\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"\n",
|
|||
|
"# Устанавливаем случайное состояние\n",
|
|||
|
"random_state = 42\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем среднюю популяцию для каждого значения каждого признака\n",
|
|||
|
"for column in [\"NetChange\", \"Yearly Change\", \"LandArea\", \"Density\"]:\n",
|
|||
|
" print(f\"Средняя популяция для '{column}':\")\n",
|
|||
|
" print(df.groupby(column)[\"Population2020\"].mean())\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"print(\"Средняя популяция для комбинации 'NetChange' и 'LandArea':\")\n",
|
|||
|
"print(df.groupby([\"NetChange\", \"LandArea\"])[\"Population2020\"].mean())\n",
|
|||
|
"print()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"print(\"Средняя популяция для комбинации 'LandArea' и 'Density':\")\n",
|
|||
|
"print(df.groupby([\"LandArea\", \"Density\"])[\"Population2020\"].mean())\n",
|
|||
|
"print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Выбор ориентира:\n",
|
|||
|
"1. Прогнозирование стоимости акций взносов:\n",
|
|||
|
"Ориентир:\n",
|
|||
|
"\n",
|
|||
|
"R² (коэффициент детерминации): 0.75 - 0.85\n",
|
|||
|
"\n",
|
|||
|
"MAE (средняя абсолютная ошибка): 1000000 - 1500000 продаж\n",
|
|||
|
"\n",
|
|||
|
"RMSE (среднеквадратичная ошибка): 1200000 - 1600000 продаж"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 133,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"MAE: 18325226.004086882\n",
|
|||
|
"MSE: 1372712018411057.2\n",
|
|||
|
"RMSE: 37050128.453367844\n",
|
|||
|
"R²: -0.22943866941880264\n",
|
|||
|
"Ориентиры для прогнозирования не достигнуты.\n",
|
|||
|
"Средняя популяция 'LandArea':\n",
|
|||
|
"LandArea\n",
|
|||
|
"0 8.010000e+02\n",
|
|||
|
"1 3.924200e+04\n",
|
|||
|
"10 1.752400e+04\n",
|
|||
|
"20 1.082400e+04\n",
|
|||
|
"21 9.877000e+03\n",
|
|||
|
" ... \n",
|
|||
|
"8358140 2.125594e+08\n",
|
|||
|
"9093510 3.774215e+07\n",
|
|||
|
"9147420 3.310027e+08\n",
|
|||
|
"9388211 1.439324e+09\n",
|
|||
|
"16376870 1.459345e+08\n",
|
|||
|
"Name: Population2020, Length: 226, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя популяция 'NetChange':\n",
|
|||
|
"NetChange\n",
|
|||
|
"-383840 1.264765e+08\n",
|
|||
|
"-259876 4.373376e+07\n",
|
|||
|
"-126866 1.923769e+07\n",
|
|||
|
"-88249 6.046183e+07\n",
|
|||
|
"-79889 2.843594e+07\n",
|
|||
|
" ... \n",
|
|||
|
" 2898047 2.735236e+08\n",
|
|||
|
" 4327022 2.208923e+08\n",
|
|||
|
" 5175990 2.061396e+08\n",
|
|||
|
" 5540090 1.439324e+09\n",
|
|||
|
" 13586631 1.380004e+09\n",
|
|||
|
"Name: Population2020, Length: 234, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя популяция 'Density':\n",
|
|||
|
"Density\n",
|
|||
|
"0 3.012500e+04\n",
|
|||
|
"2 1.937814e+06\n",
|
|||
|
"3 9.460677e+06\n",
|
|||
|
"4 8.106156e+06\n",
|
|||
|
"5 4.649658e+06\n",
|
|||
|
" ... \n",
|
|||
|
"3369 3.369100e+04\n",
|
|||
|
"7140 7.496981e+06\n",
|
|||
|
"8358 5.850342e+06\n",
|
|||
|
"21645 6.493350e+05\n",
|
|||
|
"26337 3.924200e+04\n",
|
|||
|
"Name: Population2020, Length: 165, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"NetChange LandArea\n",
|
|||
|
"-383840 364555 1.264765e+08\n",
|
|||
|
"-259876 579320 4.373376e+07\n",
|
|||
|
"-126866 230170 1.923769e+07\n",
|
|||
|
"-88249 294140 6.046183e+07\n",
|
|||
|
"-79889 882050 2.843594e+07\n",
|
|||
|
" ... \n",
|
|||
|
" 2898047 1811570 2.735236e+08\n",
|
|||
|
" 4327022 770880 2.208923e+08\n",
|
|||
|
" 5175990 910770 2.061396e+08\n",
|
|||
|
" 5540090 9388211 1.439324e+09\n",
|
|||
|
" 13586631 2973190 1.380004e+09\n",
|
|||
|
"Name: Population2020, Length: 235, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"LandArea Density\n",
|
|||
|
"0 2003 8.010000e+02\n",
|
|||
|
"1 26337 3.924200e+04\n",
|
|||
|
"10 136 1.357000e+03\n",
|
|||
|
" 3369 3.369100e+04\n",
|
|||
|
"20 541 1.082400e+04\n",
|
|||
|
" ... \n",
|
|||
|
"8358140 25 2.125594e+08\n",
|
|||
|
"9093510 4 3.774215e+07\n",
|
|||
|
"9147420 36 3.310027e+08\n",
|
|||
|
"9388211 153 1.439324e+09\n",
|
|||
|
"16376870 9 1.459345e+08\n",
|
|||
|
"Name: Population2020, Length: 235, dtype: float64\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\n",
|
|||
|
" \".//static//csv///world-population-by-country-2020.csv\", index_col=\"no\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"df[\"Population2020\"] = df[\"Population2020\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"NetChange\"] = df[\"NetChange\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Yearly Change\"] = df[\"Yearly Change\"].apply(lambda x: float(\"\".join(x.rstrip(\"%\"))))\n",
|
|||
|
"df[\"LandArea\"] = df[\"LandArea\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Density\"] = df[\"Density\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y)\n",
|
|||
|
"\n",
|
|||
|
"X = df.drop(\n",
|
|||
|
" columns=[\n",
|
|||
|
" \"Population2020\",\n",
|
|||
|
" \"Country (or dependency)\",\n",
|
|||
|
" \"Migrants (net)\",\n",
|
|||
|
" \"Fert. Rate\",\n",
|
|||
|
" \"Med. Age\",\n",
|
|||
|
" \"Urban Pop %\",\n",
|
|||
|
" \"World Share\",\n",
|
|||
|
" \"Density\",\n",
|
|||
|
" ],\n",
|
|||
|
" axis=1,\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"y = df[\"Population2020\"]\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки\n",
|
|||
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Стандартизируем признаки\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"X_train = scaler.fit_transform(X_train)\n",
|
|||
|
"X_test = scaler.transform(X_test)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем модель линейной регрессии\n",
|
|||
|
"model = LinearRegression()\n",
|
|||
|
"model.fit(X_train, y_train)\n",
|
|||
|
"\n",
|
|||
|
"# Делаем предсказания на тестовой выборке\n",
|
|||
|
"y_pred = model.predict(X_test)\n",
|
|||
|
"\n",
|
|||
|
"# Оцениваем качество модели\n",
|
|||
|
"mae = mean_absolute_error(y_test, y_pred)\n",
|
|||
|
"mse = mean_squared_error(y_test, y_pred)\n",
|
|||
|
"rmse = mean_squared_error(y_test, y_pred, squared=False)\n",
|
|||
|
"r2 = r2_score(y_test, y_pred)\n",
|
|||
|
"\n",
|
|||
|
"print(f\"MAE: {mae}\")\n",
|
|||
|
"print(f\"MSE: {mse}\")\n",
|
|||
|
"print(f\"RMSE: {rmse}\")\n",
|
|||
|
"print(f\"R²: {r2}\")\n",
|
|||
|
"\n",
|
|||
|
"# Проверяем, достигнуты ли ориентиры\n",
|
|||
|
"if r2 >= 0.75 and mae <= 1500000 and rmse <= 1700000:\n",
|
|||
|
" print(\"Ориентиры для прогнозирования достигнуты!\")\n",
|
|||
|
"else:\n",
|
|||
|
" print(\"Ориентиры для прогнозирования не достигнуты.\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"columns_to_group = [\n",
|
|||
|
"\n",
|
|||
|
" \"LandArea\",\n",
|
|||
|
" \"NetChange\", \"Density\"\n",
|
|||
|
"]\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем среднюю популяцию для каждого значения каждого признака\n",
|
|||
|
"for column in columns_to_group:\n",
|
|||
|
" print(f\"Средняя популяция '{column}':\")\n",
|
|||
|
" print(df.groupby(column)[\"Population2020\"].mean())\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем средняя популяция для комбинаций признаков\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"print(df.groupby([\"NetChange\", \"LandArea\"])[\"Population2020\"].mean())\n",
|
|||
|
"print()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"print(df.groupby([\"LandArea\", \"Density\"])[\"Population2020\"].mean())\n",
|
|||
|
"print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Анализ применимости алгоритмов обучения с учителем для решения поставленных задач:\n",
|
|||
|
"1. Прогнозирование популяции:\n",
|
|||
|
"Задача: Регрессия\n",
|
|||
|
"\n",
|
|||
|
"Свойства алгоритмов:\n",
|
|||
|
"\n",
|
|||
|
"Линейная регрессия:\n",
|
|||
|
"Применимость: Хорошо подходит для задач, где зависимость между признаками и целевой переменной линейна.\n",
|
|||
|
"Преимущества: Проста в реализации, интерпретируема.\n",
|
|||
|
"Недостатки: Может плохо работать, если зависимость нелинейна.\n",
|
|||
|
"\n",
|
|||
|
"Деревья решений (регрессия):\n",
|
|||
|
"Применимость: Подходит для задач с нелинейными зависимостями.\n",
|
|||
|
"Преимущества: Может обрабатывать категориальные признаки, не требует масштабирования данных.\n",
|
|||
|
"Недостатки: Подвержены переобучению, могут давать нестабильные результаты.\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (регрессия):\n",
|
|||
|
"Применимость: Хорошо подходит для задач с нелинейными зависимостями и большим количеством признаков.\n",
|
|||
|
"Преимущества: Устойчив к переобучению, может обрабатывать категориальные признаки.\n",
|
|||
|
"Недостатки: Менее интерпретируем, чем линейная регрессия.\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (регрессия):\n",
|
|||
|
"Применимость: Подходит для задач с нелинейными зависимостями и сложными взаимосвязями между признаками.\n",
|
|||
|
"Преимущества: Может достигать высокой точности, устойчив к переобучению.\n",
|
|||
|
"Недостатки: Сложнее в настройке, чем случайный лес, менее интерпретируем.\n",
|
|||
|
"\n",
|
|||
|
"Нейронные сети (регрессия):\n",
|
|||
|
"Применимость: Подходит для задач с очень сложными зависимостями и большим количеством данных.\n",
|
|||
|
"Преимущества: Может моделировать очень сложные зависимости.\n",
|
|||
|
"Недостатки: Требует большого количества данных, сложнее в настройке и интерпретации.\n",
|
|||
|
"\n",
|
|||
|
"Вывод:\n",
|
|||
|
"\n",
|
|||
|
"Линейная регрессия: Может быть хорошим выбором для начала, особенно если зависимость между признаками и целевой переменной линейна.\n",
|
|||
|
"\n",
|
|||
|
"Деревья решений и случайный лес: Подходят для задач с нелинейными зависимостями.\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг: Может давать более высокую точность, чем случайный лес, но требует больше времени на настройку.\n",
|
|||
|
"\n",
|
|||
|
"Нейронные сети: Могут быть излишними для этой задачи, если данных недостаточно много.\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация характеристик:\n",
|
|||
|
"Задача: Классификация\n",
|
|||
|
"\n",
|
|||
|
"Свойства алгоритмов:\n",
|
|||
|
"\n",
|
|||
|
"Логистическая регрессия:\n",
|
|||
|
"Применимость: Хорошо подходит для задач бинарной классификации, где зависимость между признаками и целевой переменной линейна.\n",
|
|||
|
"Преимущества: Проста в реализации, интерпретируема.\n",
|
|||
|
"Недостатки: Может плохо работать, если зависимость нелинейна.\n",
|
|||
|
"\n",
|
|||
|
"Деревья решений (классификация):\n",
|
|||
|
"Применимость: Подходит для задач с нелинейными зависимостями.\n",
|
|||
|
"Преимущества: Может обрабатывать категориальные признаки, не требует масштабирования данных.\n",
|
|||
|
"Недостатки: Подвержены переобучению, могут давать нестабильные результаты.\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (классификация):\n",
|
|||
|
"Применимость: Хорошо подходит для задач с нелинейными зависимостями и большим количеством признаков.\n",
|
|||
|
"Преимущества: Устойчив к переобучению, может обрабатывать категориальные признаки.\n",
|
|||
|
"Недостатки: Менее интерпретируем, чем линейная регрессия.\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (классификация):\n",
|
|||
|
"Применимость: Подходит для задач с нелинейными зависимостями и сложными взаимосвязями между признаками.\n",
|
|||
|
"Преимущества: Может достигать высокой точности, устойчив к переобучению.\n",
|
|||
|
"Недостатки: Сложнее в настройке, чем случайный лес, менее интерпретируем.\n",
|
|||
|
"\n",
|
|||
|
"Нейронные сети (классификация):\n",
|
|||
|
"Применимость: Подходит для задач с очень сложными зависимостями и большим количеством данных.\n",
|
|||
|
"Преимущества: Может моделировать очень сложные зависимости.\n",
|
|||
|
"Недостатки: Требует большого количества данных, сложнее в настройке и интерпретации.\n",
|
|||
|
"\n",
|
|||
|
"Вывод:\n",
|
|||
|
"\n",
|
|||
|
"Логистическая регрессия: Может быть хорошим выбором для начала, особенно если зависимость между признаками и целевой переменной линейна.\n",
|
|||
|
"\n",
|
|||
|
"Деревья решений и случайный лес: Подходят для задач с нелинейными зависимостями.\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг: Может давать более высокую точность, чем случайный лес, но требует больше времени на настройку.\n",
|
|||
|
"\n",
|
|||
|
"Нейронные сети: Могут быть излишними для этой задачи, если данных недостаточно много.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование стоимости акций:\n",
|
|||
|
"Выбранные модели:\n",
|
|||
|
"\n",
|
|||
|
"Линейная регрессия\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (регрессия)\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (регрессия)\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация тарифной сетки:\n",
|
|||
|
"Выбранные модели:\n",
|
|||
|
"\n",
|
|||
|
"Логистическая регрессия\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (классификация)\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (классификация)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 134,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE: 18325226.004086882\n",
|
|||
|
"MSE: 1372712018411057.2\n",
|
|||
|
"RMSE: 37050128.453367844\n",
|
|||
|
"R²: -0.22943866941880264\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"MAE: 8668932.614893615\n",
|
|||
|
"MSE: 406442910254215.25\n",
|
|||
|
"RMSE: 20160429.31720987\n",
|
|||
|
"R²: 0.63597854169292\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"MAE: 8278621.445813501\n",
|
|||
|
"MSE: 351479677103981.1\n",
|
|||
|
"RMSE: 18747791.259345222\n",
|
|||
|
"R²: 0.6852051262385974\n",
|
|||
|
"\n",
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Accuracy: 0.8936170212765957\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Accuracy: 0.9574468085106383\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Accuracy: 0.8936170212765957\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\n",
|
|||
|
" \".//static//csv///world-population-by-country-2020.csv\", index_col=\"no\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"df[\"Population2020\"] = df[\"Population2020\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"NetChange\"] = df[\"NetChange\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Yearly Change\"] = df[\"Yearly Change\"].apply(lambda x: float(\"\".join(x.rstrip(\"%\"))))\n",
|
|||
|
"df[\"LandArea\"] = df[\"LandArea\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df.drop(\n",
|
|||
|
" columns=[\n",
|
|||
|
" \"Population2020\",\n",
|
|||
|
" \"Country (or dependency)\",\n",
|
|||
|
" \"Migrants (net)\",\n",
|
|||
|
" \"Fert. Rate\",\n",
|
|||
|
" \"Med. Age\",\n",
|
|||
|
" \"Urban Pop %\",\n",
|
|||
|
" \"World Share\",\n",
|
|||
|
" \"Density\",\n",
|
|||
|
" ],\n",
|
|||
|
" axis=1,\n",
|
|||
|
")\n",
|
|||
|
"y_reg = df[\"Population2020\"]\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Стандартизируем признаки для задачи регрессии\n",
|
|||
|
"scaler_reg = StandardScaler()\n",
|
|||
|
"X_train_reg = scaler_reg.fit_transform(X_train_reg)\n",
|
|||
|
"X_test_reg = scaler_reg.transform(X_test_reg)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" model.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" y_pred_reg = model.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df.drop(\n",
|
|||
|
" columns=[\n",
|
|||
|
" \"Population2020\",\n",
|
|||
|
" \"Country (or dependency)\",\n",
|
|||
|
" \"Migrants (net)\",\n",
|
|||
|
" \"Fert. Rate\",\n",
|
|||
|
" \"Med. Age\",\n",
|
|||
|
" \"Urban Pop %\",\n",
|
|||
|
" \"World Share\",\n",
|
|||
|
" \"Density\",\n",
|
|||
|
" ],\n",
|
|||
|
" axis=1,\n",
|
|||
|
")\n",
|
|||
|
"y_class = (df[\"Population2020\"] > df[\"Population2020\"].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Стандартизируем признаки для задачи классификации\n",
|
|||
|
"scaler_class = StandardScaler()\n",
|
|||
|
"X_train_class = scaler_class.fit_transform(X_train_class)\n",
|
|||
|
"X_test_class = scaler_class.transform(X_test_class)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" model.fit(X_train_class, y_train_class)\n",
|
|||
|
" y_pred_class = model.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование стоимости акций:\n",
|
|||
|
"Конвейер для задачи регрессии:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 135,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE: 18355084.060291413\n",
|
|||
|
"MSE: 1373553690261338.5\n",
|
|||
|
"RMSE: 37061485.26788071\n",
|
|||
|
"R²: -0.23019249389605534\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"MAE: 8360523.173191491\n",
|
|||
|
"MSE: 433344291612551.56\n",
|
|||
|
"RMSE: 20816923.20235033\n",
|
|||
|
"R²: 0.6118849240519788\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"MAE: 7374600.833694444\n",
|
|||
|
"MSE: 279881158669974.12\n",
|
|||
|
"RMSE: 16729649.089863604\n",
|
|||
|
"R²: 0.7493307301928449\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\n",
|
|||
|
" \".//static//csv///world-population-by-country-2020.csv\", index_col=\"no\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"df[\"Population2020\"] = df[\"Population2020\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"NetChange\"] = df[\"NetChange\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Yearly Change\"] = df[\"Yearly Change\"].apply(lambda x: float(\"\".join(x.rstrip(\"%\"))))\n",
|
|||
|
"df[\"LandArea\"] = df[\"LandArea\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Density\"] = df[\"Density\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"NetChange\", \"Yearly Change\", \"LandArea\", \"Density\"]\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df[\"Population2020\"]\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" pipeline.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" y_pred_reg = pipeline.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"2. Оптимизация характеристик магазина:\n",
|
|||
|
"Конвейер для задачи классификации:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 136,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Accuracy: 0.8936170212765957\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Accuracy: 0.9361702127659575\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Accuracy: 0.9148936170212766\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import accuracy_score\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\n",
|
|||
|
" \".//static//csv///world-population-by-country-2020.csv\", index_col=\"no\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"df[\"Population2020\"] = df[\"Population2020\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"NetChange\"] = df[\"NetChange\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Yearly Change\"] = df[\"Yearly Change\"].apply(lambda x: float(\"\".join(x.rstrip(\"%\"))))\n",
|
|||
|
"df[\"LandArea\"] = df[\"LandArea\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Density\"] = df[\"Density\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"NetChange\", \"Yearly Change\", \"LandArea\", \"Density\"]\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df[\"Population2020\"] > df[\"Population2020\"].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" pipeline.fit(X_train_class, y_train_class)\n",
|
|||
|
" y_pred_class = pipeline.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование популяции:\n",
|
|||
|
"\n",
|
|||
|
"Настройка гиперпараметров для задачи регрессии:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 137,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"Best Parameters: {}\n",
|
|||
|
"MAE: 18355084.060291413\n",
|
|||
|
"MSE: 1373553690261338.5\n",
|
|||
|
"RMSE: 37061485.26788071\n",
|
|||
|
"R²: -0.23019249389605534\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"Best Parameters: {'model__max_depth': None, 'model__n_estimators': 200}\n",
|
|||
|
"MAE: 8240284.800957445\n",
|
|||
|
"MSE: 400813836952793.8\n",
|
|||
|
"RMSE: 20020335.585418988\n",
|
|||
|
"R²: 0.6410200946894263\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 3, 'model__n_estimators': 200}\n",
|
|||
|
"MAE: 7008860.588160669\n",
|
|||
|
"MSE: 275375287564661.78\n",
|
|||
|
"RMSE: 16594435.439769013\n",
|
|||
|
"R²: 0.7533663123848768\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\n",
|
|||
|
" \".//static//csv///world-population-by-country-2020.csv\", index_col=\"no\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"df[\"Population2020\"] = df[\"Population2020\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"NetChange\"] = df[\"NetChange\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Yearly Change\"] = df[\"Yearly Change\"].apply(lambda x: float(\"\".join(x.rstrip(\"%\"))))\n",
|
|||
|
"df[\"LandArea\"] = df[\"LandArea\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Density\"] = df[\"Density\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"NetChange\", \"Yearly Change\", \"LandArea\", \"Density\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": (LinearRegression(), {}),\n",
|
|||
|
" \"Random Forest Regression\": (RandomForestRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Regression\": (GradientBoostingRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Population2020']\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, (model, params) in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" grid_search.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_reg = best_model.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"2. Оптимизация характеристик:\n",
|
|||
|
"\n",
|
|||
|
"Настройка гиперпараметров для задачи классификации:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 138,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Best Parameters: {'model__C': 1, 'model__solver': 'liblinear'}\n",
|
|||
|
"Accuracy: 0.8936170212765957\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Best Parameters: {'model__max_depth': None, 'model__n_estimators': 200}\n",
|
|||
|
"Accuracy: 0.9574468085106383\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.01, 'model__max_depth': 5, 'model__n_estimators': 200}\n",
|
|||
|
"Accuracy: 0.9148936170212766\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import accuracy_score\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\n",
|
|||
|
" \".//static//csv///world-population-by-country-2020.csv\", index_col=\"no\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"df[\"Population2020\"] = df[\"Population2020\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"NetChange\"] = df[\"NetChange\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Yearly Change\"] = df[\"Yearly Change\"].apply(lambda x: float(\"\".join(x.rstrip(\"%\"))))\n",
|
|||
|
"df[\"LandArea\"] = df[\"LandArea\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Density\"] = df[\"Density\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"NetChange\", \"Yearly Change\", \"LandArea\", \"Density\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": (LogisticRegression(), {\n",
|
|||
|
" 'model__C': [0.1, 1, 10],\n",
|
|||
|
" 'model__solver': ['liblinear', 'lbfgs']\n",
|
|||
|
" }),\n",
|
|||
|
" \"Random Forest Classification\": (RandomForestClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Classification\": (GradientBoostingClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Population2020'] > df['Population2020'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, (model, params) in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='accuracy')\n",
|
|||
|
" grid_search.fit(X_train_class, y_train_class)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_class = best_model.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование популяции:\n",
|
|||
|
"Задача: Регрессия\n",
|
|||
|
"\n",
|
|||
|
"Выбор метрик:\n",
|
|||
|
"\n",
|
|||
|
"MAE (Mean Absolute Error): Средняя абсолютная ошибка. Показывает среднее отклонение предсказанных значений от фактических. Эта метрика легко интерпретируется, так как она измеряется в тех же единицах, что и целевая переменная \n",
|
|||
|
"\n",
|
|||
|
"MSE (Mean Squared Error): Среднеквадратичная ошибка. Показывает среднее квадратичное отклонение предсказанных значений от фактических. Эта метрика чувствительна к выбросам, так как ошибки возводятся в квадрат.\n",
|
|||
|
"\n",
|
|||
|
"RMSE (Root Mean Squared Error): Квадратный корень из среднеквадратичной ошибки. Показывает среднее отклонение предсказанных значений от фактических в тех же единицах, что и целевая переменная. Эта метрика также чувствительна к выбросам, но легче интерпретируется, чем MSE.\n",
|
|||
|
"\n",
|
|||
|
"R² (R-squared): Коэффициент детерминации. Показывает, какую долю дисперсии целевой переменной объясняет модель. Значение R² близкое к 1 указывает на хорошее качество модели.\n",
|
|||
|
"\n",
|
|||
|
"Обоснование:\n",
|
|||
|
"\n",
|
|||
|
"MAE: Хорошо подходит для задач, где важно понимать среднее отклонение предсказаний от фактических значений.\n",
|
|||
|
"\n",
|
|||
|
"MSE и RMSE: Полезны для задач, где важно минимизировать влияние выбросов, так как они возводят ошибки в квадрат.\n",
|
|||
|
"\n",
|
|||
|
"R²: Позволяет оценить, насколько хорошо модель объясняет вариацию целевой переменной.\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация характеристик:\n",
|
|||
|
"Задача: Классификация\n",
|
|||
|
"\n",
|
|||
|
"Выбор метрик:\n",
|
|||
|
"\n",
|
|||
|
"Accuracy: Доля правильных предсказаний среди всех предсказаний. Эта метрика показывает общую точность модели.\n",
|
|||
|
"\n",
|
|||
|
"Precision: Доля правильных положительных предсказаний среди всех положительных предсказаний. Эта метрика важна, если важно минимизировать количество ложноположительных результатов.\n",
|
|||
|
"\n",
|
|||
|
"Recall (Sensitivity): Доля правильных положительных предсказаний среди всех фактических положительных случаев. Эта метрика важна, если важно минимизировать количество ложноотрицательных результатов.\n",
|
|||
|
"\n",
|
|||
|
"F1-score: Гармоническое среднее между precision и recall. Эта метрика показывает баланс между precision и recall.\n",
|
|||
|
"\n",
|
|||
|
"Обоснование:\n",
|
|||
|
"\n",
|
|||
|
"Accuracy: Хорошо подходит для задач, где классы сбалансированы.\n",
|
|||
|
"\n",
|
|||
|
"Precision и Recall: Важны для задач, где важно минимизировать ошибки определенного типа (ложноположительные или ложноотрицательные).\n",
|
|||
|
"\n",
|
|||
|
"F1-score: Позволяет оценить баланс между precision и recall."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 139,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"Best Parameters: {}\n",
|
|||
|
"MAE: 18355084.060291413\n",
|
|||
|
"MSE: 1373553690261338.5\n",
|
|||
|
"RMSE: 37061485.26788071\n",
|
|||
|
"R²: -0.23019249389605534\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"Best Parameters: {'model__max_depth': None, 'model__n_estimators': 100}\n",
|
|||
|
"MAE: 8207608.10893617\n",
|
|||
|
"MSE: 390570064659169.6\n",
|
|||
|
"RMSE: 19762845.560778175\n",
|
|||
|
"R²: 0.6501946991290963\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 3, 'model__n_estimators': 200}\n",
|
|||
|
"MAE: 7140734.77304005\n",
|
|||
|
"MSE: 290599190343546.25\n",
|
|||
|
"RMSE: 17046970.122093435\n",
|
|||
|
"R²: 0.7397313659978713\n",
|
|||
|
"\n",
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Best Parameters: {'model__C': 1, 'model__solver': 'liblinear'}\n",
|
|||
|
"Accuracy: 0.8936170212765957\n",
|
|||
|
"Precision: 1.0\n",
|
|||
|
"Recall: 0.4444444444444444\n",
|
|||
|
"F1-score: 0.6153846153846154\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgwAAAHHCAYAAADTQQDlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABOlElEQVR4nO3deVxUVf8H8M+AMiAwICqbAiIkSoJr+hAqqAiimaZlLiW4Wy6puVGZuGK2uOWWC6hJWqTmkpobmuvjhuZGgqikiIoPq7LO+f1hzM8RcBi4wACfd6/7yjn3zjnfOwzDd85yr0wIIUBERET0CnoVHQARERHpPiYMREREpBETBiIiItKICQMRERFpxISBiIiINGLCQERERBoxYSAiIiKNmDAQERGRRkwYiIiISCMmDNXUzZs34evrCzMzM8hkMuzYsUPS+m/fvg2ZTIawsDBJ663MvL294e3tLVl96enpGD58OKytrSGTyTBhwgTJ6tYVkZGRkMlkiIyMlKS+sLAwyGQy3L59W5L6CAgODoZMJqvoMKgcMGGoQLGxsRg1ahQaNWoEQ0NDKBQKeHp6YsmSJXj27FmZth0QEIC//voL8+bNw6ZNm9CmTZsyba88BQYGQiaTQaFQFPo63rx5EzKZDDKZDN98843W9d+/fx/BwcGIioqSINqSmz9/PsLCwvDRRx9h06ZN+PDDD8u0vYYNG+Ktt94q0zakMn/+fMmT4JflJx/5W40aNVC/fn0EBgbi3r17Zdo2UYUQVCF2794tjIyMhLm5uRg/frz44YcfxPfffy/69+8vatasKUaMGFFmbT99+lQAEJ9//nmZtaFUKsWzZ89Ebm5umbVRlICAAFGjRg2hr68vtm7dWmD/zJkzhaGhoQAgvv76a63rP3v2rAAgQkNDtXpeVlaWyMrK0rq9orRr1054enpKVp8mDg4OokePHuXWnhBC5OXliWfPnom8vDytnmdsbCwCAgIKlOfm5opnz54JpVJZ6thCQ0MFADF79myxadMmsWbNGjFs2DChr68vnJycxLNnz0rdRmWQk5NTbc61uqtRselK9RQXF4f+/fvDwcEBhw8fho2NjWrfmDFjEBMTgz179pRZ+48ePQIAmJubl1kbMpkMhoaGZVa/JnK5HJ6envjpp5/Qr18/tX3h4eHo0aMHfv3113KJ5enTp6hVqxYMDAwkrffhw4dwdXWVrL7c3FwolUrJ4ywNPT09Sd9H+vr60NfXl6w+APD391f10A0fPhx169bFV199hZ07dxZ475UlIQQyMzNhZGRUbm0CQI0aNVCjBv+UVAcckqgACxcuRHp6OtatW6eWLORzdnbGJ598onqcm5uLOXPmwMnJCXK5HA0bNsRnn32GrKwstefldxkfP34cbdu2haGhIRo1aoSNGzeqjgkODoaDgwMAYMqUKZDJZGjYsCGA5135+f9+UWFjlAcOHED79u1hbm4OExMTuLi44LPPPlPtL2oOw+HDh9GhQwcYGxvD3NwcvXr1wvXr1wttLyYmBoGBgTA3N4eZmRmGDBmCp0+fFv3CvmTgwIHYu3cvkpOTVWVnz57FzZs3MXDgwALHP3nyBJMnT4abmxtMTEygUCjg7++PS5cuqY6JjIzEG2+8AQAYMmSIqjs6/zy9vb3RrFkznD9/Hh07dkStWrVUr8vLcxgCAgJgaGhY4Pz9/PxQu3Zt3L9/v9Dzyh/Xj4uLw549e1Qx5I/LP3z4EMOGDYOVlRUMDQ3RvHlzbNiwQa2O/J/PN998g8WLF6veW9euXSvWa1uU4r5XlUolgoODYWtri1q1aqFTp064du0aGjZsiMDAwALn+uIchps3b6Jv376wtraGoaEhGjRogP79+yMlJQXA82Q1IyMDGzZsUL02+XUWNYdh79698PLygqmpKRQKBd544w2Eh4eX6DXo0KEDgOdDji+6ceMG3n33XVhYWMDQ0BBt2rTBzp07Czz/8uXL8PLygpGRERo0aIC5c+ciNDS0QNz5v+/79+9HmzZtYGRkhNWrVwMAkpOTMWHCBNjZ2UEul8PZ2RlfffUVlEqlWltbtmxB69atVeft5uaGJUuWqPbn5ORg1qxZeO2112BoaIg6deqgffv2OHDggOqYwj4fpPzMIt3BtLAC7Nq1C40aNcKbb75ZrOOHDx+ODRs24N1338Wnn36KM2fOICQkBNevX8f27dvVjo2JicG7776LYcOGISAgAOvXr0dgYCBat26N119/HX369IG5uTkmTpyIAQMGoHv37jAxMdEq/qtXr+Ktt96Cu7s7Zs+eDblcjpiYGJw4ceKVzzt48CD8/f3RqFEjBAcH49mzZ1i2bBk8PT1x4cKFAslKv3794OjoiJCQEFy4cAFr166FpaUlvvrqq2LF2adPH4wePRrbtm3D0KFDATzvXWjSpAlatWpV4Phbt25hx44deO+99+Do6IjExESsXr0aXl5euHbtGmxtbdG0aVPMnj0bX375JUaOHKn64/DizzIpKQn+/v7o378/PvjgA1hZWRUa35IlS3D48GEEBATg1KlT0NfXx+rVq/HHH39g06ZNsLW1LfR5TZs2xaZNmzBx4kQ0aNAAn376KQCgXr16ePbsGby9vRETE4OxY8fC0dERv/zyCwIDA5GcnKyWiAJAaGgoMjMzMXLkSMjlclhYWBTrtS1Kcd+rQUFBWLhwIXr27Ak/Pz9cunQJfn5+yMzMfGX92dnZ8PPzQ1ZWFsaNGwdra2vcu3cPu3fvRnJyMszMzLBp0yYMHz4cbdu2xciRIwEATk5ORdYZFhaGoUOH4vXXX0dQUBDMzc1x8eJF7Nu3r9DEUpP8P+q1a9dWlV29ehWenp6oX78+pk+fDmNjY/z888/o3bs3fv31V7zzzjsAgHv37qFTp06QyWQICgqCsbEx1q5dC7lcXmhb0dHRGDBgAEaNGoURI0bAxcUFT58+hZeXF+7du4dRo0bB3t4eJ0+eRFBQEBISErB48WIAz5P+AQMGoEuXLqrfqevXr+PEiROq90lwcDBCQkJUr2dqairOnTuHCxcuoGvXrkW+BlJ+ZpEOqegxkeomJSVFABC9evUq1vFRUVECgBg+fLha+eTJkwUAcfjwYVWZg4ODACCOHTumKnv48KGQy+Xi008/VZXFxcUVOn4fEBAgHBwcCsQwc+ZM8eJbZdGiRQKAePToUZFx57fx4jh/ixYthKWlpUhKSlKVXbp0Sejp6YnBgwcXaG/o0KFqdb7zzjuiTp06Rbb54nkYGxsLIYR49913RZcuXYQQz8fDra2txaxZswp9DTIzMwuMlcfFxQm5XC5mz56tKnvVHAYvLy8BQKxatarQfV5eXmpl+/fvFwDE3Llzxa1bt4SJiYno3bu3xnMUovA5BYsXLxYAxI8//qgqy87OFh4eHsLExESkpqaqzguAUCgU4uHDhyVu70XFfa8+ePBA1KhRo8B5BgcHCwBqcw+OHDkiAIgjR44IIYS4ePGiACB++eWXV8Za1ByG/HkHcXFxQgghkpOThampqWjXrl2BcXhN8xzy6zp48KB49OiRiI+PFxEREaJevXpCLpeL+Ph41bFdunQRbm5uIjMzU63+N998U7z22muqsnHjxgmZTCYuXryoKktKShIWFhZqcQvx/7/v+/btU4trzpw5wtjYWPz9999q5dOnTxf6+vri7t27QgghPvnkE6FQKF45z6h58+Ya5628/PlQFp9ZpBs4JFHOUlNTAQCmpqbFOv73338HAEyaNEmtPP9b5ctzHVxdXVXfeoHn3zpdXFxw69atEsf8svy5D7/99luBLs6iJCQkICoqCoGBgWrfYt3d3dG1a1fVeb5o9OjRao87dOiApKQk1WtYHAMHDkRkZCQePHiAw4cP48GDB0V+a5TL5dDTe/4rkZeXh6SkJNVwy4ULF4rdplwux5AhQ4p1rK+vL0aNGoXZs2ejT58+MDQ0VHUrl8Tvv/8Oa2trDBgwQFVWs2ZNjB8/Hunp6Th69Kja8X379kW9evVK3N7LbQOa36uHDh1Cbm4uPv74Y7Xjxo0bp7ENMzMzAMD+/fu1Gp4qyoEDB5CWlobp06c
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Best Parameters: {'model__max_depth': None, 'model__n_estimators': 200}\n",
|
|||
|
"Accuracy: 1.0\n",
|
|||
|
"Precision: 1.0\n",
|
|||
|
"Recall: 1.0\n",
|
|||
|
"F1-score: 1.0\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgwAAAHHCAYAAADTQQDlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABTGElEQVR4nO3dd1xTV/8H8E9AE2ZAZCsibqm4q7W4RRCto2qtG5y1xYXbp8NVxWqtq1atVRyP1LbOujdYZ124qiiIW1GxbJk5vz98yM8IGAIXCPHz9nVfL3Puzbnfm1ySb84591yZEEKAiIiI6C2MSjoAIiIi0n9MGIiIiEgrJgxERESkFRMGIiIi0ooJAxEREWnFhIGIiIi0YsJAREREWjFhICIiIq2YMBAREZFWTBiKyK1bt+Dt7Q0rKyvIZDJs375d0vrv3LkDmUyGtWvXSlpvada6dWu0bt1asvqSkpIwdOhQODo6QiaTYezYsZLVXVrwPNNv+vD+VK5cGf7+/hpluX3+rV27FjKZDHfu3Cn2GGUyGaZPn17s+zU0Bp0wREVF4bPPPkOVKlVgYmICpVIJT09PLF68GC9fvizSffv5+eHKlSuYPXs2NmzYgMaNGxfp/oqTv78/ZDIZlEplrq/jrVu3IJPJIJPJ8P333+tc/6NHjzB9+nSEh4dLEG3BzZkzB2vXrsXnn3+ODRs2YMCAAUW6v8qVK6tfN5lMBnNzczRp0gTr168v0v2WNm++Tq8vqampJR1eDidPnsT06dMRFxen0/NCQ0PRvXt3ODo6Qi6Xw97eHp07d8bWrVuLJlAJlcTn3549e5gUFLEyJR1AUdm9ezc++eQTKBQKDBw4EHXq1EF6ejqOHz+OiRMn4tq1a/j555+LZN8vX77EqVOn8OWXX2LkyJFFsg9XV1e8fPkSZcuWLZL6tSlTpgxSUlKwc+dO9OrVS2Pdxo0bYWJiUuAP70ePHmHGjBmoXLky6tevn+/nHThwoED7y8uRI0fwwQcfYNq0aZLW+zb169fH+PHjAQCPHz/GL7/8Aj8/P6SlpWHYsGHFFoe+e/11ep1cLi+BaN7u5MmTmDFjBvz9/WFtbZ2v50ybNg0zZ85E9erV8dlnn8HV1RWxsbHYs2cPevTogY0bN6Jv375FG3g+RUREwMjo/3975vX5N2DAAPTu3RsKhaJI4tizZw+WLVuWa9Lw8uVLlCljsF93xcYgX8Ho6Gj07t0brq6uOHLkCJycnNTrAgICEBkZid27dxfZ/p89ewYA+f5wKAiZTAYTE5Miq18bhUIBT09P/PrrrzkShpCQEHTq1AlbtmwpllhSUlJgZmYm+ZfF06dP4e7uLll9mZmZUKlUb42zQoUK6N+/v/qxv78/qlSpgoULFzJheM2br5NUVCoV0tPTS/Rva/PmzZg5cyZ69uyJkJAQjR8FEydOxP79+5GRkVFi8b3pzQQgr88/Y2NjGBsbF1dYGkry/TQowgCNGDFCABAnTpzI1/YZGRli5syZokqVKkIulwtXV1cxdepUkZqaqrGdq6ur6NSpk/jrr7/E+++/LxQKhXBzcxPr1q1TbzNt2jQBQGNxdXUVQgjh5+en/v/rsp/zugMHDghPT09hZWUlzM3NRY0aNcTUqVPV66OjowUAERwcrPG8w4cPi+bNmwszMzNhZWUlunTpIv75559c93fr1i3h5+cnrKyshFKpFP7+/iI5OVnr6+Xn5yfMzc3F2rVrhUKhEP/++6963d9//y0AiC1btggAYv78+ep1sbGxYvz48aJOnTrC3NxcWFpaig4dOojw8HD1NkePHs3x+r1+nK1atRLvvfeeOHfunGjRooUwNTUVY8aMUa9r1aqVuq6BAwcKhUKR4/i9vb2FtbW1ePjwYa7Hl1cM0dHRQgghYmJixODBg4W9vb1QKBSibt26Yu3atRp1ZL8/8+fPFwsXLhRVqlQRRkZG4uLFi3m+rtnn15saN24s5HK5RtmxY8dEz549hYuLi5DL5aJixYpi7NixIiUlRWO77PfqwYMHomvXrsLc3FzY2tqK8ePHi8zMTI1t//33X+Hn5yeUSqWwsrISAwcOFBcvXiz0eRYRESH69esnlEqlsLW1FV999ZVQqVTi3r17okuXLsLS0lI4ODiI77//Ps/XJj+v0+uSkpLEuHHjRMWKFYVcLhc1atQQ8+fPFyqVSmM7ACIgIED897//Fe7u7qJMmTJi27ZtQgghHjx4IAYNGiTs7e2FXC4X7u7uYvXq1Tn2tWTJEuHu7i5MTU2FtbW1aNSokdi4caPGa5DXuZSbWrVqCRsbG5GQkKD1tcjtc+DSpUvCz89PuLm5CYVCIRwcHMSgQYPE8+fPNZ6bkJAgxowZI1xdXYVcLhd2dnbCy8tLnD9/Xr3NzZs3Rffu3YWDg4NQKBSiQoUK4tNPPxVxcXHqbVxdXYWfn1+ex5v9mRccHJzrse/Zs0e0bNlSWFhYCEtLS9G4cWP16ydE/s51Pz+/XF/nbADEtGnTNPZ74cIF0aFDB2FpaSnMzc1F27ZtxalTpzS2yY75+PHjIjAwUNja2gozMzPRrVs38fTpU63vj6ExyBaGnTt3okqVKvjwww/ztf3QoUOxbt069OzZE+PHj8eZM2cQFBSE69evY9u2bRrbRkZGomfPnhgyZAj8/PywZs0a+Pv7o1GjRnjvvffQvXt3WFtbIzAwEH369EHHjh1hYWGhU/zXrl3DRx99hLp162LmzJlQKBSIjIzEiRMn3vq8Q4cOwdfXF1WqVMH06dPx8uVLLF26FJ6enrhw4QIqV66ssX2vXr3g5uaGoKAgXLhwAb/88gvs7e3x3Xff5SvO7t27Y8SIEdi6dSsGDx4M4FXrQq1atdCwYcMc29++fRvbt2/HJ598Ajc3N8TExGDlypVo1aoV/vnnHzg7O6N27dqYOXMmvvnmGwwfPhwtWrQAAI33MjY2Fr6+vujduzf69+8PBweHXONbvHgxjhw5Aj8/P5w6dQrGxsZYuXIlDhw4gA0bNsDZ2TnX59WuXRsbNmxAYGAgKlasqG76trOzw8uXL9G6dWtERkZi5MiRcHNzwx9//AF/f3/ExcVhzJgxGnUFBwcjNTUVw4cPh0KhgI2NTb5e22yZmZl48OABypUrp1H+xx9/ICUlBZ9//jnKly+Pv//+G0uXLsWDBw/wxx9/aGyblZUFHx8fNG3aFN9//z0OHTqEBQsWoGrVqvj8888BAEIIdO3aFcePH8eIESNQu3ZtbNu2DX5+fjli0vU8+/TTT1G7dm3MnTsXu3fvxrfffgsbGxusXLkSbdu2xXfffYeNGzdiwoQJeP/999GyZUutr0tGRgaeP3+uUWZmZgYzMzMIIdClSxccPXoUQ4YMQf369bF//35MnDgRDx8+xMKFCzWed+TIEfz+++8YOXIkbG1tUblyZcTExOCDDz6ATCbDyJEjYWdnh71792LIkCFISEhQD4BdtWoVRo8ejZ49e2LMmDFITU3F5cuXcebMGfTt2xfdu3fHzZs38euvv2LhwoWwtbUF8Opcys2tW7dw48YNDB48GJaWllpfh9wcPHgQt2/fxqBBg+Do6Kjufr127RpOnz4NmUwGABgxYgQ2b96MkSNHwt3dHbGxsTh+/DiuX7+Ohg0bIj09HT4+PkhLS8OoUaPg6OiIhw8fYteuXYiLi4OVlVWOfev6+bd27VoMHjwY7733HqZOnQpra2tcvHgR+/btU3e55Odc/+yzz/Do0SMcPHgQGzZs0PoaXbt2DS1atIBSqcSkSZNQtmxZrFy5Eq1bt0ZYWBiaNm2qsf2oUaNQrlw5TJs2DXfu3MGiRYswcuRI/Pbbb/l+XwxCSWcsUouPjxcARNeuXfO1fXh4uAAghg4dqlE+YcIEAUAcOXJEXebq6ioAiGPHjqnLnj59KhQKhRg/fry67PVfl6/LbwvDwoULBQDx7NmzPOPO7ZdF/fr1hb29vYiNjVWXXbp0SRgZGYmBAwfm2N/gwYM16vz4449F+fLl89zn68dhbm4uhBCiZ8+eol27dkIIIbKysoSjo6OYMWNGrq9BamqqyMrKynEcCoVCzJw
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.01, 'model__max_depth': 5, 'model__n_estimators': 200}\n",
|
|||
|
"Accuracy: 0.9148936170212766\n",
|
|||
|
"Precision: 0.8571428571428571\n",
|
|||
|
"Recall: 0.6666666666666666\n",
|
|||
|
"F1-score: 0.75\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAgwAAAHHCAYAAADTQQDlAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABTrElEQVR4nO3deVwU9f8H8NeC7oLAgigIKCKgoqh4lhHeIohKKpppqeCZhlqoeXzLvMWvVlpmat6apOX59RYPNM+88EoJEG+8UG459/P7w9ifK+CyMMCCr2ePeeTOzM68Z3eZfe/78/nMyIQQAkRERERvYFDaARAREZH+Y8JAREREWjFhICIiIq2YMBAREZFWTBiIiIhIKyYMREREpBUTBiIiItKKCQMRERFpxYSBiIiItGLCUACRkZHw8vKCubk5ZDIZtm/fLun2b926BZlMhjVr1ki63bKsXbt2aNeunWTbS05OxtChQ2FjYwOZTIYvvvhCsm3ru7w+X9OmTYNMJiu9oMqZsvJ66sO5platWggICNCYl9c5ds2aNZDJZLh161aJxyiTyTBt2rQS36++KzMJQ3R0ND799FM4OTnByMgISqUSHh4e+OGHH/DixYti3be/vz+uXLmC2bNnY/369WjRokWx7q8kBQQEQCaTQalU5vk6RkZGQiaTQSaT4dtvv9V5+w8ePMC0adMQHh4uQbSFN2fOHKxZswYjR47E+vXrMWDAgGLfp0qlwrp169CpUydUrVoVFStWhLW1Nby8vPDLL78gPT292GMoTbq+9zlfEK9O1tbWaN++Pfbu3Vu8wRZAamoqpk2bhrCwsNIOJU9hYWHw8/ODjY0N5HI5rK2t4evri61bt5Z2aFqVxjl2z549TAp0JcqAXbt2CWNjY2FhYSHGjBkjfvnlF/HTTz+Jvn37iooVK4phw4YV275TU1MFAPHVV18V2z5UKpV48eKFyMrKKrZ95Mff319UqFBBGBoaik2bNuVaPnXqVGFkZCQAiPnz5+u8/bNnzwoAYvXq1To9Lz09XaSnp+u8v/y0bNlSeHh4SLY9bVJTU4W3t7cAIN5//30RHBwsVq1aJb799lvh6+srDA0NxeDBg0sklpiYmFzvQWZmpnjx4kWx7lfX93716tUCgJgxY4ZYv369WLdunZg/f75o0KCBACB27txZrPFq8+TJEwFATJ06Ndeykng93+Sbb74RAESdOnXEN998I1auXCnmzZsn2rVrJwCIDRs2CCHy/iyUtLS0NJGRkaF+nN85NisrS7x48UKoVKpiiSMwMFDk9xX44sULkZmZWSz7LcsqlEaSoouYmBj07dsXDg4OOHz4MGxtbdXLAgMDERUVhd27dxfb/p88eQIAsLCwKLZ9yGQyGBkZFdv2tVEoFPDw8MBvv/2GPn36aCwLCQlB165dsWXLlhKJJTU1FZUqVYJcLpd0u48fP4arq6tk28vKyoJKpco3zqCgIOzfvx8LFy7E559/rrFs3LhxiIyMRGhoaJH2URQVKlRAhQr6+efv4+Oj8QtzyJAhqFatGn777Td069atFCPLX2m+nps3b8aMGTPQu3dvhISEoGLFiuplX375Jfbv34/MzMxSiS0vCoVC43F+51hDQ0MYGhqWVFgaSvN8rNdKO2PRZsSIEQKAOHHiRIHWz8zMFDNmzBBOTk5CLpcLBwcHMXnyZJGWlqaxnoODg+jatav4888/xTvvvCMUCoVwdHQUa9euVa8zdepUAUBjcnBwEEK8/GWe8+9X5TznVQcOHBAeHh7C3NxcmJiYiLp164rJkyerl+eX9R86dEi0atVKVKpUSZibm4sPPvhA/P3333nuLzIyUvj7+wtzc3OhVCpFQECASElJ0fp6+fv7CxMTE7FmzRqhUCjE8+fP1cv++usvAUBs2bIlV4UhLi5OjBs3TjRs2FCYmJgIMzMz0blzZxEeHq5e58iRI7lev1ePs23btqJBgwbi3LlzonXr1sLY2Fh8/vnn6mVt27ZVb2vgwIFCoVDkOn4vLy9hYWEh7t+/n+fx5RdDTEyMEEKIR48eicGDBwtra2uhUCiEm5ubWLNmjcY2ct6f+fPniwULFggnJydhYGAgLl68mOc+79y5IwwNDUXnzp3f8MpretM+0tPTxZQpU0SzZs2EUqkUlSpVEq1atRKHDx/OtZ3nz58Lf39/oVQqhbm5uRg4cKC4ePFirs9XXp9TIYRYv369aNasmTAyMhKVK1cWH330kbhz547GOjnv27Vr10S7du2EsbGxsLOzE//973/V62h77/OSU2E4e/asxnyVSiWUSqUYOHCgxvzk5GQxduxYUaNGDSGXy0XdunXF/Pnzc/0iLeg54ezZs8LLy0tUqVJFGBkZiVq1aolBgwZpvD+vTznVhrxeTwAiMDBQbNu2TTRo0EDI5XLh6uoq9u7dm+vYjxw5Ipo3by4UCoVwcnISS5cuzfc9el29evWEpaWlSExM1LpuXueaS5cuCX9/f+Ho6CgUCoWoVq2aGDRokHj69KnGcxMTE8Xnn38uHBwchFwuF1ZWVsLT01OcP39evc4///wj/Pz8RLVq1YRCoRDVq1cXH330kYiPj1ev4+DgIPz9/TVet7zOsTmfh5y/1Rx79uwRbdq0EaampsLMzEy0aNFCXUERQohjx46J3r17C3t7eyGXy0WNGjXEF198IVJTU9Xr+Pv75/l+5nj1vc1x4cIF0blzZ2FmZiZMTExEhw4dxKlTpzTWyYn5+PHjIigoSFStWlVUqlRJ9OjRQzx+/Fjr+6Pv9PMnxit27twJJycnvP/++wVaf+jQoVi7di169+6NcePG4cyZMwgODsb169exbds2jXWjoqLQu3dvDBkyBP7+/li1ahUCAgLQvHlzNGjQAH5+frCwsEBQUBD69euHLl26wNTUVKf4r127hm7dusHNzQ0zZsyAQqFAVFQUTpw48cbnHTx4ED4+PnBycsK0adPw4sULLFq0CB4eHrhw4QJq1aqlsX6fPn3g6OiI4OBgXLhwAStWrIC1tTX++9//FihOPz8/jBgxAlu3bsXgwYMBvKwu1KtXD82aNcu1/s2bN7F9+3Z8+OGHcHR0xKNHj7Bs2TK0bdsWf//9N+zs7FC/fn3MmDED33zzDYYPH47WrVsDgMZ7GRcXBx8fH/Tt2xf9+/dHtWrV8ozvhx9+wOHDh+Hv749Tp07B0NAQy5Ytw4EDB7B+/XrY2dnl+bz69etj/fr1CAoKQo0aNTBu3DgAgJWVFV68eIF27dohKioKo0aNgqOjI/744w8EBAQgPj4+V2Vg9erVSEtLw/Dhw6FQKGBpaZnnPvfu3Yvs7Gz0799fy6ueW177SExMxIoVK9CvXz8MGzYMSUlJWLlyJby9vfHXX3+hSZMmAAAhBLp3747jx49jxIgRqF+/PrZt2wZ/f/8C7Xv27NmYMmUK+vTpg6FDh+LJkydYtGgR2rRpg4sXL2r8Anz+/Dk6d+4MPz8/9OnTB5s3b8bEiRPRqFEj+Pj4FOi9z09CQgKePn0KIQQeP36MRYsWITk5WeP1FELggw8+wJEjRzBkyBA0adIE+/fvx5dffon79+9jwYIF6nULck54/PgxvLy8YGVlhUmTJsHCwgK3bt1St/9bWVlhyZIlGDlyJHr27Ak/Pz8AgJub2xuP5fjx49i6dSs+++wzmJmZ4ccff0SvXr1w584dVKlSBQBw8eJFdO7cGba2tpg+fTqys7MxY8YMWFlZaX2tIiMjcePGDQwePBhmZmZa189LaGgobt68iUGDBsHGxgbXrl3DL7/8gmvXruH06dPqzpwjRozA5s2bMWrUKLi6uiIuLg7Hjx/H9evX0axZM2RkZMDb2xvp6ekYPXo0bGxscP/+fezatQvx8fEwNzfPtW9dz7Fr1qzB4MGD0aBBA0yePBkWFha4ePEi9u3bh48//hgA8McffyA1NRUjR45ElSpV8Ndff2HRokW4d+8e/vjjDwDAp59+igcPHiA0NBTr16/X+hpdu3YNrVu3hlKpxIQJE1CxYkUsW7YM7dq1w9GjR9GyZUuN9UePHo3KlStj6tSpuHXrFhYuXIhRo0Z
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGwCAYAAABVdURTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABHvUlEQVR4nO3deXhU5d3/8c9MkpksZCEJSQgEQqIiCLIKBKv+VBSXotS20uqjSC3WBbRSW0EFxAWsC8UqSsVS1EcL6mNbn0LhESpWIIgscUEWSQJBIEASkkkC2Wbu3x9JBgcCZnAWMnm/rivXxZw5Z+abo3I+nnPf99dijDECAAAIEdZgFwAAAOBLhBsAABBSCDcAACCkEG4AAEBIIdwAAICQQrgBAAAhhXADAABCSniwCwg0l8ulffv2KTY2VhaLJdjlAACAVjDGqLKyUunp6bJaT31vpt2Fm3379ikjIyPYZQAAgNOwZ88ede3a9ZT7tLtwExsbK6nx5MTFxQW5GgAA0BoOh0MZGRnu6/iptLtw0/woKi4ujnADAEAb05ohJQwoBgAAIYVwAwAAQgrhBgAAhBTCDQAACCmEGwAAEFIINwAAIKQQbgAAQEgh3AAAgJBCuAEAACGFcAMAAEJKUMPNf/7zH40aNUrp6emyWCz6+9///p3HrFq1SgMHDpTdbtdZZ52lhQsX+r1OAADQdgQ13FRXV6tfv36aO3duq/YvLCzUtddeq0svvVR5eXn69a9/rV/+8pdavny5nysFAABtRVAbZ1599dW6+uqrW73/vHnz1KNHDz333HOSpF69emn16tX6wx/+oJEjR/qrTAAA0EoHHTWqrG1QdqcOQauhTXUFz83N1YgRIzy2jRw5Ur/+9a9Pekxtba1qa2vdrx0Oh7/KAwCg3SmtqtW6gjLlFpQoN79U+Yeqdfm5KfrzbRcEraY2FW6Ki4uVmprqsS01NVUOh0NHjx5VVFTUCcfMmjVLM2bMCFSJAACEtIoj9VpXWKrc/FKtKyjVtuJKj/ctFqm6riFI1TVqU+HmdEyZMkWTJk1yv3Y4HMrIyAhiRQAAtB1VtQ36tLBMa/NLlFtQqi37HDLGc5+eqbHKyU5STnaShvZIVEK0LTjFNmlT4SYtLU0HDhzw2HbgwAHFxcW1eNdGkux2u+x2eyDKAwCgzTta59SG3WXKzS/V2vxSfbG3Qk6XZ5rJ6hSjnKwkDc9O1tCsRCV3OLOus20q3OTk5Gjp0qUe2z744APl5OQEqSIAANq2mnqnNheVK7egVLn5JcrbU656p2eY6ZYY3RhmzkrSsKwkpcZFBqna1glquKmqqtLOnTvdrwsLC5WXl6fExER169ZNU6ZM0d69e/X6669Lku688069+OKL+t3vfqdf/OIX+ve//623335bS5YsCdavAABAm1LX4NLn35QrN79UuQWl2rj7sGobXB77pMdHalh2knKyGh81de0YHaRqT09Qw82GDRt06aWXul83j40ZO3asFi5cqP3796uoqMj9fo8ePbRkyRLdf//9ev7559W1a1e9+uqrTAMHAOAkGpwubdnn0NqmMLNhV5mO1Dk99ukUa3cHmZysJHVPipbFYglSxd+fxZjjhwWFNofDofj4eFVUVCguLi7Y5QAA4FMul9HWYkfjnZn8Uq0vLFNlrefspY7REe4gk5OdpOxOHc74MOPN9btNjbkBAACejDH6+mBV0wDgEn1SWKbyI/Ue+8RGhmtojyQNb5rR1DM1VlbrmR1mvg/CDQAAbYgxRoUl1U0DgBvXmimpqvPYJ8YWpgt6JDaGmaxk9U6PU1gIh5njEW4AADjD7Sk74h4AnJtfqmJHjcf7kRFWDe6e6F5rpm+XeEWEBbV9ZFARbgAAOMPsrzjqHjOTW1Cqbw4f9XjfFmbVgG4J7nEz/bslyB4eFqRqzzyEGwAAguxQZa3WFTQumreuoFSFJdUe74dbLeqXkeAeADyoe0dFRhBmToZwAwBAgB2urtMnTf2Z1uaX6uuDVR7vWy1Sny7x7jszF2QmKsbOJbu1OFMAAPiZo6Ze6wvKlNt0d2Zb8Yn9mXp1jmtqaZCkC3okKj4qIjjFhgDCDQAAPlZd26BPdzWGmXVN/ZmOa8+ks1M6KCe7McwM7ZGkjjHBbTYZSgg3AAB8TzX1Tm3afdi9CvBne8rVcFya6ZEco2FNY2aGZSUqJfbM7s/UlhFuAADwUl2DS3l7yrU2v0S5+aXaXFSuOqdnf6YuCVHuRfNyspPUOT4qSNW2P4QbAAC+Q4PTpc/3VrinZ2/YXaaaes8wkxpn1/DsZPeMpozEttVsMpQQbgAAOI7TZfTVPodyC0rc/Zmqj2s2mRRj07CmMTM5WUnqkRxzxvdnai8INwCAds/lMtpxsFJrdzaOmfmkoFSOGs9mk/FRERqWldg4o+msZJ2dcuY3m2yvCDcAgHbHGKP8Q9XKzS9pnNFUUKayas/+TLH2cA3pkdg0ADhJvTvHhXSzyVBCuAEAhDxjjIqa+jM1z2g6VFnrsU9URGOzyeYxM33S4xTejvsztWWEGwBASNpb/q3+TPkl2lfh2WzSFm7VoG4d3TOazu+aIFs4YSYUEG4AACHhoKPG3TV7bX6pisqOeLwfEWZR/4wE5TTNaBrQLYH+TCGKcAMAaJNKq2q1rqDMPaMp/5Bns8kwq0V9m/ozDW9qNhlt47LXHvBPGQDQJlQcqde6pmaT6wpKta240uN9i0U6Lz3OPWbmgsxExUbSn6k9ItwAAM5IVbUN+rSwrHEV4IJSbdl3YrPJc9Nij7U06JGk+GjCDAg3AIAzxNE6pzbsLnOPmflib4Wcx/VnyuoU07RoXrKGZSUqqYM9SNXiTEa4AQAERU29U5uLypsGAZcob0+56p2eYaZbYnTTonmNa82kxtFsEt+NcAMACIi6Bpc+/6a8cWp2Qak27j6s2gbP/kzp8ZFNLQ2SlZOdpC4JNJuE9wg3AAC/aHC6tGWfw71o3oZdZTpyXH+mTrF29wDg4dlJ6pYYTUsDfG+EGwCAT7hcRluLHe6F89YXlqmy1rM/U2KMzd2fKSc7Sdmd6M8E3yPcAABOizFGXx+sahoAXKJPCstUfqTeY5/YyPDG2UxNYaZnaiz9meB3hBsAQKsYY1RYUu1eBXhdQalKqjybTcbYGvszNc9o6p0epzDCDAKMcAMAOKk9Tc0mmwNNscOzP1NkhFWDuzd2zs7JTlLfLvGKoNkkgoxwAwBw21/xrWaTBaX65vBRj/dtYVYN6JbQGGayktS/W4Ls4fRnwpmFcAMA7dihylqtK2hcNG9dQakKSzz7M4VbLeqXkeAeMzOoe0eaTeKMR7gBgHbkcHWdPik81jn764NVHu9bLVLfLvEa1nRn5oLMRMXYuVSgbeHfWAAIYY6aeq0vKFNu092ZbcUn9mfq1TmuaQBwki7okaj4KPozoW0j3ABACKmubdCnuxrDzLqm/kzHtWfS2Skd3IvmDe2RpI4xtuAUC/gJ4QYA2rCaeqc27T7sXgX4sz3lajguzfRIjjnWOTsrUSmx9GdCaCPcAEAbUtfgUt6ecq3NL1Fufqk2F5WrzunZn6lLQlTjY6amn87x9GdC+0K4AYAzWIPTpc/3VrinZ2/YXaaaes8wkxpnb2w02XR3JiMxOkjVAmcGwg0AnEGcLqOv9jmUW1Di7s9UfVyzyaQYW1Pn7MZBwD2SY+jPBHwL4QYAgsjlMtpxsFJrdzaOmfmkoFSOGs9mk/FRERqWldh4dyY7SWen0GwSOBXCDQAEkDFG+YeqlZtf0jijqaBMZdWe/Zli7eEa0uNYS4NeaXE0mwS8QLgBAD8yxqioqT9T84ymQ5W1HvtERTQ2m8zJanzUdF56nMLpzwScNsINAPjY3vJv9WfKL9G+Cs9mk7ZwqwZ37+geAHx+1wTZwgkzgK8QbgDgezroqHF3zV6bX6qisiMe70eEWdQ/I0E5TTOaBnRLoD8T4EeEGwDwUmlVrdYVlLlnNOUf8mw2GWa1qG+XePdaM4O6d1S0jb9ugUDhvzYA+A4VR+q1rqnZ5LqCUm0rrvR432KRzkuPaxozk6zBmR0VG0l
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn import metrics\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\n",
|
|||
|
" \".//static//csv///world-population-by-country-2020.csv\", index_col=\"no\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"df[\"Population2020\"] = df[\"Population2020\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"NetChange\"] = df[\"NetChange\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Yearly Change\"] = df[\"Yearly Change\"].apply(lambda x: float(\"\".join(x.rstrip(\"%\"))))\n",
|
|||
|
"df[\"LandArea\"] = df[\"LandArea\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Density\"] = df[\"Density\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"NetChange\", \"Yearly Change\", \"LandArea\", \"Density\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": (LinearRegression(), {}),\n",
|
|||
|
" \"Random Forest Regression\": (RandomForestRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Regression\": (GradientBoostingRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Population2020']\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, (model, params) in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" grid_search.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_reg = best_model.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": (LogisticRegression(), {\n",
|
|||
|
" 'model__C': [0.1, 1, 10],\n",
|
|||
|
" 'model__solver': ['liblinear', 'lbfgs']\n",
|
|||
|
" }),\n",
|
|||
|
" \"Random Forest Classification\": (RandomForestClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Classification\": (GradientBoostingClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Population2020'] > df['Population2020'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, (model, params) in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='accuracy')\n",
|
|||
|
" grid_search.fit(X_train_class, y_train_class)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_class = best_model.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" precision = precision_score(y_test_class, y_pred_class)\n",
|
|||
|
" recall = recall_score(y_test_class, y_pred_class)\n",
|
|||
|
" f1 = f1_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print(f\"Precision: {precision}\")\n",
|
|||
|
" print(f\"Recall: {recall}\")\n",
|
|||
|
" print(f\"F1-score: {f1}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
" # Визуализация матрицы ошибок\n",
|
|||
|
" cm = confusion_matrix(y_test_class, y_pred_class)\n",
|
|||
|
" disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Less', 'More'])\n",
|
|||
|
" disp.plot(cmap=plt.cm.Blues)\n",
|
|||
|
" plt.title(f'Confusion Matrix for {name}')\n",
|
|||
|
" plt.show()\n",
|
|||
|
"\n",
|
|||
|
" fpr, tpr, _ = metrics.roc_curve(y_test_class, y_pred_class)\n",
|
|||
|
"# построение ROC кривой\n",
|
|||
|
"plt.plot(fpr, tpr)\n",
|
|||
|
"plt.ylabel(\"True Positive Rate\")\n",
|
|||
|
"plt.xlabel(\"False Positive Rate\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Давайте проанализируем полученные значения метрик и определим, являются ли они нормальными или их можно улучшить.\n",
|
|||
|
"\n",
|
|||
|
"### Оценка смещения и дисперсии для задачи регрессии:\n",
|
|||
|
"\n",
|
|||
|
"### Вывод для задачи регрессии:\n",
|
|||
|
"\n",
|
|||
|
"- **Random Forest Regression** демонстрирует наилучшие результаты по метрикам MAE и R², что указывает на высокую точность и стабильность модели.\n",
|
|||
|
"- **Linear Regression** и **Gradient Boosting Regression** также показывают хорошие результаты, но уступают случайному лесу.\n",
|
|||
|
"\n",
|
|||
|
"### Вывод для задачи классификации:\n",
|
|||
|
"\n",
|
|||
|
"- **Random Forest Classification** демонстрирует наилучшие результаты по всем метрикам (Accuracy, Precision, Recall, F1-score), что указывает на высокую точность и стабильность модели.\n",
|
|||
|
"- **Logistic Regression** и **Gradient Boosting Classification** также показывают хорошие результаты, но уступают случайному лесу.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Для оценки смещения (bias) и дисперсии (variance) моделей можно использовать метод перекрестной проверки (cross-validation). Этот метод позволяет оценить, насколько хорошо модель обобщается на новых данных.\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи регрессии:\n",
|
|||
|
"Для задачи регрессии мы будем использовать метрики MAE (Mean Absolute Error) и R² (R-squared) для оценки смещения и дисперсии.\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи классификации:\n",
|
|||
|
"Для задачи классификации мы будем использовать метрики Accuracy, Precision, Recall и F1-score для оценки смещения и дисперсии.\n",
|
|||
|
"\n",
|
|||
|
"Пример кода для оценки смещения и дисперсии:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 140,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Оценка смещения и дисперсии для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 29840383.42279941, Std = 27144603.41689917\n",
|
|||
|
"R² (Cross-Validation): Mean = -21089.965883559045, Std = 41156.25923282549\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 26665786.071446814, Std = 46081535.77323325\n",
|
|||
|
"R² (Cross-Validation): Mean = -363.4188861822276, Std = 719.8191512710059\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 25735773.24094847, Std = 45055407.0307318\n",
|
|||
|
"R² (Cross-Validation): Mean = -457.52444446928376, Std = 910.2095956390277\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи классификации:\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|||
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.8553191489361703, Std = 0.05613151471605499\n",
|
|||
|
"Precision (Cross-Validation): Mean = 0.6947368421052632, Std = 0.40276055725703136\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.40555555555555556, Std = 0.4034381559533162\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.3924603174603175, Std = 0.31860013632010536\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.8340425531914895, Std = 0.21714639697315868\n",
|
|||
|
"Precision (Cross-Validation): Mean = 0.8457142857142858, Std = 0.3085714285714286\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.7277777777777777, Std = 0.18604891166267057\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.7121367521367521, Std = 0.20909441956885236\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.8340425531914895, Std = 0.2179786974445225\n",
|
|||
|
"Precision (Cross-Validation): Mean = 0.8486486486486486, Std = 0.3027027027027027\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.725, Std = 0.2549509756796392\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.7126265039308517, Std = 0.21811324450544675\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import cross_val_score\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\n",
|
|||
|
" \".//static//csv///world-population-by-country-2020.csv\", index_col=\"no\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"df[\"Population2020\"] = df[\"Population2020\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"NetChange\"] = df[\"NetChange\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Yearly Change\"] = df[\"Yearly Change\"].apply(lambda x: float(\"\".join(x.rstrip(\"%\"))))\n",
|
|||
|
"df[\"LandArea\"] = df[\"LandArea\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Density\"] = df[\"Density\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"NetChange\", \"Yearly Change\", \"LandArea\", \"Density\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Population2020']\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи регрессии\n",
|
|||
|
"print(\"Оценка смещения и дисперсии для задачи регрессии:\")\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" mae_scores = -cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" r2_scores = cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='r2')\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"MAE (Cross-Validation): Mean = {mae_scores.mean()}, Std = {mae_scores.std()}\")\n",
|
|||
|
" print(f\"R² (Cross-Validation): Mean = {r2_scores.mean()}, Std = {r2_scores.std()}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Population2020'] > df['Population2020'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи классификации\n",
|
|||
|
"print(\"Оценка смещения и дисперсии для задачи классификации:\")\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" accuracy_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='accuracy')\n",
|
|||
|
" precision_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='precision')\n",
|
|||
|
" recall_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='recall')\n",
|
|||
|
" f1_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='f1')\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Accuracy (Cross-Validation): Mean = {accuracy_scores.mean()}, Std = {accuracy_scores.std()}\")\n",
|
|||
|
" print(f\"Precision (Cross-Validation): Mean = {precision_scores.mean()}, Std = {precision_scores.std()}\")\n",
|
|||
|
" print(f\"Recall (Cross-Validation): Mean = {recall_scores.mean()}, Std = {recall_scores.std()}\")\n",
|
|||
|
" print(f\"F1-score (Cross-Validation): Mean = {f1_scores.mean()}, Std = {f1_scores.std()}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 141,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJsAAAI+CAYAAAAb9gt6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACJJElEQVR4nOzdeZxO5eP/8fes92xmGIYZGUZU1tDIEoVMM5ZIWUILErJmKUuLLeUThUKWFlqQNiplXxOVbEXImmLGFobBzJi5fn/4zfnObWaY0TG3Ma/n4+Fh7nOuc851zn3d51z3+z6LmzHGCAAAAAAAALCBu6srAAAAAAAAgJsHYRMAAAAAAABsQ9gEAAAAAAAA2xA2AQAAAAAAwDaETQAAAAAAALANYRMAAAAAAABsQ9gEAAAAAAAA2xA2AQAAAAAAwDaETQAAAAAAALANYRNcomPHjgoICMjVZR44cEBubm6aOXNmri73ZvbZZ58pODhYZ8+edXVVslSrVi0NHDgw2+XPnj2rp59+WqGhoXJzc1Pfvn2vX+VgcXNz0/Dhw11djVzzX/ZHq1atkpubm1atWmV7vQAgv8rJceiXX36Rt7e3/vrrL9vrMXXqVJUsWVKJiYnZnubjjz9WuXLl5OXlpYIFC9peJ2TUsWNHRUREuLoauap+/fqqX7/+NU0bERGhjh072lof3PgIm1xs5syZcnNzk5ubm9auXZthvDFG4eHhcnNz04MPPuiCGuZcSkqKihcvLjc3Ny1cuNDV1bHFuXPnNHz48Ovy5S7t/c/s3zPPPGP78uySkpKiYcOGqXfv3k7BYUREhNzc3BQVFZXpdO+++661fr/++mumZQYOHCg3Nzc9+uijmY5P+6Ke1b///e9/VtlBgwZp8uTJiouLy9Z6vfbaa5o5c6a6d++ujz/+WE888US2prtWadsr7Z+/v79q1Kihjz766LouF5cMHz5cbm5ucnd3199//51hfHx8vHx9feXm5qZevXq5oIYAkD3p+5Rubm7y9PTULbfcoo4dO+rQoUOurt5N5cUXX1S7du1UqlQpa1j9+vWdtr+vr6/uvPNOTZgwQampqU7TP/HEE6pbt65q1qypevXqaceOHda4jh07KikpSdOmTctWXXbu3KmOHTuqTJkyevfddzV9+nR7VjILacfNtH9eXl6KiIhQnz59dOrUqeu6bDj3gUeNGpVpmccee0xubm65/sM+cDlPV1cAl/j4+Gj27NmqW7eu0/DVq1frn3/+kcPhcFHNcm7FihWKjY1VRESEZs2apcaNG7u6Sv/ZuXPnNGLECEm65kT/Sh544AE9+eSTGYbffvvtti/LLt9++6127dqlrl27Zhjn4+OjlStXKi4uTqGhoU7jZs2aJR8fH124cCHT+RpjNGfOHEVEROjbb7/VmTNnVKBAgUzLtmvXTk2aNMkwvFq1atbfDz30kAIDA/XOO+9o5MiRV12vFStWqFatWho2bNhVy9qlatWqGjBggCQpNjZW7733njp06KDExER16dIl1+rhSufPn5enp+sOSQ6HQ3PmzMlwFtxXX33lohoBwLUZOXKkSpcurQsXLuinn37SzJkztXbtWm3btk0+Pj6url6et2XLFi1btkzr1q3LMK5EiRIaPXq0JOn48eOaPXu2+vXrp2PHjunVV1+1yr388stWH69v377q0aOHVq5cKelSH6pDhw4aN26cevfuLTc3tyvWZ9WqVUpNTdVbb72lsmXL2rWaVzVlyhQFBAQoISFBy5cv18SJE7Vp06ZMfzy/Gb377rsZQsTc5OPjozlz5uill15yGp6QkKCvv/6azzpuCJzZdINo0qSJPv/8c128eNFp+OzZsxUZGZnhC/uN7JNPPtFdd92lfv36af78+UpISHB1lW54t99+ux5//PEM/2rUqHHF6c6dO5fp8IsXLyopKek/1elq79uMGTNUp04d3XLLLRnG1alTRwEBAZo7d67T8H/++Uc//PCDmjZtmuV8V61apX/++UcffPCBLl68eMUv+3fddVem261ixYpWGXd3d7Vq1UofffSRjDFXXCdJOnr0qK2noGfnvbjlllusuj///PNau3atAgICNH78eNvqkV2u+rz6+Pi4NGxq0qSJ5syZk2H47Nmzr9heAeBG07hxYz3++ON6+umn9d577+m5557T3r179c0337i6atdFVn2h62XGjBkqWbKkatWqlWFcUFCQdTzv27ev1qxZo1KlSmnixIlKSUmxyqX/MdEYI3d3569kbdq00V9//WUFUFdy9OhRSbK175KdbdqqVSs9/vjj6tatmz777DM9+uij+vHHH/XLL7/YVo/sSE1NzfIHzOvJy8vLpScDNGnSRH/88Ye2bt3qNPzrr79WUlKSHnjgARfVDPg/hE03iHbt2unEiRNaunSpNSwpKUlffPGF2rdvn+k0qampmjBhgipWrCgfHx8VK1ZM3bp108mTJ53Kff3112ratKmKFy8uh8OhMmXK6JVXXnE66EmXztipVKmS/vjjDzVo0EB+fn665ZZbNGbMmGyvx/nz5zVv3jy1bdtWbdq00fnz5/X1119nWX7fvn2KiYmRv7+/ihcvrpEjR2YIBD799FNFRkaqQIECCgwMVOXKlfXWW29lmE/r1q0VHBwsPz8/1apVS999991V65vVtcfpr8M+cOCAQkJCJEkjRoywTl1Nf13/zp071apVKwUHB8vHx0fVq1e3vVOX9v5s3LhR9913n/z8/PTCCy9Yp9O+8cYbmjBhgsqUKSOHw6E//vhD0qUzde699175+/urYMGCeuihh5xO15b+75ToP/74Q+3bt1ehQoUynGWX3oULF7Ro0aIsL5Xz8fHRI488otmzZzsNnzNnjgoVKqSYmJgs5z1r1ixVqFBBDRo0UFRUlGbNmpXdTZSlBx54QH/99Ze2bNmSZZm0++Ds379f3333nfU+HzhwQNKlzlznzp1VrFgx+fj4qEqVKvrwww+d5nG19yK7QkJCVK5cOe3du9dpeHY/86mpqRo+fLiKFy8uPz8/NWjQQH/88UeG6+XTLrlYvXq1evTooaJFi6pEiRLW+IULF1ptp0CBAmratKm2b9/utKy4uDh16tRJJUqUkMPhUFhYmB566CFru0nSr7/+qpiYGBUpUkS+vr4qXbq0nnrqKaf5ZHavjM2bN6tx48YKDAxUQECAGjZsqJ9++smpTNo6/Pjjj+rfv79CQkLk7++vhx9+WMeOHcvuJlf79u21ZcsW7dy502ndVqxYkeU+ODttQpJOnTqljh07KigoSAULFlSHDh2yvNTgWvclu3fvVsuWLRUaGiofHx+VKFFCbdu21enTp7O3AQDctO69915JynBMyUx29unGGI0aNUolSpSwjjHbt2/PcIxJ61tcLm2/nX6eOe2rXt4XkqTExEQNGzZMZcuWlcPhUHh4uAYOHJjh3keJiYnq16+fQkJCVKBAATVv3lz//PPPVbdNmvnz5+v++++/6hlH0qX+0N13360zZ85YoVB6y5cv13vvved0+b8kRUZGKjg4+Ip9aOnSpfhpZ2KHhIRkOJa+8847qlixohwOh4oXL66ePXtmOP5caZvmRFbt7Oeff1ajRo0UFBQkPz8/1atXTz/++GOG6VetWqXq1avLx8dHZcqU0bRp0zJtQ2mXtc+aNctat0WLFkmSDh06pKeeekrFihWTw+FQxYoV9cEHH2RY1sSJE1WxYkX5+fmpUKFCql69ulOf9cyZM+rbt68iIiLkcDhUtGhRPfDAA9q0aZNVJrN7NiUkJGjAgAEKDw+Xw+HQHXfcoTfeeCPDd5u0dZg/f74qVapk1TVtPbKjdu3aKl26dIa+9qxZs9SoUSMFBwdnOl122oQkTZ8+XWXKlJGvr69q1KihH374IdP5Zfdzd7nk5GSNGDFCt912m3x8fFS4cGHVrVvX6bsw8j4uo7tBREREqHbt2pozZ4512dnChQt1+vRptW3bVm+//XaGabp166aZM2eqU6dO6tOnj/bv369JkyZp8+bN+vHHH+Xl5SXp0kE9ICBA/fv3V0BAgFasWKGhQ4cqPj5eY8eOdZrnyZM
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x600 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|||
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJoAAASlCAYAAADgRbP+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzde3zO9f/H8eeO1zYz520O+5ooEg1zSEgHWRJJJMqxpFjJOq5iKO3bgXRQOjhVRA7JN3JaJOVLYVIhxxQzpzQ2ttnevz/8dn132TV28dmuHR73283Ndb2vz+F1ffbedb33/Jw8jDFGAAAAAAAAwGXydHcBAAAAAAAAKB0ImgAAAAAAAGAJgiYAAAAAAABYgqAJAAAAAAAAliBoAgAAAAAAgCUImgAAAAAAAGAJgiYAAAAAAABYgqAJAAAAAAAAliBoAgAAAAAAgCUImoBSaufOnerYsaMqVKggDw8PLVy40N0l2Q0YMEDh4eFuW//06dPl4eGhffv2ObS/9tpruuKKK+Tl5aUmTZpIksLDwzVgwIAir3H06NHy8PAo8vW60+X0ixtvvFE33nijpfUAAC7fpXy2r169Wh4eHlq9enWh1HQxzsYDxcG+ffvk4eGh6dOnu60GZ+MiZ2PO/MZaRcHDw0OjR48u8vW6y+X0C3f/rqH0ImhCsfbuu+/Kw8NDrVq1cncpJU7//v21detWjRs3Tp988omaN29e6OtMSUnRmDFjFBERocDAQPn7+6tRo0Z65plndPDgwUJf/+VYvny5nn76abVp00bTpk3Tyy+/XOjrTEtL0+jRo4vdl7uHh4c8PDz04IMPOn39+eeft09z9OjRIq4OAHAhOX/g5/zz8/PTVVddpejoaCUnJ7u7vGLPHeMB6dwf/N27d1doaKh8fX0VHBysLl26aMGCBUWy/svhjjHnkiVLil2YlLOT0NPTU3/++Wee11NSUuTv7y8PDw9FR0e7oUKg6Hi7uwDgQmbOnKnw8HBt2LBBu3btUr169dxdUolw+vRprVu3Ts8//3yRfZHt2bNHHTp00P79+9WzZ0899NBD8vX11c8//6wpU6boiy++0O+//14ktVxM3759de+998pms9nbvvnmG3l6emrKlCny9fW1t+/YsUOenoWTyaelpWnMmDGSlOdonBdeeEHPPvtsoay3IPz8/DR//ny9++67DttDkj777DP5+fnpzJkzbqoOAHAxY8eOVZ06dXTmzBmtXbtW7733npYsWaJffvlFAQEBRVbHhx9+qOzsbJfmueGGG3T69Ok83z9FIb/xQGGKi4vT2LFjdeWVV2rIkCGqXbu2jh07piVLlujuu+/WzJkz1adPnyKp5WLOHxflN+Z0Ntay0pIlSzRp0iSnYdPp06fl7e2+P3NtNps+++wzPf300w7tJSE0BKzCEU0otvbu3asffvhBEyZMULVq1TRz5kx3l5Sv1NRUd5fg4MiRI5KkihUrWrbMC73Hs2fPqnv37kpOTtbq1av12WefadiwYRo8eLDefvtt7dmzRz179rSslsvl5eUlPz8/h1PTDh8+LH9//zyDSpvNJh8fn6IuUd7e3vLz8yvy9ea47bbblJKSoq+//tqh/YcfftDevXvVuXNnN1UGACiITp066f7779eDDz6o6dOn6/HHH9fevXv15Zdf5jtPYYxnfHx8XA4bPD095efnV2g7ei4kv/HApTLG6PTp0/m+Pm/ePI0dO1Y9evTQr7/+qjFjxmjQoEF66qmntGrVKi1dulRBQUGW1GKF88dF+Y05nY21ioqfn59bg6bbb79dn332WZ72WbNmMX5CmUHQhGJr5syZqlSpkjp37qwePXrkGzSdOHFCI0aMUHh4uGw2m2rVqqV+/fo5nNJz5swZjR49WldddZX8/PxUvXp1de/eXbt375aU//nJzs55HjBggAIDA7V7927dfvvtKl++vO677z5J0nfffaeePXvqX//6l2w2m8LCwjRixAinA4zt27frnnvuUbVq1eTv76/69evr+eeflyStWrVKHh4e+uKLL/LMN2vWLHl4eGjdunVOt8fo0aNVu3ZtSdJTTz0lDw8Ph2sjbN68WZ06dVJQUJACAwN1yy236L///a/DMnIOu//22281dOhQBQcHq1atWk7XJ0nz58/Xli1b9Pzzz6tt27Z5Xg8KCtK4cePynV+SXn/9dV1//fWqUqWK/P39FRkZqXnz5uWZbsWKFWrbtq0qVqyowMBA1a9fX88995zDNG+//bauueYaBQQEqFKlSmrevLlmzZqV5/3lXDfAw8ND06ZNU2pqqv1Ug5yfubNrEVysz2VkZGjUqFGKjIxUhQoVVK5cObVr106rVq2yL2Pfvn2qVq2aJGnMmDH29ebsmXN2jaazZ8/qxRdfVN26dWWz2RQeHq7nnntO6enpDtOFh4frjjvu0Nq1a9WyZUv5+fnpiiuu0Mcff3zBn0FuNWvW1A033OCw3aRzv5eNGzdWo0aNnM43d+5cRUZGyt/fX1WrVtX999+vAwcO5Jlu4cKFatSokfz8/NSoUSOnfV2SsrOzNXHiRF1zzTXy8/NTSEiIhgwZor///vui7+Fi/QAAypKbb75Z0rkdedKFxzOufPZ+/fXXat++vcqXL6+goCC1aNHC4bPW2TWaZs+ercjISPs8jRs31ptvvml/Pb9xWUG+Y3Le14EDB9StWzcFBgaqWrVqevLJJ5WVlXXBbXSh8YCr38HLli1T8+bN5e/vr/fffz/fdY4cOVKVK1fW1KlTne7YioqK0h133JHv/D///LMGDBigK664Qn5+fgoNDdWgQYN07Ngxh+lOnjypxx9/3D52CQ4O1q233qpNmzbZp9m5c6fuvvtuhYaGys/PT7Vq1dK9996rf/75x+H95YyLLjTmzO8aTRfrLwUZSw8YMECTJk2SJIfTRHM4u0aTK+Pf77//XjExMapWrZrKlSunu+66yx6oFUSfPn2UmJio7du329sOHTqkb775Jt8j0w4fPqwHHnhAISEh8vPzU0REhGbMmJFnuhMnTmjAgAGqUKGCKlasqP79++vEiRNOl7l9+3b16NFDlStXlp+fn5o3b65FixZdtP6C9APgYjh1DsXWzJkz1b17d/n6+qp3795677339OOPP6pFixb2aU6dOqV27dpp27ZtGjRokJo1a6ajR49q0aJF+uuvv1S1alVlZWXpjjvuUEJCgu69914NHz5cJ0+e1IoVK/TLL7+obt26Ltd29uxZRUVFqW3btnr99dfth6DPnTtXaWlpeuSRR1SlShVt2LBBb7/9tv766y/NnTvXPv/PP/+sdu3aycfHRw899JDCw8O1e/du/ec//9G4ceN04403KiwsTDNnztRdd92VZ7vUrVtXrVu3dlpb9+7dVbFiRY0YMUK9e/fW7bffrsDAQEnSr7/+qnbt2ikoKEhPP/20fHx89P777+vGG2/Ut99+m+daWEOHDlW1atU0atSoC+7lzPnS6tu3r8vbMsebb76prl276r777lNGRoZmz56tnj176quvvrLv/fn11191xx136Nprr9XYsWNls9m0a9cuff/99/blfPjhh3rsscfUo0cPDR8+XGfOnNHPP/+s9evX5/vl/sknn+iDDz7Qhg0b9NFHH0mSrr/+eqfTFqTPpaSk6KOPPlLv3r01ePBgnTx5UlOmTFFUVJQ2bNigJk2aqFq1anrvvff0yCOP6K677lL37t0lSddee22+2+jBBx/UjBkz1KNHDz3xxBNav3694uPjtW3btjxBza5du9SjRw898MAD6t+/v6ZOnaoBAwYoMjJS11xzTYF+Jn369NHw4cN16tQpBQYG6uzZs5o7d65iYmKcnjY3ffp0DRw4UC1atFB8fLySk5P15ptv6vvvv9fmzZvtezuXL1+uu+++Ww0bNlR8fLyOHTumgQMHOg0zhwwZYl/uY489pr179+qdd97R5s2b9f333+d7tNml9AMAKM1ydq5VqVLF3pbfeKagn73Tp0/XoEGDdM011yg2NlYVK1bU5s2btXTp0nw/a1esWKHevXvrlltu0SuvvCJJ2rZtm77//nsNHz483/o
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"from sklearn.model_selection import cross_val_score\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\n",
|
|||
|
" \".//static//csv///world-population-by-country-2020.csv\", index_col=\"no\"\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"df[\"Population2020\"] = df[\"Population2020\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"NetChange\"] = df[\"NetChange\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Yearly Change\"] = df[\"Yearly Change\"].apply(lambda x: float(\"\".join(x.rstrip(\"%\"))))\n",
|
|||
|
"df[\"LandArea\"] = df[\"LandArea\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"df[\"Density\"] = df[\"Density\"].apply(lambda x: int(\"\".join(x.split(\",\"))))\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"numerical_cols = [\"NetChange\", \"Yearly Change\", \"LandArea\", \"Density\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Population2020']\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи регрессии\n",
|
|||
|
"mae_means = []\n",
|
|||
|
"mae_stds = []\n",
|
|||
|
"r2_means = []\n",
|
|||
|
"r2_stds = []\n",
|
|||
|
"\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" mae_scores = -cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" r2_scores = cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='r2')\n",
|
|||
|
" mae_means.append(mae_scores.mean())\n",
|
|||
|
" mae_stds.append(mae_scores.std())\n",
|
|||
|
" r2_means.append(r2_scores.mean())\n",
|
|||
|
" r2_stds.append(r2_scores.std())\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов для задачи регрессии\n",
|
|||
|
"fig, ax = plt.subplots(1, 2, figsize=(12, 6))\n",
|
|||
|
"\n",
|
|||
|
"ax[0].bar(models_reg.keys(), mae_means, yerr=mae_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[0].set_ylabel('MAE')\n",
|
|||
|
"ax[0].set_title('Mean Absolute Error (MAE) for Regression Models')\n",
|
|||
|
"ax[0].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[1].bar(models_reg.keys(), r2_means, yerr=r2_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[1].set_ylabel('R²')\n",
|
|||
|
"ax[1].set_title('R-squared (R²) for Regression Models')\n",
|
|||
|
"ax[1].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Population2020'] > df['Population2020'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи классификации\n",
|
|||
|
"accuracy_means = []\n",
|
|||
|
"accuracy_stds = []\n",
|
|||
|
"precision_means = []\n",
|
|||
|
"precision_stds = []\n",
|
|||
|
"recall_means = []\n",
|
|||
|
"recall_stds = []\n",
|
|||
|
"f1_means = []\n",
|
|||
|
"f1_stds = []\n",
|
|||
|
"\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" accuracy_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='accuracy')\n",
|
|||
|
" precision_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='precision')\n",
|
|||
|
" recall_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='recall')\n",
|
|||
|
" f1_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='f1')\n",
|
|||
|
" accuracy_means.append(accuracy_scores.mean())\n",
|
|||
|
" accuracy_stds.append(accuracy_scores.std())\n",
|
|||
|
" precision_means.append(precision_scores.mean())\n",
|
|||
|
" precision_stds.append(precision_scores.std())\n",
|
|||
|
" recall_means.append(recall_scores.mean())\n",
|
|||
|
" recall_stds.append(recall_scores.std())\n",
|
|||
|
" f1_means.append(f1_scores.mean())\n",
|
|||
|
" f1_stds.append(f1_scores.std())\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов для задачи классификации\n",
|
|||
|
"fig, ax = plt.subplots(2, 2, figsize=(12, 12))\n",
|
|||
|
"\n",
|
|||
|
"ax[0, 0].bar(models_class.keys(), accuracy_means, yerr=accuracy_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[0, 0].set_ylabel('Accuracy')\n",
|
|||
|
"ax[0, 0].set_title('Accuracy for Classification Models')\n",
|
|||
|
"ax[0, 0].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[0, 1].bar(models_class.keys(), precision_means, yerr=precision_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[0, 1].set_ylabel('Precision')\n",
|
|||
|
"ax[0, 1].set_title('Precision for Classification Models')\n",
|
|||
|
"ax[0, 1].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[1, 0].bar(models_class.keys(), recall_means, yerr=recall_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[1, 0].set_ylabel('Recall')\n",
|
|||
|
"ax[1, 0].set_title('Recall for Classification Models')\n",
|
|||
|
"ax[1, 0].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[1, 1].bar(models_class.keys(), f1_means, yerr=f1_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[1, 1].set_ylabel('F1-score')\n",
|
|||
|
"ax[1, 1].set_title('F1-score for Classification Models')\n",
|
|||
|
"ax[1, 1].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "aisenv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|