1784 lines
337 KiB
Plaintext
1784 lines
337 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Начало лабораторной работы"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 17,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"df = pd.read_csv(\"data/starbucks.csv\")\n",
|
|||
|
"print(df.columns)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Бизнес-цели"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование посетителей в магазине:\n",
|
|||
|
"\n",
|
|||
|
"Цель: Разработать модель, которая будет предсказывать объем выкупленного кофе основе: цены открытия, цены закрытия, самой высокой цене, самой низкой цене\n",
|
|||
|
"Применение:\n",
|
|||
|
"Узнать, какие лучше цены выставлять на кофе\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация цен на кофе:\n",
|
|||
|
"Цель: Определить оптимальную цену на кофе, чтобы объем их скупа был больше.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование посетителей в магазине"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 18,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Среднее значение поля 'Volume: 14704589.99726232\n",
|
|||
|
" Date Open High Low Close Adj Close Volume \\\n",
|
|||
|
"0 1992-06-26 0.328125 0.347656 0.320313 0.335938 0.260703 224358400 \n",
|
|||
|
"1 1992-06-29 0.339844 0.367188 0.332031 0.359375 0.278891 58732800 \n",
|
|||
|
"2 1992-06-30 0.367188 0.371094 0.343750 0.347656 0.269797 34777600 \n",
|
|||
|
"3 1992-07-01 0.351563 0.359375 0.339844 0.355469 0.275860 18316800 \n",
|
|||
|
"4 1992-07-02 0.359375 0.359375 0.347656 0.355469 0.275860 13996800 \n",
|
|||
|
"\n",
|
|||
|
" above_average_volume volume_volatility \n",
|
|||
|
"0 1 584004800 \n",
|
|||
|
"1 1 584004800 \n",
|
|||
|
"2 1 584004800 \n",
|
|||
|
"3 1 584004800 \n",
|
|||
|
"4 0 584004800 \n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Устанавливаем случайное состояние\n",
|
|||
|
"random_state = 28\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем среднее значение объема\n",
|
|||
|
"average_count = df['Volume'].mean()\n",
|
|||
|
"print(f\"Среднее значение поля 'Volume: {average_count}\")\n",
|
|||
|
"\n",
|
|||
|
"# Создаем новую переменную, указывающую, превышает ли объемная продажа среднюю\n",
|
|||
|
"df[\"above_average_volume\"] = (df[\"Volume\"] > average_count).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем волатильность (разницу между максимальной и минимальной объемная продажаю)\n",
|
|||
|
"df[\"volume_volatility\"] = df[\"Volume\"].max() - df[\"Volume\"].min()\n",
|
|||
|
"\n",
|
|||
|
"# Выводим первые строки измененной таблицы для проверки\n",
|
|||
|
"print(df.head())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"2. Оптимизация параметров магазина:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Средняя объемная продажа для 'Open':\n",
|
|||
|
"Open\n",
|
|||
|
"0.328125 224358400.0\n",
|
|||
|
"0.339844 58732800.0\n",
|
|||
|
"0.351563 9331200.0\n",
|
|||
|
"0.355469 13081600.0\n",
|
|||
|
"0.359375 12518400.0\n",
|
|||
|
" ... \n",
|
|||
|
"122.559998 11747000.0\n",
|
|||
|
"122.930000 6618400.0\n",
|
|||
|
"124.550003 7934200.0\n",
|
|||
|
"125.739998 4827500.0\n",
|
|||
|
"126.080002 6110900.0\n",
|
|||
|
"Name: Volume, Length: 5300, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа для 'Close':\n",
|
|||
|
"Close\n",
|
|||
|
"0.335938 224358400.0\n",
|
|||
|
"0.347656 25139200.0\n",
|
|||
|
"0.355469 12182400.0\n",
|
|||
|
"0.359375 31328000.0\n",
|
|||
|
"0.363281 11040000.0\n",
|
|||
|
" ... \n",
|
|||
|
"122.410004 11747000.0\n",
|
|||
|
"122.629997 7172300.0\n",
|
|||
|
"125.970001 7934200.0\n",
|
|||
|
"126.029999 6110900.0\n",
|
|||
|
"126.059998 4827500.0\n",
|
|||
|
"Name: Volume, Length: 5440, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа для 'High':\n",
|
|||
|
"High\n",
|
|||
|
"0.347656 2.243584e+08\n",
|
|||
|
"0.355469 1.063893e+07\n",
|
|||
|
"0.359375 1.207893e+07\n",
|
|||
|
"0.367188 3.488640e+07\n",
|
|||
|
"0.371094 2.038720e+07\n",
|
|||
|
" ... \n",
|
|||
|
"123.330002 1.174700e+07\n",
|
|||
|
"123.470001 6.618400e+06\n",
|
|||
|
"126.099998 4.827500e+06\n",
|
|||
|
"126.160004 6.110900e+06\n",
|
|||
|
"126.320000 7.934200e+06\n",
|
|||
|
"Name: Volume, Length: 5245, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа для 'Low':\n",
|
|||
|
"Low\n",
|
|||
|
"0.320313 224358400.0\n",
|
|||
|
"0.332031 58732800.0\n",
|
|||
|
"0.339844 18316800.0\n",
|
|||
|
"0.343750 25139200.0\n",
|
|||
|
"0.347656 8584000.0\n",
|
|||
|
" ... \n",
|
|||
|
"121.389999 11747000.0\n",
|
|||
|
"122.139999 6618400.0\n",
|
|||
|
"123.919998 7934200.0\n",
|
|||
|
"124.250000 4827500.0\n",
|
|||
|
"124.809998 6110900.0\n",
|
|||
|
"Name: Volume, Length: 5223, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа для комбинации 'Open' и 'Close':\n",
|
|||
|
"Open Close \n",
|
|||
|
"0.328125 0.335938 224358400.0\n",
|
|||
|
"0.339844 0.359375 58732800.0\n",
|
|||
|
"0.351563 0.355469 12035200.0\n",
|
|||
|
" 0.359375 3923200.0\n",
|
|||
|
"0.355469 0.347656 15500800.0\n",
|
|||
|
" ... \n",
|
|||
|
"122.559998 122.410004 11747000.0\n",
|
|||
|
"122.930000 122.379997 6618400.0\n",
|
|||
|
"124.550003 125.970001 7934200.0\n",
|
|||
|
"125.739998 126.059998 4827500.0\n",
|
|||
|
"126.080002 126.029999 6110900.0\n",
|
|||
|
"Name: Volume, Length: 7825, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа для комбинации 'High' и 'Low':\n",
|
|||
|
"High Low \n",
|
|||
|
"0.347656 0.320313 224358400.0\n",
|
|||
|
"0.355469 0.343750 15500800.0\n",
|
|||
|
" 0.347656 8208000.0\n",
|
|||
|
"0.359375 0.339844 18316800.0\n",
|
|||
|
" 0.347656 8960000.0\n",
|
|||
|
" ... \n",
|
|||
|
"123.330002 121.389999 11747000.0\n",
|
|||
|
"123.470001 122.139999 6618400.0\n",
|
|||
|
"126.099998 124.250000 4827500.0\n",
|
|||
|
"126.160004 124.809998 6110900.0\n",
|
|||
|
"126.320000 123.919998 7934200.0\n",
|
|||
|
"Name: Volume, Length: 7662, dtype: float64\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"# Устанавливаем случайное состояние\n",
|
|||
|
"random_state = 42\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем среднюю объемную продажу для каждого значения каждого признака\n",
|
|||
|
"for column in [\"Open\", \"Close\", \"High\", \"Low\"]:\n",
|
|||
|
" print(f\"Средняя объемная продажа для '{column}':\")\n",
|
|||
|
" print(df.groupby(column)[\"Volume\"].mean())\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"print(\"Средняя объемная продажа для комбинации 'Open' и 'Close':\")\n",
|
|||
|
"print(df.groupby([\"Open\", \"Close\"])[\"Volume\"].mean())\n",
|
|||
|
"print()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"print(\"Средняя объемная продажа для комбинации 'High' и 'Low':\")\n",
|
|||
|
"print(df.groupby([\"High\", \"Low\"])[\"Volume\"].mean())\n",
|
|||
|
"print()\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Выбор ориентира:\n",
|
|||
|
"1. Прогнозирование стоимости акций взносов:\n",
|
|||
|
"Ориентир:\n",
|
|||
|
"\n",
|
|||
|
"R² (коэффициент детерминации): 0.75 - 0.85\n",
|
|||
|
"\n",
|
|||
|
"MAE (средняя абсолютная ошибка): 1000000 - 1500000 продаж\n",
|
|||
|
"\n",
|
|||
|
"RMSE (среднеквадратичная ошибка): 1200000 - 1600000 продаж"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 20,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"MAE: 3991467.1377542643\n",
|
|||
|
"MSE: 46938455671077.555\n",
|
|||
|
"RMSE: 6851164.548533158\n",
|
|||
|
"R²: 0.5289490019636116\n",
|
|||
|
"Ориентиры для прогнозирования не достигнуты.\n",
|
|||
|
"Средняя объемная продажа 'Open':\n",
|
|||
|
"Open\n",
|
|||
|
"0.328125 224358400.0\n",
|
|||
|
"0.339844 58732800.0\n",
|
|||
|
"0.351563 9331200.0\n",
|
|||
|
"0.355469 13081600.0\n",
|
|||
|
"0.359375 12518400.0\n",
|
|||
|
" ... \n",
|
|||
|
"122.559998 11747000.0\n",
|
|||
|
"122.930000 6618400.0\n",
|
|||
|
"124.550003 7934200.0\n",
|
|||
|
"125.739998 4827500.0\n",
|
|||
|
"126.080002 6110900.0\n",
|
|||
|
"Name: Volume, Length: 5300, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа 'High':\n",
|
|||
|
"High\n",
|
|||
|
"0.347656 2.243584e+08\n",
|
|||
|
"0.355469 1.063893e+07\n",
|
|||
|
"0.359375 1.207893e+07\n",
|
|||
|
"0.367188 3.488640e+07\n",
|
|||
|
"0.371094 2.038720e+07\n",
|
|||
|
" ... \n",
|
|||
|
"123.330002 1.174700e+07\n",
|
|||
|
"123.470001 6.618400e+06\n",
|
|||
|
"126.099998 4.827500e+06\n",
|
|||
|
"126.160004 6.110900e+06\n",
|
|||
|
"126.320000 7.934200e+06\n",
|
|||
|
"Name: Volume, Length: 5245, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа 'Close':\n",
|
|||
|
"Close\n",
|
|||
|
"0.335938 224358400.0\n",
|
|||
|
"0.347656 25139200.0\n",
|
|||
|
"0.355469 12182400.0\n",
|
|||
|
"0.359375 31328000.0\n",
|
|||
|
"0.363281 11040000.0\n",
|
|||
|
" ... \n",
|
|||
|
"122.410004 11747000.0\n",
|
|||
|
"122.629997 7172300.0\n",
|
|||
|
"125.970001 7934200.0\n",
|
|||
|
"126.029999 6110900.0\n",
|
|||
|
"126.059998 4827500.0\n",
|
|||
|
"Name: Volume, Length: 5440, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа 'Low':\n",
|
|||
|
"Low\n",
|
|||
|
"0.320313 224358400.0\n",
|
|||
|
"0.332031 58732800.0\n",
|
|||
|
"0.339844 18316800.0\n",
|
|||
|
"0.343750 25139200.0\n",
|
|||
|
"0.347656 8584000.0\n",
|
|||
|
" ... \n",
|
|||
|
"121.389999 11747000.0\n",
|
|||
|
"122.139999 6618400.0\n",
|
|||
|
"123.919998 7934200.0\n",
|
|||
|
"124.250000 4827500.0\n",
|
|||
|
"124.809998 6110900.0\n",
|
|||
|
"Name: Volume, Length: 5223, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя посещаемость взносов для комбинации 'Open' и 'Close':\n",
|
|||
|
"Open Close \n",
|
|||
|
"0.328125 0.335938 224358400.0\n",
|
|||
|
"0.339844 0.359375 58732800.0\n",
|
|||
|
"0.351563 0.355469 12035200.0\n",
|
|||
|
" 0.359375 3923200.0\n",
|
|||
|
"0.355469 0.347656 15500800.0\n",
|
|||
|
" ... \n",
|
|||
|
"122.559998 122.410004 11747000.0\n",
|
|||
|
"122.930000 122.379997 6618400.0\n",
|
|||
|
"124.550003 125.970001 7934200.0\n",
|
|||
|
"125.739998 126.059998 4827500.0\n",
|
|||
|
"126.080002 126.029999 6110900.0\n",
|
|||
|
"Name: Volume, Length: 7825, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя посещаемость взносов для комбинации 'High' и 'Low':\n",
|
|||
|
"High Low \n",
|
|||
|
"0.347656 0.320313 224358400.0\n",
|
|||
|
"0.355469 0.343750 15500800.0\n",
|
|||
|
" 0.347656 8208000.0\n",
|
|||
|
"0.359375 0.339844 18316800.0\n",
|
|||
|
" 0.347656 8960000.0\n",
|
|||
|
" ... \n",
|
|||
|
"123.330002 121.389999 11747000.0\n",
|
|||
|
"123.470001 122.139999 6618400.0\n",
|
|||
|
"126.099998 124.250000 4827500.0\n",
|
|||
|
"126.160004 124.809998 6110900.0\n",
|
|||
|
"126.320000 123.919998 7934200.0\n",
|
|||
|
"Name: Volume, Length: 7662, dtype: float64\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y)\n",
|
|||
|
"\n",
|
|||
|
"X = df.drop(columns=[\"Volume\", \"Date\"], axis=1)\n",
|
|||
|
"\n",
|
|||
|
"y = df[\"Volume\"]\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки\n",
|
|||
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Стандартизируем признаки\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"X_train = scaler.fit_transform(X_train)\n",
|
|||
|
"X_test = scaler.transform(X_test)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем модель линейной регрессии\n",
|
|||
|
"model = LinearRegression()\n",
|
|||
|
"model.fit(X_train, y_train)\n",
|
|||
|
"\n",
|
|||
|
"# Делаем предсказания на тестовой выборке\n",
|
|||
|
"y_pred = model.predict(X_test)\n",
|
|||
|
"\n",
|
|||
|
"# Оцениваем качество модели\n",
|
|||
|
"mae = mean_absolute_error(y_test, y_pred)\n",
|
|||
|
"mse = mean_squared_error(y_test, y_pred)\n",
|
|||
|
"rmse = mean_squared_error(y_test, y_pred, squared=False)\n",
|
|||
|
"r2 = r2_score(y_test, y_pred)\n",
|
|||
|
"\n",
|
|||
|
"print(f\"MAE: {mae}\")\n",
|
|||
|
"print(f\"MSE: {mse}\")\n",
|
|||
|
"print(f\"RMSE: {rmse}\")\n",
|
|||
|
"print(f\"R²: {r2}\")\n",
|
|||
|
"\n",
|
|||
|
"# Проверяем, достигнуты ли ориентиры\n",
|
|||
|
"if r2 >= 0.75 and mae <= 1500000 and rmse <= 1700000:\n",
|
|||
|
" print(\"Ориентиры для прогнозирования достигнуты!\")\n",
|
|||
|
"else:\n",
|
|||
|
" print(\"Ориентиры для прогнозирования не достигнуты.\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"columns_to_group = [\n",
|
|||
|
" \"Open\",\n",
|
|||
|
" \"High\",\n",
|
|||
|
" \"Close\", \"Low\"\n",
|
|||
|
"]\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем среднюю объемная продажа для каждого значения каждого признака\n",
|
|||
|
"for column in columns_to_group:\n",
|
|||
|
" print(f\"Средняя объемная продажа '{column}':\")\n",
|
|||
|
" print(df.groupby(column)[\"Volume\"].mean())\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем среднюю объемная продажа для комбинаций признаков\n",
|
|||
|
"\n",
|
|||
|
"print(\n",
|
|||
|
" \"Средняя посещаемость взносов для комбинации 'Open' и 'Close':\"\n",
|
|||
|
")\n",
|
|||
|
"print(df.groupby([\"Open\", \"Close\"])[\"Volume\"].mean())\n",
|
|||
|
"print()\n",
|
|||
|
"\n",
|
|||
|
"print(\n",
|
|||
|
" \"Средняя посещаемость взносов для комбинации 'High' и 'Low':\"\n",
|
|||
|
")\n",
|
|||
|
"print(df.groupby([\"High\", \"Low\"])[\"Volume\"].mean())\n",
|
|||
|
"print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Анализ применимости алгоритмов обучения с учителем для решения поставленных задач:\n",
|
|||
|
"1. Прогнозирование посещаемости магазинов:\n",
|
|||
|
"Задача: Регрессия\n",
|
|||
|
"\n",
|
|||
|
"Свойства алгоритмов:\n",
|
|||
|
"\n",
|
|||
|
"Линейная регрессия:\n",
|
|||
|
"Применимость: Хорошо подходит для задач, где зависимость между признаками и целевой переменной линейна.\n",
|
|||
|
"Преимущества: Проста в реализации, интерпретируема.\n",
|
|||
|
"Недостатки: Может плохо работать, если зависимость нелинейна.\n",
|
|||
|
"\n",
|
|||
|
"Деревья решений (регрессия):\n",
|
|||
|
"Применимость: Подходит для задач с нелинейными зависимостями.\n",
|
|||
|
"Преимущества: Может обрабатывать категориальные признаки, не требует масштабирования данных.\n",
|
|||
|
"Недостатки: Подвержены переобучению, могут давать нестабильные результаты.\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (регрессия):\n",
|
|||
|
"Применимость: Хорошо подходит для задач с нелинейными зависимостями и большим количеством признаков.\n",
|
|||
|
"Преимущества: Устойчив к переобучению, может обрабатывать категориальные признаки.\n",
|
|||
|
"Недостатки: Менее интерпретируем, чем линейная регрессия.\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (регрессия):\n",
|
|||
|
"Применимость: Подходит для задач с нелинейными зависимостями и сложными взаимосвязями между признаками.\n",
|
|||
|
"Преимущества: Может достигать высокой точности, устойчив к переобучению.\n",
|
|||
|
"Недостатки: Сложнее в настройке, чем случайный лес, менее интерпретируем.\n",
|
|||
|
"\n",
|
|||
|
"Нейронные сети (регрессия):\n",
|
|||
|
"Применимость: Подходит для задач с очень сложными зависимостями и большим количеством данных.\n",
|
|||
|
"Преимущества: Может моделировать очень сложные зависимости.\n",
|
|||
|
"Недостатки: Требует большого количества данных, сложнее в настройке и интерпретации.\n",
|
|||
|
"\n",
|
|||
|
"Вывод:\n",
|
|||
|
"\n",
|
|||
|
"Линейная регрессия: Может быть хорошим выбором для начала, особенно если зависимость между признаками и целевой переменной линейна.\n",
|
|||
|
"\n",
|
|||
|
"Деревья решений и случайный лес: Подходят для задач с нелинейными зависимостями.\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг: Может давать более высокую точность, чем случайный лес, но требует больше времени на настройку.\n",
|
|||
|
"\n",
|
|||
|
"Нейронные сети: Могут быть излишними для этой задачи, если данных недостаточно много.\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация тарифной сетки:\n",
|
|||
|
"Задача: Классификация (группировка клиентов по группам риска)\n",
|
|||
|
"\n",
|
|||
|
"Свойства алгоритмов:\n",
|
|||
|
"\n",
|
|||
|
"Логистическая регрессия:\n",
|
|||
|
"Применимость: Хорошо подходит для задач бинарной классификации, где зависимость между признаками и целевой переменной линейна.\n",
|
|||
|
"Преимущества: Проста в реализации, интерпретируема.\n",
|
|||
|
"Недостатки: Может плохо работать, если зависимость нелинейна.\n",
|
|||
|
"\n",
|
|||
|
"Деревья решений (классификация):\n",
|
|||
|
"Применимость: Подходит для задач с нелинейными зависимостями.\n",
|
|||
|
"Преимущества: Может обрабатывать категориальные признаки, не требует масштабирования данных.\n",
|
|||
|
"Недостатки: Подвержены переобучению, могут давать нестабильные результаты.\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (классификация):\n",
|
|||
|
"Применимость: Хорошо подходит для задач с нелинейными зависимостями и большим количеством признаков.\n",
|
|||
|
"Преимущества: Устойчив к переобучению, может обрабатывать категориальные признаки.\n",
|
|||
|
"Недостатки: Менее интерпретируем, чем линейная регрессия.\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (классификация):\n",
|
|||
|
"Применимость: Подходит для задач с нелинейными зависимостями и сложными взаимосвязями между признаками.\n",
|
|||
|
"Преимущества: Может достигать высокой точности, устойчив к переобучению.\n",
|
|||
|
"Недостатки: Сложнее в настройке, чем случайный лес, менее интерпретируем.\n",
|
|||
|
"\n",
|
|||
|
"Нейронные сети (классификация):\n",
|
|||
|
"Применимость: Подходит для задач с очень сложными зависимостями и большим количеством данных.\n",
|
|||
|
"Преимущества: Может моделировать очень сложные зависимости.\n",
|
|||
|
"Недостатки: Требует большого количества данных, сложнее в настройке и интерпретации.\n",
|
|||
|
"\n",
|
|||
|
"Вывод:\n",
|
|||
|
"\n",
|
|||
|
"Логистическая регрессия: Может быть хорошим выбором для начала, особенно если зависимость между признаками и целевой переменной линейна.\n",
|
|||
|
"\n",
|
|||
|
"Деревья решений и случайный лес: Подходят для задач с нелинейными зависимостями.\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг: Может давать более высокую точность, чем случайный лес, но требует больше времени на настройку.\n",
|
|||
|
"\n",
|
|||
|
"Нейронные сети: Могут быть излишними для этой задачи, если данных недостаточно много.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование стоимости акций:\n",
|
|||
|
"Выбранные модели:\n",
|
|||
|
"\n",
|
|||
|
"Линейная регрессия\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (регрессия)\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (регрессия)\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация тарифной сетки:\n",
|
|||
|
"Выбранные модели:\n",
|
|||
|
"\n",
|
|||
|
"Логистическая регрессия\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (классификация)\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (классификация)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 21,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE: 3991467.1377542643\n",
|
|||
|
"MSE: 46938455671077.555\n",
|
|||
|
"RMSE: 6851164.548533158\n",
|
|||
|
"R²: 0.5289490019636116\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"MAE: 3632387.72405058\n",
|
|||
|
"MSE: 36255672119329.99\n",
|
|||
|
"RMSE: 6021268.314842812\n",
|
|||
|
"R²: 0.6361561050076534\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"MAE: 3762986.930251514\n",
|
|||
|
"MSE: 32821664355184.965\n",
|
|||
|
"RMSE: 5729019.493350059\n",
|
|||
|
"R²: 0.6706180991537869\n",
|
|||
|
"\n",
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Accuracy: 1.0\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Accuracy: 1.0\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Accuracy: 1.0\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df.drop(columns = [\"Volume\", \"Date\"], axis=1)\n",
|
|||
|
"y_reg = df[\"Volume\"]\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Стандартизируем признаки для задачи регрессии\n",
|
|||
|
"scaler_reg = StandardScaler()\n",
|
|||
|
"X_train_reg = scaler_reg.fit_transform(X_train_reg)\n",
|
|||
|
"X_test_reg = scaler_reg.transform(X_test_reg)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" model.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" y_pred_reg = model.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df.drop(columns=[\"Volume\", \"Date\"], axis=1)\n",
|
|||
|
"y_class = (df[\"Volume\"] > df[\"Volume\"].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Стандартизируем признаки для задачи классификации\n",
|
|||
|
"scaler_class = StandardScaler()\n",
|
|||
|
"X_train_class = scaler_class.fit_transform(X_train_class)\n",
|
|||
|
"X_test_class = scaler_class.transform(X_test_class)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" model.fit(X_train_class, y_train_class)\n",
|
|||
|
" y_pred_class = model.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование стоимости акций:\n",
|
|||
|
"Конвейер для задачи регрессии:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 22,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE: 5864823.098654955\n",
|
|||
|
"MSE: 79729784253194.64\n",
|
|||
|
"RMSE: 8929153.613484016\n",
|
|||
|
"R²: 0.19987153584955009\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"MAE: 4775882.923204809\n",
|
|||
|
"MSE: 53290061861042.07\n",
|
|||
|
"RMSE: 7300004.237056446\n",
|
|||
|
"R²: 0.4652074409739847\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"MAE: 5397945.863176441\n",
|
|||
|
"MSE: 62387989562365.266\n",
|
|||
|
"RMSE: 7898606.811480444\n",
|
|||
|
"R²: 0.37390516307625066\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df[\"Volume\"]\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" pipeline.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" y_pred_reg = pipeline.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"2. Оптимизация характеристик магазина:\n",
|
|||
|
"Конвейер для задачи классификации:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 23,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Accuracy: 0.6648009950248757\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Accuracy: 0.7568407960199005\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Accuracy: 0.7437810945273632\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import accuracy_score\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df[\"Volume\"] > df[\"Volume\"].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" pipeline.fit(X_train_class, y_train_class)\n",
|
|||
|
" y_pred_class = pipeline.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование посещения:\n",
|
|||
|
"\n",
|
|||
|
"Настройка гиперпараметров для задачи регрессии:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 24,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"Best Parameters: {}\n",
|
|||
|
"MAE: 5864823.098654955\n",
|
|||
|
"MSE: 79729784253194.64\n",
|
|||
|
"RMSE: 8929153.613484016\n",
|
|||
|
"R²: 0.19987153584955009\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"Best Parameters: {'model__max_depth': None, 'model__n_estimators': 200}\n",
|
|||
|
"MAE: 4765190.037919035\n",
|
|||
|
"MSE: 52229952422553.555\n",
|
|||
|
"RMSE: 7227029.2944302885\n",
|
|||
|
"R²: 0.4758461720930298\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 5, 'model__n_estimators': 200}\n",
|
|||
|
"MAE: 4991448.210803198\n",
|
|||
|
"MSE: 55277620586398.51\n",
|
|||
|
"RMSE: 7434892.10321162\n",
|
|||
|
"R²: 0.4452612900440134\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": (LinearRegression(), {}),\n",
|
|||
|
" \"Random Forest Regression\": (RandomForestRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Regression\": (GradientBoostingRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Volume']\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, (model, params) in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" grid_search.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_reg = best_model.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"2. Оптимизация характеристик:\n",
|
|||
|
"\n",
|
|||
|
"Настройка гиперпараметров для задачи классификации:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 25,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Best Parameters: {'model__C': 10, 'model__solver': 'liblinear'}\n",
|
|||
|
"Accuracy: 0.6865671641791045\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Best Parameters: {'model__max_depth': 20, 'model__n_estimators': 100}\n",
|
|||
|
"Accuracy: 0.7562189054726368\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 5, 'model__n_estimators': 200}\n",
|
|||
|
"Accuracy: 0.7475124378109452\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import accuracy_score\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": (LogisticRegression(), {\n",
|
|||
|
" 'model__C': [0.1, 1, 10],\n",
|
|||
|
" 'model__solver': ['liblinear', 'lbfgs']\n",
|
|||
|
" }),\n",
|
|||
|
" \"Random Forest Classification\": (RandomForestClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Classification\": (GradientBoostingClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Volume'] > df['Volume'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, (model, params) in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='accuracy')\n",
|
|||
|
" grid_search.fit(X_train_class, y_train_class)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_class = best_model.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование посещаемости::\n",
|
|||
|
"Задача: Регрессия\n",
|
|||
|
"\n",
|
|||
|
"Выбор метрик:\n",
|
|||
|
"\n",
|
|||
|
"MAE (Mean Absolute Error): Средняя абсолютная ошибка. Показывает среднее отклонение предсказанных значений от фактических. Эта метрика легко интерпретируется, так как она измеряется в тех же единицах, что и целевая переменная \n",
|
|||
|
"\n",
|
|||
|
"MSE (Mean Squared Error): Среднеквадратичная ошибка. Показывает среднее квадратичное отклонение предсказанных значений от фактических. Эта метрика чувствительна к выбросам, так как ошибки возводятся в квадрат.\n",
|
|||
|
"\n",
|
|||
|
"RMSE (Root Mean Squared Error): Квадратный корень из среднеквадратичной ошибки. Показывает среднее отклонение предсказанных значений от фактических в тех же единицах, что и целевая переменная. Эта метрика также чувствительна к выбросам, но легче интерпретируется, чем MSE.\n",
|
|||
|
"\n",
|
|||
|
"R² (R-squared): Коэффициент детерминации. Показывает, какую долю дисперсии целевой переменной объясняет модель. Значение R² близкое к 1 указывает на хорошее качество модели.\n",
|
|||
|
"\n",
|
|||
|
"Обоснование:\n",
|
|||
|
"\n",
|
|||
|
"MAE: Хорошо подходит для задач, где важно понимать среднее отклонение предсказаний от фактических значений.\n",
|
|||
|
"\n",
|
|||
|
"MSE и RMSE: Полезны для задач, где важно минимизировать влияние выбросов, так как они возводят ошибки в квадрат.\n",
|
|||
|
"\n",
|
|||
|
"R²: Позволяет оценить, насколько хорошо модель объясняет вариацию целевой переменной.\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация характеристик:\n",
|
|||
|
"Задача: Классификация\n",
|
|||
|
"\n",
|
|||
|
"Выбор метрик:\n",
|
|||
|
"\n",
|
|||
|
"Accuracy: Доля правильных предсказаний среди всех предсказаний. Эта метрика показывает общую точность модели.\n",
|
|||
|
"\n",
|
|||
|
"Precision: Доля правильных положительных предсказаний среди всех положительных предсказаний. Эта метрика важна, если важно минимизировать количество ложноположительных результатов.\n",
|
|||
|
"\n",
|
|||
|
"Recall (Sensitivity): Доля правильных положительных предсказаний среди всех фактических положительных случаев. Эта метрика важна, если важно минимизировать количество ложноотрицательных результатов.\n",
|
|||
|
"\n",
|
|||
|
"F1-score: Гармоническое среднее между precision и recall. Эта метрика показывает баланс между precision и recall.\n",
|
|||
|
"\n",
|
|||
|
"Обоснование:\n",
|
|||
|
"\n",
|
|||
|
"Accuracy: Хорошо подходит для задач, где классы сбалансированы.\n",
|
|||
|
"\n",
|
|||
|
"Precision и Recall: Важны для задач, где важно минимизировать ошибки определенного типа (ложноположительные или ложноотрицательные).\n",
|
|||
|
"\n",
|
|||
|
"F1-score: Позволяет оценить баланс между precision и recall."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 27,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"Best Parameters: {}\n",
|
|||
|
"MAE: 5864823.098654955\n",
|
|||
|
"MSE: 79729784253194.64\n",
|
|||
|
"RMSE: 8929153.613484016\n",
|
|||
|
"R²: 0.19987153584955009\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"Best Parameters: {'model__max_depth': 20, 'model__n_estimators': 200}\n",
|
|||
|
"MAE: 4736367.925802057\n",
|
|||
|
"MSE: 49837093302660.92\n",
|
|||
|
"RMSE: 7059539.17070094\n",
|
|||
|
"R²: 0.49985971622163283\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 5, 'model__n_estimators': 200}\n",
|
|||
|
"MAE: 4996200.504513187\n",
|
|||
|
"MSE: 55572704024621.99\n",
|
|||
|
"RMSE: 7454710.190518608\n",
|
|||
|
"R²: 0.4422999794066711\n",
|
|||
|
"\n",
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Best Parameters: {'model__C': 10, 'model__solver': 'liblinear'}\n",
|
|||
|
"Accuracy: 0.6865671641791045\n",
|
|||
|
"Precision: 0.5378548895899053\n",
|
|||
|
"Recall: 0.6177536231884058\n",
|
|||
|
"F1-score: 0.5750421585160203\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhQAAAHHCAYAAADnOMH5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAABU0ElEQVR4nO3dd1xV9f8H8NdlXRC4DGWIAuKGxJ2KqDgQUixNza8bFDXNiTlLS9Gk0NQ09wBnmmhmmgMHmkLmzoEkLnCAg5jKvOf3Bz9OXQEF72Hc7uvZ4zySz/mcz3mfywXe9zPOkQmCIICIiIhIDToVHQARERFpPiYUREREpDYmFERERKQ2JhRERESkNiYUREREpDYmFERERKQ2JhRERESkNiYUREREpDYmFERERKQ2JhRa6tatW/Dy8oKZmRlkMhn27t0rafv37t2DTCZDaGiopO1qso4dO6Jjx46StZeeno4RI0bA1tYWMpkMkyZNkqztyiIiIgIymQwRERGStBcaGgqZTIZ79+5J0h4Bc+bMgUwmq+gwqBJgQlGBbt++jY8//hi1a9eGoaEhFAoF3N3d8d133+Hly5dlem5fX19cvXoVX331FbZs2YKWLVuW6fnKk5+fH2QyGRQKRZGv461btyCTySCTybBo0aJSt//o0SPMmTMHly9fliDat7dgwQKEhoZizJgx2LJlC4YMGVKm56tVqxZ69OhRpueQyoIFCyRPkl9VkJwUbHp6eqhRowb8/Pzw8OHDMj03UaUkUIXYv3+/YGRkJJibmwsTJkwQ1q5dK3z//fdC//79BX19fWHkyJFldu4XL14IAITPP/+8zM6hVCqFly9fCrm5uWV2juL4+voKenp6gq6urrBz585C+7/88kvB0NBQACAsXLiw1O2fO3dOACCEhISU6risrCwhKyur1OcrTuvWrQV3d3fJ2nsTR0dHwcfHp9zOJwiCkJeXJ7x8+VLIy8sr1XHGxsaCr69vofLc3Fzh5cuXglKpVDu2kJAQAYAQGBgobNmyRVi3bp3g7+8v6OrqCnXq1BFevnyp9jk0QU5OjtZcK72eXsWmM9rp7t276N+/PxwdHXH8+HFUr15d3Dd27FjExsbiwIEDZXb+p0+fAgDMzc3L7BwymQyGhoZl1v6byOVyuLu744cffkC/fv1U9m3fvh0+Pj7YvXt3ucTy4sULVKlSBQYGBpK2++TJE7i4uEjWXm5uLpRKpeRxqkNHR0fS95Guri50dXUlaw8AunXrJvbwjRgxAtWqVcM333yDffv2FXrvlSVBEJCZmQkjI6NyOycA6OnpQU+Pf0qIQx4VIjg4GOnp6diwYYNKMlGgbt26mDhxovh1bm4u5s2bhzp16kAul6NWrVr47LPPkJWVpXJcQZf06dOn0apVKxgaGqJ27drYvHmzWGfOnDlwdHQEAEydOhUymQy1atUCkD9UUPDvfytqjDQ8PBzt2rWDubk5TExM0KBBA3z22Wfi/uLmUBw/fhzt27eHsbExzM3N0bNnT0RHRxd5vtjYWPj5+cHc3BxmZmYYNmwYXrx4UfwL+4qBAwfi4MGDSE5OFsvOnTuHW7duYeDAgYXqJyUlYcqUKXB1dYWJiQkUCgW6deuGK1euiHUiIiLw7rvvAgCGDRsmdncXXGfHjh3RqFEjXLhwAR06dECVKlXE1+XVORS+vr4wNDQsdP3e3t6wsLDAo0ePiryugnkFd+/exYEDB8QYCuYFPHnyBP7+/rCxsYGhoSGaNGmCTZs2qbRR8P1ZtGgRli5dKr63bty4UaLXtjglfa8qlUrMmTMHdnZ2qFKlCjp16oQbN26gVq1a8PPzK3St/55DcevWLfTp0we2trYwNDREzZo10b9/f6SkpADIT2YzMjKwadMm8bUpaLO4ORQHDx6Eh4cHTE1NoVAo8O6772L79u1v9Rq0b98eQP6Q5r/dvHkTffv2haWlJQwNDdGyZUvs27ev0PF//vknPDw8YGRkhJo1a2L+/PkICQkpFHfBz/vhw4fRsmVLGBkZYc2aNQCA5ORkTJo0Cfb29pDL5ahbty6++eYbKJVKlXPt2LEDLVq0EK/b1dUV3333nbg/JycHc+fORb169WBoaIiqVauiXbt2CA8PF+sU9ftByt9ZpDmYVlaAX375BbVr10bbtm1LVH/EiBHYtGkT+vbti08//RRnz55FUFAQoqOj8dNPP6nUjY2NRd++feHv7w9fX19s3LgRfn5+aNGiBd555x307t0b5ubmCAgIwIABA9C9e3eYmJiUKv7r16+jR48eaNy4MQIDAyGXyxEbG4szZ8689rijR4+iW7duqF27NubMmYOXL19i+fLlcHd3x8WLFwslM/369YOTkxOCgoJw8eJFrF+/HtbW1vjmm29KFGfv3r0xevRo7NmzB8OHDweQ3zvRsGFDNG/evFD9O3fuYO/evfjoo4/g5OSExMRErFmzBh4eHrhx4wbs7Ozg7OyMwMBAfPHFFxg1apT4x+Pf38vnz5+jW7du6N+/PwYPHgwbG5si4/vuu+9w/Phx+Pr6IioqCrq6ulizZg2OHDmCLVu2wM7OrsjjnJ2dsWXLFgQEBKBmzZr49NNPAQBWVlZ4+fIlOnbsiNjYWIwbNw5OTk7YtWsX/Pz8kJycrJKoAkBISAgyMzMxatQoyOVyWFpalui1LU5J36szZ85EcHAw3n//fXh7e+PKlSvw9vZGZmbma9vPzs6Gt7c3srKyMH78eNja2uLhw4fYv38/kpOTYWZmhi1btmDEiBFo1aoVRo0aBQCoU6dOsW2GhoZi+PDheOeddzBz5kyYm5vj0qVLOHToUJGJ55sU/NG3sLAQy65fvw53d3fUqFEDM2bMgLGxMX788Uf06tULu3fvxocffggAePjwITp16gSZTIaZM2fC2NgY69evh1wuL/JcMTExGDBgAD7++GOMHDkSDRo0wIsXL+Dh4YGHDx/i448/hoODAyIjIzFz5kw8fvwYS5cuBZD/oWDAgAHo0qWL+DMVHR2NM2fOiO+TOXPmICgoSHw9U1NTcf78eVy8eBFdu3Yt9jWQ8ncWaZCKHnPRNikpKQIAoWfPniWqf/nyZQGAMGLECJXyKVOmCACE48ePi2WOjo4CAOHUqVNi2ZMnTwS5XC58+umnYtndu3eLnD/g6+srODo6Forhyy+/FP79VlmyZIkAQHj69GmxcRec49/zDJo2bSpYW1sLz58/F8uuXLki6OjoCEOHDi10vuHDh6u0+eGHHwpVq1Yt9pz/vg5jY2NBEAShb9++QpcuXQRByB+Pt7W1FebOnVvka5CZmVlorP7u3buCXC4XAgMDxbLXzaHw8PAQAAirV68ucp+Hh4dK2eHDhwUAwvz584U7d+4IJiYmQq9evd54jYJQ9JyGpUuXCgCErVu3imXZ2dmCm5ubYGJiIqSmporXBUBQKBTCkydP3vp8/1bS92pCQoKgp6dX6DrnzJkjAFCZ+3DixAkBgHDixAlBEATh0qVLAgBh165dr421uDkUBfMe7t69KwiCICQnJwumpqZC69atC80DeNM8i4K2jh49Kjx9+lSIj48XwsLCBCsrK0Eulwvx8fFi3S5dugiurq5CZmamSvtt27YV6tWrJ5aNHz9ekMlkwqVLl8Sy58+fC5aWlipxC8I/P++HDh1SiWvevHmCsbGx8Ndff6mUz5gxQ9DV1RXi4uIEQRCEiRMnCgqF4rXznJo0afLGeTOv/n4oi99ZpBk45FHOUlNTAQCmpqYlqv/rr78CACZPnqxSXvCp9NW5Fi4uLuKnZiD/U2uDBg1w586dt475VQVzL37++edCXajFefz4MS5fvgw/Pz+VT8GNGzdG165dxev8t9GjR6t83b59ezx//lx8DUti4MCBiIiIQEJCAo4fP46EhIRiP3XK5XLo6OT/SOTl5eH58+ficM7FixdLfE65XI5hw4aVqK6Xlxc+/vhjBAYGonfv3jA0NBS7rd/Gr7/+CltbWwwYMEAs09fXx4QJE5Ceno6TJ0+q1O/Tpw+srKze+nyvnht483v12LFjyM3NxSeffKJSb/z48W88h5mZGQDg8OHDpRr
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Best Parameters: {'model__max_depth': 20, 'model__n_estimators': 100}\n",
|
|||
|
"Accuracy: 0.7574626865671642\n",
|
|||
|
"Precision: 0.6483516483516484\n",
|
|||
|
"Recall: 0.6413043478260869\n",
|
|||
|
"F1-score: 0.644808743169399\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhQAAAHHCAYAAADnOMH5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAABf9ElEQVR4nO3dd1gUV9sG8HtBWBDYRZQiiohdFHtUgiUqgkqMRo3RWEBRo8GG3Td2EzFootFobIktGhN7YsceBXuJFbuoFCusqPTz/cHHxBXQXXcAV+6f11yXzJw588wy7D57yoxCCCFAREREZACTgg6AiIiIjB8TCiIiIjIYEwoiIiIyGBMKIiIiMhgTCiIiIjIYEwoiIiIyGBMKIiIiMhgTCiIiIjIYEwoiIiIyGBOKPHL16lX4+PhArVZDoVBg06ZNstZ/69YtKBQKLFu2TNZ6jdlHH32Ejz76SLb6EhMT0adPHzg5OUGhUGDo0KGy1W0seJ29296F30/ZsmUREBCgtS6n979ly5ZBoVDg1q1b+R6jQqHApEmT8v24hc17nVBcv34dX375JcqVKwcLCwuoVCp4eXnhxx9/xIsXL/L02P7+/jh37hy+/fZbrFy5EvXq1cvT4+WngIAAKBQKqFSqHF/Hq1evQqFQQKFQYObMmXrXHx0djUmTJuHMmTMyRPv2pk2bhmXLlmHAgAFYuXIlevTokafHK1u2rPS6KRQKWFlZoX79+lixYkWeHtfYvPo6vbwkJSUVdHjZhIeHY9KkSYiPj9drv/3796NDhw5wcnKCubk5HBwc0LZtW2zYsCFvApVRQbz/bdu2jUlDAStS0AHkla1bt+Kzzz6DUqlEz549Ub16daSkpODQoUMYOXIkLly4gEWLFuXJsV+8eIGIiAh8/fXXGDhwYJ4cw9XVFS9evICZmVme1P8mRYoUwfPnz/H333+jc+fOWttWrVoFCwuLt35zj46OxuTJk1G2bFnUqlVL5/127dr1VsfLzd69e9GwYUNMnDhR1npfp1atWhg+fDgAICYmBkuWLIG/vz+Sk5PRt2/ffIvjXffy6/Qyc3PzAojm9cLDwzF58mQEBATA1tZWp30mTpyIKVOmoGLFivjyyy/h6uqKR48eYdu2bejYsSNWrVqFL774Im8D11FkZCRMTP77bprb+1+PHj3QpUsXKJXKPIlj27ZtmDdvXo5JxYsXL1CkyHv7cffOeC9f4Zs3b6JLly5wdXXF3r17UbJkSWlbUFAQrl27hq1bt+bZ8R88eAAAOr95vA2FQgELC4s8q/9NlEolvLy88Pvvv2dLKFavXg0/Pz+sX78+X2J5/vw5ihYtKvuHyf379+Hu7i5bfWlpacjIyHhtnKVKlUL37t2lnwMCAlCuXDnMmjWLCcVLXn2d5JKRkYGUlJQC/dtat24dpkyZgk6dOmH16tVaXxpGjhyJnTt3IjU1tcDie9WrCUJu73+mpqYwNTXNr7C0FOTvs1AR76H+/fsLAOLw4cM6lU9NTRVTpkwR5cqVE+bm5sLV1VWMHTtWJCUlaZVzdXUVfn5+4p9//hEffPCBUCqVws3NTSxfvlwqM3HiRAFAa3F1dRVCCOHv7y/9/2VZ+7xs165dwsvLS6jVamFlZSUqVaokxo4dK22/efOmACCWLl2qtd+ePXtEo0aNRNGiRYVarRaffPKJuHjxYo7Hu3r1qvD39xdqtVqoVCoREBAgnj179sbXy9/fX1hZWYlly5YJpVIpnjx5Im07duyYACDWr18vAIgZM2ZI2x49eiSGDx8uqlevLqysrISNjY1o1aqVOHPmjFRm37592V6/l8+zadOmolq1auLEiROicePGwtLSUgwZMkTa1rRpU6munj17CqVSme38fXx8hK2trbh3716O55dbDDdv3hRCCBEXFyd69+4tHBwchFKpFDVq1BDLli3TqiPr9zNjxgwxa9YsUa5cOWFiYiJOnz6d6+uadX29ql69esLc3Fxr3cGDB0WnTp2Ei4uLMDc3F6VLlxZDhw4Vz58/1yqX9bu6e/euaNeunbCyshIlSpQQw4cPF2lpaVplnzx5Ivz9/YVKpRJqtVr07NlTnD592uDrLDIyUnTr1k2oVCpRokQJMW7cOJGRkSGioqLEJ598ImxsbISjo6OYOXNmrq+NLq/TyxITE8WwYcNE6dKlhbm5uahUqZKYMWOGyMjI0CoHQAQFBYnffvtNuLu7iyJFioiNGzcKIYS4e/eu6NWrl3BwcBDm5ubC3d1d/PLLL9mONWfOHOHu7i4sLS2Fra2tqFu3rli1apXWa5DbtZSTKlWqCDs7O6HRaN74WuT0PnD27Fnh7+8v3NzchFKpFI6OjqJXr17i4cOHWvtqNBoxZMgQ4erqKszNzYW9vb3w9vYWJ0+elMpcuXJFdOjQQTg6OgqlUilKlSolPv/8cxEfHy+VcXV1Ff7+/rmeb9Z73tKlS3M8923btokmTZoIa2trYWNjI+rVqye9fkLodq37+/vn+DpnASAmTpyoddxTp06JVq1aCRsbG2FlZSWaN28uIiIitMpkxXzo0CERHBwsSpQoIYoWLSrat28v7t+//8bfT2HzXrZQ/P333yhXrhw+/PBDncr36dMHy5cvR6dOnTB8+HAcPXoUISEhuHTpEjZu3KhV9tq1a+jUqRMCAwPh7++PX3/9FQEBAahbty6qVauGDh06wNbWFsHBwejatSvatGkDa2trveK/cOECPv74Y9SoUQNTpkyBUqnEtWvXcPjw4dfut3v3brRu3RrlypXDpEmT8OLFC8ydOxdeXl44deoUypYtq1W+c+fOcHNzQ0hICE6dOoUlS5bAwcEB3333nU5xdujQAf3798eGDRvQu3dvAJmtE1WqVEGdOnWylb9x4wY2bdqEzz77DG5uboiLi8PChQvRtGlTXLx4Ec7OzqhatSqmTJmCCRMmoF+/fmjcuDEAaP0uHz16hNatW6NLly7o3r07HB0dc4zvxx9/xN69e+Hv74+IiAiYmppi4cKF2LVrF1auXAlnZ+cc96tatSpWrlyJ4OBglC5dWmpat7e3x4sXL/DRRx/h2rVrGDhwINzc3LB27VoEBAQgPj4eQ4YM0apr6dKlSEpKQr9+/aBUKmFnZ6fTa5slLS0Nd+/eRbFixbTWr127Fs+fP8eAAQNQvHhxHDt2DHPnzsXdu3exdu1arbLp6enw9fVFgwYNMHPmTOzevRvff/89ypcvjwEDBgAAhBBo164dDh06hP79+6Nq1arYuHEj/P39s8Wk73X2+eefo2rVqpg+fTq2bt2Kb775BnZ2dli4cCGaN2+O7777DqtWrcKIESPwwQcfoEmTJm98XVJTU/Hw4UOtdUWLFkXRokUhhMAnn3yCffv2ITAwELVq1cLOnTsxcuRI3Lt3D7NmzdLab+/evfjzzz8xcOBAlChRAmXLlkVcXBwaNmwIhUKBgQMHwt7eHtu3b0dgYCA0Go00QHfx4sUYPHgwOnXqhCFDhiApKQn//vsvjh49ii+++AIdOnTAlStX8Pvvv2PWrFkoUaIEgMxrKSdXr17F5cuX0bt3b9jY2LzxdchJWFgYbty4gV69esHJyUnq3r1w4QKOHDkChUIBAOjfvz/WrVuHgQMHwt3dHY8ePcKhQ4dw6dIl1KlTBykpKfD19UVycjIGDRoEJycn3Lt3D1u2bEF8fDzUanW2Y+v7/rds2TL07t0b1apVw9ixY2Fra4vTp09jx44dUpeOLtf6l19+iejoaISFhWHlypVvfI0uXLiAxo0bQ6VSYdSoUTAzM8PChQvx0Ucf4cCBA2jQoIFW+UGDBqFYsWKYOHEibt26hdmzZ2PgwIH4448/dP69FAoFndHILSEhQQAQ7dq106n8mTNnBADRp08frfUjRowQAMTevXulda6urgKAOHjwoLTu/v37QqlUiuHDh0vrXv52+jJdWyhmzZolAIgHDx7kGndO30xq1aolHBwcxKNHj6R1Z8+eFSYmJqJnz57Zjte7d2+tOj/99FNRvHjxXI/58nlYWVkJIYTo1KmTaNGihRBCiPT0dOHk5CQmT56c42uQlJQk0tPTs52
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 5, 'model__n_estimators': 200}\n",
|
|||
|
"Accuracy: 0.7493781094527363\n",
|
|||
|
"Precision: 0.6469428007889546\n",
|
|||
|
"Recall: 0.5942028985507246\n",
|
|||
|
"F1-score: 0.619452313503305\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhQAAAHHCAYAAADnOMH5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAABia0lEQVR4nO3dd1hT1/8H8HdAEmZAlKmAuEFxV6U4WwSVWhWt1aoFZ7WoFbetWsUqrbbVat1awVVbZ+sWdxXco+66sSrggojIzPn94Y98jYAm5gKmvF8+93nMveeee24Skk8+55x7ZUIIASIiIiIDmBR3A4iIiMj4MaAgIiIigzGgICIiIoMxoCAiIiKDMaAgIiIigzGgICIiIoMxoCAiIiKDMaAgIiIigzGgICIiIoMxoNDBlStXEBAQAFtbW8hkMmzcuFHS+m/evAmZTIaoqChJ6zVmLVq0QIsWLSSrLzU1FX379oWzszNkMhmGDh0qWd1vu/zeXxMnToRMJiu+Rv3HGMvz+TZ81lSoUAGhoaFa6/L7jI2KioJMJsPNmzeLvI0ymQwTJ04s8uMaO6MJKK5du4bPPvsMFStWhLm5OZRKJfz8/PDTTz/h2bNnhXrskJAQnD17FlOmTMHy5cvRoEGDQj1eUQoNDYVMJoNSqcz3ebxy5QpkMhlkMhm+//57veu/e/cuJk6ciNOnT0vQ2jc3depUREVFYeDAgVi+fDl69uxZ6MdUq9VYtmwZWrVqhbJly8LMzAyOjo4ICAjAwoULkZGRUehtKE76vva5XyAvLo6OjmjZsiW2bdtWuI3VQVpaGiZOnIh9+/YVd1PytW/fPgQHB8PZ2RlyuRyOjo5o164d1q9fX9xNe63i+IzdunUrgwapCSOwefNmYWFhIezs7MSQIUPEwoULxc8//yy6du0qzMzMRL9+/Qrt2GlpaQKA+OqrrwrtGGq1Wjx79kxkZ2cX2jEKEhISIkqVKiVMTU3Fb7/9lmf7119/LczNzQUAMX36dL3rP3bsmAAgli5dqtd+GRkZIiMjQ+/jFaRRo0bCz89PsvpeJy0tTQQGBgoA4t133xWRkZHil19+Ed9//71o166dMDU1Fb179y6Stty4cSPPa5CVlSWePXtWqMfV97VfunSpACAiIiLE8uXLxbJly8T06dNFjRo1BACxadOmQm3v69y/f18AEF9//XWebUXxfL7KhAkTBABRpUoVMWHCBLFkyRIxbdo00aJFCwFArFy5UgiR/3uhqKWnp4vMzEzN44I+Y7Ozs8WzZ8+EWq0ulHaEhYWJgr4Cnz17JrKysgrluP9lpYojiNHHjRs30LVrV3h4eGDPnj1wcXHRbAsLC8PVq1exZcuWQjv+/fv3AQB2dnaFdgyZTAZzc/NCq/91FAoF/Pz88Ouvv6JLly5a21atWoWgoCCsW7euSNqSlpYGS0tLyOVySetNSkqCt7e3ZPVlZ2dDrVYX2M7w8HDs2LEDM2fOxBdffKG1bfjw4bhy5QpiYmIMOoYhSpUqhVKl3s4//zZt2mj9Qu3Tpw+cnJzw66+/4oMPPijGlhWsOJ/PtWvXIiIiAp07d8aqVatgZmam2TZy5Ejs2LEDWVlZxdK2/CgUCq3HBX3GmpqawtTUtKiapaU4P4+NWnFHNK8zYMAAAUAcOnRIp/JZWVkiIiJCVKxYUcjlcuHh4SHGjh0r0tPTtcp5eHiIoKAg8ddff4l33nlHKBQK4enpKaKjozVlvv76awFAa/Hw8BBCPP9ln/v/F+Xu86KdO3cKPz8/YWtrK6ysrETVqlXF2LFjNdsL+tWwe/du0aRJE2FpaSlsbW3Fhx9+KC5cuJDv8a5cuSJCQkKEra2tUCqVIjQ0VDx9+vS1z1dISIiwsrISUVFRQqFQiMePH2u2HT16VAAQ69aty5OhePjwoRg+fLioWbOmsLKyEjY2NqJ169bi9OnTmjJ79+7N8/y9eJ7NmzcXNWrUEMePHxdNmzYVFhYW4osvvtBsa968uaauTz/9VCgUijznHxAQIOzs7MSdO3fyPb+C2nDjxg0hhBCJiYmid+/ewtHRUSgUClGrVi0RFRWlVUfu6zN9+nQxY8YMUbFiRWFiYiJOnTqV7zHj4+OFqampaN269SueeW2vOkZGRoYYP368qFevnlAqlcLS0lI0adJE7NmzJ089jx8/FiEhIUKpVApbW1vx6aefilOnTuV5f+X3PhVCiOXLl4t69eoJc3NzUbp0afHxxx+L+Ph4rTK5r9v58+dFixYthIWFhXB1dRXfffedpszrXvv85GYojh07prVerVYLpVIpPv30U631qampYtiwYaJ8+fJCLpeLqlWriunTp+f5RavrZ8KxY8dEQECAKFOmjDA3NxcVKlQQvXr10np9Xl5ysxX5PZ8ARFhYmNiwYYOoUaOGkMvlwtvbW2zbti3Pue/du1fUr19fKBQKUbFiRTF//vwCX6OXVa9eXdjb2wuVSvXasvl91pw5c0aEhIQIT09PoVAohJOTk+jVq5d48OCB1r4qlUp88cUXwsPDQ8jlcuHg4CD8/f3FiRMnNGX++ecfERwcLJycnIRCoRDlypUTH3/8sUhOTtaU8fDwECEhIVrPW36fsbnvh9y/1Vxbt24VzZo1E9bW1sLGxkY0aNBAk4ERQogDBw6Izp07Czc3NyGXy0X58uXF0KFDRVpamqZMSEhIvq9nrhdf21wnT54UrVu3FjY2NsLKykq89957Ii4uTqtMbpsPHjwowsPDRdmyZYWlpaXo0KGDSEpKeu3rY+zezp8oL9i0aRMqVqyId999V6fyffv2RXR0NDp37ozhw4fjyJEjiIyMxMWLF7FhwwatslevXkXnzp3Rp08fhISE4JdffkFoaCjq16+PGjVqIDg4GHZ2dggPD0e3bt3Qtm1bWFtb69X+8+fP44MPPkCtWrUQEREBhUKBq1ev4tChQ6/cb9euXWjTpg0qVqyIiRMn4tmzZ5g9ezb8/Pxw8uRJVKhQQat8ly5d4OnpicjISJw8eRKLFy+Go6MjvvvuO53aGRwcjAEDBmD9+vXo3bs3gOfZierVq6NevXp5yl+/fh0bN27ERx99BE9PTyQmJmLBggVo3rw5Lly4AFdXV3h5eSEiIgITJkxA//790bRpUwDQei0fPnyINm3aoGvXrujRowecnJzybd9PP/2EPXv2ICQkBHFxcTA1NcWCBQuwc+dOLF++HK6urvnu5+XlheXLlyM8PBzly5fH8OHDAQAODg549uwZWrRogatXr2LQoEHw9PTEmjVrEBoaiuTk5DyZhaVLlyI9PR39+/eHQqGAvb19vsfctm0bcnJy0KNHj9c863nldwyVSoXFixejW7du6NevH548eYIlS5YgMDAQR48eRZ06dQAAQgi0b98eBw8exIABA+Dl5YUNGzYgJCREp2NPmTIF48ePR5cuXdC3b1/cv38fs2fPRrNmzXDq1CmtX5CPHz9G69atERwcjC5dumDt2rUYPXo0fHx80KZNG51e+4KkpKTgwYMHEEIgKSkJs2fPRmpqqtbzKYTAhx9+iL1796JPnz6oU6cOduzYgZEjR+LOnTuYMWOGpqwunwlJSUkICAiAg4MDxowZAzs7O9y8eVMz/sDBwQHz5s3DwIED0bFjRwQHBwMAatWq9cpzOXjwINavX4/PP/8cNjY2mDVrFjp16oT4+HiUKVMGAHDq1Cm0bt0aLi4umDRpEnJychAREQEHB4fXPldXrlzBpUuX0Lt3b9jY2Ly2fH5iYmJw/fp19OrVC87Ozjh//jwWLlyI8+fP4/Dhw5rBpgMGDMDatWsxaNAgeHt74+HDhzh48CAuXryIevXqITMzE4GBgcjIyMDgwYPh7OyMO3fuYPPmzUhOToatrW2eY+v7GRsVFYXevXujRo0aGDt2LOzs7HDq1Cls374dn3zyCQBgzZo1SEtLw8CBA1GmTBkcPXoUs2fPxr///os1a9YAAD777DPcvXsXMTExWL58+Wufo/Pnz6Np06ZQKpUYNWoUzMzMsGDBArRo0QL79+9Ho0aNtMoPHjwYpUuXxtdff42bN29i5syZGDRoEH777Te
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGwCAYAAABVdURTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAABOZElEQVR4nO3deXhTdb4G8DdJm3RvKaVbGiz7Dm0KVEBlWKSAsrQKzMhAZVzGBeTKuIDK5gKOC+IdmGFkRlGvDou2gIIwUkUBUZak7BShRWi6U2i6p01+94/SlEqBpjY5Sfp+nqfPY07Pab49oHnNeXN+MiGEABEREZGbkEs9ABEREVFrYrghIiIit8JwQ0RERG6F4YaIiIjcCsMNERERuRWGGyIiInIrDDdERETkVjykHsDRLBYLcnJy4O/vD5lMJvU4RERE1AxCCJSWliIyMhJy+c3fm2lz4SYnJwcajUbqMYiIiKgFLl68iKioqJvu0+bCjb+/P4C6kxMQECDxNERERNQcRqMRGo3G+jp+M20u3NRfigoICGC4ISIicjHNqZSwUExERERuheGGiIiI3ArDDREREbkVhhsiIiJyKww3RERE5FYYboiIiMitMNwQERGRW2G4ISIiIrfCcENERERuheGGiIiI3Iqk4eb777/HhAkTEBkZCZlMhs2bN9/ymN27d0Or1UKlUqFr165Yt26d3eckIiIi1yFpuCkvL8eAAQOwevXqZu2flZWFe+65ByNGjEB6ejr+53/+Bw8//DB27txp50mJiIjIVUi6cOa4ceMwbty4Zu+/Zs0adOrUCW+//TYAoFevXti7dy/eeecdJCQk2GtMIiIiaqacK5WoMJnRNdRPshlcalXw/fv3Y/To0Y22JSQk4H/+539ueEx1dTWqq6utj41Go73GIyIiapNKq2rw1fE8pOoM+DHrEkb3CsPamQMlm8elwk1eXh7CwsIabQsLC4PRaERlZSW8vb2vO2b58uVYunSpo0YkIiJqE2rNFuw5W4QUnQFfn8xDVY3F+r2qGjMsFgG5XCbJbC4VblpiwYIFmDdvnvWx0WiERqORcCIiIiLXJITAiRwjUnQGbD2Sg6KyhisjXTr4IkkbhUkxkYhq5yPhlC4WbsLDw5Gfn99oW35+PgICApp81wYAVCoVVCqVI8YjIiJyS7klldisz0GqPhtn8sus24N9lZg4IBJJWjX6qQMhk0nzTs2vuVS4GTJkCLZv395o29dff40hQ4ZINBEREZF7KquuxVfHcpGqN2B/5iUIUbdd6SHH3b3DkBSrxl3dO8BT4Xy3zJM03JSVleHs2bPWx1lZWUhPT0dwcDA6duyIBQsWwGAw4KOPPgIAPPbYY1i1ahWee+45/OlPf8I333yDjRs3Ytu2bVL9CkRERG6j1mzB3rNFSNUbsPNE4x7N4E7BSIpVY1y/CAR6e0o45a1JGm4OHTqEESNGWB/Xd2OSk5Oxbt065Obm4sKFC9bvd+rUCdu2bcPTTz+Nd999F1FRUfjXv/7Fj4ETERG1kBACJ3ONSNUZsOVIDgpLG3o0nUN8kaRVY1KMGppgaXs0tpAJUf9GU9tgNBoRGBiIkpISBAQESD0OERGRJPJKqrA53YBUnQEZ+aXW7cG+SkzoH4FEbRQGRDlPj8aW12+X6twQERFRy5VX12LH8Tyk6g3Yd66oUY9mdK9QJMVGYXgP5+zR2ILhhoiIyI2ZLQL7zhYhRZeNnSfyUVljtn5vcHQwErVqjHeBHo0tGG6IiIjc0MkcI1L12diSnoOCa3o0nUJ8kRSrxuRY1+rR2ILhhoiIyE3kG6uwWW9Aqt6A03kNPZp2Pp6YMCASibFqxGiCnKZHYy8MN0RERC6svLoWO09c7dGcLYKlvkejkGNUr1Akxqrxux6hUHq4do/GFgw3RERELsZsEfjhXN26TjtP5KHC1NCjGRTdDomxUbinXwQCfdynR2MLhhsiIiIXcSrXiFS9AVvSDcg3NvRootv7IDE2ComxanRs7549Glsw3BARETmxAmMVtqTnIEVvwKlco3V7kI8n7u0fgSRtFGLbQI/GFgw3RERETqbCVNejSdFd36MZ2TMUiVo1RrSxHo0tGG6IiIicgNkisP/cJaTos7HjeOMeTdxt7ZAYq8a9/SMQ5KOUcErXwHBDREQkoYy8UqTos7FFn4M8Y5V1+23tfZAYq0ZirBq3tfeVcELXw3BDRETkYAWlVdianoMUnQEnr+nRBHrX92jU0HZsxx5NCzHcEBEROUClyYz/nqzr0ez5udDao/FUyDCiRyiStFEY0bMDVB4KaQd1Aww3REREdmK2CPyYeQkpOgN2HM9F+TU9Gm3HICRqo3Bvvwi082WPpjUx3BAREbWyM/mlSNEZsFlvaNSj6Rjc0KOJDmGPxl4YboiIiFpBfY8mVW/AiZyGHk2AlwfuHRCJpFg14m5jj8YRGG6IiIhaqL5Hk6o3YM/PRTBfLdJ4KmT4XY9QJMWqMbJXKHs0DsZwQ0REZAOLReDHrPoeTR7Kqmut34vtGISkWDXu7R/JHo2EGG6IiIia4ef8UqToDdiiNyCnpKFHown2RmKMGpNj1ejcwU/CCakeww0REdENFJVVW3s0xwwl1u3+Xh7WdZ0GskfjdBhuiIiIrlFVY8Z/T+YjVZeN76/p0XjIr/ZotGqM7BkKL0/2aJwVww0REbV5FovAT1nFSNVn46tjeSi9pkczQFPfo4lAez+VhFNSczHcEBFRm3W2oAyp+mxs1ufAcKXSul0d5I0kbV2Ppgt7NC6H4YaIiNqUorJqfHGkrkdzNLtxj+aefhFIjFVjUHQw5HL2aFwVww0REbm9qhozdp3KR6rOgN1nCn/Vo+mAxNgojOrFHo27YLghIiK3ZLEIHDhfjFSdAduP5Tbu0UQFIjFWjQkDItmjcUMMN0RE5FbOFZYhVWdAqt5wXY8mMbauR9M1lD0ad8ZwQ0RELu/SNT2aI9f2aFQeGN8vAolaNQazR9NmMNwQEZFLqqoxI+1UAVL12didUYjaqz0ahVyG4d07IDFWjbt7h7FH0wYx3BARkcuwWAQO/XIZKbpsbDuWi9Kqhh5N/2t6NCHs0bRpDDdEROT0MgvLkKqv69FkX27o0UQGemFyrBpJWjW6hvpLOCE5E4YbIiJySsXlJnx5NAcpOgPSL16xbvdTeWBc33AkaaMQ34k9Groeww0RETmNqhozvjldgBSdAbszChr1aO7qFoJEbRTu7hUGbyV7NHRjDDdERCQpIep7NAZsO5oD4zU9mr7qACTGRmHigEh08GePhpqH4YaIiCSRVVR+tUeTjYvFDT2aiPoeTawa3cLYoyHbMdwQEZHDXK7v0egN0F+4Yt3uq1RgXL8IJGnVuL1Te/Zo6DdhuCEiIruqrjXj26s9mm8zClBjruvRyGXAXVfvRzOmdzh7NNRqGG6IiKjVCSFw+JfLSNEbsO1oLkoqa6zf6xMZgMRYNSbGRCLU30vCKcldMdwQEVGrOW/t0RhwobjCuj08oOF+NN3ZoyE7Y7ghIqLf5EqFCV8czUWqLhu6X/Voxva92qPp3B4K9mjIQRhuiIjIZnU9mkKk6rPxzenGPZo7unVAUqwaY/qEwUfJlxlyPP6tIyKiZhFCQHfhClJ02fjyVz2a3hEBSNKqMXFAJEID2KMhaTHcEBHRTf1yqa5Hs1lvwPlLDT2asAAVJseokahVo2d4gIQTEjXGcENERNcpqajBl8dykKoz4NAvl63bfZQKjO1Tt67TkC7s0ZBzYrghIiIAgKnWgm8zCpCqM+Cb0wUwmS0A6no0w7qGIEmrRkKfcPZoyOnxbygRURsmhID+4hWk6gz48mgOLlc09Gh6hvsjSavGpBg1wtijIRfCcENE1AZdLK6w3o8mq6jcuj3UX4XJsWokxqrRK4I9GnJNDDdERG1ESUUNth3LRao+GwfPN/RovD0VGNs3HElaNYZ2CWGPhlweww0RkRsz1Vrw3ZlCpOiykXaqoUcjkwF3dA1BYmxdj8ZXxZcDch/820xE5GaEEEi/eAWpegO+OHJ9jyYxtq5HEx7IHg25J4YbIiI3cbG4Apu
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn import metrics\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": (LinearRegression(), {}),\n",
|
|||
|
" \"Random Forest Regression\": (RandomForestRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Regression\": (GradientBoostingRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Volume']\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, (model, params) in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" grid_search.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_reg = best_model.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": (LogisticRegression(), {\n",
|
|||
|
" 'model__C': [0.1, 1, 10],\n",
|
|||
|
" 'model__solver': ['liblinear', 'lbfgs']\n",
|
|||
|
" }),\n",
|
|||
|
" \"Random Forest Classification\": (RandomForestClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Classification\": (GradientBoostingClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Volume'] > df['Volume'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, (model, params) in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='accuracy')\n",
|
|||
|
" grid_search.fit(X_train_class, y_train_class)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_class = best_model.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" precision = precision_score(y_test_class, y_pred_class)\n",
|
|||
|
" recall = recall_score(y_test_class, y_pred_class)\n",
|
|||
|
" f1 = f1_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print(f\"Precision: {precision}\")\n",
|
|||
|
" print(f\"Recall: {recall}\")\n",
|
|||
|
" print(f\"F1-score: {f1}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
" # Визуализация матрицы ошибок\n",
|
|||
|
" cm = confusion_matrix(y_test_class, y_pred_class)\n",
|
|||
|
" disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Less', 'More'])\n",
|
|||
|
" disp.plot(cmap=plt.cm.Blues)\n",
|
|||
|
" plt.title(f'Confusion Matrix for {name}')\n",
|
|||
|
" plt.show()\n",
|
|||
|
"\n",
|
|||
|
" fpr, tpr, _ = metrics.roc_curve(y_test_class, y_pred_class)\n",
|
|||
|
"# построение ROC кривой\n",
|
|||
|
"plt.plot(fpr, tpr)\n",
|
|||
|
"plt.ylabel(\"True Positive Rate\")\n",
|
|||
|
"plt.xlabel(\"False Positive Rate\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Давайте проанализируем полученные значения метрик и определим, являются ли они нормальными или есть возможность улушчения.\n",
|
|||
|
"\n",
|
|||
|
"### Оценка смещения и дисперсии для задачи регрессии:\n",
|
|||
|
"\n",
|
|||
|
"### Вывод для задачи регрессии:\n",
|
|||
|
"\n",
|
|||
|
"- **Random Forest Regression** демонстрирует наилучшие результаты по метрикам MAE и R², что указывает на высокую точность и стабильность модели.\n",
|
|||
|
"- **Linear Regression** и **Gradient Boosting Regression** также показывают хорошие результаты, но уступают случайному лесу.\n",
|
|||
|
"\n",
|
|||
|
"### Вывод для задачи классификации:\n",
|
|||
|
"\n",
|
|||
|
"- **Random Forest Classification** демонстрирует наилучшие результаты по всем метрикам (Accuracy, Precision, Recall, F1-score), что указывает на высокую точность и стабильность модели.\n",
|
|||
|
"- **Logistic Regression** и **Gradient Boosting Classification** также показывают хорошие результаты, но уступают случайному лесу.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Для оценки смещения (bias) и дисперсии (variance) моделей можно использовать метод перекрестной проверки (cross-validation). Этот метод позволяет оценить, насколько хорошо модель обобщается на новых данных.\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи регрессии:\n",
|
|||
|
"Для задачи регрессии мы будем использовать метрики MAE (Mean Absolute Error) и R² (R-squared) для оценки смещения и дисперсии.\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи классификации:\n",
|
|||
|
"Для задачи классификации мы будем использовать метрики Accuracy, Precision, Recall и F1-score для оценки смещения и дисперсии.\n",
|
|||
|
"\n",
|
|||
|
"Пример кода для оценки смещения и дисперсии:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 28,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Оценка смещения и дисперсии для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 7579277.521464063, Std = 1960011.680230822\n",
|
|||
|
"R² (Cross-Validation): Mean = -1.1347990784665143, Std = 2.306562184953969\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 7806347.158011436, Std = 3478782.4486245485\n",
|
|||
|
"R² (Cross-Validation): Mean = -0.1365001480998081, Std = 0.12973311964755527\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 7893683.279353255, Std = 3518932.5109060933\n",
|
|||
|
"R² (Cross-Validation): Mean = -0.1057513351347448, Std = 0.08711611953829208\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.5892532514775223, Std = 0.12093239665054567\n",
|
|||
|
"Precision (Cross-Validation): Mean = 0.35360976489674945, Std = 0.34878959634336554\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.41634103019538193, Std = 0.47748852647480444\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.26337058226161625, Std = 0.2700065991354378\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|||
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|||
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.3738201494085268, Std = 0.16711322364727796\n",
|
|||
|
"Precision (Cross-Validation): Mean = 0.40131211828076446, Std = 0.30406982088770196\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.48331005101041064, Std = 0.28204866326457984\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.32735686540449993, Std = 0.09990409789532408\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.3266515895940335, Std = 0.17857049420400362\n",
|
|||
|
"Precision (Cross-Validation): Mean = 0.36258531023350526, Std = 0.3286759742498122\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.3801836880463708, Std = 0.3324199402098825\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.23705262823922438, Std = 0.15533217789069204\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.model_selection import cross_val_score\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Volume']\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи регрессии\n",
|
|||
|
"print(\"Оценка смещения и дисперсии для задачи регрессии:\")\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" mae_scores = -cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" r2_scores = cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='r2')\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"MAE (Cross-Validation): Mean = {mae_scores.mean()}, Std = {mae_scores.std()}\")\n",
|
|||
|
" print(f\"R² (Cross-Validation): Mean = {r2_scores.mean()}, Std = {r2_scores.std()}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Volume'] > df['Volume'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи классификации\n",
|
|||
|
"print(\"Оценка смещения и дисперсии для задачи классификации:\")\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" accuracy_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='accuracy')\n",
|
|||
|
" precision_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='precision')\n",
|
|||
|
" recall_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='recall')\n",
|
|||
|
" f1_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='f1')\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Accuracy (Cross-Validation): Mean = {accuracy_scores.mean()}, Std = {accuracy_scores.std()}\")\n",
|
|||
|
" print(f\"Precision (Cross-Validation): Mean = {precision_scores.mean()}, Std = {precision_scores.std()}\")\n",
|
|||
|
" print(f\"Recall (Cross-Validation): Mean = {recall_scores.mean()}, Std = {recall_scores.std()}\")\n",
|
|||
|
" print(f\"F1-score (Cross-Validation): Mean = {f1_scores.mean()}, Std = {f1_scores.std()}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 29,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJgAAAI+CAYAAADwwbB5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAACDuElEQVR4nOzdeZyNdf/H8fesZzaDYcwMhhFlyTIieyFjxhK570RUliRli6lEdxlTSqVwhywVugsplTZZI1m6y9qGLGNJYw3DYNbv7w+/Ofccs5hxzcwZ4/V8PDycc51r+VzX+Z7rfOd9rsXFGGMEAAAAAAAAXCNXZxcAAAAAAACA6xsBEwAAAAAAACwhYAIAAAAAAIAlBEwAAAAAAACwhIAJAAAAAAAAlhAwAQAAAAAAwBICJgAAAAAAAFhCwAQAAAAAAABLCJgAAAAAAABgCQETirV+/frJz8+vSJd54MABubi4aN68eUW63JLso48+UkBAgM6fP+/sUnLUrFkzjRo1Ks/jnz9/Xo888oiCg4Pl4uKiESNGFF5xsHNxcdG4ceOcXUaRsbI/Wrt2rVxcXLR27doCrwsArjf5+f748ccf5enpqYMHDxZ4HTNnzlSVKlWUlJSU52nef/991apVSx4eHipTpkyB14Ss+vXrp7CwMGeXUaTatGmjNm3aXNO0YWFh6tevX4HWg+sTAdN1Yt68eXJxcZGLi4vWr1+f5XVjjEJDQ+Xi4qK7777bCRXmX1pamipWrCgXFxd98803zi6nQFy4cEHjxo0rlD/oMt7/7P499thjBb68gpKWlqaYmBgNGzbMISwMCwuTi4uLIiIisp3u7bfftq/f5s2bsx1n1KhRcnFxUc+ePbN9PeOP85z+vfLKK/Zxn3nmGU2fPl1Hjx7N03q9/PLLmjdvnh5//HG9//77euihh/I03bXK2F4Z/3x9fdWkSRP95z//KdTl4rJx48bJxcVFrq6uOnz4cJbXExIS5O3tLRcXFw0dOtQJFQIo6TL3BV1cXOTu7q5KlSqpX79+OnLkiLPLK1H+9a9/qVevXqpatap9WJs2bRy2v7e3t+rXr68pU6YoPT3dYfqHHnpIrVq1UtOmTdW6dWvt3LnT/lq/fv2UnJysWbNm5amWXbt2qV+/fqpevbrefvttzZ49u2BWMgcZ33cZ/zw8PBQWFqbhw4frzJkzhbpsOPZdx48fn+04DzzwgFxcXIr8R3ggL9ydXQDyx8vLSwsWLFCrVq0chn/33Xf6888/ZbPZnFRZ/n377beKj49XWFiY5s+fr44dOzq7JMsuXLig2NhYSbrmXwBy0759e/Xp0yfL8FtuuaXAl1VQvvzyS+3evVuPPvpolte8vLy0Zs0aHT16VMHBwQ6vzZ8/X15eXrp06VK28zXGaOHChQoLC9OXX36pc+fOqVSpUtmO26tXL3Xq1CnL8IYNG9of33PPPfL399dbb72lF1544arr9e2336pZs2aKiYm56rgFJTw8XE8++aQkKT4+Xu+884769u2rpKQkDRw4sMjqcKaLFy/K3d15X102m00LFy7McrTbp59+6qSKANxoXnjhBVWrVk2XLl3SDz/8oHnz5mn9+vX69ddf5eXl5ezyrnvbt2/XqlWrtHHjxiyvVa5cWRMmTJAknTx5UgsWLNDIkSN14sQJvfTSS/bxnn/+eXvfbMSIERo8eLDWrFkj6XLfp2/fvpo0aZKGDRsmFxeXXOtZu3at0tPT9e9//1s1atQoqNW8qhkzZsjPz0+JiYlavXq1pk6dqq1bt2b7Q3dJ9Pbbb2cJDouSl5eXFi5cqOeee85heGJioj7//HM+6yi2OILpOtOpUyd9/PHHSk1NdRi+YMECNWrUKMsf6cXZBx98oNtuu00jR47UkiVLlJiY6OySir1bbrlFDz74YJZ/TZo0yXW6CxcuZDs8NTVVycnJlmq62vs2d+5ctWzZUpUqVcryWsuWLeXn56dFixY5DP/zzz/1/fffq3PnzjnOd+3atfrzzz81Z84cpaam5voH/m233Zbtdrv11lvt47i6uqp79+76z3/+I2NMruskScePHy/Qw9Tz8l5UqlTJXvvTTz+t9evXy8/PT5MnTy6wOvLKWZ9XLy8vpwZMnTp10sKFC7MMX7BgQa7tFQAKSseOHfXggw/qkUce0TvvvKOnnnpK+/bt0xdffOHs0gpFTn2YwjJ37lxVqVJFzZo1y/Ja6dKl7d/DI0aM0Lp161S1alVNnTpVaWlp9vEy//BnjJGrq+OfXD169NDBgwftoVNujh8/LkkF2ufIyzbt3r27HnzwQQ0aNEgfffSRevbsqQ0bNujHH38ssDryIj09PccfGwuTh4eHU3+479Spk37//Xft2LHDYfjnn3+u5ORktW/f3kmVAbkjYLrO9OrVS6dOndLKlSvtw5KTk7V48WL17t0722nS09M1ZcoU3XrrrfLy8lJQUJAGDRqk06dPO4z3+eefq3PnzqpYsaJsNpuqV6+uF1980eELU7p8ZE7dunX1+++/q23btvLx8VGlSpX02muv5Xk9Ll68qM8++0z333+/evTooYsXL+rzzz/Pcfz9+/crKipKvr6+qlixol544YUsIcCHH36oRo0aqVSpUvL391e9evX073//O8t87rvvPgUEBMjHx0fNmjXT119/fdV6czonOfP52QcOHFBgYKAkKTY21n54a+bz/Xft2qXu3bsrICBAXl5eaty4cYF3CDPeny1btujOO++Uj4+Pnn32Wfsht6+//rqmTJmi6tWry2az6ffff5d0+YicO+64Q76+vipTpozuueceh0O6pf8dNv3777+rd+/eKlu2bJaj6TK7dOmSli1bluNpcF5eXvrnP/+pBQsWOAxfuHChypYtq6ioqBznPX/+fNWpU0dt27ZVRESE5s+fn9dNlKP27dvr4MGD2r59e47jZFzXJi4uTl9//bX9fT5w4ICkyx3BAQMGKCgoSF5eXmrQoIHee+89h3lc7b3Iq8DAQNWqVUv79u1zGJ7Xz3x6errGjRunihUrysfHR23bttXvv/+e5Tz6jNMyvvvuOw0ePFgVKlRQ5cqV7a9/88039rZTqlQpde7cWb/99pvDso4ePar+/furcuXKstlsCgkJ0T333GPfbpK0efNmRUVFqXz58vL29la1atX08MMPO8wnu2tobNu2TR07dpS/v7/8/PzUrl07/fDDDw7jZKzDhg0bFB0drcDAQPn6+uof//iHTpw4kddNrt69e2v79u3atWuXw7p9++23Oe6D89ImJOnMmTPq16+fSpcurTJlyqhv3745no5wrfuSPXv26N5771VwcLC8vLxUuXJl3X///Tp79mzeNgCAYueOO+6QpCzfBdnJy77YGKPx48ercuXK9u+G3377Lct3Q0af4EoZ+9vM88xvH/PKPowkJSUlKSYmRjVq1JDNZlNoaKhGjRqV5VpGSUlJGjlypAIDA1WqVCl17dpVf/7551W3TYYlS5borrvuuuqRRdLlfsztt9+uc+fO2YOgzFavXq133nnH4ZR8SWrUqJECAgJy7ftKl0+PzzhSOjAwMMt34FtvvaVbb71VNptNFStW1JAhQ7J8b+S2TfMjp3b23//+Vx06dFDp0qXl4+Oj1q1ba8OGDVmmX7t2rRo3biwvLy9Vr15ds2bNyrYNZZxqPn/+fPu6LVu2TJJ05MgRPfzwwwoKCpLNZtOtt96qOXPmZFnW1KlTdeutt8rHx0dly5ZV48aNHfqa586d04gRIxQWFiabzaYKFSqoffv22rp1q32c7K7BlJiYqCeffFKhoaGy2WyqWbOmXn/99Sx/k2Ssw5IlS1S3bl17rRnrkRfNmzdXtWrVsvSR58+frw4dOiggICDb6fLSJiRp9uzZql69ury9vdWkSRN9//332c4vr5+7K6WkpCg2NlY333yzvLy8VK5cObVq1crhb1iUTJwid50JCwtT8+bNtXDhQvspZd98843Onj2r+++/X2+++WaWaQYNGqR58+apf//+Gj58uOLi4jRt2jRt27ZNGzZskIeHh6TLHQI/Pz9FR0fLz89P3377rcaOHauEhAR
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x600 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|||
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n",
|
|||
|
"c:\\Users\\ateks\\Courses\\Courses\\.venv\\Lib\\site-packages\\sklearn\\metrics\\_classification.py:1531: UndefinedMetricWarning: Precision is ill-defined and being set to 0.0 due to no predicted samples. Use `zero_division` parameter to control this behavior.\n",
|
|||
|
" _warn_prf(average, modifier, f\"{metric.capitalize()} is\", len(result))\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJoAAASlCAYAAADgRbP+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/GU6VOAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzde3zP9f//8ft7s/PMeXPax5wihyZzyLkkC5HkEJXTJwmTT+vERxlK+0i0kiIfp4r4iOSLnBaV8qGEVMghKbY5xRhmtufvD7+9P3vbxt689n7PdrteLi7e79f7dXi8Xu/n+/1+7PF8vl4vmzHGCAAAAAAAALhJHu4OAAAAAAAAAIUDhSYAAAAAAABYgkITAAAAAAAALEGhCQAAAAAAAJag0AQAAAAAAABLUGgCAAAAAACAJSg0AQAAAAAAwBIUmgAAAAAAAGAJCk0AAAAAAACwBIUmoJDat2+f2rdvrxIlSshms2nZsmXuDsmuf//+CgsLc9v2586dK5vNpkOHDjlMnzRpkqpVqyZPT081aNBAkhQWFqb+/fu7PMaxY8fKZrO5fLvudDPt4u6779bdd99taTwAgJt3I9/tGzdulM1m08aNG/MlpuvJKR8oCA4dOiSbzaa5c+e6LYac8qKccs7cci1XsNlsGjt2rMu36y430y7c/VlD4UWhCQXau+++K5vNpqZNm7o7lFtOv379tGvXLk2YMEEffvihGjVqlO/bTE5O1rhx4xQeHq7AwED5+fmpXr16evHFF3X06NF83/7NWLt2rV544QW1aNFCc+bM0WuvvZbv2zx//rzGjh1b4H7cbTabbDabnnjiiRxfHz16tH2eEydOuDg6AMC1ZP6Bn/nP19dXt912m6KiopSUlOTu8Ao8d+QD0pU/+Lt166by5cvL29tbwcHB6ty5s5YuXeqS7d8Md+Scq1atKnDFpMxOQg8PD/3xxx/ZXk9OTpafn59sNpuioqLcECHgOsXcHQBwLfPnz1dYWJi2bt2q/fv3q0aNGu4O6ZZw4cIFbd68WaNHj3bZD9nBgwfVrl07HT58WD169NCTTz4pb29v/fjjj5o1a5Y+/fRT/frrry6J5Xoef/xxPfLII/Lx8bFP++KLL+Th4aFZs2bJ29vbPn3v3r3y8Mifmvz58+c1btw4Sco2Guell17SyJEj82W7eeHr66slS5bo3XffdTgekvTxxx/L19dXFy9edFN0AIDrGT9+vKpWraqLFy9q06ZNeu+997Rq1Sr99NNP8vf3d1kcM2fOVEZGhlPLtG7dWhcuXMj2++MKueUD+SkmJkbjx49XzZo1NXjwYFWpUkUnT57UqlWr9PDDD2v+/Pnq06ePS2K5nqvzotxyzpxyLSutWrVK06ZNy7HYdOHCBRUr5r4/c318fPTxxx/rhRdecJh+KxQNAaswogkF1m+//aZvv/1WU6ZMUbly5TR//nx3h5SrlJQUd4fg4Pjx45KkkiVLWrbOa+3j5cuX1a1bNyUlJWnjxo36+OOPNWzYMA0aNEhTp07VwYMH1aNHD8tiuVmenp7y9fV1ODXt2LFj8vPzy5ZU+vj4yMvLy9UhqlixYvL19XX5djPdf//9Sk5O1ueff+4w/dtvv9Vvv/2mTp06uSkyAEBedOjQQY899pieeOIJzZ07V//4xz/022+/6bPPPst1mfzIZ7y8vJwuNnh4eMjX1zffOnquJbd84EYZY3ThwoVcX//kk080fvx4de/eXT///LPGjRungQMH6vnnn9eGDRu0evVqBQUFWRKLFa7Oi3LLOXPKtVzF19fXrYWmjh076uOPP842fcGCBeRPKDIoNKHAmj9/vkqVKqVOnTqpe/fuuRaaTp8+rWeeeUZhYWHy8fFR5cqV1bdvX4dTei5evKixY8fqtttuk6+vrypUqKBu3brpwIEDknI/Pzmnc5779++vwMBAHThwQB07dlTx4sX16KOPSpK+/vpr9ejRQ3/729/k4+Oj0NBQPfPMMzkmGHv27FHPnj1Vrlw5+fn5qVatWho9erQkacOGDbLZbPr000+zLbdgwQLZbDZt3rw5x+MxduxYValSRZL0/PPPy2azOVwbYfv27erQoYOCgoIUGBioe++9V//9738d1pE57P7LL7/U0KFDFRwcrMqVK+e4PUlasmSJdu7cqdGjR6tly5bZXg8KCtKECRNyXV6S3njjDTVv3lxlypSRn5+fIiIi9Mknn2Sbb926dWrZsqVKliypwMBA1apVS//85z8d5pk6darq1q0rf39/lSpVSo0aNdKCBQuy7V/mdQNsNpvmzJmjlJQU+6kGme95TtciuF6bu3TpksaMGaOIiAiVKFFCAQEBatWqlTZs2GBfx6FDh1SuXDlJ0rhx4+zbzeyZy+kaTZcvX9Yrr7yi6tWry8fHR2FhYfrnP/+p1NRUh/nCwsL0wAMPaNOmTWrSpIl8fX1VrVo1ffDBB9d8D7KqVKmSWrdu7XDcpCufy/r166tevXo5Lrd48WJFRETIz89PZcuW1WOPPaYjR45km2/ZsmWqV6+efH19Va9evRzbuiRlZGQoLi5OdevWla+vr0JCQjR48GD99ddf192H67UDAChK2rZtK+lKR5507XzGme/ezz//XG3atFHx4sUVFBSkxo0bO3zX5nSNpoULFyoiIsK+TP369fXWW2/ZX88tL8vLb0zmfh05ckRdu3ZVYGCgypUrp+eee07p6enXPEbXygec/Q1es2aNGjVqJD8/P82YMSPXbb788ssqXbq0Zs+enWPHVmRkpB544IFcl//xxx/Vv39/VatWTb6+vipfvrwGDhyokydPOsx39uxZ/eMf/7DnLsHBwbrvvvv0ww8/2OfZt2+fHn74YZUvX16+vr6qXLmyHnnkEZ05c8Zh/zLzomvlnLldo+l67SUvuXT//v01bdo0SXI4TTRTTtdocib//eabbxQdHa1y5copICBADz30kL2glhd9+vTRjh07tGfPHvu0xMREffHFF7mOTDt27Jj+/ve/KyQkRL6+vgoPD9e8efOyzXf69Gn1799fJUqUUMmSJdWvXz+dPn06x3Xu2bNH3bt3V+nSpeXr66tGjRpp+fLl140/L+0AuB5OnUOBNX/+fHXr1k3e3t7q3bu33nvvPX333Xdq3LixfZ5z586pVatW2r17twYOHKiGDRvqxIkTWr58uf7880+VLVtW6enpeuCBBxQfH69HHnlEI0aM0NmzZ7Vu3Tr99NNPql69utOxXb58WZGRkWrZsqXeeOMN+xD0xYsX6/z58xoyZIjKlCmjrVu3aurUqfrzzz+1ePFi+/I//vijWrVqJS8vLz355JMKCwvTgQMH9H//93+aMGGC7r77boWGhmr+/Pl66KGHsh2X6tWrq1mzZjnG1q1bN5UsWVLPPPOMevfurY4dOyowMFCS9PPPP6tVq1YKCgrSCy+8IC8vL82YMUN33323vvzyy2zXwho6dKjKlSunMWPGXLOXM/NH6/HHH3f6WGZ666231KVLFz366KO6dOmSFi5cqB49emjFihX23p+ff/5ZDzzwgO644w6NHz9ePj4+2r9/v7755hv7embOnKmnn35a3bt314gRI3Tx4kX9+OOP2rJlS64/7h9++KHef/99bd26Vf/+978lSc2bN89x3ry0ueTkZP373/9W7969NWjQIJ09e1azZs1SZGSktm7dqgYNGqhcuXJ67733NGTIED300EPq1q2bJOmOO+7I9Rg98cQTmjdvnrp3765nn31WW7ZsUWxsrHbv3p2tULN//351795df//739WvXz/Nnj1b/fv3V0REhOrWrZun96RPnz4aMWKEzp07p8DAQF2+fFmLFy9WdHR0jqfNzZ07VwMGDFDjxo0VGxurpKQkvfXWW/rmm2+0fft2e2/n2rVr9fDDD6tOnTqKjY3VyZMnNWDAgByLmYMHD7av9+mnn9Zvv/2md955R9u3b9c333yT62izG2kHAFCYZXaulSlTxj4tt3wmr9+9c+fO1cCBA1W3bl2NGjVKJUuW1Pbt27V69epcv2vXrVun3r17695779XEiRMlSbt379Y333yjESNG5Bp
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"from sklearn.model_selection import cross_val_score\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Volume']\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи регрессии\n",
|
|||
|
"mae_means = []\n",
|
|||
|
"mae_stds = []\n",
|
|||
|
"r2_means = []\n",
|
|||
|
"r2_stds = []\n",
|
|||
|
"\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" mae_scores = -cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" r2_scores = cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='r2')\n",
|
|||
|
" mae_means.append(mae_scores.mean())\n",
|
|||
|
" mae_stds.append(mae_scores.std())\n",
|
|||
|
" r2_means.append(r2_scores.mean())\n",
|
|||
|
" r2_stds.append(r2_scores.std())\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов для задачи регрессии\n",
|
|||
|
"fig, ax = plt.subplots(1, 2, figsize=(12, 6))\n",
|
|||
|
"\n",
|
|||
|
"ax[0].bar(models_reg.keys(), mae_means, yerr=mae_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[0].set_ylabel('MAE')\n",
|
|||
|
"ax[0].set_title('Mean Absolute Error (MAE) for Regression Models')\n",
|
|||
|
"ax[0].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[1].bar(models_reg.keys(), r2_means, yerr=r2_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[1].set_ylabel('R²')\n",
|
|||
|
"ax[1].set_title('R-squared (R²) for Regression Models')\n",
|
|||
|
"ax[1].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Volume'] > df['Volume'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи классификации\n",
|
|||
|
"accuracy_means = []\n",
|
|||
|
"accuracy_stds = []\n",
|
|||
|
"precision_means = []\n",
|
|||
|
"precision_stds = []\n",
|
|||
|
"recall_means = []\n",
|
|||
|
"recall_stds = []\n",
|
|||
|
"f1_means = []\n",
|
|||
|
"f1_stds = []\n",
|
|||
|
"\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" accuracy_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='accuracy')\n",
|
|||
|
" precision_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='precision')\n",
|
|||
|
" recall_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='recall')\n",
|
|||
|
" f1_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='f1')\n",
|
|||
|
" accuracy_means.append(accuracy_scores.mean())\n",
|
|||
|
" accuracy_stds.append(accuracy_scores.std())\n",
|
|||
|
" precision_means.append(precision_scores.mean())\n",
|
|||
|
" precision_stds.append(precision_scores.std())\n",
|
|||
|
" recall_means.append(recall_scores.mean())\n",
|
|||
|
" recall_stds.append(recall_scores.std())\n",
|
|||
|
" f1_means.append(f1_scores.mean())\n",
|
|||
|
" f1_stds.append(f1_scores.std())\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов для задачи классификации\n",
|
|||
|
"fig, ax = plt.subplots(2, 2, figsize=(12, 12))\n",
|
|||
|
"\n",
|
|||
|
"ax[0, 0].bar(models_class.keys(), accuracy_means, yerr=accuracy_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[0, 0].set_ylabel('Accuracy')\n",
|
|||
|
"ax[0, 0].set_title('Accuracy for Classification Models')\n",
|
|||
|
"ax[0, 0].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[0, 1].bar(models_class.keys(), precision_means, yerr=precision_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[0, 1].set_ylabel('Precision')\n",
|
|||
|
"ax[0, 1].set_title('Precision for Classification Models')\n",
|
|||
|
"ax[0, 1].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[1, 0].bar(models_class.keys(), recall_means, yerr=recall_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[1, 0].set_ylabel('Recall')\n",
|
|||
|
"ax[1, 0].set_title('Recall for Classification Models')\n",
|
|||
|
"ax[1, 0].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[1, 1].bar(models_class.keys(), f1_means, yerr=f1_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[1, 1].set_ylabel('F1-score')\n",
|
|||
|
"ax[1, 1].set_title('F1-score for Classification Models')\n",
|
|||
|
"ax[1, 1].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "aisenv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|