1802 lines
347 KiB
Plaintext
1802 lines
347 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Начало лабораторной работы"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume'], dtype='object')\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Yamana_Gold_Inc._AUY.csv\")\n",
|
|||
|
"print(df.columns)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Бизнес-цели"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование цен на золото:\n",
|
|||
|
"\n",
|
|||
|
"Цель: Разработать модель, которая будет предсказывать объем выкупленных акций на основе: цены открытия, цены закрытия, самой высокой цене, самой низкой цене\n",
|
|||
|
"Применение:\n",
|
|||
|
"Узнать, какие лучше цены выставлять на акции.\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация цен на акции:\n",
|
|||
|
"Цель: Определить оптимальную цену на акцию, чтобы объем их скупа был больше.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование объема проданных акций"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 2,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Среднее значение поля 'Volume: 9081991.374023996\n",
|
|||
|
" Date Open High Low Close Adj Close Volume \\\n",
|
|||
|
"0 6/22/2001 3.428571 3.428571 3.428571 3.428571 2.806002 0 \n",
|
|||
|
"1 6/25/2001 3.428571 3.428571 3.428571 3.428571 2.806002 0 \n",
|
|||
|
"2 6/26/2001 3.714286 3.714286 3.714286 3.714286 3.039837 0 \n",
|
|||
|
"3 6/27/2001 3.714286 3.714286 3.714286 3.714286 3.039837 0 \n",
|
|||
|
"4 6/28/2001 3.714286 3.714286 3.714286 3.714286 3.039837 0 \n",
|
|||
|
"\n",
|
|||
|
" above_average_volume volume_volatility \n",
|
|||
|
"0 0 76714000 \n",
|
|||
|
"1 0 76714000 \n",
|
|||
|
"2 0 76714000 \n",
|
|||
|
"3 0 76714000 \n",
|
|||
|
"4 0 76714000 \n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Yamana_Gold_Inc._AUY.csv\")\n",
|
|||
|
"\n",
|
|||
|
"# Устанавливаем случайное состояние\n",
|
|||
|
"random_state = 28\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем среднее значение объема\n",
|
|||
|
"average_count = df['Volume'].mean()\n",
|
|||
|
"print(f\"Среднее значение поля 'Volume: {average_count}\")\n",
|
|||
|
"\n",
|
|||
|
"# Создаем новую переменную, указывающую, превышает ли объемная продажа среднюю\n",
|
|||
|
"df[\"above_average_volume\"] = (df[\"Volume\"] > average_count).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем волатильность (разницу между максимальной и минимальной объемная продажаю)\n",
|
|||
|
"df[\"volume_volatility\"] = df[\"Volume\"].max() - df[\"Volume\"].min()\n",
|
|||
|
"\n",
|
|||
|
"# Выводим первые строки измененной таблицы для проверки\n",
|
|||
|
"print(df.head())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"2. Оптимизация параметров магазина:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 3,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Средняя объемная продажа для 'Open':\n",
|
|||
|
"Open\n",
|
|||
|
"1.142857 2.815714e+02\n",
|
|||
|
"1.260000 1.500000e+02\n",
|
|||
|
"1.320000 5.700000e+03\n",
|
|||
|
"1.380000 1.200000e+03\n",
|
|||
|
"1.400000 0.000000e+00\n",
|
|||
|
" ... \n",
|
|||
|
"19.940001 8.288800e+06\n",
|
|||
|
"20.059999 6.851500e+06\n",
|
|||
|
"20.100000 4.836700e+06\n",
|
|||
|
"20.250000 4.154200e+06\n",
|
|||
|
"20.420000 4.264000e+06\n",
|
|||
|
"Name: Volume, Length: 1421, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа для 'Close':\n",
|
|||
|
"Close\n",
|
|||
|
"1.142857 2.815714e+02\n",
|
|||
|
"1.260000 1.500000e+02\n",
|
|||
|
"1.400000 3.000000e+02\n",
|
|||
|
"1.410000 1.735315e+06\n",
|
|||
|
"1.420000 3.000000e+03\n",
|
|||
|
" ... \n",
|
|||
|
"19.750000 5.734200e+06\n",
|
|||
|
"20.080000 4.836700e+06\n",
|
|||
|
"20.129999 6.276400e+06\n",
|
|||
|
"20.209999 8.799600e+06\n",
|
|||
|
"20.389999 6.851500e+06\n",
|
|||
|
"Name: Volume, Length: 1442, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа для 'High':\n",
|
|||
|
"High\n",
|
|||
|
"1.142857 2.815714e+02\n",
|
|||
|
"1.260000 1.500000e+02\n",
|
|||
|
"1.400000 3.000000e+02\n",
|
|||
|
"1.410000 7.909091e+02\n",
|
|||
|
"1.428571 1.957805e+02\n",
|
|||
|
" ... \n",
|
|||
|
"20.209999 8.799600e+06\n",
|
|||
|
"20.309999 4.154200e+06\n",
|
|||
|
"20.389999 4.836700e+06\n",
|
|||
|
"20.500000 6.851500e+06\n",
|
|||
|
"20.590000 4.264000e+06\n",
|
|||
|
"Name: Volume, Length: 1423, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа для 'Low':\n",
|
|||
|
"Low\n",
|
|||
|
"1.142857 2.815714e+02\n",
|
|||
|
"1.260000 1.500000e+02\n",
|
|||
|
"1.320000 5.700000e+03\n",
|
|||
|
"1.380000 1.125310e+07\n",
|
|||
|
"1.400000 1.626675e+06\n",
|
|||
|
" ... \n",
|
|||
|
"19.570000 8.288800e+06\n",
|
|||
|
"19.650000 4.154200e+06\n",
|
|||
|
"19.879999 6.851500e+06\n",
|
|||
|
"20.000000 4.836700e+06\n",
|
|||
|
"20.090000 4.264000e+06\n",
|
|||
|
"Name: Volume, Length: 1410, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа для комбинации 'Open' и 'Close':\n",
|
|||
|
"Open Close \n",
|
|||
|
"1.142857 1.142857 2.815714e+02\n",
|
|||
|
"1.260000 1.260000 1.500000e+02\n",
|
|||
|
"1.320000 1.410000 5.700000e+03\n",
|
|||
|
"1.380000 1.400000 1.200000e+03\n",
|
|||
|
"1.400000 1.400000 0.000000e+00\n",
|
|||
|
" ... \n",
|
|||
|
"19.940001 20.129999 8.288800e+06\n",
|
|||
|
"20.059999 20.389999 6.851500e+06\n",
|
|||
|
"20.100000 20.080000 4.836700e+06\n",
|
|||
|
"20.250000 19.719999 4.154200e+06\n",
|
|||
|
"20.420000 20.129999 4.264000e+06\n",
|
|||
|
"Name: Volume, Length: 4401, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа для комбинации 'High' и 'Low':\n",
|
|||
|
"High Low \n",
|
|||
|
"1.142857 1.142857 2.815714e+02\n",
|
|||
|
"1.260000 1.260000 1.500000e+02\n",
|
|||
|
"1.400000 1.380000 1.200000e+03\n",
|
|||
|
" 1.400000 0.000000e+00\n",
|
|||
|
"1.410000 1.320000 5.700000e+03\n",
|
|||
|
" ... \n",
|
|||
|
"20.209999 19.290001 8.799600e+06\n",
|
|||
|
"20.309999 19.650000 4.154200e+06\n",
|
|||
|
"20.389999 20.000000 4.836700e+06\n",
|
|||
|
"20.500000 19.879999 6.851500e+06\n",
|
|||
|
"20.590000 20.090000 4.264000e+06\n",
|
|||
|
"Name: Volume, Length: 4246, dtype: float64\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Yamana_Gold_Inc._AUY.csv\")\n",
|
|||
|
"\n",
|
|||
|
"# Устанавливаем случайное состояние\n",
|
|||
|
"random_state = 42\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем среднюю объемную продажу для каждого значения каждого признака\n",
|
|||
|
"for column in [\"Open\", \"Close\", \"High\", \"Low\"]:\n",
|
|||
|
" print(f\"Средняя объемная продажа для '{column}':\")\n",
|
|||
|
" print(df.groupby(column)[\"Volume\"].mean())\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"print(\"Средняя объемная продажа для комбинации 'Open' и 'Close':\")\n",
|
|||
|
"print(df.groupby([\"Open\", \"Close\"])[\"Volume\"].mean())\n",
|
|||
|
"print()\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"print(\"Средняя объемная продажа для комбинации 'High' и 'Low':\")\n",
|
|||
|
"print(df.groupby([\"High\", \"Low\"])[\"Volume\"].mean())\n",
|
|||
|
"print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Выбор ориентира:\n",
|
|||
|
"1. Прогнозирование стоимости акций взносов:\n",
|
|||
|
"Ориентир:\n",
|
|||
|
"\n",
|
|||
|
"R² (коэффициент детерминации): 0.75 - 0.85\n",
|
|||
|
"\n",
|
|||
|
"MAE (средняя абсолютная ошибка): 2000000 - 3.500.000 продаж\n",
|
|||
|
"\n",
|
|||
|
"RMSE (среднеквадратичная ошибка): 2200000 - 3600000 продаж"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 4,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"MAE: 4479413.782319424\n",
|
|||
|
"MSE: 33820915982442.465\n",
|
|||
|
"RMSE: 5815575.292474723\n",
|
|||
|
"R²: 0.42948176526183957\n",
|
|||
|
"Ориентиры для прогнозирования не достигнуты.\n",
|
|||
|
"Средняя объемная продажа 'Open':\n",
|
|||
|
"Open\n",
|
|||
|
"1.142857 2.815714e+02\n",
|
|||
|
"1.260000 1.500000e+02\n",
|
|||
|
"1.320000 5.700000e+03\n",
|
|||
|
"1.380000 1.200000e+03\n",
|
|||
|
"1.400000 0.000000e+00\n",
|
|||
|
" ... \n",
|
|||
|
"19.940001 8.288800e+06\n",
|
|||
|
"20.059999 6.851500e+06\n",
|
|||
|
"20.100000 4.836700e+06\n",
|
|||
|
"20.250000 4.154200e+06\n",
|
|||
|
"20.420000 4.264000e+06\n",
|
|||
|
"Name: Volume, Length: 1421, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа 'High':\n",
|
|||
|
"High\n",
|
|||
|
"1.142857 2.815714e+02\n",
|
|||
|
"1.260000 1.500000e+02\n",
|
|||
|
"1.400000 3.000000e+02\n",
|
|||
|
"1.410000 7.909091e+02\n",
|
|||
|
"1.428571 1.957805e+02\n",
|
|||
|
" ... \n",
|
|||
|
"20.209999 8.799600e+06\n",
|
|||
|
"20.309999 4.154200e+06\n",
|
|||
|
"20.389999 4.836700e+06\n",
|
|||
|
"20.500000 6.851500e+06\n",
|
|||
|
"20.590000 4.264000e+06\n",
|
|||
|
"Name: Volume, Length: 1423, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа 'Close':\n",
|
|||
|
"Close\n",
|
|||
|
"1.142857 2.815714e+02\n",
|
|||
|
"1.260000 1.500000e+02\n",
|
|||
|
"1.400000 3.000000e+02\n",
|
|||
|
"1.410000 1.735315e+06\n",
|
|||
|
"1.420000 3.000000e+03\n",
|
|||
|
" ... \n",
|
|||
|
"19.750000 5.734200e+06\n",
|
|||
|
"20.080000 4.836700e+06\n",
|
|||
|
"20.129999 6.276400e+06\n",
|
|||
|
"20.209999 8.799600e+06\n",
|
|||
|
"20.389999 6.851500e+06\n",
|
|||
|
"Name: Volume, Length: 1442, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя объемная продажа 'Low':\n",
|
|||
|
"Low\n",
|
|||
|
"1.142857 2.815714e+02\n",
|
|||
|
"1.260000 1.500000e+02\n",
|
|||
|
"1.320000 5.700000e+03\n",
|
|||
|
"1.380000 1.125310e+07\n",
|
|||
|
"1.400000 1.626675e+06\n",
|
|||
|
" ... \n",
|
|||
|
"19.570000 8.288800e+06\n",
|
|||
|
"19.650000 4.154200e+06\n",
|
|||
|
"19.879999 6.851500e+06\n",
|
|||
|
"20.000000 4.836700e+06\n",
|
|||
|
"20.090000 4.264000e+06\n",
|
|||
|
"Name: Volume, Length: 1410, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя посещаемость взносов для комбинации 'Open' и 'Close':\n",
|
|||
|
"Open Close \n",
|
|||
|
"1.142857 1.142857 2.815714e+02\n",
|
|||
|
"1.260000 1.260000 1.500000e+02\n",
|
|||
|
"1.320000 1.410000 5.700000e+03\n",
|
|||
|
"1.380000 1.400000 1.200000e+03\n",
|
|||
|
"1.400000 1.400000 0.000000e+00\n",
|
|||
|
" ... \n",
|
|||
|
"19.940001 20.129999 8.288800e+06\n",
|
|||
|
"20.059999 20.389999 6.851500e+06\n",
|
|||
|
"20.100000 20.080000 4.836700e+06\n",
|
|||
|
"20.250000 19.719999 4.154200e+06\n",
|
|||
|
"20.420000 20.129999 4.264000e+06\n",
|
|||
|
"Name: Volume, Length: 4401, dtype: float64\n",
|
|||
|
"\n",
|
|||
|
"Средняя посещаемость взносов для комбинации 'High' и 'Low':\n",
|
|||
|
"High Low \n",
|
|||
|
"1.142857 1.142857 2.815714e+02\n",
|
|||
|
"1.260000 1.260000 1.500000e+02\n",
|
|||
|
"1.400000 1.380000 1.200000e+03\n",
|
|||
|
" 1.400000 0.000000e+00\n",
|
|||
|
"1.410000 1.320000 5.700000e+03\n",
|
|||
|
" ... \n",
|
|||
|
"20.209999 19.290001 8.799600e+06\n",
|
|||
|
"20.309999 19.650000 4.154200e+06\n",
|
|||
|
"20.389999 20.000000 4.836700e+06\n",
|
|||
|
"20.500000 19.879999 6.851500e+06\n",
|
|||
|
"20.590000 20.090000 4.264000e+06\n",
|
|||
|
"Name: Volume, Length: 4246, dtype: float64\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Yamana_Gold_Inc._AUY.csv\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y)\n",
|
|||
|
"\n",
|
|||
|
"X = df.drop(columns=[\"Volume\", \"Date\"], axis=1)\n",
|
|||
|
"\n",
|
|||
|
"y = df[\"Volume\"]\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки\n",
|
|||
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Стандартизируем признаки\n",
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"X_train = scaler.fit_transform(X_train)\n",
|
|||
|
"X_test = scaler.transform(X_test)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем модель линейной регрессии\n",
|
|||
|
"model = LinearRegression()\n",
|
|||
|
"model.fit(X_train, y_train)\n",
|
|||
|
"\n",
|
|||
|
"# Делаем предсказания на тестовой выборке\n",
|
|||
|
"y_pred = model.predict(X_test)\n",
|
|||
|
"\n",
|
|||
|
"# Оцениваем качество модели\n",
|
|||
|
"mae = mean_absolute_error(y_test, y_pred)\n",
|
|||
|
"mse = mean_squared_error(y_test, y_pred)\n",
|
|||
|
"rmse = mean_squared_error(y_test, y_pred, squared=False)\n",
|
|||
|
"r2 = r2_score(y_test, y_pred)\n",
|
|||
|
"\n",
|
|||
|
"print(f\"MAE: {mae}\")\n",
|
|||
|
"print(f\"MSE: {mse}\")\n",
|
|||
|
"print(f\"RMSE: {rmse}\")\n",
|
|||
|
"print(f\"R²: {r2}\")\n",
|
|||
|
"\n",
|
|||
|
"# Проверяем, достигнуты ли ориентиры\n",
|
|||
|
"if r2 >= 0.75 and mae <= 1500000 and rmse <= 1700000:\n",
|
|||
|
" print(\"Ориентиры для прогнозирования достигнуты!\")\n",
|
|||
|
"else:\n",
|
|||
|
" print(\"Ориентиры для прогнозирования не достигнуты.\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"columns_to_group = [\n",
|
|||
|
" \"Open\",\n",
|
|||
|
" \"High\",\n",
|
|||
|
" \"Close\", \"Low\"\n",
|
|||
|
"]\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем среднюю объемная продажа для каждого значения каждого признака\n",
|
|||
|
"for column in columns_to_group:\n",
|
|||
|
" print(f\"Средняя объемная продажа '{column}':\")\n",
|
|||
|
" print(df.groupby(column)[\"Volume\"].mean())\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Рассчитываем среднюю объемная продажа для комбинаций признаков\n",
|
|||
|
"\n",
|
|||
|
"print(\n",
|
|||
|
" \"Средняя посещаемость взносов для комбинации 'Open' и 'Close':\"\n",
|
|||
|
")\n",
|
|||
|
"print(df.groupby([\"Open\", \"Close\"])[\"Volume\"].mean())\n",
|
|||
|
"print()\n",
|
|||
|
"\n",
|
|||
|
"print(\n",
|
|||
|
" \"Средняя посещаемость взносов для комбинации 'High' и 'Low':\"\n",
|
|||
|
")\n",
|
|||
|
"print(df.groupby([\"High\", \"Low\"])[\"Volume\"].mean())\n",
|
|||
|
"print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"### Анализ применимости алгоритмов обучения с учителем для решения поставленных задач:\n",
|
|||
|
"1. Прогнозирование выкупа акций:\n",
|
|||
|
"Задача: Регрессия\n",
|
|||
|
"\n",
|
|||
|
"Свойства алгоритмов:\n",
|
|||
|
"\n",
|
|||
|
"Линейная регрессия:\n",
|
|||
|
"Применимость: Хорошо подходит для задач, где зависимость между признаками и целевой переменной линейна.\n",
|
|||
|
"Преимущества: Проста в реализации, интерпретируема.\n",
|
|||
|
"Недостатки: Может плохо работать, если зависимость нелинейна.\n",
|
|||
|
"\n",
|
|||
|
"Деревья решений (регрессия):\n",
|
|||
|
"Применимость: Подходит для задач с нелинейными зависимостями.\n",
|
|||
|
"Преимущества: Может обрабатывать категориальные признаки, не требует масштабирования данных.\n",
|
|||
|
"Недостатки: Подвержены переобучению, могут давать нестабильные результаты.\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (регрессия):\n",
|
|||
|
"Применимость: Хорошо подходит для задач с нелинейными зависимостями и большим количеством признаков.\n",
|
|||
|
"Преимущества: Устойчив к переобучению, может обрабатывать категориальные признаки.\n",
|
|||
|
"Недостатки: Менее интерпретируем, чем линейная регрессия.\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (регрессия):\n",
|
|||
|
"Применимость: Подходит для задач с нелинейными зависимостями и сложными взаимосвязями между признаками.\n",
|
|||
|
"Преимущества: Может достигать высокой точности, устойчив к переобучению.\n",
|
|||
|
"Недостатки: Сложнее в настройке, чем случайный лес, менее интерпретируем.\n",
|
|||
|
"\n",
|
|||
|
"Нейронные сети (регрессия):\n",
|
|||
|
"Применимость: Подходит для задач с очень сложными зависимостями и большим количеством данных.\n",
|
|||
|
"Преимущества: Может моделировать очень сложные зависимости.\n",
|
|||
|
"Недостатки: Требует большого количества данных, сложнее в настройке и интерпретации.\n",
|
|||
|
"\n",
|
|||
|
"Вывод:\n",
|
|||
|
"\n",
|
|||
|
"Линейная регрессия: Может быть хорошим выбором для начала, особенно если зависимость между признаками и целевой переменной линейна.\n",
|
|||
|
"\n",
|
|||
|
"Деревья решений и случайный лес: Подходят для задач с нелинейными зависимостями.\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг: Может давать более высокую точность, чем случайный лес, но требует больше времени на настройку.\n",
|
|||
|
"\n",
|
|||
|
"Нейронные сети: Могут быть излишними для этой задачи, если данных недостаточно много.\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация тарифной сетки:\n",
|
|||
|
"Задача: Классификация \n",
|
|||
|
"\n",
|
|||
|
"Свойства алгоритмов:\n",
|
|||
|
"\n",
|
|||
|
"Логистическая регрессия:\n",
|
|||
|
"Применимость: Хорошо подходит для задач бинарной классификации, где зависимость между признаками и целевой переменной линейна.\n",
|
|||
|
"Преимущества: Проста в реализации, интерпретируема.\n",
|
|||
|
"Недостатки: Может плохо работать, если зависимость нелинейна.\n",
|
|||
|
"\n",
|
|||
|
"Деревья решений (классификация):\n",
|
|||
|
"Применимость: Подходит для задач с нелинейными зависимостями.\n",
|
|||
|
"Преимущества: Может обрабатывать категориальные признаки, не требует масштабирования данных.\n",
|
|||
|
"Недостатки: Подвержены переобучению, могут давать нестабильные результаты.\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (классификация):\n",
|
|||
|
"Применимость: Хорошо подходит для задач с нелинейными зависимостями и большим количеством признаков.\n",
|
|||
|
"Преимущества: Устойчив к переобучению, может обрабатывать категориальные признаки.\n",
|
|||
|
"Недостатки: Менее интерпретируем, чем линейная регрессия.\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (классификация):\n",
|
|||
|
"Применимость: Подходит для задач с нелинейными зависимостями и сложными взаимосвязями между признаками.\n",
|
|||
|
"Преимущества: Может достигать высокой точности, устойчив к переобучению.\n",
|
|||
|
"Недостатки: Сложнее в настройке, чем случайный лес, менее интерпретируем.\n",
|
|||
|
"\n",
|
|||
|
"Нейронные сети (классификация):\n",
|
|||
|
"Применимость: Подходит для задач с очень сложными зависимостями и большим количеством данных.\n",
|
|||
|
"Преимущества: Может моделировать очень сложные зависимости.\n",
|
|||
|
"Недостатки: Требует большого количества данных, сложнее в настройке и интерпретации.\n",
|
|||
|
"\n",
|
|||
|
"Вывод:\n",
|
|||
|
"\n",
|
|||
|
"Логистическая регрессия: Может быть хорошим выбором для начала, особенно если зависимость между признаками и целевой переменной линейна.\n",
|
|||
|
"\n",
|
|||
|
"Деревья решений и случайный лес: Подходят для задач с нелинейными зависимостями.\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг: Может давать более высокую точность, чем случайный лес, но требует больше времени на настройку.\n",
|
|||
|
"\n",
|
|||
|
"Нейронные сети: Могут быть излишними для этой задачи, если данных недостаточно много.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование стоимости акций:\n",
|
|||
|
"Выбранные модели:\n",
|
|||
|
"\n",
|
|||
|
"Линейная регрессия\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (регрессия)\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (регрессия)\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация тарифной сетки:\n",
|
|||
|
"Выбранные модели:\n",
|
|||
|
"\n",
|
|||
|
"Логистическая регрессия\n",
|
|||
|
"\n",
|
|||
|
"Случайный лес (классификация)\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг (классификация)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 5,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE: 4479413.782319424\n",
|
|||
|
"MSE: 33820915982442.465\n",
|
|||
|
"RMSE: 5815575.292474723\n",
|
|||
|
"R²: 0.42948176526183957\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"MAE: 2992885.6305958964\n",
|
|||
|
"MSE: 23229080308355.86\n",
|
|||
|
"RMSE: 4819655.621344316\n",
|
|||
|
"R²: 0.6081533126129994\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"MAE: 3630084.451629419\n",
|
|||
|
"MSE: 28221268640877.676\n",
|
|||
|
"RMSE: 5312369.399889062\n",
|
|||
|
"R²: 0.523941090908852\n",
|
|||
|
"\n",
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Accuracy: 0.7526165556612749\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Accuracy: 0.80209324452902\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Accuracy: 0.7849666983824929\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Yamana_Gold_Inc._AUY.csv\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df.drop(columns = [\"Volume\", \"Date\"], axis=1)\n",
|
|||
|
"y_reg = df[\"Volume\"]\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Стандартизируем признаки для задачи регрессии\n",
|
|||
|
"scaler_reg = StandardScaler()\n",
|
|||
|
"X_train_reg = scaler_reg.fit_transform(X_train_reg)\n",
|
|||
|
"X_test_reg = scaler_reg.transform(X_test_reg)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" model.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" y_pred_reg = model.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df.drop(columns=[\"Volume\", \"Date\"], axis=1)\n",
|
|||
|
"y_class = (df[\"Volume\"] > df[\"Volume\"].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Стандартизируем признаки для задачи классификации\n",
|
|||
|
"scaler_class = StandardScaler()\n",
|
|||
|
"X_train_class = scaler_class.fit_transform(X_train_class)\n",
|
|||
|
"X_test_class = scaler_class.transform(X_test_class)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" model.fit(X_train_class, y_train_class)\n",
|
|||
|
" y_pred_class = model.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование стоимости акций:\n",
|
|||
|
"Конвейер для задачи регрессии:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 6,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE: 5371500.134804331\n",
|
|||
|
"MSE: 47781112642081.98\n",
|
|||
|
"RMSE: 6912388.345722626\n",
|
|||
|
"R²: 0.19399001338292088\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"MAE: 4408370.880479668\n",
|
|||
|
"MSE: 42119887803881.95\n",
|
|||
|
"RMSE: 6489983.652050439\n",
|
|||
|
"R²: 0.28948807744547955\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"MAE: 4651931.4982183585\n",
|
|||
|
"MSE: 40455010983490.94\n",
|
|||
|
"RMSE: 6360425.377558559\n",
|
|||
|
"R²: 0.3175725499393369\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Yamana_Gold_Inc._AUY.csv\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df[\"Volume\"]\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" pipeline.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" y_pred_reg = pipeline.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"2. Оптимизация характеристик магазина:\n",
|
|||
|
"Конвейер для задачи классификации:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 7,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Accuracy: 0.6355851569933397\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Accuracy: 0.6945765937202664\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Accuracy: 0.6936251189343482\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import accuracy_score\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Yamana_Gold_Inc._AUY.csv\")\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df[\"Volume\"] > df[\"Volume\"].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" pipeline.fit(X_train_class, y_train_class)\n",
|
|||
|
" y_pred_class = pipeline.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование объема:\n",
|
|||
|
"\n",
|
|||
|
"Настройка гиперпараметров для задачи регрессии:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 8,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"Best Parameters: {}\n",
|
|||
|
"MAE: 5371500.134804331\n",
|
|||
|
"MSE: 47781112642081.98\n",
|
|||
|
"RMSE: 6912388.345722626\n",
|
|||
|
"R²: 0.19399001338292088\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"Best Parameters: {'model__max_depth': 10, 'model__n_estimators': 100}\n",
|
|||
|
"MAE: 4340519.215001971\n",
|
|||
|
"MSE: 39191675053789.43\n",
|
|||
|
"RMSE: 6260325.475068323\n",
|
|||
|
"R²: 0.33888350984559035\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 3, 'model__n_estimators': 200}\n",
|
|||
|
"MAE: 4502839.821286126\n",
|
|||
|
"MSE: 39536963282425.29\n",
|
|||
|
"RMSE: 6287842.498220298\n",
|
|||
|
"R²: 0.333058912109105\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Yamana_Gold_Inc._AUY.csv\")\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": (LinearRegression(), {}),\n",
|
|||
|
" \"Random Forest Regression\": (RandomForestRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Regression\": (GradientBoostingRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Volume']\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, (model, params) in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" grid_search.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_reg = best_model.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"2. Оптимизация характеристик:\n",
|
|||
|
"\n",
|
|||
|
"Настройка гиперпараметров для задачи классификации:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 9,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Best Parameters: {'model__C': 10, 'model__solver': 'liblinear'}\n",
|
|||
|
"Accuracy: 0.6584205518553758\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Best Parameters: {'model__max_depth': 10, 'model__n_estimators': 100}\n",
|
|||
|
"Accuracy: 0.69267364414843\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 3, 'model__n_estimators': 200}\n",
|
|||
|
"Accuracy: 0.6955280685061845\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import accuracy_score\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Yamana_Gold_Inc._AUY.csv\")\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": (LogisticRegression(), {\n",
|
|||
|
" 'model__C': [0.1, 1, 10],\n",
|
|||
|
" 'model__solver': ['liblinear', 'lbfgs']\n",
|
|||
|
" }),\n",
|
|||
|
" \"Random Forest Classification\": (RandomForestClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Classification\": (GradientBoostingClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Volume'] > df['Volume'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, (model, params) in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='accuracy')\n",
|
|||
|
" grid_search.fit(X_train_class, y_train_class)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_class = best_model.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"1. Прогнозирование посещаемости::\n",
|
|||
|
"Задача: Регрессия\n",
|
|||
|
"\n",
|
|||
|
"Выбор метрик:\n",
|
|||
|
"\n",
|
|||
|
"MAE (Mean Absolute Error): Средняя абсолютная ошибка. Показывает среднее отклонение предсказанных значений от фактических. Эта метрика легко интерпретируется, так как она измеряется в тех же единицах, что и целевая переменная \n",
|
|||
|
"\n",
|
|||
|
"MSE (Mean Squared Error): Среднеквадратичная ошибка. Показывает среднее квадратичное отклонение предсказанных значений от фактических. Эта метрика чувствительна к выбросам, так как ошибки возводятся в квадрат.\n",
|
|||
|
"\n",
|
|||
|
"RMSE (Root Mean Squared Error): Квадратный корень из среднеквадратичной ошибки. Показывает среднее отклонение предсказанных значений от фактических в тех же единицах, что и целевая переменная. Эта метрика также чувствительна к выбросам, но легче интерпретируется, чем MSE.\n",
|
|||
|
"\n",
|
|||
|
"R² (R-squared): Коэффициент детерминации. Показывает, какую долю дисперсии целевой переменной объясняет модель. Значение R² близкое к 1 указывает на хорошее качество модели.\n",
|
|||
|
"\n",
|
|||
|
"Обоснование:\n",
|
|||
|
"\n",
|
|||
|
"MAE: Хорошо подходит для задач, где важно понимать среднее отклонение предсказаний от фактических значений.\n",
|
|||
|
"\n",
|
|||
|
"MSE и RMSE: Полезны для задач, где важно минимизировать влияние выбросов, так как они возводят ошибки в квадрат.\n",
|
|||
|
"\n",
|
|||
|
"R²: Позволяет оценить, насколько хорошо модель объясняет вариацию целевой переменной.\n",
|
|||
|
"\n",
|
|||
|
"2. Оптимизация характеристик:\n",
|
|||
|
"Задача: Классификация\n",
|
|||
|
"\n",
|
|||
|
"Выбор метрик:\n",
|
|||
|
"\n",
|
|||
|
"Accuracy: Доля правильных предсказаний среди всех предсказаний. Эта метрика показывает общую точность модели.\n",
|
|||
|
"\n",
|
|||
|
"Precision: Доля правильных положительных предсказаний среди всех положительных предсказаний. Эта метрика важна, если важно минимизировать количество ложноположительных результатов.\n",
|
|||
|
"\n",
|
|||
|
"Recall (Sensitivity): Доля правильных положительных предсказаний среди всех фактических положительных случаев. Эта метрика важна, если важно минимизировать количество ложноотрицательных результатов.\n",
|
|||
|
"\n",
|
|||
|
"F1-score: Гармоническое среднее между precision и recall. Эта метрика показывает баланс между precision и recall.\n",
|
|||
|
"\n",
|
|||
|
"Обоснование:\n",
|
|||
|
"\n",
|
|||
|
"Accuracy: Хорошо подходит для задач, где классы сбалансированы.\n",
|
|||
|
"\n",
|
|||
|
"Precision и Recall: Важны для задач, где важно минимизировать ошибки определенного типа (ложноположительные или ложноотрицательные).\n",
|
|||
|
"\n",
|
|||
|
"F1-score: Позволяет оценить баланс между precision и recall."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 10,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"Best Parameters: {}\n",
|
|||
|
"MAE: 5371500.134804331\n",
|
|||
|
"MSE: 47781112642081.98\n",
|
|||
|
"RMSE: 6912388.345722626\n",
|
|||
|
"R²: 0.19399001338292088\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"Best Parameters: {'model__max_depth': 10, 'model__n_estimators': 200}\n",
|
|||
|
"MAE: 4334820.948370353\n",
|
|||
|
"MSE: 38984999673622.87\n",
|
|||
|
"RMSE: 6243796.895609503\n",
|
|||
|
"R²: 0.3423698752981901\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"d:\\3_КУРС_ПИ\\МИИ\\aisenv\\Lib\\site-packages\\sklearn\\metrics\\_regression.py:492: FutureWarning: 'squared' is deprecated in version 1.4 and will be removed in 1.6. To calculate the root mean squared error, use the function'root_mean_squared_error'.\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 3, 'model__n_estimators': 200}\n",
|
|||
|
"MAE: 4503234.105751993\n",
|
|||
|
"MSE: 39534389006317.54\n",
|
|||
|
"RMSE: 6287637.7922330685\n",
|
|||
|
"R²: 0.3331023370554158\n",
|
|||
|
"\n",
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Best Parameters: {'model__C': 10, 'model__solver': 'liblinear'}\n",
|
|||
|
"Accuracy: 0.6584205518553758\n",
|
|||
|
"Precision: 0.6464646464646465\n",
|
|||
|
"Recall: 0.4304932735426009\n",
|
|||
|
"F1-score: 0.5168236877523553\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhQAAAHHCAYAAADnOMH5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABciUlEQVR4nO3deVxUVf8H8M+wDeuAKDCggIi5kLhvhLmkgohbauYKuKbhhhtamogpPrZomqmpiZqkpWbljgtaSuWGK5LgAiWLYoCA7Pf3Bz/u4wjo4FyWefi8e91XzD3nnnsuDvCd7znnXpkgCAKIiIiINKBT3R0gIiIi7ceAgoiIiDTGgIKIiIg0xoCCiIiINMaAgoiIiDTGgIKIiIg0xoCCiIiINMaAgoiIiDTGgIKIiIg0xoCilrp9+zY8PDxgbm4OmUyG/fv3S9r+vXv3IJPJEBoaKmm72qx79+7o3r27ZO1lZmZiwoQJUCqVkMlkmDlzpmRt1xQRERGQyWSIiIiQpL3Q0FDIZDLcu3dPkvYICAoKgkwmq+5uUA3AgKIaxcXF4b333kOjRo1gaGgIhUIBd3d3fPHFF3j69GmlntvX1xfXrl3DsmXLsGPHDrRv375Sz1eV/Pz8IJPJoFAoyvw+3r59GzKZDDKZDJ9++mmF23/w4AGCgoIQFRUlQW9f3fLlyxEaGoopU6Zgx44dGDNmTKWer2HDhujXr1+lnkMqy5cvlzxIfl5JcFKy6enpoX79+vDz88M///xTqecmqpEEqhYHDhwQjIyMBAsLC2H69OnC119/LXz55ZfC8OHDBX19fWHixImVdu7s7GwBgPDhhx9W2jmKioqEp0+fCgUFBZV2jvL4+voKenp6gq6urrB79+5S5YsXLxYMDQ0FAMInn3xS4fbPnz8vABC2bt1aoeNyc3OF3NzcCp+vPJ06dRLc3d0la+9lHB0dBW9v7yo7nyAIQmFhofD06VOhsLCwQseZmJgIvr6+pfYXFBQIT58+FYqKijTu29atWwUAQnBwsLBjxw5h06ZNwvjx4wVdXV3B2dlZePr0qcbn0Ab5+fm15lrpxfSqN5ypne7evYvhw4fD0dERJ0+ehK2trVjm7++P2NhYHDx4sNLO//DhQwCAhYVFpZ1DJpPB0NCw0tp/GblcDnd3d3z33XcYNmyYSllYWBi8vb2xd+/eKulLdnY2jI2NYWBgIGm7KSkpcHFxkay9goICFBUVSd5PTejo6Ej6PtLV1YWurq5k7QGAl5eXmOGbMGEC6tWrh//85z/4+eefS733KpMgCMjJyYGRkVGVnRMA9PT0oKfHPyXEIY9qsXLlSmRmZmLLli0qwUSJxo0bY8aMGeLrgoICLF26FM7OzpDL5WjYsCE++OAD5ObmqhxXkpL+7bff0LFjRxgaGqJRo0bYvn27WCcoKAiOjo4AgLlz50Imk6Fhw4YAiocKSr5+VlljpOHh4ejSpQssLCxgamqKpk2b4oMPPhDLy5tDcfLkSbz55pswMTGBhYUFBg4ciOjo6DLPFxsbCz8/P1hYWMDc3Bxjx45FdnZ2+d/Y54wcORKHDx9GWlqauO/8+fO4ffs2Ro4cWar+48ePMWfOHLi6usLU1BQKhQJeXl64cuWKWCciIgIdOnQAAIwdO1ZMd5dcZ/fu3dGiRQtcvHgRXbt2hbGxsfh9eX4Oha+vLwwNDUtdv6enJ+rUqYMHDx6UeV0l8wru3r2LgwcPin0omReQkpKC8ePHw8bGBoaGhmjVqhW2bdum0kbJv8+nn36K1atXi++tmzdvqvW9LY+679WioiIEBQXBzs4OxsbG6NGjB27evImGDRvCz8+v1LU+O4fi9u3bGDJkCJRKJQwNDdGgQQMMHz4c6enpAIqD2aysLGzbtk383pS0Wd4cisOHD6Nbt24wMzODQqFAhw4dEBYW9krfgzfffBNA8ZDms27duoWhQ4fC0tIShoaGaN++PX7++edSx1+9ehXdunWDkZERGjRogI8//hhbt24t1e+Sn/ejR4+iffv2MDIywsaNGwEAaWlpmDlzJuzt7SGXy9G4cWP85z//QVFRkcq5du3ahXbt2onX7erqii+++EIsz8/Px5IlS/Daa6/B0NAQdevWRZcuXRAeHi7WKev3g5S/s0h7MKysBr/88gsaNWqEN954Q636EyZMwLZt2zB06FDMnj0bf/zxB0JCQhAdHY0ff/xRpW5sbCyGDh2K8ePHw9fXF9988w38/PzQrl07vP766xg8eDAsLCwQEBCAESNGoG/fvjA1Na1Q/2/cuIF+/fqhZcuWCA4OhlwuR2xsLM6ePfvC444fPw4vLy80atQIQUFBePr0KdauXQt3d3dcunSpVDAzbNgwODk5ISQkBJcuXcLmzZthbW2N//znP2r1c/DgwZg8eTL27duHcePGASjOTjRr1gxt27YtVf/OnTvYv38/3nnnHTg5OSE5ORkbN25Et27dcPPmTdjZ2aF58+YIDg7GRx99hEmTJol/PJ79t0xNTYWXlxeGDx+O0aNHw8bGpsz+ffHFFzh58iR8fX0RGRkJXV1dbNy4EceOHcOOHTtgZ2dX5nHNmzfHjh07EBAQgAYNGmD27NkAACsrKzx9+hTdu3dHbGwspk6dCicnJ/zwww/w8/NDWlqaSqAKAFu3bkVOTg4mTZoEuVwOS0tLtb635VH3vbpgwQKsXLkS/fv3h6enJ65cuQJPT0/k5OS8sP28vDx4enoiNzcX06ZNg1KpxD///IMDBw4gLS0N5ubm2LFjByZMmICOHTti0qRJAABnZ+dy2wwNDcW4cePw+uuvY8GCBbCwsMDly5dx5MiRMgPPlyn5o1+nTh1x340bN+Du7o769etj/vz5MDExwffff49BgwZh7969ePvttwEA//zzD3r06AGZTIYFCxbAxMQEmzdvhlwuL/NcMTExGDFiBN577z1MnDgRTZs2RXZ2Nrp164Z//vkH7733HhwcHHDu3DksWLAAiYmJWL16NYDiDwUjRoxAz549xZ+p6OhonD17VnyfBAUFISQkRPx+ZmRk4MKFC7h06RJ69+5d7vdAyt9ZpEWqe8yltklPTxcACAMHDlSrflRUlABAmDBhgsr+OXPmCACEkydPivscHR0FAMKZM2fEfSkpKYJcLhdmz54t7rt7926Z8wd8fX0FR0fHUn1YvHix8OxbZdWqVQIA4eHDh+X2u+Qcz84zaN26tWBtbS2kpqaK+65cuSLo6OgIPj4+pc43btw4lTbffvttoW7duuWe89nrMDExEQRBEIYOHSr07NlTEITi8XilUiksWbKkzO9BTk5OqbH6u3fvCnK5XAgODhb3vWgORbdu3QQAwoYNG8os69atm8q+o0ePCgCEjz/+WLhz545gamoqDBo06KXXKAhlz2lYvXq1AED49ttvxX15eXmCm5ubYGpqKmRkZIjXBUBQKBRCSkrKK5/vWeq+V5OSkgQ9Pb1S1xkUFCQAUJn7cOrUKQGAcOrUKUEQBOHy5csCAOGHH354YV/Lm0NRMu/h7t27giAIQlpammBmZiZ06tSp1DyAl82zKGnr+PHjwsOHD4WEhARhz549gpWVlSCXy4WEhASxbs+ePQVXV1chJydHpf033nhDeO2118R906ZNE2QymXD58mVxX2pqqmBpaanSb0H478/7kSNHVPq1dOlSwcTERPjrr79U9s+fP1/Q1dUV4uPjBUEQhBkzZggKheKF85xatWr10nkzz/9+qIzfWaQdOORRxTIyMgAAZmZmatU/dOgQAGDWrFkq+0s+lT4/18LFxUX81AwUf2pt2rQp7ty588p9fl7J3IuffvqpVAq1PImJiYiKioKfn5/Kp+CWLVuid+/e4nU+a/LkySqv33zzTaSmporfQ3WMHDkSERERSEpKwsmTJ5GUlFTup065XA4dneIficLCQqSmporDOZcuXVL7nHK5HGPHjlWrroeHB9577z0EBwdj8ODBMDQ0FNPWr+LQoUNQKpUYMWKEuE9fXx/Tp09HZmYmTp8+rVJ/yJAhsLKyeuXzPX9u4OXv1RMnTqCgoADvv/++Sr1p06a99Bzm5uYAgKN
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Best Parameters: {'model__max_depth': 10, 'model__n_estimators': 100}\n",
|
|||
|
"Accuracy: 0.6879162702188392\n",
|
|||
|
"Precision: 0.6594594594594595\n",
|
|||
|
"Recall: 0.547085201793722\n",
|
|||
|
"F1-score: 0.5980392156862745\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhQAAAHHCAYAAADnOMH5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABdkUlEQVR4nO3deVhU1f8H8PewzLAOiOyKuC8k7mmEayKouKVm7qCoabjhhpa5YIlpZWnulppJVq5pbrhhKpkbai4kuOAGmgaIyH5+f/jjfh0BnXEu4Mj75XOfhzn33HPPHa7DZ852FUIIASIiIiI9GJV2BYiIiMjwMaAgIiIivTGgICIiIr0xoCAiIiK9MaAgIiIivTGgICIiIr0xoCAiIiK9MaAgIiIivTGgICIiIr0xoCgmly9fhq+vL2xsbKBQKLBlyxZZy7927RoUCgVWr14ta7mGrHXr1mjdurVs5aWlpWHIkCFwdnaGQqHA2LFjZSvbUPA+e7W9Cr+fypUrIzAwUCOtsM+/1atXQ6FQ4Nq1ayVeR4VCgRkzZpT4ecua1zqgiI+PxwcffICqVavCzMwMarUa3t7e+Oabb/D48eNiPXdAQADOnTuHzz77DGvXrkWTJk2K9XwlKTAwEAqFAmq1utD38fLly1AoFFAoFPjiiy90Lv/27duYMWMGYmJiZKjty5s9ezZWr16NESNGYO3atRgwYECxnq9y5crS+6ZQKGBpaYmmTZvihx9+KNbzGppn36ent4yMjNKuXgFHjx7FjBkzkJycrNNxBw8eRPfu3eHs7AylUglHR0d07twZmzZtKp6Kyqg0Pv927NjBoKGUmZR2BYrL77//jvfeew8qlQoDBw5E3bp1kZWVhcOHD2PixIk4f/48li9fXiznfvz4MaKjo/Hxxx9j5MiRxXIOd3d3PH78GKampsVS/ouYmJggPT0d27ZtQ69evTT2rVu3DmZmZi/94X779m3MnDkTlStXRoMGDbQ+bs+ePS91vqLs378fb731FqZPny5ruc/ToEEDjB8/HgBw584drFy5EgEBAcjMzMTQoUNLrB6vuqffp6cplcpSqM3zHT16FDNnzkRgYCBsbW21Omb69OkICwtDjRo18MEHH8Dd3R3379/Hjh070KNHD6xbtw59+/Yt3oprKTY2FkZG//tuWtTn34ABA9C7d2+oVKpiqceOHTuwaNGiQoOKx48fw8Tktf1z98p4Ld/hq1evonfv3nB3d8f+/fvh4uIi7QsODkZcXBx+//33Yjv/vXv3AEDrD4+XoVAoYGZmVmzlv4hKpYK3tzd++umnAgFFREQE/P39sXHjxhKpS3p6OiwsLGT/Y3L37l14eHjIVl5OTg7y8vKeW88KFSqgf//+0uvAwEBUrVoV8+fPZ0DxlGffJ7nk5eUhKyurVP9vbdiwAWFhYejZsyciIiI0vjRMnDgRu3fvRnZ2dqnV71nPBghFff4ZGxvD2Ni4pKqloTR/n2WKeA0NHz5cABBHjhzRKn92drYICwsTVatWFUqlUri7u4spU6aIjIwMjXzu7u7C399f/PHHH+LNN98UKpVKVKlSRaxZs0bKM336dAFAY3N3dxdCCBEQECD9/LT8Y562Z88e4e3tLWxsbISlpaWoWbOmmDJlirT/6tWrAoBYtWqVxnH79u0TzZs3FxYWFsLGxkZ06dJFXLhwodDzXb58WQQEBAgbGxuhVqtFYGCgePTo0Qvfr4CAAGFpaSlWr14tVCqV+O+//6R9f/31lwAgNm7cKACIefPmSfvu378vxo8fL+rWrSssLS2FtbW1aN++vYiJiZHyHDhwoMD79/R1tmrVSrzxxhvixIkTokWLFsLc3FyMGTNG2teqVSuprIEDBwqVSlXg+n19fYWtra24detWoddXVB2uXr0qhBAiKSlJDB48WDg6OgqVSiXq1asnVq9erVFG/u9n3rx5Yv78+aJq1arCyMhInD59usj3Nf/+elaTJk2EUqnUSDt06JDo2bOncHNzE0qlUlSsWFGMHTtWpKena+TL/13dvHlTdO3aVVhaWgp7e3sxfvx4kZOTo5H3v//+EwEBAUKtVgsbGxsxcOBAcfr0ab3vs9jYWNGvXz+hVquFvb29mDp1qsjLyxMJCQmiS5cuwtraWjg5OYkvvviiyPdGm/fpaWlpaWLcuHGiYsWKQqlUipo1a4p58+aJvLw8jXwARHBwsPjxxx+Fh4eHMDExEZs3bxZCCHHz5k0xaNAg4ejoKJRKpfDw8BDfffddgXMtWLBAeHh4CHNzc2FraysaN24s1q1bp/EeFHUvFaZ27drCzs5OpKamvvC9KOxz4MyZMyIgIEBUqVJFqFQq4eTkJAYNGiT+/fdfjWNTU1PFmDFjhLu7u1AqlcLBwUH4+PiIkydPSnn++ecf0b17d+Hk5CRUKpWoUKGCeP/990VycrKUx93dXQQEBBR5vfmfeatWrSr02nfs2CFatmwprKyshLW1tWjSpIn0/gmh3b0eEBBQ6PucD4CYPn26xnlPnTol2rdvL6ytrYWlpaV45513RHR0tEae/DofPnxYhISECHt7e2FhYSG6desm7t69+8LfT1nzWrZQbNu2DVWrVsXbb7+tVf4hQ4ZgzZo16NmzJ8aPH49jx44hPDwcFy9exObNmzXyxsXFoWfPnggKCkJAQAC+//57BAYGonHjxnjjjTfQvXt32NraIiQkBH369EHHjh1hZWWlU/3Pnz+PTp06oV69eggLC4NKpUJcXByOHDny3OP27t2LDh06oGrVqpgxYwYeP36MhQsXwtvbG6dOnULlypU18vfq1QtVqlRBeHg4Tp06hZUrV8LR0RGff/65VvXs3r07hg8fjk2bNmHw4MEAnrRO1K5dG40aNSqQ/8qVK9iyZQvee+89VKlSBUlJSVi2bBlatWqFCxcuwNXVFXXq1EFYWBimTZuGYcOGoUWLFgCg8bu8f/8+OnTogN69e6N///5wcnIqtH7ffPMN9u/fj4CAAERHR8PY2BjLli3Dnj17sHbtWri6uhZ6XJ06dbB27VqEhISgYsWKUtO6g4MDHj9+jNatWyMuLg4jR45ElSpV8OuvvyIwMBDJyckYM2aMRlmrVq1CRkYGhg0bBpVKBTs7O63e23w5OTm4efMmypUrp5H+66+/Ij09HSNGjED58uXx119/YeHChbh58yZ+/fVXjby5ubnw8/NDs2bN8MUXX2Dv3r348ssvUa1aNYwYMQIAIIRA165dcfjwYQwfPhx16tTB5s2bERAQUKBOut5n77//PurUqYM5c+bg999/x6effgo7OzssW7YM77zzDj7//HOsW7cOEyZMwJtvvomWLVu+8H3Jzs7Gv//+q5FmYWEBCwsLCCHQpUsXHDhwAEFBQWjQoAF2796NiRMn4tatW5g/f77Gcfv378cvv/yCkSNHwt7eHpUrV0ZSUhLeeustKBQKjBw5Eg4ODti5cyeCgoKQmpoqDdBdsWIFRo8ejZ49e2LMmDHIyMjA2bNncezYMfTt2xfdu3fHP//8g59++gnz58+Hvb09gCf3UmEuX76MS5cuYfDgwbC2tn7h+1CYyMhIXLlyBYMGDYKzs7PUvXv+/Hn8+eefUCgUAIDhw4djw4YNGDlyJDw8PHD//n0cPnwYFy9eRKNGjZCVlQU/Pz9kZmZi1KhRcHZ2xq1bt7B9+3YkJyfDxsamwLl1/fxbvXo1Bg8ejDfeeANTpkyBra0tTp8+jV27dkldOtrc6x988AFu376NyMhIrF279oXv0fnz59GiRQuo1WpMmjQJpqamWLZsGVq3bo2oqCg0a9ZMI/+oUaNQrlw5TJ8+HdeuXcPXX3+NkSNH4ueff9b691ImlHZEI7eUlBQBQHTt2lWr/DExMQKAGDJkiEb6hAkTBACxf/9+Kc3d3V0AEIcOHZLS7t69K1QqlRg/fryU9vS306dp20Ixf/58AUDcu3evyHoX9s2kQYMGwtHRUdy/f19KO3PmjDAyMhIDBw4scL7BgwdrlPnuu++K8uXLF3nOp6/D0tJSCCFEz549Rdu2bYUQQuTm5gpnZ2cxc+bMQt+DjIwMkZubW+A
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.1, 'model__max_depth': 3, 'model__n_estimators': 200}\n",
|
|||
|
"Accuracy: 0.6936251189343482\n",
|
|||
|
"Precision: 0.6519607843137255\n",
|
|||
|
"Recall: 0.5964125560538116\n",
|
|||
|
"F1-score: 0.6229508196721312\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhQAAAHHCAYAAADnOMH5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABg6klEQVR4nO3deVxUVf8H8M8wMsM6IMqqiIorinspYS6JoJKp6GOaC7g+Gm64oWUumFJaaZmpmSmaZGlqZW64p5K5kYZGggumAq4gIOuc3x/+uI8joINzWSY+b1739WLOPffec2eG4Tvfc869CiGEABEREZEBTMq7AURERGT8GFAQERGRwRhQEBERkcEYUBAREZHBGFAQERGRwRhQEBERkcEYUBAREZHBGFAQERGRwRhQEBERkcEYUOjh0qVL8PX1hY2NDRQKBbZv3y7r/q9evQqFQoF169bJul9j1qlTJ3Tq1Em2/aWnp2PkyJFwcnKCQqHApEmTZNt3RVfU+2vu3LlQKBTl16h/GWN5PivCZ03t2rURFBSkU1bUZ+y6deugUChw9erVMm+jQqHA3Llzy/y4xs5oAoqEhAT897//Rd26dWFmZgaNRgNvb298+umnePToUakeOzAwEOfPn8eCBQuwYcMGtGnTplSPV5aCgoKgUCig0WiKfB4vXboEhUIBhUKBjz76qMT7v3nzJubOnYuYmBgZWvviFi5ciHXr1mHs2LHYsGEDhgwZUurH1Gq1WL9+Pbp27Yrq1avD1NQUDg4O8PX1xZdffons7OxSb0N5KulrX/AP5MnFwcEBnTt3xq5du0q3sXrIzMzE3LlzcejQofJuSpEOHTqEgIAAODk5QaVSwcHBAT179sTWrVvLu2nPVR6fsTt37mTQIDdhBHbs2CHMzc2Fra2tmDBhgvjyyy/F559/LgYMGCBMTU3FqFGjSu3YmZmZAoB49913S+0YWq1WPHr0SOTl5ZXaMYoTGBgoqlSpIpRKpfjuu+8KrZ8zZ44wMzMTAMTixYtLvP+TJ08KAGLt2rUl2i47O1tkZ2eX+HjFadu2rfD29pZtf8+TmZkp/Pz8BADxyiuviPDwcPH111+Ljz76SPTs2VMolUoxfPjwMmnLlStXCr0Gubm54tGjR6V63JK+9mvXrhUARFhYmNiwYYNYv369WLx4sWjSpIkAIH7++edSbe/z3L59WwAQc+bMKbSuLJ7PZ5k9e7YAIOrXry9mz54t1qxZIxYtWiQ6deokAIiNGzcKIYp+L5S1rKwskZOTIz0u7jM2Ly9PPHr0SGi12lJpR3BwsCjuX+CjR49Ebm5uqRz336xKeQQxJXHlyhUMGDAAbm5uOHDgAJydnaV1wcHBiI+Pxy+//FJqx799+zYAwNbWttSOoVAoYGZmVmr7fx61Wg1vb298++236N+/v866yMhI+Pv744cffiiTtmRmZsLCwgIqlUrW/aakpMDDw0O2/eXl5UGr1RbbzpCQEOzZswdLly7FxIkTddZNmTIFly5dQlRUlEHHMESVKlVQpUrF/PPv3r27zjfUESNGwNHREd9++y1ef/31cmxZ8crz+dyyZQvCwsLQr18/REZGwtTUVFo3bdo07NmzB7m5ueXStqKo1Wqdx8V9xiqVSiiVyrJqlo7y/Dw2auUd0TzPmDFjBABx7Ngxvern5uaKsLAwUbduXaFSqYSbm5uYOXOmyMrK0qnn5uYm/P39xa+//ipeeukloVarRZ06dURERIRUZ86cOQKAzuLm5iaEePzNvuD3JxVs86S9e/cKb29vYWNjIywtLUWDBg3EzJkzpfXFfWvYv3+/aN++vbCwsBA2NjbijTfeEBcuXCjyeJcuXRKBgYHCxsZGaDQaERQUJDIyMp77fAUGBgpLS0uxbt06oVarxf3796V1v//+uwAgfvjhh0IZirt374opU6aIpk2bCktLS2FtbS26desmYmJipDoHDx4s9Pw9eZ4dO3YUTZo0EadOnRKvvvqqMDc3FxMnTpTWdezYUdrX0KFDhVqtLnT+vr6+wtbWVty4caPI8yuuDVeuXBFCCJGcnCyGDx8uHBwchFqtFs2aNRPr1q3T2UfB67N48WKxZMkSUbduXWFiYiLOnj1b5DETExOFUqkU3bp1e8Yzr+tZx8jOzhbvvfeeaNWqldBoNMLCwkK0b99eHDhwoNB+7t+/LwIDA4VGoxE2NjZi6NCh4uzZs4XeX0W9T4UQYsOGDaJVq1bCzMxMVK1aVbz55psiMTFRp07B6xYbGys6deokzM3NhYuLi/jwww+lOs977YtSkKE4efKkTrlWqxUajUYMHTpUpzw9PV1MnjxZ1KxZU6hUKtGgQQOxePHiQt9o9f1MOHnypPD19RXVqlUTZmZmonbt2mLYsGE6r8/TS0G2oqjnE4AIDg4W27ZtE02aNBEqlUp4eHiIXbt2FTr3gwcPitatWwu1Wi3q1q0rVq5cWexr9LRGjRoJOzs7kZaW9ty6RX3W/PHHHyIwMFDUqVNHqNVq4ejoKIYNGybu3Lmjs21aWpqYOHGicHNzEyqVStjb2wsfHx9x+vRpqc7ff/8tAgIChKOjo1Cr1aJGjRrizTffFA8ePJDquLm5icDAQJ3nrajP2IL3Q8HfaoGdO3eKDh06CCsrK2FtbS3atGkjZWCEEOLIkSOiX79+wtXVVahUKlGzZk0xadIkkZmZKdUJDAws8vUs8ORrW+DMmTOiW7duwtraWlhaWorXXntNREdH69QpaPPRo0dFSEiIqF69urCwsBC9e/cWKSkpz319jF3F/IryhJ9//hl169bFK6+8olf9kSNHIiIiAv369cOUKVNw4sQJhIeH4+LFi9i2bZtO3fj4ePTr1w8jRoxAYGAgvv76awQFBaF169Zo0qQJAgICYGtri5CQEAwcOBA9evSAlZVVidofGxuL119/Hc2aNUNYWBjUajXi4+Nx7NixZ263b98+dO/eHXXr1sXcuXPx6NEjLFu2DN7e3jhz5gxq166tU79///6oU6cOwsPDcebMGXz11VdwcHDAhx9+qFc7AwICMGbMGGzduhXDhw8H8Dg70ahRI7Rq1apQ/cuXL2P79u34z3/+gzp16iA5ORmrVq1Cx44dceHCBbi4uKBx48YICwvD7NmzMXr0aLz66qsAoPNa3r17F927d8eAAQMwePBgODo6Ftm+Tz/9FAcOHEBgYCCio6OhVCqxatUq7N27Fxs2bICLi0uR2zVu3BgbNmxASEgIatasiSlTpgAA7O3t8ejRI3Tq1Anx8fEYN24c6tSpg82bNyMoKAgPHjwolFlYu3YtsrKyMHr0aKjVatjZ2RV5zF27diE/Px+DBw9+zrNeWFHHSEtLw1dffYWBAwdi1KhRePjwIdasWQM/Pz/8/vvvaNGiBQBACIFevXrh6NGjGDNmDBo3boxt27YhMDBQr2MvWLAA7733Hvr374+RI0fi9u3bWLZsGTp06ICzZ8/qfIO8f/8+unXrhoCAAPTv3x9btmxBaGgoPD090b17d71e++Kkpqbizp07EEIgJSUFy5YtQ3p6us7zKYTAG2+8gYMHD2LEiBFo0aIF9uzZg2nTpuHGjRtYsmSJVFefz4SUlBT4+vrC3t4eM2bMgK2tLa5evSqNP7C3t8eKFSswduxY9OnTBwEBAQCAZs2aPfNcjh49iq1bt+Ltt9+GtbU1PvvsM/Tt2xeJiYmoVq0aAODs2bPo1q0bnJ2dMW/ePOTn5yMsLAz29vbPfa4uXbqEv/76C8OHD4e1tfVz6xclKioKly9fxrBhw+Dk5ITY2Fh8+eWXiI2NxW+//SYNNh0zZgy2bNmCcePGwcPDA3fv3sXRo0dx8eJFtGrVCjk5OfDz80N2djbGjx8PJycn3LhxAzt27MCDBw9gY2NT6Ngl/Yxdt24dhg8fjiZNmmDmzJmwtbXF2bNnsXv3brz11lsAgM2bNyMzMxNjx45FtWrV8Pvvv2PZsmX4559/sHnzZgDAf//7X9y8eRNRUVHYsGHDc5+j2NhYvPrqq9BoNJg+fTpMTU2xatUqdOrUCYcPH0bbtm116o8fPx5Vq1bFnDlzcPXqVSx
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAjcAAAGwCAYAAABVdURTAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABPTklEQVR4nO3deVhU9f4H8PcMwrDIIiL7IK6ouSBrbpmKYpZploJ5lazsVmpeuVZaqdmi/bTMSsuyxeqWoGVlaZjikpoFLmhuIIIysikuDPvAzPf3Bzk6isogzGFm3q/n4XnufDln5j1Hr/PuzJn5yIQQAkREREQWQi51ACIiIqLGxHJDREREFoXlhoiIiCwKyw0RERFZFJYbIiIisigsN0RERGRRWG6IiIjIorSQOoCp6XQ65OXlwdnZGTKZTOo4REREVA9CCJSUlMDX1xdy+a3PzVhducnLy4NSqZQ6BhERETWASqWCv7//LbexunLj7OwMoPbguLi4SJyGiIiI6kOtVkOpVOpfx2/F6srNlbeiXFxcWG6IiIjMTH0uKeEFxURERGRRWG6IiIjIorDcEBERkUVhuSEiIiKLwnJDREREFoXlhoiIiCwKyw0RERFZFJYbIiIisigsN0RERGRRWG6IiIjIokhabn7//XeMHDkSvr6+kMlk+PHHH2+7z44dOxASEgKFQoGOHTti9erVTZ6TiIiIzIek5aasrAy9evXCihUr6rV9dnY27r//fgwaNAhpaWn4z3/+gyeffBKbN29u4qRERERkLiQdnHnffffhvvvuq/f2K1euRLt27fDOO+8AALp27Yrdu3fj3XffRXR0dFPFJCIiono6e6kcldU6dPRsKVkGs5oKvnfvXkRFRRmsRUdH4z//+c9N96mqqkJVVZX+tlqtbqp4REREVklTo0Py8UKsSVVh18nzGNrVC59MCpMsj1mVm4KCAnh5eRmseXl5Qa1Wo6KiAg4ODjfss2jRIixYsMBUEYmIiKxG1vlSJKaq8P2Bsygq1ejXK6q10OoEbOQySXKZVblpiDlz5iA+Pl5/W61WQ6lUSpiIiIjIfFVWa/HrkXysSVEhJfuifr2NswJjQ/0xLkyJQA8nCROaWbnx9vZGYWGhwVphYSFcXFzqPGsDAAqFAgqFwhTxiIiILNbxfDUSUnLww8FcqCtrAAByGXBvkCdiwpUY3MUTtjbN4xtmzKrc9OnTB5s2bTJY27JlC/r06SNRIiIiIstVWlWDnw/lISElB4fOFuvX/dwcEBOuxCOh/vB1q/vkgpQkLTelpaXIzMzU387OzkZaWhrc3d0REBCAOXPmIDc3F1999RUA4Omnn8by5cvxwgsv4PHHH8e2bduwdu1abNy4UaqnQEREZFGEEEhTXUZCigo/H85DuUYLAGghl2HYXV6ICQ9A/44ekl1PUx+Slpt9+/Zh0KBB+ttXro2Ji4vD6tWrkZ+fj5ycHP3v27Vrh40bN2LmzJl477334O/vj08//ZQfAyciIrpDl8s1+OFgLhJSVEgvLNGvt/dwQmyEEmNC/OHR0jwu85AJIYTUIUxJrVbD1dUVxcXFcHFxkToOERGRZIQQ+DPrIhJSc/DrkQJoanQAAEULOe7v4YOYcCUi2rlDJpP+LI0xr99mdc0NERER3blzJZX4fn8uElNzcPpCuX69q48LxkcoMaqXH1wdbSVMeGdYboiIiKyAVifw+8nzSEjJQfLxc6jR1b5x42RngweD/RAbrkRPf9dmcZbmTrHcEBERWbDcyxVYm6rCun0q5BVX6td7B7hhfHgA7u/pAyeFZdUBy3o2REREhGrtP+MQUlT4/eR5XLm61tXBFmNC/BAbHoAgb2dpQzYhlhsiIiILkXW+FIn7VPh+v+E4hD7tWyM2Qonou7xhb2sjYULTYLkhIiIyY5XVWiQdKcCalBz8dc04BI+WCowN80dMMxiHYGosN0RERGboeL4aiakq/HAwF8UV1QBqxyEM7NwGsREBzWocgqmx3BAREZmJ0qoa/HIoD2tSVTikuqxf93NzwLgwJcaGNc9xCKbGckNERNSMXRmHkJiqws+H8lB2zTiEod28EBvR/MchmBrLDRERUTN0uVyDHw/mIiFVhRMFhuMQYsJrxyG0cTaPcQimxnJDRETUTFwZh5CYmoNN141DGNHDB7HNaBxCc8ZyQ0REJLHzJVX4/sBZJKaqkF1Upl/v4u2M8REBGB1s3uMQTI3lhoiISAJancCuk+eRkKLC1uOF141D8EVseIDFjEMwNZYbIiIiE8q9XIF1+1RYt+8sci9X6NeDlW4YH6HEAz19LW4cgqnx6BERETWx2nEI55CQmoOdGYbjEB7q7YfYCCW6eLtIG9KCsNwQERE1keyiMiSmqvDd/rMoKq3Sr9/d3h3jIwKsZhyCqbHcEBERNaLKai02H60dh/BnluE4hEdC/RETrkQ7KxuHYGosN0RERI3gRIEaCSmG4xBkV8YhhAdgSFfrHYdgaiw3REREDVRWVYOfD+UhIVWFtGvGIfi62mNcuBJjw5Tw4zgEk2O5ISIiMoIQAofOFiMxNQcb0gzHIUR19UJshBIDOrXhOAQJsdwQERHVQ3F5NX5My8WalByDcQjt/hmH8DDHITQbLDdEREQ3IYTAX9kXkZiqwqa/81H1zzgEuxZyjOjujdiIAERyHEKzw3JDRER0nfMlVVj/zziErOvGIcSGK/FQb3+OQ2jGWG6IiIhwdRxCYqoKW45dHYfgaGeDB3v5IjYiAL04DsEssNwQEZFVy7tcgXX7zmLtPpXBOIReSjeMD1figV6+aMlxCGaFf1pERGR1roxDSPxnHMI/J2ngYt8CY0Jqv2ivqw/HIZgrlhsiIrIap4vKkLivdhzC+ZKr4xAi29WOQxjeneMQLAHLDRERWbQr4xASUlTYm3VBv+7R0g4Ph/ojJkyJ9m1aSpiQGhvLDRERWaT0ghIkpObgh4O5uFx+dRzCPZ3aYHyEEkO6enEcgoViuSEiIotRVlWDXw7XjkM4mHNZv+7rao+xYUqMC+c4BGvAckNERGZNCIHDZ4uRkKrChrRcg3EIQ7p6IjYiAPdwHIJVYbkhIiKzdGUcQkKqCsfz1fr1wNaOiAkPwMOhfvB0tpcwIUmF5YaIiMyGEAIp/4xD2HjdOIT7unsjNjwAd7fnOARrx3JDRETNXlFpFb7ff+M4hCAvZ8RGKPFQbz+4OdpJmJCaE5YbIiJqlnQ6gV2ZRUhIyblhHMLInr6IjVAiWOnGszR0A5YbIiJqVvKLa8chJKZeNw7B3xWxEQEYyXEIdBv820FERJKr1uqw7cQ5JKaqsCP9nME4hId6+yEmPADdfDkOgeqH5YaIiCRz5kIZElJvHIcQ0c4d4yOUuK+7D8chkNFYboiIyKSujENITFXhj1NXxyG0drLDI6G1Qys5DoHuBMsNERGZREZhCdak1D0OITa8dhyCXQuOQ6A7x3JDRERNplxTg18O5SMhNQcHrhmH4HNlHEKYP/xbOUoXkCwSyw0RETUqIQT+zi3GmhQVfj6Uh9KqGgCAjVyGqK6eiA0PwD2dOQ6Bmg7LDRERNYriimr8lJaLhBQVjl0zDqFta0fEhCvxSKg/xyGQSbDcEBFRgwkhkHr6EhJSc7Dp73xUVhuOQ4gJV+Ludq0h51kaMiGWGyIiMtqF0ip8f+AsElJVyDp/dRxCZ6+WiA0PwEO9/dDKieMQSBosN0REVC86ncDuzCIkpqrw27ECVGsNxyHERCjRm+MQqBlguSEiolu61TiEmPAAjOzlA2d7WwkTEhliuSEiohvUXDMOYTvHIZCZYbkhIiK9MxfKkPjPOIRz141DiA1XYkQPjkOg5o/lhojIylXVaLH5aCESU3OwJ/PGcQjjwpXowHEIZEZYboiIrNTJwhIkpKqw/sBZXLpmHMKAf8YhRHEcApkplhsiIitSrqnBL4fzkZiqwv4zl/TrV8YhjA31h9Kd4xDIvLHcEBFZgb/PFmNNag42pBmOQxjSxROxEUoM7OzJcQhkMVhuiIgslLqyGj8dzEVCqgpH866OQwhwrx2HMDbUH54uHIdAloflhojIggg
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn import metrics\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Yamana_Gold_Inc._AUY.csv\")\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": (LinearRegression(), {}),\n",
|
|||
|
" \"Random Forest Regression\": (RandomForestRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Regression\": (GradientBoostingRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Volume']\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи регрессии\n",
|
|||
|
"print(\"Результаты для задачи регрессии:\")\n",
|
|||
|
"for name, (model, params) in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" grid_search.fit(X_train_reg, y_train_reg)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_reg = best_model.predict(X_test_reg)\n",
|
|||
|
" mae = mean_absolute_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" mse = mean_squared_error(y_test_reg, y_pred_reg)\n",
|
|||
|
" rmse = mean_squared_error(y_test_reg, y_pred_reg, squared=False)\n",
|
|||
|
" r2 = r2_score(y_test_reg, y_pred_reg)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"MAE: {mae}\")\n",
|
|||
|
" print(f\"MSE: {mse}\")\n",
|
|||
|
" print(f\"RMSE: {rmse}\")\n",
|
|||
|
" print(f\"R²: {r2}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": (LogisticRegression(), {\n",
|
|||
|
" 'model__C': [0.1, 1, 10],\n",
|
|||
|
" 'model__solver': ['liblinear', 'lbfgs']\n",
|
|||
|
" }),\n",
|
|||
|
" \"Random Forest Classification\": (RandomForestClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Classification\": (GradientBoostingClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Volume'] > df['Volume'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, (model, params) in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='accuracy')\n",
|
|||
|
" grid_search.fit(X_train_class, y_train_class)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_class = best_model.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" precision = precision_score(y_test_class, y_pred_class)\n",
|
|||
|
" recall = recall_score(y_test_class, y_pred_class)\n",
|
|||
|
" f1 = f1_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print(f\"Precision: {precision}\")\n",
|
|||
|
" print(f\"Recall: {recall}\")\n",
|
|||
|
" print(f\"F1-score: {f1}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
" # Визуализация матрицы ошибок\n",
|
|||
|
" cm = confusion_matrix(y_test_class, y_pred_class)\n",
|
|||
|
" disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Less', 'More'])\n",
|
|||
|
" disp.plot(cmap=plt.cm.Blues)\n",
|
|||
|
" plt.title(f'Confusion Matrix for {name}')\n",
|
|||
|
" plt.show()\n",
|
|||
|
"\n",
|
|||
|
" fpr, tpr, _ = metrics.roc_curve(y_test_class, y_pred_class)\n",
|
|||
|
"# построение ROC кривой\n",
|
|||
|
"plt.plot(fpr, tpr)\n",
|
|||
|
"plt.ylabel(\"True Positive Rate\")\n",
|
|||
|
"plt.xlabel(\"False Positive Rate\")\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Давайте проанализируем полученные значения метрик и определим, являются ли они нормальными или их можно улучшить.\n",
|
|||
|
"\n",
|
|||
|
"### Оценка смещения и дисперсии для задачи регрессии:\n",
|
|||
|
"\n",
|
|||
|
"### Вывод для задачи регрессии:\n",
|
|||
|
"\n",
|
|||
|
"- **Random Forest Regression** демонстрирует наилучшие результаты по метрикам MAE и R², что указывает на высокую точность и стабильность модели.\n",
|
|||
|
"- **Linear Regression** и **Gradient Boosting Regression** также показывают хорошие результаты, но уступают случайному лесу.\n",
|
|||
|
"\n",
|
|||
|
"### Вывод для задачи классификации:\n",
|
|||
|
"\n",
|
|||
|
"- **Random Forest Classification** демонстрирует наилучшие результаты по всем метрикам (Accuracy, Precision, Recall, F1-score), что указывает на высокую точность и стабильность модели.\n",
|
|||
|
"- **Logistic Regression** и **Gradient Boosting Classification** также показывают хорошие результаты, но уступают случайному лесу.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Для оценки смещения (bias) и дисперсии (variance) моделей можно использовать метод перекрестной проверки (cross-validation). Этот метод позволяет оценить, насколько хорошо модель обобщается на новых данных.\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи регрессии:\n",
|
|||
|
"Для задачи регрессии мы будем использовать метрики MAE (Mean Absolute Error) и R² (R-squared) для оценки смещения и дисперсии.\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи классификации:\n",
|
|||
|
"Для задачи классификации мы будем использовать метрики Accuracy, Precision, Recall и F1-score для оценки смещения и дисперсии.\n",
|
|||
|
"\n",
|
|||
|
"Пример кода для оценки смещения и дисперсии:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 11,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Оценка смещения и дисперсии для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 7111210.742214432, Std = 3258621.575988359\n",
|
|||
|
"R² (Cross-Validation): Mean = -973.9850485533416, Std = 1947.0230766477396\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 7101311.532316089, Std = 2376379.9714977024\n",
|
|||
|
"R² (Cross-Validation): Mean = -908.3857578796964, Std = 1815.6264123425312\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 7070482.89476308, Std = 2802589.358183748\n",
|
|||
|
"R² (Cross-Validation): Mean = -981.8443733941988, Std = 1962.7525784329255\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.23427139685560236, Std = 0.18945014607379965\n",
|
|||
|
"Precision (Cross-Validation): Mean = 0.22756569304610003, Std = 0.27873494187794556\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.1027019915619471, Std = 0.07059717190284222\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.1051367726720347, Std = 0.057537649734717644\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.4147656200444022, Std = 0.1210675807050909\n",
|
|||
|
"Precision (Cross-Validation): Mean = 0.3526804705326606, Std = 0.10418270160673734\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.431013927323867, Std = 0.22234885624805578\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.3842515316280531, Std = 0.1459641276080606\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.3256321870327579, Std = 0.16717715703301406\n",
|
|||
|
"Precision (Cross-Validation): Mean = 0.2558131803912397, Std = 0.149590186670139\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.35217892301410875, Std = 0.32986484051603016\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.28471602649116273, Std = 0.21174524841246298\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import cross_val_score\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Yamana_Gold_Inc._AUY.csv\")\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Volume']\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи регрессии\n",
|
|||
|
"print(\"Оценка смещения и дисперсии для задачи регрессии:\")\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" mae_scores = -cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" r2_scores = cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='r2')\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"MAE (Cross-Validation): Mean = {mae_scores.mean()}, Std = {mae_scores.std()}\")\n",
|
|||
|
" print(f\"R² (Cross-Validation): Mean = {r2_scores.mean()}, Std = {r2_scores.std()}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Volume'] > df['Volume'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи классификации\n",
|
|||
|
"print(\"Оценка смещения и дисперсии для задачи классификации:\")\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" accuracy_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='accuracy')\n",
|
|||
|
" precision_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='precision')\n",
|
|||
|
" recall_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='recall')\n",
|
|||
|
" f1_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='f1')\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Accuracy (Cross-Validation): Mean = {accuracy_scores.mean()}, Std = {accuracy_scores.std()}\")\n",
|
|||
|
" print(f\"Precision (Cross-Validation): Mean = {precision_scores.mean()}, Std = {precision_scores.std()}\")\n",
|
|||
|
" print(f\"Recall (Cross-Validation): Mean = {recall_scores.mean()}, Std = {recall_scores.std()}\")\n",
|
|||
|
" print(f\"F1-score (Cross-Validation): Mean = {f1_scores.mean()}, Std = {f1_scores.std()}\")\n",
|
|||
|
" print()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJsAAAI+CAYAAAAb9gt6AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACXn0lEQVR4nOzdfXxP9eP/8ec2u565yGzD2FyUXGs+Zi5C1uYi8vmEUK4SclExEYVtUkKkokaFLii60ucTYYZPydLHVUWIIiUbKsawzXZ+f/jtfPe292bT2d5mj/vt5ub9Pud1znmd83693++z5/t1XsfJMAxDAAAAAAAAgAWcHV0BAAAAAAAA3DwImwAAAAAAAGAZwiYAAAAAAABYhrAJAAAAAAAAliFsAgAAAAAAgGUImwAAAAAAAGAZwiYAAAAAAABYhrAJAAAAAAAAliFsAgAAAAAAgGUIm+BwgwcPlo+PT4lu8+jRo3JyctKyZctKdLs3s1WrVqly5co6f/68o6uSr1atWmnixImFLn/+/Hk9/PDDCggIkJOTk8aOHVt8lYPJyclJsbGxjq5Gifk7n0dbtmyRk5OTtmzZYnm9AKAsKMp3zjfffCM3Nzf98ssvltcjPj5eNWvWVHp6eqGXeeedd1S/fn25urqqYsWKltcJeQ0ePFjBwcGOrkaJ6tChgzp06HBdywYHB2vw4MGW1gelB2HTDWTZsmVycnKSk5OTtm7dmme+YRgKCgqSk5OT7rnnHgfUsOiysrJUrVo1OTk56fPPP3d0dSxx4cIFxcbGFssfdzmvv71/jzzyiOXbs0pWVpZiYmL06KOP2gSHwcHBcnJyUkREhN3lXn/9dXP/duzYYbfMxIkT5eTkpPvvv9/u/Jw/1PP79/zzz5tln3zySS1cuFDJycmF2q/nnntOy5Yt08iRI/XOO+9owIABhVrueuUcr5x/3t7eatmypd5+++1i3S6uiI2NlZOTk5ydnfXrr7/mmZ+amipPT085OTlpzJgxDqghAOSV+/zRyclJ5cqVU/Xq1TV48GAdP37c0dW7qTz99NPq16+fatWqZU7r0KGDzfH39PRUkyZNNH/+fGVnZ9ssP2DAALVt21ZhYWFq37699u/fb84bPHiwMjIytGjRokLV5cCBAxo8eLDq1Kmj119/XYsXL7ZmJ/OR8x2Z88/V1VXBwcF67LHHdObMmWLdNmzPd2fMmGG3zAMPPCAnJ6cS/xEfyE85R1cAeXl4eGjFihVq27atzfT//ve/+u233+Tu7u6gmhXdpk2bdOLECQUHB2v58uXq0qWLo6v0t124cEFxcXGSdN0pf0HuvvtuDRw4MM/0W2+91fJtWeU///mPDh48qOHDh+eZ5+Hhoc2bNys5OVkBAQE285YvXy4PDw9dunTJ7noNw9B7772n4OBg/ec//9G5c+dUvnx5u2X79eunrl275pnevHlz8/G9994rX19fvfrqq5o+ffo192vTpk1q1aqVYmJirlnWKs2aNdP48eMlSSdOnNAbb7yhQYMGKT09XcOGDSuxejjSxYsXVa6c476e3N3d9d577+XpBffxxx87qEYAcG3Tp09XSEiILl26pK+//lrLli3T1q1btXfvXnl4eDi6eqXenj17tHHjRm3bti3PvBo1amjmzJmSpNOnT2vFihUaN26cTp06pWeffdYsN3XqVPN8buzYsRo1apQ2b94s6cr50qBBgzRv3jw9+uijcnJyKrA+W7ZsUXZ2tl566SXVrVvXqt28ptdee00+Pj5KS0tTYmKiXnnlFe3atcvuD+U3o9dffz1PiFiSPDw89N5772nKlCk209PS0vTpp5/yXscNhZ5NN6CuXbvqgw8+0OXLl22mr1ixQqGhoXn+YL+Rvfvuu7rjjjs0btw4rV69WmlpaY6u0g3v1ltv1YMPPpjnX8uWLQtc7sKFC3anX758WRkZGX+rTtd63ZYuXao2bdqoevXqeea1adNGPj4+Wrlypc303377TV9++aW6deuW73q3bNmi3377TUuWLNHly5cL/GP/jjvusHvcGjZsaJZxdnZWr1699Pbbb8swjAL3SZJOnjxpabf0wrwW1atXN+s+YcIEbd26VT4+PnrxxRctq0dhOer96uHh4dCwqWvXrnrvvffyTF+xYkWB7RUAHKlLly568MEH9fDDD+uNN97QE088oZ9++kn//ve/HV21YpHfeU9xWbp0qWrWrKlWrVrlmVehQgXzu3vs2LH64osvVKtWLb3yyivKysoyy+X+4dAwDDk72/4p1qdPH/3yyy9mAFWQkydPSpKl5ymFOaa9evXSgw8+qBEjRmjVqlW6//779dVXX+mbb76xrB6FkZ2dne+PlcXJ1dXVoT/8d+3aVT/88IO+/fZbm+mffvqpMjIydPfddzuoZkBehE03oH79+umPP/5QQkKCOS0jI0Mffvih+vfvb3eZ7OxszZ8/Xw0bNpSHh4f8/f01YsQI/fXXXzblPv30U3Xr1k3VqlWTu7u76tSpo2eeecbmi1C60mOnUaNG+uGHH9SxY0d5eXmpevXqmj17dqH34+LFi/rkk0/Ut29f9enTRxcvXtSnn36ab/mff/5ZUVFR8vb2VrVq1TR9+vQ8gcD777+v0NBQlS9fXr6+vmrcuLFeeumlPOvp3bu3KleuLC8vL7Vq1Upr1qy5Zn3zux4597XZR48elZ+fnyQpLi7O7M6a+1r/AwcOqFevXqpcubI8PDzUokULy0/0cl6fnTt36s4775SXl5eeeuops4vtCy+8oPnz56tOnTpyd3fXDz/8IOlKT5127drJ29tbFStW1L333mvThVv6v27SP/zwg/r3769KlSrl6WWX26VLl7Ru3bp8L5Xz8PDQv/71L61YscJm+nvvvadKlSopKioq33UvX75cDRo0UMeOHRUREaHly5cX9hDl6+6779Yvv/yiPXv25FsmZxycI0eOaM2aNebrfPToUUlXTvCGDh0qf39/eXh4qGnTpnrrrbds1nGt16Kw/Pz8VL9+ff3000820wv7ns/OzlZsbKyqVasmLy8vdezYUT/88EOea+hzLsP473//q1GjRqlq1aqqUaOGOf/zzz8320758uXVrVs37du3z2ZbycnJGjJkiGrUqCF3d3cFBgbq3nvvNY+bJO3YsUNRUVGqUqWKPD09FRISooceeshmPfbGz9i9e7e6dOkiX19f+fj4qFOnTvr6669tyuTsw1dffaXo6Gj5+fnJ29tb//znP3Xq1KnCHnL1799fe/bs0YEDB2z2bdOmTfl+BhemTUjSmTNnNHjwYFWoUEEVK1bUoEGD8r384Ho/Sw4dOqT77rtPAQEB8vDwUI0aNdS3b1+dPXu2cAcAwE2hXbt2kpTn+8Oewnx+G4ahGTNmqEaNGub3yb59+/J8n+ScR1wt5zM69zqLel569XmPJKWnpysmJkZ169aVu7u7goKCNHHixDxjH6Wnp2vcuHHy8/NT+fLl1aNHD/3222/XPDY5Vq9erbvuuuuaPY6kK+c+//jHP3Tu3DkzFMotMTFRb7zxhs2l/pIUGhqqypUrF3i+LF257D6n17Wfn1+e781XX31VDRs2lLu7u6pVq6bRo0fn+a4p6JgWRX7tbPv27ercubMqVKggLy8vtW/fXl999VWe5bds2aIWLVrIw8NDderU0aJFi+y2oZxL2JcvX27u27p16yRJx48f10MPPSR/f3+5u7urYcOGWrJkSZ5tvfLKK2rYsKG8vLxUqVIltWjRwub89Ny5cxo7dqyCg4Pl7u6uqlWr6u6779auXbvMMvbGbEpLS9P48eMVFBQkd3d33XbbbXrhhRfy/B2Tsw+rV69Wo0aNzLrm7EdhhIeHKyQkJM959fLly9W5c2dVrlzZ7nKFaROStHjxYtWpU0eenp5q2bKlvvzyS7vrK+z77mqZmZmKi4tTvXr15OHhoVtuuUVt27a1+bsXNw8uo7sBBQcHKzw8XO+995552dnnn3+us2fPqm/fvnr55ZfzLDNixAgtW7ZMQ4YM0WOPPaYjR45owYIF2r17t7766iu5urpKuvJF7+Pjo+joaPn4+GjTpk2aNm2
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x600 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABJoAAASlCAYAAADgRbP+AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzde3zP9f//8fuO753MabY57GMORaItc0hOHWRJSUWiMpR8YqXWyTqYKS0lKSmd0KeIj0i+kdOiUj4UhgqhJGxzSmNjZnv+/vDbu73tPTZe23uH2/VycfF+P9+vw+P92nPv93P318nNGGMEAAAAAAAAXCR3VxcAAAAAAACAyoGgCQAAAAAAAJYgaAIAAAAAAIAlCJoAAAAAAABgCYImAAAAAAAAWIKgCQAAAAAAAJYgaAIAAAAAAIAlCJoAAAAAAABgCYImAAAAAAAAWIKgCaikduzYoe7du6t69epyc3PTggULXF2S3aBBgxQeHu6y9c+YMUNubm7avXu3Q/srr7yixo0by8PDQ5GRkZKk8PBwDRo0qMxrHDNmjNzc3Mp8va50Mf3immuu0TXXXGNpPQCAi3chn+2rVq2Sm5ubVq1aVSo1nY+z8UB5sHv3brm5uWnGjBkuq8HZuMjZmLOosVZZcHNz05gxY8p8va5yMf3C1b9rqLwImlCuvfXWW3Jzc1P79u1dXUqFExMToy1btmjcuHH66KOP1KZNm1JfZ0ZGhhITExUREaGAgAD5+vqqZcuWeuqpp7R///5SX//FWLZsmZ588kl17NhR06dP14svvljq68zKytKYMWPK3Ze7m5ub3NzcdP/99zt9/ZlnnrFPc+jQoTKuDgBwLvl/4Of/8/Hx0aWXXqrY2Filp6e7urxyzxXjAenMH/y33367QkND5e3treDgYN1yyy2aP39+maz/YrhizLl48eJyFybl7yR0d3fXn3/+Wej1jIwM+fr6ys3NTbGxsS6oECg7nq4uADiXmTNnKjw8XOvWrdPOnTvVtGlTV5dUIZw4cUJr1qzRM888U2ZfZL/99pu6deumPXv2qG/fvnrggQfk7e2tzZs364MPPtBnn32mX3/9tUxqOZ97771Xd911l2w2m73tq6++kru7uz744AN5e3vb27dv3y5399LJ5LOyspSYmChJhY7GefbZZzVq1KhSWW9x+Pj4aN68eXrrrbcctockffLJJ/Lx8dHJkyddVB0A4HzGjh2rRo0a6eTJk1q9erXefvttLV68WD/99JP8/PzKrI733ntPeXl5JZqnS5cuOnHiRKHvn7JQ1HigNCUkJGjs2LG65JJLNGzYMDVs2FCHDx/W4sWLdccdd2jmzJkaMGBAmdRyPmePi4oaczoba1lp8eLFmjJlitOw6cSJE/L0dN2fuTabTZ988omefPJJh/aKEBoCVuGIJpRbv//+u77//ntNnDhRderU0cyZM11dUpEyMzNdXYKDgwcPSpJq1Khh2TLP9R5Pnz6t22+/Xenp6Vq1apU++eQTjRgxQkOHDtXkyZP122+/qW/fvpbVcrE8PDzk4+PjcGragQMH5OvrW2hQabPZ5OXlVdYlytPTUz4+PmW+3nw33nijMjIy9OWXXzq0f//99/r999/Vs2dPF1UGACiOHj166J577tH999+vGTNm6JFHHtHvv/+uzz//vMh5SmM84+XlVeKwwd3dXT4+PqW2o+dcihoPXChjjE6cOFHk659++qnGjh2rPn366Oeff1ZiYqKGDBmiJ554QitXrtSSJUsUGBhoSS1WOHtcVNSY09lYq6z4+Pi4NGi66aab9MknnxRqnzVrFuMnVBkETSi3Zs6cqZo1a6pnz57q06dPkUHT0aNH9eijjyo8PFw2m00NGjTQwIEDHU7pOXnypMaMGaNLL71UPj4+qlu3rm6//Xbt2rVLUtHnJzs753nQoEEKCAjQrl27dNNNN6latWq6++67JUnffvut+vbtq3/961+y2WwKCwvTo48+6nSAsW3bNt15552qU6eOfH191axZMz3zzDOSpJUrV8rNzU2fffZZoflmzZolNzc3rVmzxun2GDNmjBo2bChJeuKJJ+Tm5uZwbYSNGzeqR48eCgwMVEBAgK6//nr973//c1hG/mH3X3/9tYYPH67g4GA1aNDA6fokad68edq0aZOeeeYZderUqdDrgYGBGjduXJHzS9KECRN09dVXq3bt2vL19VVUVJQ+/fTTQtMtX75cnTp1Uo0aNRQQEKBmzZrp6aefdphm8uTJuvzyy+Xn56eaNWuqTZs2mjVrVqH3l3/dADc3N02fPl2ZmZn2Uw3yf+bOrkVwvj536tQpjR49WlFRUapevbr8/f3VuXNnrVy50r6M3bt3q06dOpKkxMRE+3rz98w5u0bT6dOn9fzzz6tJkyay2WwKDw/X008/rezsbIfpwsPDdfPNN2v16tVq166dfHx81LhxY/3nP/8558+goPr166tLly4O200683vZqlUrtWzZ0ul8c+fOVVRUlHx9fRUUFKR77rlH+/btKzTdggUL1LJlS/n4+Khly5ZO+7ok5eXladKkSbr88svl4+OjkJAQDRs2TH/99dd538P5+gEAVCXXXXedpDM78qRzj2dK8tn75ZdfqmvXrqpWrZoCAwPVtm1bh89aZ9domj17tqKiouzztGrVSq+//rr99aLGZcX5jsl/X/v27VPv3r0VEBCgOnXq6PHHH1dubu45t9G5xgMl/Q5eunSp2rRpI19fX73zzjtFrvO5555TrVq1NG3aNKc7tqKjo3XzzTcXOf/mzZs1aNAgNW7cWD4+PgoNDdWQIUN0+PBhh+mOHTumRx55xD52CQ4O1g033KANGzbYp9mxY4fuuOMOhYaGysfHRw0aNNBdd92lv//+2+H95Y+LzjXmLOoaTefrL8UZSw8aNEhTpkyRJIfTRPM5u0ZTSca/3333neLi4lSnTh35+/vrtttuswdqxTFgwAClpKRo27Zt9ra0tDR99dVXRR6ZduDAAd13330KCQmRj4+PIiIi9OGHHxaa7ujRoxo0aJCqV6+uGjVqKCYmRkePHnW6zG3btqlPnz6qVauWfHx81KZNGy1cuPC89RenHwDnw6lzKLdmzpyp22+/Xd7e3urfv7/efvtt/fDDD2rbtq19muPHj6tz587aunWrhgwZotatW+vQoUNauHCh9u7dq6CgIOXm5urmm29WcnKy7rrrLo0cOVLHjh3T8uXL9dNPP6lJkyYlru306dOKjo5Wp06dNGHCBPsh6HPnzlVWVpYefPBB1a5dW+vWrdPkyZO1d+9ezZ071z7/5s2b1blzZ3l5eemBBx5QeHi4du3apf/7v//TuHHjdM011ygsLEwzZ87UbbfdVmi7NGnSRB06dHBa2+23364aNWro0UcfVf/+/XXTTTcpICBAkvTzzz+rc+fOCgwM1JNPPikvLy+98847uuaaa/T1118XuhbW8OHDVadOHY0ePfqceznzv7TuvffeEm/LfK+//rp69eqlu+++W6dOndLs2bPVt29fffHFF/a9Pz///LNuvvlmXXHFFRo7dqxsNpt27typ7777zr6c9957Tw8//LD69OmjkSNH6uTJk9q8ebPWrl1b5Jf7Rx99pHfffVfr1q3T+++/L0m6+uqrnU5bnD6XkZGh999/X/3799fQoUN17NgxffDBB4qOjta6desUGRmpOnXq6O2339aDDz6o2267Tbfffrsk6YorrihyG91///368MMP1adPHz322GNau3atkpKStHXr1kJBzc6dO9WnTx/dd999iomJ0bRp0zRo0CBFRUXp8ssvL9bPZMCAARo5cqSOHz+ugIAAnT59WnPnzlVcXJzT0+ZmzJihwYMHq23btkpKSlJ6erpef/11fffdd9q4caN9b+eyZct0xx13qEWLFkpKStLhw4c1ePBgp2HmsGHD7Mt9+OGH9fvvv+vNN9/Uxo0b9d133xV5tNmF9AMAqMzyd67Vrl3b3lbUeKa4n70zZszQkCFDdPnllys+Pl41atTQxo0btWTJkiI/a5cvX67+/fvr+uuv1/jx4yVJW7du1XfffaeRI0c
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1200x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"from sklearn.model_selection import cross_val_score\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\".//static//csv//Yamana_Gold_Inc._AUY.csv\")\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"numerical_cols = [\"Open\", \"Close\", \"High\", \"Low\"]\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Volume']\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи регрессии\n",
|
|||
|
"mae_means = []\n",
|
|||
|
"mae_stds = []\n",
|
|||
|
"r2_means = []\n",
|
|||
|
"r2_stds = []\n",
|
|||
|
"\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" mae_scores = -cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" r2_scores = cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='r2')\n",
|
|||
|
" mae_means.append(mae_scores.mean())\n",
|
|||
|
" mae_stds.append(mae_scores.std())\n",
|
|||
|
" r2_means.append(r2_scores.mean())\n",
|
|||
|
" r2_stds.append(r2_scores.std())\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов для задачи регрессии\n",
|
|||
|
"fig, ax = plt.subplots(1, 2, figsize=(12, 6))\n",
|
|||
|
"\n",
|
|||
|
"ax[0].bar(models_reg.keys(), mae_means, yerr=mae_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[0].set_ylabel('MAE')\n",
|
|||
|
"ax[0].set_title('Mean Absolute Error (MAE) for Regression Models')\n",
|
|||
|
"ax[0].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[1].bar(models_reg.keys(), r2_means, yerr=r2_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[1].set_ylabel('R²')\n",
|
|||
|
"ax[1].set_title('R-squared (R²) for Regression Models')\n",
|
|||
|
"ax[1].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Volume'] > df['Volume'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи классификации\n",
|
|||
|
"accuracy_means = []\n",
|
|||
|
"accuracy_stds = []\n",
|
|||
|
"precision_means = []\n",
|
|||
|
"precision_stds = []\n",
|
|||
|
"recall_means = []\n",
|
|||
|
"recall_stds = []\n",
|
|||
|
"f1_means = []\n",
|
|||
|
"f1_stds = []\n",
|
|||
|
"\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" accuracy_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='accuracy')\n",
|
|||
|
" precision_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='precision')\n",
|
|||
|
" recall_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='recall')\n",
|
|||
|
" f1_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='f1')\n",
|
|||
|
" accuracy_means.append(accuracy_scores.mean())\n",
|
|||
|
" accuracy_stds.append(accuracy_scores.std())\n",
|
|||
|
" precision_means.append(precision_scores.mean())\n",
|
|||
|
" precision_stds.append(precision_scores.std())\n",
|
|||
|
" recall_means.append(recall_scores.mean())\n",
|
|||
|
" recall_stds.append(recall_scores.std())\n",
|
|||
|
" f1_means.append(f1_scores.mean())\n",
|
|||
|
" f1_stds.append(f1_scores.std())\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов для задачи классификации\n",
|
|||
|
"fig, ax = plt.subplots(2, 2, figsize=(12, 12))\n",
|
|||
|
"\n",
|
|||
|
"ax[0, 0].bar(models_class.keys(), accuracy_means, yerr=accuracy_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[0, 0].set_ylabel('Accuracy')\n",
|
|||
|
"ax[0, 0].set_title('Accuracy for Classification Models')\n",
|
|||
|
"ax[0, 0].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[0, 1].bar(models_class.keys(), precision_means, yerr=precision_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[0, 1].set_ylabel('Precision')\n",
|
|||
|
"ax[0, 1].set_title('Precision for Classification Models')\n",
|
|||
|
"ax[0, 1].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[1, 0].bar(models_class.keys(), recall_means, yerr=recall_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[1, 0].set_ylabel('Recall')\n",
|
|||
|
"ax[1, 0].set_title('Recall for Classification Models')\n",
|
|||
|
"ax[1, 0].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[1, 1].bar(models_class.keys(), f1_means, yerr=f1_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[1, 1].set_ylabel('F1-score')\n",
|
|||
|
"ax[1, 1].set_title('F1-score for Classification Models')\n",
|
|||
|
"ax[1, 1].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "aisenv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|