3 changed files with 747 additions and 8 deletions
--- a/.gitignore
+++ b/.gitignore
@ -16,3 +16,5 @@ static/csv/diabetes.csv
 static/csv/healthcare-dataset-stroke-data.csv
 static/csv/heart_2020_cleaned.csv
 static/csv/neo_v2.csv
+static/csv/Yamana_Gold_Inc._AUY.csv
+static/csv/AgeDataset-V1.csv
--- a/Lab_3/lab3.ipynb
+++ b/Lab_3/lab3.ipynb
@ -0,0 +1,730 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    " ## Вариант 13 \n",
+    " https://www.kaggle.com/datasets/nancyalaswad90/yamana-gold-inc-stock-price?resource=download\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 105,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',\n",
+       "       'Day_of_week', 'Month', 'Year'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 105,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "data  = pd.read_csv(\"..//static//csv//Yamana_Gold_Inc._AUY.csv\", sep=\",\")\n",
+    "\n",
+    "# Преобразование даты\n",
+    "data['Date'] = pd.to_datetime(data['Date'])\n",
+    "\n",
+    "# Преобразование данных: создание новых признаков\n",
+    "data['Day_of_week'] = data['Date'].dt.dayofweek\n",
+    "data['Month'] = data['Date'].dt.month\n",
+    "data['Year'] = data['Date'].dt.year\n",
+    "data .columns"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#1. Бизнес-цели для набора данных по акции компании Yamana Gold Inc.\n",
+    "Цель 1: Прогнозирование изменения цены акции компании.\n",
+    "Прогнозирование цен на акции является одной из ключевых задач в области финансов и инвестирования. Задача состоит в предсказании будущих изменений стоимости акции на основе исторических данных, таких как открытие и закрытие торгов, объемы торгов и другие показатели.\n",
+    "\n",
+    "Цель 2: Оценка волатильности акций компании.\n",
+    "Измерение волатильности позволяет инвесторам оценить риск и принять решения по управлению капиталом. Задача заключается в прогнозировании уровня волатильности на основе исторической динамики цен, объемов торгов и других рыночных факторов.\n",
+    "\n",
+    "#2. Цели технического проекта для каждой бизнес-цели\n",
+    "Цель 1: Прогнозирование изменения цены акции компании\n",
+    "\n",
+    "Разработать модель машинного обучения для прогнозирования будущих цен акций на основе исторических данных.\n",
+    "Использовать регрессионные модели, такие как линейная регрессия или более сложные модели, например, LSTM (долгосрочная краткосрочная память) для временных рядов.\n",
+    "Цель 2: Оценка волатильности акций компании\n",
+    "\n",
+    "Создать модель, которая будет прогнозировать волатильность на основе исторических данных о ценах.\n",
+    "Использовать методы статистического анализа, такие как вычисление стандартного отклонения, или методы машинного обучения для более точной оценки волатильности.\n",
+    "\n",
+    "#3 Проверим датасет на пропуски и удалим при необходимости строки с недостающими данными"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 106,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Date           0\n",
+       "Open           0\n",
+       "High           0\n",
+       "Low            0\n",
+       "Close          0\n",
+       "Adj Close      0\n",
+       "Volume         0\n",
+       "Day_of_week    0\n",
+       "Month          0\n",
+       "Year           0\n",
+       "dtype: int64"
+      ]
+     },
+     "execution_count": 106,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "\n",
+    "# Проверим на пропущенные значения\n",
+    "data.isnull().sum()\n",
+    "\n",
+    "# Заполним пропуски или удалим строки с пропусками\n",
+    "data = data.dropna()\n",
+    "\n",
+    "# Проверим, что данные очищены\n",
+    "data.isnull().sum()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Конструирование признаков"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 107,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Date</th>\n",
+       "      <th>Open</th>\n",
+       "      <th>High</th>\n",
+       "      <th>Low</th>\n",
+       "      <th>Close</th>\n",
+       "      <th>Adj Close</th>\n",
+       "      <th>Volume</th>\n",
+       "      <th>Day_of_week</th>\n",
+       "      <th>Month</th>\n",
+       "      <th>Year</th>\n",
+       "      <th>Price_Change</th>\n",
+       "      <th>SMA_5</th>\n",
+       "      <th>SMA_20</th>\n",
+       "      <th>STD_5</th>\n",
+       "      <th>STD_20</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>2001-06-22</td>\n",
+       "      <td>3.428571</td>\n",
+       "      <td>3.428571</td>\n",
+       "      <td>3.428571</td>\n",
+       "      <td>3.428571</td>\n",
+       "      <td>2.806002</td>\n",
+       "      <td>0</td>\n",
+       "      <td>4</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2001</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>2001-06-25</td>\n",
+       "      <td>3.428571</td>\n",
+       "      <td>3.428571</td>\n",
+       "      <td>3.428571</td>\n",
+       "      <td>3.428571</td>\n",
+       "      <td>2.806002</td>\n",
+       "      <td>0</td>\n",
+       "      <td>0</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2001</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>2001-06-26</td>\n",
+       "      <td>3.714286</td>\n",
+       "      <td>3.714286</td>\n",
+       "      <td>3.714286</td>\n",
+       "      <td>3.714286</td>\n",
+       "      <td>3.039837</td>\n",
+       "      <td>0</td>\n",
+       "      <td>1</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2001</td>\n",
+       "      <td>0.285715</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>2001-06-27</td>\n",
+       "      <td>3.714286</td>\n",
+       "      <td>3.714286</td>\n",
+       "      <td>3.714286</td>\n",
+       "      <td>3.714286</td>\n",
+       "      <td>3.039837</td>\n",
+       "      <td>0</td>\n",
+       "      <td>2</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2001</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>2001-06-28</td>\n",
+       "      <td>3.714286</td>\n",
+       "      <td>3.714286</td>\n",
+       "      <td>3.714286</td>\n",
+       "      <td>3.714286</td>\n",
+       "      <td>3.039837</td>\n",
+       "      <td>0</td>\n",
+       "      <td>3</td>\n",
+       "      <td>6</td>\n",
+       "      <td>2001</td>\n",
+       "      <td>0.000000</td>\n",
+       "      <td>3.6</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>0.156493</td>\n",
+       "      <td>NaN</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "        Date      Open      High       Low     Close  Adj Close  Volume  \\\n",
+       "0 2001-06-22  3.428571  3.428571  3.428571  3.428571   2.806002       0   \n",
+       "1 2001-06-25  3.428571  3.428571  3.428571  3.428571   2.806002       0   \n",
+       "2 2001-06-26  3.714286  3.714286  3.714286  3.714286   3.039837       0   \n",
+       "3 2001-06-27  3.714286  3.714286  3.714286  3.714286   3.039837       0   \n",
+       "4 2001-06-28  3.714286  3.714286  3.714286  3.714286   3.039837       0   \n",
+       "\n",
+       "   Day_of_week  Month  Year  Price_Change  SMA_5  SMA_20     STD_5  STD_20  \n",
+       "0            4      6  2001           NaN    NaN     NaN       NaN     NaN  \n",
+       "1            0      6  2001      0.000000    NaN     NaN       NaN     NaN  \n",
+       "2            1      6  2001      0.285715    NaN     NaN       NaN     NaN  \n",
+       "3            2      6  2001      0.000000    NaN     NaN       NaN     NaN  \n",
+       "4            3      6  2001      0.000000    3.6     NaN  0.156493     NaN  "
+      ]
+     },
+     "execution_count": 107,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Создаем новый признак - разницу между текущей и предыдущей ценой (Price_Change)\n",
+    "data['Price_Change'] = data['Close'].diff()\n",
+    "\n",
+    "# Создадим скользящие средние для 5 и 20 дней\n",
+    "data['SMA_5'] = data['Close'].rolling(window=5).mean()\n",
+    "data['SMA_20'] = data['Close'].rolling(window=20).mean()\n",
+    "\n",
+    "# Стандартное отклонение для 5 и 20 дней\n",
+    "data['STD_5'] = data['Close'].rolling(window=5).std()\n",
+    "data['STD_20'] = data['Close'].rolling(window=20).std()\n",
+    "\n",
+    "data.head()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    " Разделение данных на обучающую, контрольную и тестовую выборки"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 108,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "((3150, 10), (1050, 10), (1051, 10))"
+      ]
+     },
+     "execution_count": 108,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "# Преобразуем колонку 'Date' в тип datetime для правильного сортирования\n",
+    "data['Date'] = pd.to_datetime(data['Date'])\n",
+    "\n",
+    "# Сортируем данные по дате, чтобы не нарушить временную зависимость\n",
+    "data = data.sort_values(by='Date')\n",
+    "\n",
+    "# Отделяем целевую переменную (например, Price_Change) и признаки\n",
+    "X = data[['Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume', 'SMA_5', 'SMA_20', 'STD_5', 'STD_20']]\n",
+    "y = data['Price_Change']\n",
+    "\n",
+    "# Разделение на обучающую, контрольную и тестовую выборки (60%, 20%, 20%)\n",
+    "X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, shuffle=False)\n",
+    "X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, shuffle=False)\n",
+    "\n",
+    "# Проверка размеров выборок\n",
+    "(X_train.shape, X_val.shape, X_test.shape)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Конструирование признаков для решения задач"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 109,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Признаки для задачи прогнозирования изменений цен\n",
+    "data['Price_Change'] = data['Close'].diff()\n",
+    "\n",
+    "# Скользящие средние и стандартное отклонение\n",
+    "data['SMA_5'] = data['Close'].rolling(window=5).mean()\n",
+    "data['SMA_20'] = data['Close'].rolling(window=20).mean()\n",
+    "data['STD_5'] = data['Close'].rolling(window=5).std()\n",
+    "data['STD_20'] = data['Close'].rolling(window=20).std()\n",
+    "\n",
+    "# Признаки для оценки волатильности\n",
+    "data['Volatility'] = data['Close'].rolling(window=5).std()\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Подготовка признаков: one-hot encoding, дискретизация, синтез признаков, масштабирование\n",
+    "One-hot encoding: Применим для категориальных признаков (например, день недели).\n",
+    "Масштабирование: Стандартизируем числовые признаки."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 110,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "   Day_of_week  Month\n",
+      "0            4      6\n",
+      "1            0      6\n",
+      "2            1      6\n",
+      "3            2      6\n",
+      "4            3      6\n"
+     ]
+    },
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Close</th>\n",
+       "      <th>SMA_5</th>\n",
+       "      <th>SMA_20</th>\n",
+       "      <th>STD_5</th>\n",
+       "      <th>STD_20</th>\n",
+       "      <th>Day_of_week_1</th>\n",
+       "      <th>Day_of_week_2</th>\n",
+       "      <th>Day_of_week_3</th>\n",
+       "      <th>Day_of_week_4</th>\n",
+       "      <th>Month_2</th>\n",
+       "      <th>Month_3</th>\n",
+       "      <th>Month_4</th>\n",
+       "      <th>Month_5</th>\n",
+       "      <th>Month_6</th>\n",
+       "      <th>Month_7</th>\n",
+       "      <th>Month_8</th>\n",
+       "      <th>Month_9</th>\n",
+       "      <th>Month_10</th>\n",
+       "      <th>Month_11</th>\n",
+       "      <th>Month_12</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>-0.721096</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>-0.507666</td>\n",
+       "      <td>-0.507962</td>\n",
+       "      <td>-0.502320</td>\n",
+       "      <td>1.999048</td>\n",
+       "      <td>-0.28793</td>\n",
+       "      <td>-0.309491</td>\n",
+       "      <td>-0.300916</td>\n",
+       "      <td>-0.297137</td>\n",
+       "      <td>3.335719</td>\n",
+       "      <td>-0.30429</td>\n",
+       "      <td>-0.311702</td>\n",
+       "      <td>-0.296377</td>\n",
+       "      <td>-0.311335</td>\n",
+       "      <td>-0.298274</td>\n",
+       "      <td>-0.303543</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>-0.721096</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>-0.507666</td>\n",
+       "      <td>-0.507962</td>\n",
+       "      <td>-0.502320</td>\n",
+       "      <td>-0.500238</td>\n",
+       "      <td>-0.28793</td>\n",
+       "      <td>-0.309491</td>\n",
+       "      <td>-0.300916</td>\n",
+       "      <td>-0.297137</td>\n",
+       "      <td>3.335719</td>\n",
+       "      <td>-0.30429</td>\n",
+       "      <td>-0.311702</td>\n",
+       "      <td>-0.296377</td>\n",
+       "      <td>-0.311335</td>\n",
+       "      <td>-0.298274</td>\n",
+       "      <td>-0.303543</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>-0.660890</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>1.969800</td>\n",
+       "      <td>-0.507962</td>\n",
+       "      <td>-0.502320</td>\n",
+       "      <td>-0.500238</td>\n",
+       "      <td>-0.28793</td>\n",
+       "      <td>-0.309491</td>\n",
+       "      <td>-0.300916</td>\n",
+       "      <td>-0.297137</td>\n",
+       "      <td>3.335719</td>\n",
+       "      <td>-0.30429</td>\n",
+       "      <td>-0.311702</td>\n",
+       "      <td>-0.296377</td>\n",
+       "      <td>-0.311335</td>\n",
+       "      <td>-0.298274</td>\n",
+       "      <td>-0.303543</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>-0.660890</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>-0.507666</td>\n",
+       "      <td>1.968649</td>\n",
+       "      <td>-0.502320</td>\n",
+       "      <td>-0.500238</td>\n",
+       "      <td>-0.28793</td>\n",
+       "      <td>-0.309491</td>\n",
+       "      <td>-0.300916</td>\n",
+       "      <td>-0.297137</td>\n",
+       "      <td>3.335719</td>\n",
+       "      <td>-0.30429</td>\n",
+       "      <td>-0.311702</td>\n",
+       "      <td>-0.296377</td>\n",
+       "      <td>-0.311335</td>\n",
+       "      <td>-0.298274</td>\n",
+       "      <td>-0.303543</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>-0.660890</td>\n",
+       "      <td>-0.686033</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>-0.269917</td>\n",
+       "      <td>NaN</td>\n",
+       "      <td>-0.507666</td>\n",
+       "      <td>-0.507962</td>\n",
+       "      <td>1.990763</td>\n",
+       "      <td>-0.500238</td>\n",
+       "      <td>-0.28793</td>\n",
+       "      <td>-0.309491</td>\n",
+       "      <td>-0.300916</td>\n",
+       "      <td>-0.297137</td>\n",
+       "      <td>3.335719</td>\n",
+       "      <td>-0.30429</td>\n",
+       "      <td>-0.311702</td>\n",
+       "      <td>-0.296377</td>\n",
+       "      <td>-0.311335</td>\n",
+       "      <td>-0.298274</td>\n",
+       "      <td>-0.303543</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "      Close     SMA_5  SMA_20     STD_5  STD_20  Day_of_week_1  Day_of_week_2  \\\n",
+       "0 -0.721096       NaN     NaN       NaN     NaN      -0.507666      -0.507962   \n",
+       "1 -0.721096       NaN     NaN       NaN     NaN      -0.507666      -0.507962   \n",
+       "2 -0.660890       NaN     NaN       NaN     NaN       1.969800      -0.507962   \n",
+       "3 -0.660890       NaN     NaN       NaN     NaN      -0.507666       1.968649   \n",
+       "4 -0.660890 -0.686033     NaN -0.269917     NaN      -0.507666      -0.507962   \n",
+       "\n",
+       "   Day_of_week_3  Day_of_week_4  Month_2   Month_3   Month_4   Month_5  \\\n",
+       "0      -0.502320       1.999048 -0.28793 -0.309491 -0.300916 -0.297137   \n",
+       "1      -0.502320      -0.500238 -0.28793 -0.309491 -0.300916 -0.297137   \n",
+       "2      -0.502320      -0.500238 -0.28793 -0.309491 -0.300916 -0.297137   \n",
+       "3      -0.502320      -0.500238 -0.28793 -0.309491 -0.300916 -0.297137   \n",
+       "4       1.990763      -0.500238 -0.28793 -0.309491 -0.300916 -0.297137   \n",
+       "\n",
+       "    Month_6  Month_7   Month_8   Month_9  Month_10  Month_11  Month_12  \n",
+       "0  3.335719 -0.30429 -0.311702 -0.296377 -0.311335 -0.298274 -0.303543  \n",
+       "1  3.335719 -0.30429 -0.311702 -0.296377 -0.311335 -0.298274 -0.303543  \n",
+       "2  3.335719 -0.30429 -0.311702 -0.296377 -0.311335 -0.298274 -0.303543  \n",
+       "3  3.335719 -0.30429 -0.311702 -0.296377 -0.311335 -0.298274 -0.303543  \n",
+       "4  3.335719 -0.30429 -0.311702 -0.296377 -0.311335 -0.298274 -0.303543  "
+      ]
+     },
+     "execution_count": 110,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.preprocessing import StandardScaler\n",
+    "import pandas as pd\n",
+    "\n",
+    "# Преобразуем дату, если это еще не сделано\n",
+    "data['Date'] = pd.to_datetime(data['Date'])\n",
+    "\n",
+    "# Добавим дополнительные признаки (день недели и месяц)\n",
+    "data['Day_of_week'] = data['Date'].dt.dayofweek\n",
+    "data['Month'] = data['Date'].dt.month\n",
+    "\n",
+    "# Проверим, что эти столбцы добавлены\n",
+    "print(data[['Day_of_week', 'Month']].head())\n",
+    "\n",
+    "# Выбираем признаки и целевую переменную\n",
+    "X = data[['Close', 'SMA_5', 'SMA_20', 'STD_5', 'STD_20', 'Day_of_week', 'Month']]\n",
+    "y = data['Price_Change']\n",
+    "\n",
+    "# Применяем one-hot encoding для категориальных признаков (Day_of_week и Month)\n",
+    "X = pd.get_dummies(X, columns=['Day_of_week', 'Month'], drop_first=True)\n",
+    "\n",
+    "# Масштабирование числовых признаков (Close, SMA, STD)\n",
+    "scaler = StandardScaler()\n",
+    "X_scaled = scaler.fit_transform(X)\n",
+    "\n",
+    "# Преобразуем обратно в DataFrame для удобства\n",
+    "X_scaled_df = pd.DataFrame(X_scaled, columns=X.columns)\n",
+    "\n",
+    "# Проверим результат\n",
+    "X_scaled_df.head()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 111,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "c:\\Users\\alexk\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\featuretools\\synthesis\\deep_feature_synthesis.py:169: UserWarning: Only one dataframe in entityset, changing max_depth to 1 since deeper features cannot be created\n",
+      "  warnings.warn(\n"
+     ]
+    }
+   ],
+   "source": [
+    "import featuretools as ft\n",
+    "\n",
+    "# Создаем сущности для Featuretools\n",
+    "es = ft.EntitySet(id=\"stock_prices\")\n",
+    "es = es.add_dataframe(dataframe_name=\"stock_data\", dataframe=data, index=\"Date\")\n",
+    "\n",
+    "# Автоматическое создание признаков\n",
+    "feature_matrix, feature_defs = ft.dfs(entityset=es, target_dataframe_name=\"stock_data\")\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "#Оценка качества признаков\n",
+    "Оценка признаков на основе предсказательной способности модели и других критериев."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 112,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "5251\n",
+      "3150\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "(np.float64(0.05230198011754029), 0.5415652186272203)"
+      ]
+     },
+     "execution_count": 112,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# Проверим размерности данных после удаления NaN\n",
+    "print(X_scaled_df.shape[0])  # Количество строк в X_scaled_df\n",
+    "print(y_train.shape[0])       # Количество строк в y_train\n",
+    "\n",
+    "# Если данные имеют разные размеры, синхронизируем их\n",
+    "df = pd.concat([X_scaled_df, y_train], axis=1).dropna()\n",
+    "X_scaled_df = df.drop(columns=y_train.name)\n",
+    "y_train = df[y_train.name]\n",
+    "\n",
+    "# Приводим индексы к одному виду\n",
+    "y_train = y_train.reset_index(drop=True)\n",
+    "X_scaled_df = X_scaled_df.reset_index(drop=True)\n",
+    "\n",
+    "# После этого продолжаем обучение модели\n",
+    "model = LinearRegression()\n",
+    "model.fit(X_scaled_df, y_train)\n",
+    "\n",
+    "# Прогнозирование и оценка качества\n",
+    "y_pred = model.predict(X_scaled_df)\n",
+    "\n",
+    "# Оценка качества модели\n",
+    "mse = mean_squared_error(y_train, y_pred)  # Используем y_train, потому что данные для теста не созданы\n",
+    "r2 = r2_score(y_train, y_pred)\n",
+    "\n",
+    "mse, r2\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "MSE = 0.0523: Модель в среднем делает ошибку около 0.0523 при прогнозировании значений.\n",
+    "R² = 0.5416: Модель объясняет примерно 54.16% изменчивости целевой переменной\n",
+    "\n",
+    "Визуализируем"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.5"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/lab_1/lab1.ipynb
+++ b/lab_1/lab1.ipynb
@ -10,16 +10,23 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Index(['Id', 'Name', 'Short description', 'Gender', 'Country', 'Occupation',\n",
-      "       'Birth year', 'Death year', 'Manner of death', 'Age of death'],\n",
-      "      dtype='object')\n"
+     "ename": "FileNotFoundError",
+     "evalue": "[Errno 2] No such file or directory: './/static//csv//csvLab1.csv'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[1], line 4\u001b[0m\n\u001b[0;32m      1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mpandas\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mpd\u001b[39;00m\n\u001b[0;32m      2\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[1;32m----> 4\u001b[0m df \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mread_csv\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m.//static//csv//csvLab1.csv\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msep\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43m,\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m      5\u001b[0m \u001b[38;5;28mprint\u001b[39m(df\u001b[38;5;241m.\u001b[39mcolumns)\n",
+      "File \u001b[1;32mc:\\Users\\alexk\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1026\u001b[0m, in \u001b[0;36mread_csv\u001b[1;34m(filepath_or_buffer, sep, delimiter, header, names, index_col, usecols, dtype, engine, converters, true_values, false_values, skipinitialspace, skiprows, skipfooter, nrows, na_values, keep_default_na, na_filter, verbose, skip_blank_lines, parse_dates, infer_datetime_format, keep_date_col, date_parser, date_format, dayfirst, cache_dates, iterator, chunksize, compression, thousands, decimal, lineterminator, quotechar, quoting, doublequote, escapechar, comment, encoding, encoding_errors, dialect, on_bad_lines, delim_whitespace, low_memory, memory_map, float_precision, storage_options, dtype_backend)\u001b[0m\n\u001b[0;32m   1013\u001b[0m kwds_defaults \u001b[38;5;241m=\u001b[39m _refine_defaults_read(\n\u001b[0;32m   1014\u001b[0m     dialect,\n\u001b[0;32m   1015\u001b[0m     delimiter,\n\u001b[1;32m   (...)\u001b[0m\n\u001b[0;32m   1022\u001b[0m     dtype_backend\u001b[38;5;241m=\u001b[39mdtype_backend,\n\u001b[0;32m   1023\u001b[0m )\n\u001b[0;32m   1024\u001b[0m kwds\u001b[38;5;241m.\u001b[39mupdate(kwds_defaults)\n\u001b[1;32m-> 1026\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_read\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[1;32mc:\\Users\\alexk\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:620\u001b[0m, in \u001b[0;36m_read\u001b[1;34m(filepath_or_buffer, kwds)\u001b[0m\n\u001b[0;32m    617\u001b[0m _validate_names(kwds\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnames\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m))\n\u001b[0;32m    619\u001b[0m \u001b[38;5;66;03m# Create the parser.\u001b[39;00m\n\u001b[1;32m--> 620\u001b[0m parser \u001b[38;5;241m=\u001b[39m \u001b[43mTextFileReader\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfilepath_or_buffer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwds\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    622\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m chunksize \u001b[38;5;129;01mor\u001b[39;00m iterator:\n\u001b[0;32m    623\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m parser\n",
+      "File \u001b[1;32mc:\\Users\\alexk\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1620\u001b[0m, in \u001b[0;36mTextFileReader.__init__\u001b[1;34m(self, f, engine, **kwds)\u001b[0m\n\u001b[0;32m   1617\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39moptions[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m kwds[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mhas_index_names\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m   1619\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles: IOHandles \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m-> 1620\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_engine \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_make_engine\u001b[49m\u001b[43m(\u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mengine\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[1;32mc:\\Users\\alexk\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\pandas\\io\\parsers\\readers.py:1880\u001b[0m, in \u001b[0;36mTextFileReader._make_engine\u001b[1;34m(self, f, engine)\u001b[0m\n\u001b[0;32m   1878\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m mode:\n\u001b[0;32m   1879\u001b[0m         mode \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m-> 1880\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;241m=\u001b[39m \u001b[43mget_handle\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   1881\u001b[0m \u001b[43m    \u001b[49m\u001b[43mf\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1882\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1883\u001b[0m \u001b[43m    \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1884\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcompression\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcompression\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1885\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmemory_map\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmemory_map\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1886\u001b[0m \u001b[43m    \u001b[49m\u001b[43mis_text\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mis_text\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1887\u001b[0m \u001b[43m    \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mencoding_errors\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstrict\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1888\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstorage_options\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstorage_options\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m   1889\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   1890\u001b[0m \u001b[38;5;28;01massert\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m   1891\u001b[0m f \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandles\u001b[38;5;241m.\u001b[39mhandle\n",
+      "File \u001b[1;32mc:\\Users\\alexk\\AppData\\Local\\Programs\\Python\\Python312\\Lib\\site-packages\\pandas\\io\\common.py:873\u001b[0m, in \u001b[0;36mget_handle\u001b[1;34m(path_or_buf, mode, encoding, compression, memory_map, is_text, errors, storage_options)\u001b[0m\n\u001b[0;32m    868\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(handle, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m    869\u001b[0m     \u001b[38;5;66;03m# Check whether the filename is to be opened in binary mode.\u001b[39;00m\n\u001b[0;32m    870\u001b[0m     \u001b[38;5;66;03m# Binary mode does not support 'encoding' and 'newline'.\u001b[39;00m\n\u001b[0;32m    871\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mencoding \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mb\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;129;01min\u001b[39;00m ioargs\u001b[38;5;241m.\u001b[39mmode:\n\u001b[0;32m    872\u001b[0m         \u001b[38;5;66;03m# Encoding\u001b[39;00m\n\u001b[1;32m--> 873\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\n\u001b[0;32m    874\u001b[0m \u001b[43m            \u001b[49m\u001b[43mhandle\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    875\u001b[0m \u001b[43m            \u001b[49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mmode\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    876\u001b[0m \u001b[43m            \u001b[49m\u001b[43mencoding\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mioargs\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mencoding\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    877\u001b[0m \u001b[43m            \u001b[49m\u001b[43merrors\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merrors\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m    878\u001b[0m \u001b[43m            \u001b[49m\u001b[43mnewline\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m    879\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    880\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m    881\u001b[0m         \u001b[38;5;66;03m# Binary mode\u001b[39;00m\n\u001b[0;32m    882\u001b[0m         handle \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mopen\u001b[39m(handle, ioargs\u001b[38;5;241m.\u001b[39mmode)\n",
+      "\u001b[1;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: './/static//csv//csvLab1.csv'"
     ]
    }
   ],
@ -186,7 +193,7 @@
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "MIiLabs",
+   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },