6 changed files with 0 additions and 29182 deletions
--- a/.gitignore
+++ b/.gitignore
@ -208,10 +208,3 @@ kernel/share/jupyter/kernels/python3/logo-64x64.png
 kernel/share/jupyter/kernels/python3/logo-svg.svg
 kernel/share/man/man1/ipython.1
 kernel/share/man/man1/ttx.1
-lab_2/datasets/game_reviews.csv
-lab_2/datasets/laptop.csv
-lab_2/datasets/Popular_PL.csv
-lab_2/datasets/coffee.csv
-lab_2/datasets/car_price_prediction.csv
-lab_3/data/house_data.csv
-kernel/Scripts/tqdm.exe
--- a/lab_2/lab_2.ipynb
+++ b/lab_2/lab_2.ipynb
@ -1,821 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Lab2 PIbd-31 Yakovlev\n",
-    "Загрузим три датасета"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 124,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 532 entries, 0 to 531\n",
-      "Data columns (total 18 columns):\n",
-      " #   Column               Non-Null Count  Dtype  \n",
-      "---  ------               --------------  -----  \n",
-      " 0   brand_name           532 non-null    object \n",
-      " 1   price                532 non-null    int64  \n",
-      " 2   rating               532 non-null    int64  \n",
-      " 3   processor_gen        520 non-null    object \n",
-      " 4   processor_brand      532 non-null    object \n",
-      " 5   processor_segment    528 non-null    object \n",
-      " 6   CPU_mark             532 non-null    object \n",
-      " 7   CPU_performance      532 non-null    object \n",
-      " 8   Graphic_card_memory  530 non-null    object \n",
-      " 9   graphic_card_name    530 non-null    object \n",
-      " 10  graphic_card_num     532 non-null    object \n",
-      " 11  Core                 530 non-null    float64\n",
-      " 12  threads              514 non-null    float64\n",
-      " 13  display_inches       532 non-null    object \n",
-      " 14  ram_storage          532 non-null    int64  \n",
-      " 15  ram_type             532 non-null    object \n",
-      " 16  operating_system     502 non-null    float64\n",
-      " 17  SSD_storage          532 non-null    object \n",
-      "dtypes: float64(3), int64(3), object(12)\n",
-      "memory usage: 74.9+ KB\n",
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 8036 entries, 0 to 8035\n",
-      "Data columns (total 7 columns):\n",
-      " #   Column     Non-Null Count  Dtype  \n",
-      "---  ------     --------------  -----  \n",
-      " 0   Date       8036 non-null   object \n",
-      " 1   Open       8036 non-null   float64\n",
-      " 2   High       8036 non-null   float64\n",
-      " 3   Low        8036 non-null   float64\n",
-      " 4   Close      8036 non-null   float64\n",
-      " 5   Adj Close  8036 non-null   float64\n",
-      " 6   Volume     8036 non-null   int64  \n",
-      "dtypes: float64(5), int64(1), object(1)\n",
-      "memory usage: 439.6+ KB\n",
-      "<class 'pandas.core.frame.DataFrame'>\n",
-      "RangeIndex: 19237 entries, 0 to 19236\n",
-      "Data columns (total 18 columns):\n",
-      " #   Column            Non-Null Count  Dtype  \n",
-      "---  ------            --------------  -----  \n",
-      " 0   ID                19237 non-null  int64  \n",
-      " 1   Price             19237 non-null  int64  \n",
-      " 2   Levy              19237 non-null  object \n",
-      " 3   Manufacturer      19237 non-null  object \n",
-      " 4   Model             19237 non-null  object \n",
-      " 5   Prod. year        19237 non-null  int64  \n",
-      " 6   Category          19237 non-null  object \n",
-      " 7   Leather interior  19237 non-null  object \n",
-      " 8   Fuel type         19237 non-null  object \n",
-      " 9   Engine volume     19237 non-null  object \n",
-      " 10  Mileage           19237 non-null  object \n",
-      " 11  Cylinders         19237 non-null  float64\n",
-      " 12  Gear box type     19237 non-null  object \n",
-      " 13  Drive wheels      19237 non-null  object \n",
-      " 14  Doors             19237 non-null  object \n",
-      " 15  Wheel             19237 non-null  object \n",
-      " 16  Color             19237 non-null  object \n",
-      " 17  Airbags           19237 non-null  int64  \n",
-      "dtypes: float64(1), int64(4), object(13)\n",
-      "memory usage: 2.6+ MB\n"
-     ]
-    }
-   ],
-   "source": [
-    "import pandas as pd\n",
-    "\n",
-    "df = pd.read_csv(\"datasets/laptop.csv\")\n",
-    "df2 = pd.read_csv(\"datasets/coffee.csv\")\n",
-    "df3 = pd.read_csv(\"datasets/car_price_prediction.csv\")\n",
-    "df.info()\n",
-    "df2.info()\n",
-    "df3.info()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Проблемная область\n",
-    "Первый датасет позволяет проанализировать данные, и понять какие ноутбуки имеют превосходство на рынке техники, и какие чаще всего выбирают пользователи.\n",
-    "Второй датасет позволяет при помощи данных акций за последние 25 лет спрогнозировать будущие показатели акций кофейни Starbucks\n",
-    "Третий датасет позволяет проанализировать данные, и спрогнозировать категорию цены для машины, по ее комплектующим.\n",
-    "#### Анализ набора данных\n",
-    "Объекты наблюдения - игровые ноутбуки, акции, машины\n",
-    "Атрибуты - \n",
-    "1. Имя бренда, цена, рейтинг, поколение процессора, марка процессора, сегмент процессора, специальная метка, производительность процессора, видеокарта, видеопамять, номер видеокарты, кол-во потоков видеокарты, размер дисплея, оперативная память, тип оперативной памяти, ОС, SSD.\n",
-    "2. Дата, начальная цена за день, максимальная цена, минимальная цена, цена на момент закрытия продаж, скорректированая цена на момент закрытия, объем торговли акций за день.\n",
-    "3. Цена обслуживания, производитель, модель, год выпуска, категория, кожанный салон, тип топлива, объем двигателя.\n",
-    "Связи между объектами - нет\n",
-    "#### Бизнес-цели\n",
-    "1. Какие модели игровых ноутбуков более выгодно продавать магазинам техники. Какие комплектующие наиболее популярны у производителей и покупателей, для дальнейшего увеличения производства данных комплектующих.\n",
-    "2. Прогноз цен, для дальнешей покупки, продажи акций. Прогнозирование, для предотвращения упадка.\n",
-    "3. Для составления списка лучших моделей автомобилей. Определения наилучшего буджетного автомобиля, который не будет часто ломаться и приносить убытки.\n",
-    "#### Примеры целей технического проекта. Что поступает на вход, что является целевым признаком.\n",
-    "На входе всегда датасет, целевые признаки:\n",
-    "1. Рейтинг ноутбука\n",
-    "2. Максимальная цена за день\n",
-    "3. Цена обслуживания\n",
-    "#### Проблемы набора данных и их решения\n",
-    "1. Возможны устаревшие данные. Для решения данной проблемы требуется удаление самых старых записей, и добавление более новых.\n",
-    "2. Возможны выбросы. Решить эту проблему помогает удаление таких выбросов. В маленькой выборке не рекомендуется удалять выбросы. В большой выборке выбросы усредняются.\n",
-    "#### Качество набора данных\n",
-    "Наборы данных содержат достаточно примеров и признаков для обучения модели. Учтены различные ситуации проблемной области. Данные соответствуют данным, которые будут\n",
-    "подаваться в производственной среде. Все метки согласованы.\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Поиск аномалий"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 125,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "               price      rating        Core     threads  ram_storage  \\\n",
-      "count     532.000000  532.000000  530.000000  514.000000   532.000000   \n",
-      "mean   107684.492481   67.781955    9.035849   15.089494    15.676692   \n",
-      "std     80187.648965    8.161356    4.413487    5.216162     8.901257   \n",
-      "min     30999.000000   43.000000    2.000000    4.000000     4.000000   \n",
-      "25%     62371.750000   63.000000    6.000000   12.000000     8.000000   \n",
-      "50%     83745.000000   66.000000    8.000000   16.000000    16.000000   \n",
-      "75%    114040.000000   72.000000   10.000000   16.000000    16.000000   \n",
-      "max    599990.000000   98.000000   24.000000   32.000000    64.000000   \n",
-      "\n",
-      "       operating_system  \n",
-      "count        502.000000  \n",
-      "mean          10.842629  \n",
-      "std            0.364513  \n",
-      "min           10.000000  \n",
-      "25%           11.000000  \n",
-      "50%           11.000000  \n",
-      "75%           11.000000  \n",
-      "max           11.000000  \n",
-      "              Open         High          Low        Close    Adj Close  \\\n",
-      "count  8036.000000  8036.000000  8036.000000  8036.000000  8036.000000   \n",
-      "mean     30.054280    30.351487    29.751322    30.058857    26.674025   \n",
-      "std      33.615577    33.906613    33.314569    33.615911    31.728090   \n",
-      "min       0.328125     0.347656     0.320313     0.335938     0.260703   \n",
-      "25%       4.392031     4.531250     4.304922     4.399610     3.414300   \n",
-      "50%      13.325000    13.493750    13.150000    13.330000    10.352452   \n",
-      "75%      55.250000    55.722501    54.852499    55.267499    47.464829   \n",
-      "max     126.080002   126.320000   124.809998   126.059998   118.010414   \n",
-      "\n",
-      "             Volume  \n",
-      "count  8.036000e+03  \n",
-      "mean   1.470459e+07  \n",
-      "std    1.340021e+07  \n",
-      "min    1.504000e+06  \n",
-      "25%    7.817750e+06  \n",
-      "50%    1.169815e+07  \n",
-      "75%    1.778795e+07  \n",
-      "max    5.855088e+08  \n",
-      "                 ID         Price    Prod. year     Cylinders       Airbags\n",
-      "count  1.923700e+04  1.923700e+04  19237.000000  19237.000000  19237.000000\n",
-      "mean   4.557654e+07  1.855593e+04   2010.912824      4.582991      6.582627\n",
-      "std    9.365914e+05  1.905813e+05      5.668673      1.199933      4.320168\n",
-      "min    2.074688e+07  1.000000e+00   1939.000000      1.000000      0.000000\n",
-      "25%    4.569837e+07  5.331000e+03   2009.000000      4.000000      4.000000\n",
-      "50%    4.577231e+07  1.317200e+04   2012.000000      4.000000      6.000000\n",
-      "75%    4.580204e+07  2.207500e+04   2015.000000      4.000000     12.000000\n",
-      "max    4.581665e+07  2.630750e+07   2020.000000     16.000000     16.000000\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(df.describe())\n",
-    "print(df2.describe())\n",
-    "print(df3.describe())"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "При просмотре вывода не было замечено аномалий в столбцах датасетов."
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Проблема пропущенных данных"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 126,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "DATASET 1\n",
-      "processor_gen процент пустых значений: %2.26\n",
-      "processor_segment процент пустых значений: %0.75\n",
-      "Graphic_card_memory процент пустых значений: %0.38\n",
-      "graphic_card_name процент пустых значений: %0.38\n",
-      "Core процент пустых значений: %0.38\n",
-      "threads процент пустых значений: %3.38\n",
-      "operating_system процент пустых значений: %5.64\n",
-      "DATASET 2\n",
-      "DATASET 3\n"
-     ]
-    }
-   ],
-   "source": [
-    "print(\"DATASET 1\")\n",
-    "for i in df.columns:\n",
-    "    null_rate = df[i].isnull().sum() / len(df)*100\n",
-    "    if null_rate > 0:\n",
-    "        print(f\"{i} процент пустых значений: %{null_rate:.2f}\")\n",
-    "print(\"DATASET 2\")\n",
-    "for i in df2.columns:\n",
-    "    null_rate = df2[i].isnull().sum() / len(df2)*100\n",
-    "    if null_rate > 0:\n",
-    "        print(f\"{i} процент пустых значений: %{null_rate:.2f}\")\n",
-    "print(\"DATASET 3\")\n",
-    "for i in df3.columns:\n",
-    "    null_rate = df3[i].isnull().sum() / len(df3)*100\n",
-    "    if null_rate > 0:\n",
-    "        print(f\"{i} процент пустых значений: %{null_rate:.2f}\")"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "В первом датасете были поля с пустыми значениями, в остальных пустых значений не найдено."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 127,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "brand_name             False\n",
-      "price                  False\n",
-      "rating                 False\n",
-      "processor_gen          False\n",
-      "processor_brand        False\n",
-      "processor_segment      False\n",
-      "CPU_mark               False\n",
-      "CPU_performance        False\n",
-      "Graphic_card_memory    False\n",
-      "graphic_card_name      False\n",
-      "graphic_card_num       False\n",
-      "Core                   False\n",
-      "threads                False\n",
-      "display_inches         False\n",
-      "ram_storage            False\n",
-      "ram_type               False\n",
-      "operating_system       False\n",
-      "SSD_storage            False\n",
-      "dtype: bool\n"
-     ]
-    },
-    {
-     "data": {
-      "text/html": [
-       "<div>\n",
-       "<style scoped>\n",
-       "    .dataframe tbody tr th:only-of-type {\n",
-       "        vertical-align: middle;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe tbody tr th {\n",
-       "        vertical-align: top;\n",
-       "    }\n",
-       "\n",
-       "    .dataframe thead th {\n",
-       "        text-align: right;\n",
-       "    }\n",
-       "</style>\n",
-       "<table border=\"1\" class=\"dataframe\">\n",
-       "  <thead>\n",
-       "    <tr style=\"text-align: right;\">\n",
-       "      <th></th>\n",
-       "      <th>brand_name</th>\n",
-       "      <th>price</th>\n",
-       "      <th>rating</th>\n",
-       "      <th>processor_gen</th>\n",
-       "      <th>processor_brand</th>\n",
-       "      <th>processor_segment</th>\n",
-       "      <th>CPU_mark</th>\n",
-       "      <th>CPU_performance</th>\n",
-       "      <th>Graphic_card_memory</th>\n",
-       "      <th>graphic_card_name</th>\n",
-       "      <th>graphic_card_num</th>\n",
-       "      <th>Core</th>\n",
-       "      <th>threads</th>\n",
-       "      <th>display_inches</th>\n",
-       "      <th>ram_storage</th>\n",
-       "      <th>ram_type</th>\n",
-       "      <th>operating_system</th>\n",
-       "      <th>SSD_storage</th>\n",
-       "    </tr>\n",
-       "  </thead>\n",
-       "  <tbody>\n",
-       "    <tr>\n",
-       "      <th>527</th>\n",
-       "      <td>dell</td>\n",
-       "      <td>75500</td>\n",
-       "      <td>63</td>\n",
-       "      <td>4th</td>\n",
-       "      <td>amd</td>\n",
-       "      <td>5</td>\n",
-       "      <td>4600H</td>\n",
-       "      <td>maximum performance</td>\n",
-       "      <td>6 GB</td>\n",
-       "      <td>amd radeon</td>\n",
-       "      <td>other</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>12.0</td>\n",
-       "      <td>15.6</td>\n",
-       "      <td>8</td>\n",
-       "      <td>DDR4</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>512 GB SSD</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>528</th>\n",
-       "      <td>lenovo</td>\n",
-       "      <td>151990</td>\n",
-       "      <td>75</td>\n",
-       "      <td>10th</td>\n",
-       "      <td>intel</td>\n",
-       "      <td>i7</td>\n",
-       "      <td>10875H</td>\n",
-       "      <td>maximum performance</td>\n",
-       "      <td>8 GB</td>\n",
-       "      <td>nvidia geforce</td>\n",
-       "      <td>other</td>\n",
-       "      <td>8.0</td>\n",
-       "      <td>16.0</td>\n",
-       "      <td>15.6</td>\n",
-       "      <td>16</td>\n",
-       "      <td>DDR4</td>\n",
-       "      <td>10.0</td>\n",
-       "      <td>1 TB SSD</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>529</th>\n",
-       "      <td>lenovo</td>\n",
-       "      <td>46500</td>\n",
-       "      <td>48</td>\n",
-       "      <td>8th</td>\n",
-       "      <td>intel</td>\n",
-       "      <td>i5</td>\n",
-       "      <td>8250U</td>\n",
-       "      <td>ultra-low power</td>\n",
-       "      <td>Integrated</td>\n",
-       "      <td>Intel Integrated</td>\n",
-       "      <td>other</td>\n",
-       "      <td>4.0</td>\n",
-       "      <td>8.0</td>\n",
-       "      <td>other</td>\n",
-       "      <td>4</td>\n",
-       "      <td>DDR4</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>other</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>530</th>\n",
-       "      <td>msi</td>\n",
-       "      <td>109990</td>\n",
-       "      <td>61</td>\n",
-       "      <td>9th</td>\n",
-       "      <td>intel</td>\n",
-       "      <td>i7</td>\n",
-       "      <td>9750H</td>\n",
-       "      <td>maximum performance</td>\n",
-       "      <td>6 GB</td>\n",
-       "      <td>nvidia geforce</td>\n",
-       "      <td>other</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>12.0</td>\n",
-       "      <td>other</td>\n",
-       "      <td>8</td>\n",
-       "      <td>other</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>other</td>\n",
-       "    </tr>\n",
-       "    <tr>\n",
-       "      <th>531</th>\n",
-       "      <td>hp</td>\n",
-       "      <td>95800</td>\n",
-       "      <td>70</td>\n",
-       "      <td>9th</td>\n",
-       "      <td>intel</td>\n",
-       "      <td>i7</td>\n",
-       "      <td>9750H</td>\n",
-       "      <td>maximum performance</td>\n",
-       "      <td>4 GB</td>\n",
-       "      <td>nvidia geforce</td>\n",
-       "      <td>1650</td>\n",
-       "      <td>6.0</td>\n",
-       "      <td>12.0</td>\n",
-       "      <td>15.6</td>\n",
-       "      <td>8</td>\n",
-       "      <td>DDR4</td>\n",
-       "      <td>0.0</td>\n",
-       "      <td>other</td>\n",
-       "    </tr>\n",
-       "  </tbody>\n",
-       "</table>\n",
-       "</div>"
-      ],
-      "text/plain": [
-       "    brand_name   price  rating processor_gen processor_brand  \\\n",
-       "527       dell   75500      63           4th             amd   \n",
-       "528     lenovo  151990      75          10th           intel   \n",
-       "529     lenovo   46500      48           8th           intel   \n",
-       "530        msi  109990      61           9th           intel   \n",
-       "531         hp   95800      70           9th           intel   \n",
-       "\n",
-       "    processor_segment CPU_mark      CPU_performance Graphic_card_memory  \\\n",
-       "527                 5    4600H  maximum performance                6 GB   \n",
-       "528                i7   10875H  maximum performance                8 GB   \n",
-       "529                i5    8250U      ultra-low power          Integrated   \n",
-       "530                i7    9750H  maximum performance                6 GB   \n",
-       "531                i7    9750H  maximum performance                4 GB   \n",
-       "\n",
-       "    graphic_card_name graphic_card_num  Core  threads display_inches  \\\n",
-       "527        amd radeon            other   6.0     12.0           15.6   \n",
-       "528    nvidia geforce            other   8.0     16.0           15.6   \n",
-       "529  Intel Integrated            other   4.0      8.0          other   \n",
-       "530    nvidia geforce            other   6.0     12.0          other   \n",
-       "531    nvidia geforce             1650   6.0     12.0           15.6   \n",
-       "\n",
-       "     ram_storage ram_type  operating_system SSD_storage  \n",
-       "527            8     DDR4              10.0  512 GB SSD  \n",
-       "528           16     DDR4              10.0    1 TB SSD  \n",
-       "529            4     DDR4               0.0       other  \n",
-       "530            8    other               0.0       other  \n",
-       "531            8     DDR4               0.0       other  "
-      ]
-     },
-     "execution_count": 127,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "df = df.fillna(0) #Замена пустых значений на 0\n",
-    "print(df.isnull().any())\n",
-    "df.tail()"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "#### Разбиение на выборки\n",
-    "Для разбиения на выборке для начала уменьшим количество уникальных значений в столбцах с целевыми признаками, путем добавления новых столбцов с малым количеством уникальных значений."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 128,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#У первого дата сета добавим новый столбец с рейтингом от 1 до 5 на основе столбца от 1 до 100.\n",
-    "\n",
-    "df['new_rating'] = pd.cut(df['rating'], bins=[0,20,40,60,80,100], labels=[1,2,3,4,5], include_lowest=True)\n",
-    "\n",
-    "#У второго добавим столбец с наибольшей ценой от 1 до 10, на основе столбца от 1 до 127.\n",
-    "\n",
-    "df2['new_high'] = pd.cut(df2['High'], bins=[0,13,26,39,52,65,78,91,104,117,130], labels=[1,2,3,4,5,6,7,8,9,10], include_lowest=True)\n",
-    "\n",
-    "#У третьего удалим слишком большие значения обслуживания и слишком маленькие и добавим новый столбец с категориями цен от 1 до 5.\n",
-    "\n",
-    "df3_filtered = df3[df3['Price'] >= 10000]\n",
-    "df3_filtered = df3_filtered[df3_filtered['Price'] <= 100000]\n",
-    "df3_filtered['new_price'] = pd.cut(df3_filtered['Price'], bins=[10000,28000,46000,64000,82000,100000], labels=[1,2,3,4,5], include_lowest=True)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 129,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from sklearn.model_selection import train_test_split\n",
-    "\n",
-    "def split_stratified_into_train_val_test(\n",
-    "    df_input,\n",
-    "    stratify_colname=\"y\",\n",
-    "    frac_train=0.6,\n",
-    "    frac_val=0.15,\n",
-    "    frac_test=0.25,\n",
-    "    random_state=None,\n",
-    "):\n",
-    "    if frac_train + frac_val + frac_test != 1.0:\n",
-    "        raise ValueError(\n",
-    "            \"fractions %f, %f, %f do not add up to 1.0\"\n",
-    "            % (frac_train, frac_val, frac_test)\n",
-    "        )\n",
-    "\n",
-    "    if stratify_colname not in df_input.columns:\n",
-    "        raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
-    "\n",
-    "    X = df_input  # Contains all columns.\n",
-    "    y = df_input[\n",
-    "        [stratify_colname]\n",
-    "    ]  # Dataframe of just the column on which to stratify.\n",
-    "\n",
-    "    # Split original dataframe into train and temp dataframes.\n",
-    "    df_train, df_temp, y_train, y_temp = train_test_split(\n",
-    "        X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
-    "    )\n",
-    "\n",
-    "    # Split the temp dataframe into val and test dataframes.\n",
-    "    relative_frac_test = frac_test / (frac_val + frac_test)\n",
-    "    df_val, df_test, y_val, y_test = train_test_split(\n",
-    "        df_temp,\n",
-    "        y_temp,\n",
-    "        stratify=y_temp,\n",
-    "        test_size=relative_frac_test,\n",
-    "        random_state=random_state,\n",
-    "    )\n",
-    "\n",
-    "    assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
-    "\n",
-    "    return df_train, df_val, df_test"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "### Выборки датасетов"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 130,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "train df: (319, 19), Val df: (106, 19), Test df:(107, 19)\n",
-      "train df2: (4821, 8), Val df2: (1607, 8), Test df2:(1608, 8)\n",
-      "train df3_filtered: (6931, 19), Val df3_filtered: (2310, 19), Test df3_filtered:(2311, 19)\n"
-     ]
-    }
-   ],
-   "source": [
-    "df_train1, df_val1, df_test1 = split_stratified_into_train_val_test(\n",
-    "    df, stratify_colname=\"new_rating\", frac_train=0.60, frac_val=0.20, frac_test=0.20\n",
-    ")\n",
-    "\n",
-    "df_train2, df_val2, df_test2 = split_stratified_into_train_val_test(\n",
-    "    df2, stratify_colname=\"new_high\", frac_train=0.60, frac_val=0.20, frac_test=0.20\n",
-    ")\n",
-    "\n",
-    "df_train3, df_val3, df_test3 = split_stratified_into_train_val_test(\n",
-    "    df3_filtered, stratify_colname=\"new_price\", frac_train=0.60, frac_val=0.20, frac_test=0.20\n",
-    ")\n",
-    "\n",
-    "print(f\"train df: {df_train1.shape}, Val df: {df_val1.shape}, Test df:{df_test1.shape}\")\n",
-    "print(f\"train df2: {df_train2.shape}, Val df2: {df_val2.shape}, Test df2:{df_test2.shape}\")\n",
-    "print(f\"train df3_filtered: {df_train3.shape}, Val df3_filtered: {df_val3.shape}, Test df3_filtered:{df_test3.shape}\")\n"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Было сделано разбиение на три выборки: 60%, 20% и 20% при помощи библиотеки scikit-learn и функции train_test_split. На взгляд сбалансированные\n",
-    "### Приращение методами выборки с избытком (oversampling) и выборки с недостатком (undersampling)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 131,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Выборка до oversampling и undersampling: (319, 6)\n",
-      "new_rating\n",
-      "4    249\n",
-      "3     44\n",
-      "5     26\n",
-      "1      0\n",
-      "2      0\n",
-      "Name: count, dtype: int64\n",
-      "Выборка после oversampling:  (750, 6)\n",
-      "new_rating\n",
-      "5    251\n",
-      "3    250\n",
-      "4    249\n",
-      "1      0\n",
-      "2      0\n",
-      "Name: count, dtype: int64\n",
-      "Выборка после undersampling:  (78, 6)\n",
-      "new_rating\n",
-      "3    26\n",
-      "5    26\n",
-      "4    26\n",
-      "1     0\n",
-      "2     0\n",
-      "Name: count, dtype: int64\n"
-     ]
-    }
-   ],
-   "source": [
-    "from imblearn.over_sampling import ADASYN\n",
-    "from imblearn.under_sampling import RandomUnderSampler\n",
-    "\n",
-    "df_train1 = df_train1[['price', 'rating', 'threads', 'ram_storage', 'operating_system', 'new_rating']].copy()\n",
-    "\n",
-    "ada = ADASYN()\n",
-    "undersampler = RandomUnderSampler(random_state=42)\n",
-    "\n",
-    "print(\"Выборка до oversampling и undersampling:\", df_train1.shape)\n",
-    "print(df_train1.new_rating.value_counts())\n",
-    "\n",
-    "X_resampled, y_resampled = ada.fit_resample(df_train1, df_train1['new_rating'])\n",
-    "df_train1_adasyn = pd.DataFrame(X_resampled)\n",
-    "\n",
-    "print(\"Выборка после oversampling: \", df_train1_adasyn.shape)\n",
-    "print(df_train1_adasyn.new_rating.value_counts())\n",
-    "\n",
-    "X_resampled_under, y_resampled_under = undersampler.fit_resample(df_train1, df_train1['new_rating'])\n",
-    "\n",
-    "print(\"Выборка после undersampling: \", pd.DataFrame(X_resampled_under).shape)\n",
-    "print(pd.DataFrame(X_resampled_under).new_rating.value_counts())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 132,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Выборка до oversampling и undersampling: (4821, 7)\n",
-      "new_high\n",
-      "1     2326\n",
-      "2      704\n",
-      "5      519\n",
-      "3      299\n",
-      "8      242\n",
-      "7      222\n",
-      "9      181\n",
-      "4      151\n",
-      "6      146\n",
-      "10      31\n",
-      "Name: count, dtype: int64\n",
-      "Выборка после oversampling:  (23144, 7)\n",
-      "new_high\n",
-      "8     2374\n",
-      "6     2368\n",
-      "2     2351\n",
-      "4     2335\n",
-      "1     2326\n",
-      "9     2317\n",
-      "10    2312\n",
-      "5     2256\n",
-      "7     2256\n",
-      "3     2249\n",
-      "Name: count, dtype: int64\n",
-      "Выборка после undersampling:  (310, 7)\n",
-      "new_high\n",
-      "1     31\n",
-      "2     31\n",
-      "3     31\n",
-      "4     31\n",
-      "5     31\n",
-      "6     31\n",
-      "7     31\n",
-      "8     31\n",
-      "9     31\n",
-      "10    31\n",
-      "Name: count, dtype: int64\n"
-     ]
-    }
-   ],
-   "source": [
-    "df_train2 = df_train2[['Open', 'High', 'new_high', 'Low', 'Close', 'Adj Close', 'Volume']].copy()\n",
-    "\n",
-    "print(\"Выборка до oversampling и undersampling:\", df_train2.shape)\n",
-    "print(df_train2.new_high.value_counts())\n",
-    "\n",
-    "X_resampled, y_resampled = ada.fit_resample(df_train2, df_train2['new_high'])\n",
-    "df_train2_adasyn = pd.DataFrame(X_resampled)\n",
-    "\n",
-    "print(\"Выборка после oversampling: \", df_train2_adasyn.shape)\n",
-    "print(df_train2_adasyn.new_high.value_counts())\n",
-    "\n",
-    "X_resampled_under2, y_resampled_under2 = undersampler.fit_resample(df_train2, df_train2['new_high'])\n",
-    "\n",
-    "print(\"Выборка после undersampling: \", pd.DataFrame(X_resampled_under2).shape)\n",
-    "print(pd.DataFrame(X_resampled_under2).new_high.value_counts())"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 133,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Выборка до oversampling и undersampling: (6931, 5)\n",
-      "new_price\n",
-      "1    5008\n",
-      "2    1281\n",
-      "3     449\n",
-      "4     136\n",
-      "5      57\n",
-      "Name: count, dtype: int64\n",
-      "Выборка после oversampling:  (25040, 5)\n",
-      "new_price\n",
-      "1    5008\n",
-      "2    5008\n",
-      "3    5008\n",
-      "4    5008\n",
-      "5    5008\n",
-      "Name: count, dtype: int64\n",
-      "Выборка после undersampling:  (285, 5)\n",
-      "new_price\n",
-      "1    57\n",
-      "2    57\n",
-      "3    57\n",
-      "4    57\n",
-      "5    57\n",
-      "Name: count, dtype: int64\n"
-     ]
-    }
-   ],
-   "source": [
-    "from imblearn.over_sampling import SMOTE\n",
-    "\n",
-    "df_train3 = df_train3[['Price', 'new_price','Prod. year' ,'Cylinders' ,'Airbags']].copy()\n",
-    "\n",
-    "smote = SMOTE(random_state=42)\n",
-    "\n",
-    "print(\"Выборка до oversampling и undersampling:\", df_train3.shape)\n",
-    "print(df_train3.new_price.value_counts())\n",
-    "\n",
-    "X_resampled, y_resampled = smote.fit_resample(df_train3, df_train3['new_price'])\n",
-    "df_train3_smote = pd.DataFrame(X_resampled)\n",
-    "\n",
-    "print(\"Выборка после oversampling: \", df_train3_smote.shape)\n",
-    "print(df_train3_smote.new_price.value_counts())\n",
-    "\n",
-    "X_resampled_under3, y_resampled_under3 = undersampler.fit_resample(df_train3, df_train3['new_price'])\n",
-    "\n",
-    "print(\"Выборка после undersampling: \", pd.DataFrame(X_resampled_under3).shape)\n",
-    "print(pd.DataFrame(X_resampled_under3).new_price.value_counts())"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "kernel",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}
--- a/lab_2/requirements.txt
+++ b/lab_2/requirements.txt
--- a/lab_3/lab_3.ipynb
+++ b/lab_3/lab_3.ipynb
--- a/lab_4/data/house_data.csv
+++ b/lab_4/data/house_data.csv
--- a/lab_4/lab_4.ipynb
+++ b/lab_4/lab_4.ipynb