lab5-6

feat(lab-4): make grid search
feat(lab-4): finish pipeline
2025-02-28 16:02:44 +04:00 · 2024-12-14 15:01:59 +04:00 · 2024-12-14 12:43:37 +04:00 · 2024-12-14 10:14:47 +04:00 · 2024-12-12 23:48:52 +04:00 · 2024-12-07 13:00:14 +04:00
25 changed files with 181956 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -275,4 +275,6 @@ cython_debug/
 # JS
 node_modules/

-test.csv
+test.csv
+описания_датасетов/.~lock.cars.odt#
+описания_датасетов/.~lock.houses.odt#
--- a/data/car-price-prediction.csv
+++ b/data/car-price-prediction.csv
--- a/data/car_price_prediction.csv
+++ b/data/car_price_prediction.csv
--- a/data/dollar.csv
+++ b/data/dollar.csv
@ -0,0 +1,244 @@
+"my_date","my_value","bullet","bulletClass","label"
+"28.03.2023","76.5662","","",""
+"31.03.2023","77.0863","","",""
+"01.04.2023","77.3233","","",""
+"04.04.2023","77.9510","","",""
+"05.04.2023","79.3563","","",""
+"06.04.2023","79.4961","","",""
+"07.04.2023","80.6713","","",""
+"08.04.2023","82.3988","","",""
+"11.04.2023","81.7441","","",""
+"12.04.2023","82.1799","","",""
+"13.04.2023","82.0934","","",""
+"14.04.2023","81.6758","","",""
+"15.04.2023","81.5045","","",""
+"18.04.2023","81.6279","","",""
+"19.04.2023","81.6028","","",""
+"20.04.2023","81.6549","","",""
+"21.04.2023","81.6188","","",""
+"22.04.2023","81.4863","","",""
+"25.04.2023","81.2745","","",""
+"26.04.2023","81.5499","","",""
+"27.04.2023","81.6274","","",""
+"28.04.2023","81.5601","","",""
+"29.04.2023","80.5093","","",""
+"03.05.2023","79.9609","","",""
+"04.05.2023","79.3071","","",""
+"05.05.2023","78.6139","","",""
+"06.05.2023","76.8207","","",""
+"11.05.2023","76.6929","","",""
+"12.05.2023","75.8846","round","min-pulsating-bullet","мин"
+"13.05.2023","77.2041","","",""
+"16.05.2023","79.1004","","",""
+"17.05.2023","79.9798","","",""
+"18.05.2023","80.7642","","",""
+"19.05.2023","80.0366","","",""
+"20.05.2023","79.9093","","",""
+"23.05.2023","79.9379","","",""
+"24.05.2023","80.1665","","",""
+"25.05.2023","79.9669","","",""
+"26.05.2023","79.9841","","",""
+"27.05.2023","79.9667","","",""
+"30.05.2023","80.0555","","",""
+"31.05.2023","80.6872","","",""
+"01.06.2023","80.9942","","",""
+"02.06.2023","80.9657","","",""
+"03.06.2023","80.8756","","",""
+"06.06.2023","81.3294","","",""
+"07.06.2023","81.2502","","",""
+"08.06.2023","81.4581","","",""
+"09.06.2023","82.0930","","",""
+"10.06.2023","82.6417","","",""
+"14.06.2023","83.6405","","",""
+"15.06.2023","84.3249","","",""
+"16.06.2023","83.9611","","",""
+"17.06.2023","83.6498","","",""
+"20.06.2023","83.9866","","",""
+"21.06.2023","84.2336","","",""
+"22.06.2023","84.2467","","",""
+"23.06.2023","83.6077","","",""
+"24.06.2023","84.0793","","",""
+"27.06.2023","84.6642","","",""
+"28.06.2023","85.0504","","",""
+"29.06.2023","85.6192","","",""
+"30.06.2023","87.0341","","",""
+"01.07.2023","88.3844","","",""
+"04.07.2023","89.3255","","",""
+"05.07.2023","89.5450","","",""
+"06.07.2023","90.3380","","",""
+"07.07.2023","92.5695","","",""
+"08.07.2023","91.6879","","",""
+"11.07.2023","91.4931","","",""
+"12.07.2023","90.5045","","",""
+"13.07.2023","90.6253","","",""
+"14.07.2023","90.1757","","",""
+"15.07.2023","90.1190","","",""
+"18.07.2023","90.4217","","",""
+"19.07.2023","90.6906","","",""
+"20.07.2023","91.2046","","",""
+"21.07.2023","90.8545","","",""
+"22.07.2023","90.3846","","",""
+"25.07.2023","90.4890","","",""
+"26.07.2023","90.0945","","",""
+"27.07.2023","90.0468","","",""
+"28.07.2023","90.0225","","",""
+"29.07.2023","90.9783","","",""
+"01.08.2023","91.5923","","",""
+"02.08.2023","91.7755","","",""
+"03.08.2023","92.8410","","",""
+"04.08.2023","93.7792","","",""
+"05.08.2023","94.8076","","",""
+"08.08.2023","96.5668","","",""
+"09.08.2023","96.0755","","",""
+"10.08.2023","97.3999","","",""
+"11.08.2023","97.2794","","",""
+"12.08.2023","98.2066","","",""
+"15.08.2023","101.0399","","",""
+"16.08.2023","97.4217","","",""
+"17.08.2023","96.7045","","",""
+"18.08.2023","93.7460","","",""
+"19.08.2023","93.4047","","",""
+"22.08.2023","94.1424","","",""
+"23.08.2023","94.1185","","",""
+"24.08.2023","94.4421","","",""
+"25.08.2023","94.4007","","",""
+"26.08.2023","94.7117","","",""
+"29.08.2023","95.4717","","",""
+"30.08.2023","95.7070","","",""
+"31.08.2023","95.9283","","",""
+"01.09.2023","96.3344","","",""
+"02.09.2023","96.3411","","",""
+"05.09.2023","96.6199","","",""
+"06.09.2023","97.5383","","",""
+"07.09.2023","97.8439","","",""
+"08.09.2023","98.1961","","",""
+"09.09.2023","97.9241","","",""
+"12.09.2023","96.5083","","",""
+"13.09.2023","94.7035","","",""
+"14.09.2023","95.9794","","",""
+"15.09.2023","96.1609","","",""
+"16.09.2023","96.6338","","",""
+"19.09.2023","96.6472","","",""
+"20.09.2023","96.2236","","",""
+"21.09.2023","96.6172","","",""
+"22.09.2023","96.0762","","",""
+"23.09.2023","96.0419","","",""
+"26.09.2023","96.1456","","",""
+"27.09.2023","96.2378","","",""
+"28.09.2023","96.5000","","",""
+"29.09.2023","97.0018","","",""
+"30.09.2023","97.4147","","",""
+"03.10.2023","98.4785","","",""
+"04.10.2023","99.2677","","",""
+"05.10.2023","99.4555","","",""
+"06.10.2023","99.6762","","",""
+"07.10.2023","100.4911","","",""
+"10.10.2023","101.3598","round","max-pulsating-bullet","макс"
+"11.10.2023","99.9349","","",""
+"12.10.2023","99.9808","","",""
+"13.10.2023","96.9948","","",""
+"14.10.2023","97.3075","","",""
+"17.10.2023","97.2865","","",""
+"18.10.2023","97.3458","","",""
+"19.10.2023","97.3724","","",""
+"20.10.2023","97.3074","","",""
+"21.10.2023","95.9053","","",""
+"24.10.2023","94.7081","","",""
+"25.10.2023","93.5224","","",""
+"26.10.2023","93.1507","","",""
+"27.10.2023","93.5616","","",""
+"28.10.2023","93.2174","","",""
+"31.10.2023","93.2435","","",""
+"01.11.2023","92.0226","","",""
+"02.11.2023","93.2801","","",""
+"03.11.2023","93.1730","","",""
+"04.11.2023","93.0351","","",""
+"08.11.2023","92.4151","","",""
+"09.11.2023","92.1973","","",""
+"10.11.2023","91.9266","","",""
+"11.11.2023","92.0535","","",""
+"14.11.2023","92.1185","","",""
+"15.11.2023","91.2570","","",""
+"16.11.2023","89.4565","","",""
+"17.11.2023","88.9466","","",""
+"18.11.2023","89.1237","","",""
+"21.11.2023","88.4954","","",""
+"22.11.2023","87.8701","","",""
+"23.11.2023","88.1648","","",""
+"24.11.2023","88.1206","","",""
+"25.11.2023","88.8133","","",""
+"28.11.2023","88.7045","","",""
+"29.11.2023","88.6102","","",""
+"30.11.2023","88.8841","","",""
+"01.12.2023","88.5819","","",""
+"02.12.2023","89.7619","","",""
+"05.12.2023","90.6728","","",""
+"06.12.2023","91.5823","","",""
+"07.12.2023","92.7826","","",""
+"08.12.2023","92.5654","","",""
+"09.12.2023","91.6402","","",""
+"12.12.2023","90.9846","","",""
+"13.12.2023","90.2158","","",""
+"14.12.2023","89.8926","","",""
+"15.12.2023","89.6741","","",""
+"16.12.2023","89.6966","","",""
+"19.12.2023","90.4162","","",""
+"20.12.2023","90.0870","","",""
+"21.12.2023","90.4056","","",""
+"22.12.2023","91.7062","","",""
+"23.12.2023","91.9389","","",""
+"26.12.2023","91.9690","","",""
+"27.12.2023","91.7069","","",""
+"28.12.2023","91.7051","","",""
+"29.12.2023","90.3041","","",""
+"30.12.2023","89.6883","","",""
+"10.01.2024","90.4040","","",""
+"11.01.2024","89.3939","","",""
+"12.01.2024","88.7818","","",""
+"13.01.2024","88.1324","","",""
+"16.01.2024","87.6772","","",""
+"17.01.2024","87.6457","","",""
+"18.01.2024","88.3540","","",""
+"19.01.2024","88.6610","","",""
+"20.01.2024","88.5896","","",""
+"23.01.2024","87.9724","","",""
+"24.01.2024","87.9199","","",""
+"25.01.2024","88.2829","","",""
+"26.01.2024","88.6562","","",""
+"27.01.2024","89.5159","","",""
+"30.01.2024","89.6090","","",""
+"31.01.2024","89.2887","","",""
+"01.02.2024","89.6678","","",""
+"02.02.2024","90.2299","","",""
+"03.02.2024","90.6626","","",""
+"06.02.2024","91.2434","","",""
+"07.02.2024","90.6842","","",""
+"08.02.2024","91.1514","","",""
+"09.02.2024","91.2561","","",""
+"10.02.2024","90.8901","","",""
+"13.02.2024","91.0758","","",""
+"14.02.2024","91.2057","","",""
+"15.02.2024","91.4316","","",""
+"16.02.2024","91.8237","","",""
+"17.02.2024","92.5492","","",""
+"20.02.2024","92.4102","","",""
+"21.02.2024","92.3490","","",""
+"22.02.2024","92.4387","","",""
+"23.02.2024","92.7519","","",""
+"27.02.2024","92.6321","","",""
+"28.02.2024","92.0425","","",""
+"29.02.2024","91.8692","","",""
+"01.03.2024","90.8423","","",""
+"02.03.2024","91.3336","","",""
+"05.03.2024","91.3534","","",""
+"06.03.2024","91.1604","","",""
+"07.03.2024","90.3412","","",""
+"08.03.2024","90.7493","","",""
+"12.03.2024","90.6252","","",""
+"13.03.2024","90.8818","","",""
+"19.03.2024","91.9829","","",""
+"20.03.2024","92.2243","","",""
+"21.03.2024","92.6861","","",""
+"22.03.2024","91.9499","","",""
+"23.03.2024","92.6118","","",""
+"26.03.2024","92.7761","","",""
--- a/data/ds_salaries.csv
+++ b/data/ds_salaries.csv
--- a/data/kc_house_data.csv
+++ b/data/kc_house_data.csv
--- a/prediction.csv
+++ b/prediction.csv
--- a/data/neo.csv
+++ b/data/neo.csv
--- a/notebooks/lab1.ipynb
+++ b/notebooks/lab1.ipynb
--- a/notebooks/lab2_1.ipynb
+++ b/notebooks/lab2_1.ipynb
@ -0,0 +1,312 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Загрузка данных в DataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "df = pd.read_csv(\"../data/kc_house_data.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Получение сведений о пропущенных данных"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(df.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(df.isnull().any())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i in df.columns:\n",
+    "    null_rate = df[i].isnull().sum() / len(df) * 100\n",
+    "    if null_rate > 0:\n",
+    "        print(f\"{i} процент пустых значений: {null_rate:.2f}%\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Создание выборок данных"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "\n",
+    "def split_stratified_into_train_val_test(\n",
+    "    df_input,\n",
+    "    stratify_colname=\"y\",\n",
+    "    frac_train=0.6,\n",
+    "    frac_val=0.15,\n",
+    "    frac_test=0.25,\n",
+    "    random_state=None,\n",
+    "):\n",
+    "    \"\"\"\n",
+    "    Splits a Pandas dataframe into three subsets (train, val, and test)\n",
+    "    following fractional ratios provided by the user, where each subset is\n",
+    "    stratified by the values in a specific column (that is, each subset has\n",
+    "    the same relative frequency of the values in the column). It performs this\n",
+    "    splitting by running train_test_split() twice.\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    df_input : Pandas dataframe\n",
+    "        Input dataframe to be split.\n",
+    "    stratify_colname : str\n",
+    "        The name of the column that will be used for stratification. Usually\n",
+    "        this column would be for the label.\n",
+    "    frac_train : float\n",
+    "    frac_val   : float\n",
+    "    frac_test  : float\n",
+    "        The ratios with which the dataframe will be split into train, val, and\n",
+    "        test data. The values should be expressed as float fractions and should\n",
+    "        sum to 1.0.\n",
+    "    random_state : int, None, or RandomStateInstance\n",
+    "        Value to be passed to train_test_split().\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    df_train, df_val, df_test :\n",
+    "        Dataframes containing the three splits.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    if frac_train + frac_val + frac_test != 1.0:\n",
+    "        raise ValueError(\n",
+    "            \"fractions %f, %f, %f do not add up to 1.0\"\n",
+    "            % (frac_train, frac_val, frac_test)\n",
+    "        )\n",
+    "\n",
+    "    if stratify_colname not in df_input.columns:\n",
+    "        raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
+    "\n",
+    "    X = df_input  # Contains all columns.\n",
+    "    y = df_input[\n",
+    "        [stratify_colname]\n",
+    "    ]  # Dataframe of just the column on which to stratify.\n",
+    "\n",
+    "    # Split original dataframe into train and temp dataframes.\n",
+    "    df_train, df_temp, y_train, y_temp = train_test_split(\n",
+    "        X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
+    "    )\n",
+    "\n",
+    "    # Split the temp dataframe into val and test dataframes.\n",
+    "    relative_frac_test = frac_test / (frac_val + frac_test)\n",
+    "    df_val, df_test, y_val, y_test = train_test_split(\n",
+    "        df_temp,\n",
+    "        y_temp,\n",
+    "        stratify=y_temp,\n",
+    "        test_size=relative_frac_test,\n",
+    "        random_state=random_state,\n",
+    "    )\n",
+    "\n",
+    "    assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
+    "\n",
+    "    return df_train, df_val, df_test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "[3 5 4 1 2]\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df.condition.unique())\n",
+    "\n",
+    "data = df[\n",
+    "    [\n",
+    "        \"price\",\n",
+    "        \"bedrooms\",\n",
+    "        \"bathrooms\",\n",
+    "        \"sqft_living\",\n",
+    "        \"sqft_lot\",\n",
+    "        \"floors\",\n",
+    "        \"view\",\n",
+    "        \"condition\",\n",
+    "        \"grade\",\n",
+    "        \"sqft_above\",\n",
+    "        \"sqft_basement\",\n",
+    "        \"yr_built\",\n",
+    "        \"yr_renovated\",\n",
+    "        \"zipcode\",\n",
+    "        \"lat\",\n",
+    "        \"long\",\n",
+    "    ]\n",
+    "].copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Обучающая выборка:  (12967, 16)\n",
+      "condition\n",
+      "3    8418\n",
+      "4    3407\n",
+      "5    1021\n",
+      "2     103\n",
+      "1      18\n",
+      "Name: count, dtype: int64\n",
+      "Контрольная выборка:  (4323, 16)\n",
+      "condition\n",
+      "3    2806\n",
+      "4    1136\n",
+      "5     340\n",
+      "2      35\n",
+      "1       6\n",
+      "Name: count, dtype: int64\n",
+      "Тестовая выборка:  (4323, 16)\n",
+      "condition\n",
+      "3    2807\n",
+      "4    1136\n",
+      "5     340\n",
+      "2      34\n",
+      "1       6\n",
+      "Name: count, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "df_train, df_val, df_test = split_stratified_into_train_val_test(\n",
+    "    data,\n",
+    "    stratify_colname=\"condition\",\n",
+    "    frac_train=0.60,\n",
+    "    frac_val=0.20,\n",
+    "    frac_test=0.20,\n",
+    ")\n",
+    "\n",
+    "print(\"Обучающая выборка: \", df_train.shape)\n",
+    "print(df_train.condition.value_counts())\n",
+    "\n",
+    "print(\"Контрольная выборка: \", df_val.shape)\n",
+    "print(df_val.condition.value_counts())\n",
+    "\n",
+    "print(\"Тестовая выборка: \", df_test.shape)\n",
+    "print(df_test.condition.value_counts())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 18,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Обучающая выборка:  (12967, 16)\n",
+      "condition\n",
+      "3    8418\n",
+      "4    3407\n",
+      "5    1021\n",
+      "2     103\n",
+      "1      18\n",
+      "Name: count, dtype: int64\n",
+      "Обучающая выборка после oversampling:  (42073, 16)\n",
+      "condition\n",
+      "5    8464\n",
+      "2    8421\n",
+      "1    8420\n",
+      "3    8418\n",
+      "4    8350\n",
+      "Name: count, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "from imblearn.over_sampling import ADASYN\n",
+    "\n",
+    "ada = ADASYN()\n",
+    "\n",
+    "print(\"Обучающая выборка: \", df_train.shape)\n",
+    "print(df_train.condition.value_counts())\n",
+    "\n",
+    "X_resampled, y_resampled = ada.fit_resample(df_train, df_train[\"condition\"])\n",
+    "df_train_adasyn = pd.DataFrame(X_resampled)\n",
+    "\n",
+    "print(\"Обучающая выборка после oversampling: \", df_train_adasyn.shape)\n",
+    "print(df_train_adasyn.condition.value_counts())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/lab2_2.ipynb
+++ b/notebooks/lab2_2.ipynb
@ -0,0 +1,648 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Загрузка данных в DataFrame"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "\n",
+    "df = pd.read_csv(\"../data/car_price_prediction.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>ID</th>\n",
+       "      <th>Price</th>\n",
+       "      <th>Levy</th>\n",
+       "      <th>Manufacturer</th>\n",
+       "      <th>Model</th>\n",
+       "      <th>Prod_year</th>\n",
+       "      <th>Category</th>\n",
+       "      <th>Leather interior</th>\n",
+       "      <th>Fuel type</th>\n",
+       "      <th>Engine volume</th>\n",
+       "      <th>Mileage</th>\n",
+       "      <th>Cylinders</th>\n",
+       "      <th>Gear_box_type</th>\n",
+       "      <th>Drive_wheels</th>\n",
+       "      <th>Doors</th>\n",
+       "      <th>Wheel</th>\n",
+       "      <th>Color</th>\n",
+       "      <th>Airbags</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>45654403</td>\n",
+       "      <td>13328</td>\n",
+       "      <td>1399</td>\n",
+       "      <td>LEXUS</td>\n",
+       "      <td>RX 450</td>\n",
+       "      <td>2010</td>\n",
+       "      <td>Jeep</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Hybrid</td>\n",
+       "      <td>3.5</td>\n",
+       "      <td>186005 km</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>Automatic</td>\n",
+       "      <td>4x4</td>\n",
+       "      <td>04-May</td>\n",
+       "      <td>Left wheel</td>\n",
+       "      <td>Silver</td>\n",
+       "      <td>12</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>44731507</td>\n",
+       "      <td>16621</td>\n",
+       "      <td>1018</td>\n",
+       "      <td>CHEVROLET</td>\n",
+       "      <td>Equinox</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>Jeep</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Petrol</td>\n",
+       "      <td>3</td>\n",
+       "      <td>192000 km</td>\n",
+       "      <td>6.0</td>\n",
+       "      <td>Tiptronic</td>\n",
+       "      <td>4x4</td>\n",
+       "      <td>04-May</td>\n",
+       "      <td>Left wheel</td>\n",
+       "      <td>Black</td>\n",
+       "      <td>8</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>45774419</td>\n",
+       "      <td>8467</td>\n",
+       "      <td>-</td>\n",
+       "      <td>HONDA</td>\n",
+       "      <td>FIT</td>\n",
+       "      <td>2006</td>\n",
+       "      <td>Hatchback</td>\n",
+       "      <td>No</td>\n",
+       "      <td>Petrol</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>200000 km</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>Variator</td>\n",
+       "      <td>Front</td>\n",
+       "      <td>04-May</td>\n",
+       "      <td>Right-hand drive</td>\n",
+       "      <td>Black</td>\n",
+       "      <td>2</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>45769185</td>\n",
+       "      <td>3607</td>\n",
+       "      <td>862</td>\n",
+       "      <td>FORD</td>\n",
+       "      <td>Escape</td>\n",
+       "      <td>2011</td>\n",
+       "      <td>Jeep</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Hybrid</td>\n",
+       "      <td>2.5</td>\n",
+       "      <td>168966 km</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>Automatic</td>\n",
+       "      <td>4x4</td>\n",
+       "      <td>04-May</td>\n",
+       "      <td>Left wheel</td>\n",
+       "      <td>White</td>\n",
+       "      <td>0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>45809263</td>\n",
+       "      <td>11726</td>\n",
+       "      <td>446</td>\n",
+       "      <td>HONDA</td>\n",
+       "      <td>FIT</td>\n",
+       "      <td>2014</td>\n",
+       "      <td>Hatchback</td>\n",
+       "      <td>Yes</td>\n",
+       "      <td>Petrol</td>\n",
+       "      <td>1.3</td>\n",
+       "      <td>91901 km</td>\n",
+       "      <td>4.0</td>\n",
+       "      <td>Automatic</td>\n",
+       "      <td>Front</td>\n",
+       "      <td>04-May</td>\n",
+       "      <td>Left wheel</td>\n",
+       "      <td>Silver</td>\n",
+       "      <td>4</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "         ID  Price  Levy Manufacturer    Model  Prod_year   Category  \\\n",
+       "0  45654403  13328  1399        LEXUS   RX 450       2010       Jeep   \n",
+       "1  44731507  16621  1018    CHEVROLET  Equinox       2011       Jeep   \n",
+       "2  45774419   8467     -        HONDA      FIT       2006  Hatchback   \n",
+       "3  45769185   3607   862         FORD   Escape       2011       Jeep   \n",
+       "4  45809263  11726   446        HONDA      FIT       2014  Hatchback   \n",
+       "\n",
+       "  Leather interior Fuel type Engine volume    Mileage  Cylinders  \\\n",
+       "0              Yes    Hybrid           3.5  186005 km        6.0   \n",
+       "1               No    Petrol             3  192000 km        6.0   \n",
+       "2               No    Petrol           1.3  200000 km        4.0   \n",
+       "3              Yes    Hybrid           2.5  168966 km        4.0   \n",
+       "4              Yes    Petrol           1.3   91901 km        4.0   \n",
+       "\n",
+       "  Gear_box_type Drive_wheels   Doors             Wheel   Color  Airbags  \n",
+       "0     Automatic          4x4  04-May        Left wheel  Silver       12  \n",
+       "1     Tiptronic          4x4  04-May        Left wheel   Black        8  \n",
+       "2      Variator        Front  04-May  Right-hand drive   Black        2  \n",
+       "3     Automatic          4x4  04-May        Left wheel   White        0  \n",
+       "4     Automatic        Front  04-May        Left wheel  Silver        4  "
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Получение сведений о пропущенных данных"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ID                  0\n",
+      "Price               0\n",
+      "Levy                0\n",
+      "Manufacturer        0\n",
+      "Model               0\n",
+      "Prod_year           0\n",
+      "Category            0\n",
+      "Leather interior    0\n",
+      "Fuel type           0\n",
+      "Engine volume       0\n",
+      "Mileage             0\n",
+      "Cylinders           0\n",
+      "Gear_box_type       0\n",
+      "Drive_wheels        0\n",
+      "Doors               0\n",
+      "Wheel               0\n",
+      "Color               0\n",
+      "Airbags             0\n",
+      "dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df.isnull().sum())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "ID                  False\n",
+      "Price               False\n",
+      "Levy                False\n",
+      "Manufacturer        False\n",
+      "Model               False\n",
+      "Prod_year           False\n",
+      "Category            False\n",
+      "Leather interior    False\n",
+      "Fuel type           False\n",
+      "Engine volume       False\n",
+      "Mileage             False\n",
+      "Cylinders           False\n",
+      "Gear_box_type       False\n",
+      "Drive_wheels        False\n",
+      "Doors               False\n",
+      "Wheel               False\n",
+      "Color               False\n",
+      "Airbags             False\n",
+      "dtype: bool\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df.isnull().any())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['1399' '1018' '-' '862' '446' '891' '761' '751' '394' '1053' '1055'\n",
+      " '1079' '810' '2386' '1850' '531' '586' '1249' '2455' '583' '1537' '1288'\n",
+      " '915' '1750' '707' '1077' '1486' '1091' '650' '382' '1436' '1194' '503'\n",
+      " '1017' '1104' '639' '629' '919' '781' '530' '640' '765' '777' '779' '934'\n",
+      " '769' '645' '1185' '1324' '830' '1187' '1111' '760' '642' '1604' '1095'\n",
+      " '966' '473' '1138' '1811' '988' '917' '1156' '687' '11714' '836' '1347'\n",
+      " '2866' '1646' '259' '609' '697' '585' '475' '690' '308' '1823' '1361'\n",
+      " '1273' '924' '584' '2078' '831' '1172' '893' '1872' '1885' '1266' '447'\n",
+      " '2148' '1730' '730' '289' '502' '333' '1325' '247' '879' '1342' '1327'\n",
+      " '1598' '1514' '1058' '738' '1935' '481' '1522' '1282' '456' '880' '900'\n",
+      " '798' '1277' '442' '1051' '790' '1292' '1047' '528' '1211' '1493' '1793'\n",
+      " '574' '930' '1998' '271' '706' '1481' '1677' '1661' '1286' '1408' '1090'\n",
+      " '595' '1451' '1267' '993' '1714' '878' '641' '749' '1511' '603' '353'\n",
+      " '877' '1236' '1141' '397' '784' '1024' '1357' '1301' '770' '922' '1438'\n",
+      " '753' '607' '1363' '638' '490' '431' '565' '517' '833' '489' '1760' '986'\n",
+      " '1841' '1620' '1360' '474' '1099' '978' '1624' '1946' '1268' '1307' '696'\n",
+      " '649' '666' '2151' '551' '800' '971' '1323' '2377' '1845' '1083' '694'\n",
+      " '463' '419' '345' '1515' '1505' '2056' '1203' '729' '460' '1356' '876'\n",
+      " '911' '1190' '780' '448' '2410' '1848' '1148' '834' '1275' '1028' '1197'\n",
+      " '724' '890' '1705' '505' '789' '2959' '518' '461' '1719' '2858' '3156'\n",
+      " '2225' '2177' '1968' '1888' '1308' '2736' '1103' '557' '2195' '843'\n",
+      " '1664' '723' '4508' '562' '501' '2018' '1076' '1202' '3301' '691' '1440'\n",
+      " '1869' '1178' '418' '1820' '1413' '488' '1304' '363' '2108' '521' '1659'\n",
+      " '87' '1411' '1528' '3292' '7058' '1578' '627' '874' '1996' '1488' '5679'\n",
+      " '1234' '5603' '400' '889' '3268' '875' '949' '2265' '441' '742' '425'\n",
+      " '2476' '2971' '614' '1816' '1375' '1405' '2297' '1062' '1113' '420'\n",
+      " '2469' '658' '1951' '2670' '2578' '1995' '1032' '994' '1011' '2421'\n",
+      " '1296' '155' '494' '426' '1086' '961' '2236' '1829' '764' '1834' '1054'\n",
+      " '617' '1529' '2266' '637' '626' '1832' '1016' '2002' '1756' '746' '1285'\n",
+      " '2690' '1118' '5332' '980' '1807' '970' '1228' '1195' '1132' '1768'\n",
+      " '1384' '1080' '7063' '1817' '1452' '1975' '1368' '702' '1974' '1781'\n",
+      " '1036' '944' '663' '364' '1539' '1345' '1680' '2209' '741' '1575' '695'\n",
+      " '1317' '294' '1525' '424' '997' '1473' '1552' '2819' '2188' '1668' '3057'\n",
+      " '799' '1502' '2606' '552' '1694' '1759' '1110' '399' '1470' '1174' '5877'\n",
+      " '1474' '1688' '526' '686' '5908' '1107' '2070' '1468' '1246' '1685' '556'\n",
+      " '1533' '1917' '1346' '732' '692' '579' '421' '362' '3505' '1855' '2711'\n",
+      " '1586' '3739' '681' '1708' '2278' '1701' '722' '1482' '928' '827' '832'\n",
+      " '527' '604' '173' '1341' '3329' '1553' '859' '167' '916' '828' '2082'\n",
+      " '1176' '1108' '975' '3008' '1516' '2269' '1699' '2073' '1031' '1503'\n",
+      " '2364' '1030' '1442' '5666' '2715' '1437' '2067' '1426' '2908' '1279'\n",
+      " '866' '4283' '279' '2658' '3015' '2004' '1391' '4736' '748' '1466' '644'\n",
+      " '683' '2705' '1297' '731' '1252' '2216' '3141' '3273' '1518' '1723'\n",
+      " '1588' '972' '682' '1094' '668' '175' '967' '402' '3894' '1960' '1599'\n",
+      " '2000' '2084' '1621' '714' '1109' '3989' '873' '1572' '1163' '1991'\n",
+      " '1716' '1673' '2562' '2874' '965' '462' '605' '1948' '1736' '3518' '2054'\n",
+      " '2467' '1681' '1272' '1205' '750' '2156' '2566' '115' '524' '3184' '676'\n",
+      " '1678' '612' '328' '955' '1441' '1675' '3965' '2909' '623' '822' '867'\n",
+      " '3025' '1993' '792' '636' '4057' '3743' '2337' '2570' '2418' '2472'\n",
+      " '3910' '1662' '2123' '2628' '3208' '2080' '3699' '2913' '864' '2505'\n",
+      " '870' '7536' '1924' '1671' '1064' '1836' '1866' '4741' '841' '1369'\n",
+      " '5681' '3112' '1366' '2223' '1198' '1039' '3811' '3571' '1387' '1171'\n",
+      " '1365' '1531' '1590' '11706' '2308' '4860' '1641' '1045' '1901']\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df[\"Levy\"].unique())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df[\"Levy\"] = df[\"Levy\"].replace({'-' : None})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Levy процент пустых значений: 30.25%\n"
+     ]
+    }
+   ],
+   "source": [
+    "for i in df.columns:\n",
+    "    null_rate = df[i].isnull().sum() / len(df) * 100\n",
+    "    if null_rate > 0:\n",
+    "        print(f\"{i} процент пустых значений: {null_rate:.2f}%\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Заполнение пропущенных данных"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df.fillna({\"Levy\": 0}, inplace=True)\n",
+    "for i in df.columns:\n",
+    "    null_rate = df[i].isnull().sum() / len(df) * 100\n",
+    "    if null_rate > 0:\n",
+    "        print(f\"{i} процент пустых значений: {null_rate:.2f}%\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Создание выборок данных"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.model_selection import train_test_split\n",
+    "\n",
+    "\n",
+    "def split_stratified_into_train_val_test(\n",
+    "    df_input,\n",
+    "    stratify_colname=\"y\",\n",
+    "    frac_train=0.6,\n",
+    "    frac_val=0.15,\n",
+    "    frac_test=0.25,\n",
+    "    random_state=None,\n",
+    "):\n",
+    "    \"\"\"\n",
+    "    Splits a Pandas dataframe into three subsets (train, val, and test)\n",
+    "    following fractional ratios provided by the user, where each subset is\n",
+    "    stratified by the values in a specific column (that is, each subset has\n",
+    "    the same relative frequency of the values in the column). It performs this\n",
+    "    splitting by running train_test_split() twice.\n",
+    "\n",
+    "    Parameters\n",
+    "    ----------\n",
+    "    df_input : Pandas dataframe\n",
+    "        Input dataframe to be split.\n",
+    "    stratify_colname : str\n",
+    "        The name of the column that will be used for stratification. Usually\n",
+    "        this column would be for the label.\n",
+    "    frac_train : float\n",
+    "    frac_val   : float\n",
+    "    frac_test  : float\n",
+    "        The ratios with which the dataframe will be split into train, val, and\n",
+    "        test data. The values should be expressed as float fractions and should\n",
+    "        sum to 1.0.\n",
+    "    random_state : int, None, or RandomStateInstance\n",
+    "        Value to be passed to train_test_split().\n",
+    "\n",
+    "    Returns\n",
+    "    -------\n",
+    "    df_train, df_val, df_test :\n",
+    "        Dataframes containing the three splits.\n",
+    "    \"\"\"\n",
+    "\n",
+    "    if frac_train + frac_val + frac_test != 1.0:\n",
+    "        raise ValueError(\n",
+    "            \"fractions %f, %f, %f do not add up to 1.0\"\n",
+    "            % (frac_train, frac_val, frac_test)\n",
+    "        )\n",
+    "\n",
+    "    if stratify_colname not in df_input.columns:\n",
+    "        raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
+    "\n",
+    "    X = df_input  # Contains all columns.\n",
+    "    y = df_input[\n",
+    "        [stratify_colname]\n",
+    "    ]  # Dataframe of just the column on which to stratify.\n",
+    "\n",
+    "    # Split original dataframe into train and temp dataframes.\n",
+    "    df_train, df_temp, y_train, y_temp = train_test_split(\n",
+    "        X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
+    "    )\n",
+    "\n",
+    "    # Split the temp dataframe into val and test dataframes.\n",
+    "    relative_frac_test = frac_test / (frac_val + frac_test)\n",
+    "    df_val, df_test, y_val, y_test = train_test_split(\n",
+    "        df_temp,\n",
+    "        y_temp,\n",
+    "        stratify=y_temp,\n",
+    "        test_size=relative_frac_test,\n",
+    "        random_state=random_state,\n",
+    "    )\n",
+    "\n",
+    "    assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
+    "\n",
+    "    return df_train, df_val, df_test"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['Automatic' 'Tiptronic' 'Variator' 'Manual']\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(df.Gear_box_type.unique())\n",
+    "\n",
+    "data = df[\n",
+    "    [\n",
+    "        \"Price\",\n",
+    "        \"Gear_box_type\",\n",
+    "    ]\n",
+    "].copy()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Обучающая выборка:  (11542, 2)\n",
+      "Gear_box_type\n",
+      "Automatic    8108\n",
+      "Tiptronic    1861\n",
+      "Manual       1125\n",
+      "Variator      448\n",
+      "Name: count, dtype: int64\n",
+      "Контрольная выборка:  (3847, 2)\n",
+      "Gear_box_type\n",
+      "Automatic    2703\n",
+      "Tiptronic     620\n",
+      "Manual        375\n",
+      "Variator      149\n",
+      "Name: count, dtype: int64\n",
+      "Тестовая выборка:  (3848, 2)\n",
+      "Gear_box_type\n",
+      "Automatic    2703\n",
+      "Tiptronic     621\n",
+      "Manual        375\n",
+      "Variator      149\n",
+      "Name: count, dtype: int64\n"
+     ]
+    }
+   ],
+   "source": [
+    "df_train, df_val, df_test = split_stratified_into_train_val_test(\n",
+    "    data,\n",
+    "    stratify_colname=\"Gear_box_type\",\n",
+    "    frac_train=0.60,\n",
+    "    frac_val=0.20,\n",
+    "    frac_test=0.20,\n",
+    ")\n",
+    "\n",
+    "print(\"Обучающая выборка: \", df_train.shape)\n",
+    "print(df_train.Gear_box_type.value_counts())\n",
+    "\n",
+    "print(\"Контрольная выборка: \", df_val.shape)\n",
+    "print(df_val.Gear_box_type.value_counts())\n",
+    "\n",
+    "print(\"Тестовая выборка: \", df_test.shape)\n",
+    "print(df_test.Gear_box_type.value_counts())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Выборка с избытком (oversampling)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Обучающая выборка:  (11542, 2)\n",
+      "Gear_box_type\n",
+      "Automatic    8108\n",
+      "Tiptronic    1861\n",
+      "Manual       1125\n",
+      "Variator      448\n",
+      "Name: count, dtype: int64\n"
+     ]
+    },
+    {
+     "ename": "ValueError",
+     "evalue": "could not convert string to float: 'Automatic'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mValueError\u001b[0m                                Traceback (most recent call last)",
+      "\u001b[1;32m~\\AppData\\Local\\Temp\\ipykernel_9996\\2277749880.py\u001b[0m in \u001b[0;36m?\u001b[1;34m()\u001b[0m\n\u001b[0;32m      4\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      5\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Обучающая выборка: \"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      6\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mGear_box_type\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mvalue_counts\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m      7\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m----> 8\u001b[1;33m \u001b[0mX_resampled\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my_resampled\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mada\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit_resample\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mdf_train\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf_train\u001b[0m\u001b[1;33m[\u001b[0m\u001b[1;34m\"Gear_box_type\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m      9\u001b[0m \u001b[0mdf_train_adasyn\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mpd\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDataFrame\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX_resampled\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     10\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m     11\u001b[0m \u001b[0mprint\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"Обучающая выборка после oversampling: \"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdf_train_adasyn\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mshape\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\imblearn\\base.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m    204\u001b[0m         \u001b[0my_resampled\u001b[0m \u001b[1;33m:\u001b[0m \u001b[0marray\u001b[0m\u001b[1;33m-\u001b[0m\u001b[0mlike\u001b[0m \u001b[0mof\u001b[0m \u001b[0mshape\u001b[0m \u001b[1;33m(\u001b[0m\u001b[0mn_samples_new\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    205\u001b[0m             \u001b[0mThe\u001b[0m \u001b[0mcorresponding\u001b[0m \u001b[0mlabel\u001b[0m \u001b[0mof\u001b[0m \u001b[1;33m`\u001b[0m\u001b[0mX_resampled\u001b[0m\u001b[1;33m`\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    206\u001b[0m         \"\"\"\n\u001b[0;32m    207\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_validate_params\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 208\u001b[1;33m         \u001b[1;32mreturn\u001b[0m \u001b[0msuper\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mfit_resample\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m",
+      "\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\imblearn\\base.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, X, y)\u001b[0m\n\u001b[0;32m    102\u001b[0m             \u001b[0mThe\u001b[0m \u001b[0mcorresponding\u001b[0m \u001b[0mlabel\u001b[0m \u001b[0mof\u001b[0m \u001b[1;33m`\u001b[0m\u001b[0mX_resampled\u001b[0m\u001b[1;33m`\u001b[0m\u001b[1;33m.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    103\u001b[0m         \"\"\"\n\u001b[0;32m    104\u001b[0m         \u001b[0mcheck_classification_targets\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    105\u001b[0m         \u001b[0marrays_transformer\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mArraysTransformer\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 106\u001b[1;33m         \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbinarize_y\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_check_X_y\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    107\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    108\u001b[0m         self.sampling_strategy_ = check_sampling_strategy(\n\u001b[0;32m    109\u001b[0m             \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0msampling_strategy\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_sampling_type\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\imblearn\\base.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, X, y, accept_sparse)\u001b[0m\n\u001b[0;32m    157\u001b[0m     \u001b[1;32mdef\u001b[0m \u001b[0m_check_X_y\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mNone\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    158\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0maccept_sparse\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    159\u001b[0m             \u001b[0maccept_sparse\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m[\u001b[0m\u001b[1;34m\"csr\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;34m\"csc\"\u001b[0m\u001b[1;33m]\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    160\u001b[0m         \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbinarize_y\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck_target_type\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mindicate_one_vs_all\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 161\u001b[1;33m         \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_validate_data\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mreset\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mTrue\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0maccept_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maccept_sparse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    162\u001b[0m         \u001b[1;32mreturn\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mbinarize_y\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)\u001b[0m\n\u001b[0;32m    646\u001b[0m                 \u001b[1;32mif\u001b[0m \u001b[1;34m\"estimator\"\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[1;32min\u001b[0m \u001b[0mcheck_y_params\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    647\u001b[0m                     \u001b[0mcheck_y_params\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;33m{\u001b[0m\u001b[1;33m**\u001b[0m\u001b[0mdefault_check_params\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mcheck_y_params\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    648\u001b[0m                 \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck_array\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0minput_name\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;34m\"y\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mcheck_y_params\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    649\u001b[0m             \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 650\u001b[1;33m                 \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mcheck_X_y\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;33m**\u001b[0m\u001b[0mcheck_params\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    651\u001b[0m             \u001b[0mout\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0my\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    652\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    653\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[1;32mnot\u001b[0m \u001b[0mno_val_X\u001b[0m \u001b[1;32mand\u001b[0m \u001b[0mcheck_params\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mget\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;34m\"ensure_2d\"\u001b[0m\u001b[1;33m,\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(X, y, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_2d, allow_nd, multi_output, ensure_min_samples, ensure_min_features, y_numeric, estimator)\u001b[0m\n\u001b[0;32m   1297\u001b[0m         raise ValueError(\n\u001b[0;32m   1298\u001b[0m             \u001b[1;33mf\"\u001b[0m\u001b[1;33m{\u001b[0m\u001b[0mestimator_name\u001b[0m\u001b[1;33m}\u001b[0m\u001b[1;33m requires y to be passed, but the target y is None\u001b[0m\u001b[1;33m\"\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1299\u001b[0m         \u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1300\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1301\u001b[1;33m     X = check_array(\n\u001b[0m\u001b[0;32m   1302\u001b[0m         \u001b[0mX\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1303\u001b[0m         \u001b[0maccept_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maccept_sparse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1304\u001b[0m         \u001b[0maccept_large_sparse\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0maccept_large_sparse\u001b[0m\u001b[1;33m,\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\utils\\validation.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[0;32m   1009\u001b[0m                         \u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1010\u001b[0m                     \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mxp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m=\u001b[0m\u001b[1;32mFalse\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1011\u001b[0m                 \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1012\u001b[0m                     \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0m_asarray_with_order\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mxp\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mxp\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 1013\u001b[1;33m             \u001b[1;32mexcept\u001b[0m \u001b[0mComplexWarning\u001b[0m \u001b[1;32mas\u001b[0m \u001b[0mcomplex_warning\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   1014\u001b[0m                 raise ValueError(\n\u001b[0;32m   1015\u001b[0m                     \u001b[1;34m\"Complex data not supported\\n{}\\n\"\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mformat\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   1016\u001b[0m                 \u001b[1;33m)\u001b[0m \u001b[1;32mfrom\u001b[0m \u001b[0mcomplex_warning\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\utils\\_array_api.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(array, dtype, order, copy, xp, device)\u001b[0m\n\u001b[0;32m    741\u001b[0m         \u001b[1;31m# Use NumPy API to support order\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    742\u001b[0m         \u001b[1;32mif\u001b[0m \u001b[0mcopy\u001b[0m \u001b[1;32mis\u001b[0m \u001b[1;32mTrue\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    743\u001b[0m             \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    744\u001b[0m         \u001b[1;32melse\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m--> 745\u001b[1;33m             \u001b[0marray\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnumpy\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0marray\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0morder\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0morder\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m    746\u001b[0m \u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    747\u001b[0m         \u001b[1;31m# At this point array is a NumPy ndarray. We convert it to an array\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m    748\u001b[0m         \u001b[1;31m# container that is consistent with the input's namespace.\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;32mc:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\pandas\\core\\generic.py\u001b[0m in \u001b[0;36m?\u001b[1;34m(self, dtype, copy)\u001b[0m\n\u001b[0;32m   2149\u001b[0m     def __array__(\n\u001b[0;32m   2150\u001b[0m         \u001b[0mself\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mnpt\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mDTypeLike\u001b[0m \u001b[1;33m|\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[1;33m:\u001b[0m \u001b[0mbool_t\u001b[0m \u001b[1;33m|\u001b[0m \u001b[1;32mNone\u001b[0m \u001b[1;33m=\u001b[0m \u001b[1;32mNone\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2151\u001b[0m     \u001b[1;33m)\u001b[0m \u001b[1;33m->\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mndarray\u001b[0m\u001b[1;33m:\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2152\u001b[0m         \u001b[0mvalues\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mself\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0m_values\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[1;32m-> 2153\u001b[1;33m         \u001b[0marr\u001b[0m \u001b[1;33m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0masarray\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[1;33m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0m\u001b[0;32m   2154\u001b[0m         if (\n\u001b[0;32m   2155\u001b[0m             \u001b[0mastype_is_view\u001b[0m\u001b[1;33m(\u001b[0m\u001b[0mvalues\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m,\u001b[0m \u001b[0marr\u001b[0m\u001b[1;33m.\u001b[0m\u001b[0mdtype\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n\u001b[0;32m   2156\u001b[0m             \u001b[1;32mand\u001b[0m \u001b[0musing_copy_on_write\u001b[0m\u001b[1;33m(\u001b[0m\u001b[1;33m)\u001b[0m\u001b[1;33m\u001b[0m\u001b[1;33m\u001b[0m\u001b[0m\n",
+      "\u001b[1;31mValueError\u001b[0m: could not convert string to float: 'Automatic'"
+     ]
+    }
+   ],
+   "source": [
+    "from imblearn.over_sampling import ADASYN\n",
+    "\n",
+    "ada = ADASYN()\n",
+    "\n",
+    "print(\"Обучающая выборка: \", df_train.shape)\n",
+    "print(df_train.Gear_box_type.value_counts())\n",
+    "\n",
+    "X_resampled, y_resampled = ada.fit_resample(df_train, df_train[\"Gear_box_type\"])\n",
+    "df_train_adasyn = pd.DataFrame(X_resampled)\n",
+    "\n",
+    "print(\"Обучающая выборка после oversampling: \", df_train_adasyn.shape)\n",
+    "print(df_train_adasyn.Gear_box_type.value_counts())"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": ".venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
--- a/notebooks/lab3_1.ipynb
+++ b/notebooks/lab3_1.ipynb
--- a/notebooks/lab3_2.ipynb
+++ b/notebooks/lab3_2.ipynb
--- a/notebooks/lab4.ipynb
+++ b/notebooks/lab4.ipynb
--- a/notebooks/lab4_pipeline.ipynb
+++ b/notebooks/lab4_pipeline.ipynb
--- a/notebooks/lab4_sandbox.ipynb
+++ b/notebooks/lab4_sandbox.ipynb
--- a/notebooks/lab5_1.ipynb
+++ b/notebooks/lab5_1.ipynb
--- a/notebooks/lab6_1.ipynb
+++ b/notebooks/lab6_1.ipynb
--- a/notebooks/transformers.py
+++ b/notebooks/transformers.py
@ -0,0 +1,17 @@
+import numpy as np
+from sklearn.base import BaseEstimator, TransformerMixin
+
+
+class CarsFeatures(BaseEstimator, TransformerMixin):
+    def __init__(self):
+        pass
+
+    def fit(self, X, y=None):
+        return self
+
+    def transform(self, X, y=None):
+        X["Age"] = 2020 - X["Prod. year"]
+        return X
+
+    def get_feature_names_out(self, features_in):
+        return np.append(features_in, ["Age"], axis=0)
--- a/notebooks/utils_clusters.py
+++ b/notebooks/utils_clusters.py
@ -0,0 +1,100 @@
+import math
+from typing import Dict, List, Tuple
+
+import numpy as np
+from pandas import DataFrame
+from sklearn import cluster
+from sklearn.metrics import silhouette_samples, silhouette_score
+
+
+def run_agglomerative(
+    df: DataFrame, num_clusters: int | None = 2
+) -> cluster.AgglomerativeClustering:
+    agglomerative = cluster.AgglomerativeClustering(
+        n_clusters=num_clusters,
+        compute_distances=True,
+    )
+    return agglomerative.fit(df)
+
+
+def get_linkage_matrix(model: cluster.AgglomerativeClustering) -> np.ndarray:
+    counts = np.zeros(model.children_.shape[0])  # type: ignore
+    n_samples = len(model.labels_)
+    for i, merge in enumerate(model.children_):  # type: ignore
+        current_count = 0
+        for child_idx in merge:
+            if child_idx < n_samples:
+                current_count += 1
+            else:
+                current_count += counts[child_idx - n_samples]
+        counts[i] = current_count
+
+    return np.column_stack([model.children_, model.distances_, counts]).astype(float)
+
+
+def print_cluster_result(
+    df: DataFrame, clusters_num: int, labels: np.ndarray, separator: str = ", "
+):
+    for cluster_id in range(clusters_num):
+        cluster_indices = np.where(labels == cluster_id)[0]
+        print(f"Cluster {cluster_id + 1} ({len(cluster_indices)}):")
+        rules = [str(df.index[idx]) for idx in cluster_indices]
+        print(separator.join(rules))
+        print("")
+        print("--------")
+
+
+def run_kmeans(
+    df: DataFrame, num_clusters: int, random_state: int
+) -> Tuple[np.ndarray, np.ndarray]:
+    kmeans = cluster.KMeans(n_clusters=num_clusters, random_state=random_state)
+    labels = kmeans.fit_predict(df)
+    return labels, kmeans.cluster_centers_
+
+
+def fit_kmeans(
+    reduced_data: np.ndarray, num_clusters: int, random_state: int
+) -> cluster.KMeans:
+    kmeans = cluster.KMeans(n_clusters=num_clusters, random_state=random_state)
+    kmeans.fit(reduced_data)
+    return kmeans
+
+
+def _get_kmeans_range(
+    df: DataFrame | np.ndarray, random_state: int
+) -> Tuple[List, range]:
+    max_clusters = int(math.sqrt(len(df)))
+    clusters_range = range(2, max_clusters + 1)
+    kmeans_per_k = [
+        cluster.KMeans(n_clusters=k, random_state=random_state).fit(df)
+        for k in clusters_range
+    ]
+    return kmeans_per_k, clusters_range
+
+
+def get_clusters_inertia(df: DataFrame, random_state: int) -> Tuple[List, range]:
+    kmeans_per_k, clusters_range = _get_kmeans_range(df, random_state)
+    return [model.inertia_ for model in kmeans_per_k], clusters_range
+
+
+def get_clusters_silhouette_scores(
+    df: DataFrame, random_state: int
+) -> Tuple[List, range]:
+    kmeans_per_k, clusters_range = _get_kmeans_range(df, random_state)
+    return [
+        float(silhouette_score(df, model.labels_)) for model in kmeans_per_k
+    ], clusters_range
+
+
+def get_clusters_silhouettes(df: np.ndarray, random_state: int) -> Dict:
+    kmeans_per_k, _ = _get_kmeans_range(df, random_state)
+    clusters_silhouettes: Dict = {}
+    for model in kmeans_per_k:
+        silhouette_value = silhouette_score(df, model.labels_)
+        sample_silhouette_values = silhouette_samples(df, model.labels_)
+        clusters_silhouettes[model.n_clusters] = (
+            silhouette_value,
+            sample_silhouette_values,
+            model,
+        )
+    return clusters_silhouettes
--- a/notebooks/visual.py
+++ b/notebooks/visual.py
@ -0,0 +1,242 @@
+from typing import Any, Dict, List
+
+import matplotlib.cm as cm
+import matplotlib.pyplot as plt
+import numpy as np
+from pandas import DataFrame
+from scipy.cluster import hierarchy
+from sklearn.cluster import KMeans
+
+
+def draw_data_2d(
+    df: DataFrame,
+    col1: int,
+    col2: int,
+    y: List | None = None,
+    classes: List | None = None,
+    subplot: Any | None = None,
+):
+    ax = None
+    if subplot is None:
+        _, ax = plt.subplots()
+    else:
+        ax = subplot
+    scatter = ax.scatter(df[df.columns[col1]], df[df.columns[col2]], c=y)
+    ax.set(xlabel=df.columns[col1], ylabel=df.columns[col2])
+    if classes is not None:
+        ax.legend(
+            scatter.legend_elements()[0], classes, loc="lower right", title="Classes"
+        )
+
+
+def draw_dendrogram(linkage_matrix: np.ndarray):
+    hierarchy.dendrogram(linkage_matrix, truncate_mode="level", p=3)
+
+
+def draw_cluster_results(
+    df: DataFrame,
+    col1: int,
+    col2: int,
+    labels: np.ndarray,
+    cluster_centers: np.ndarray,
+    subplot: Any | None = None,
+):
+    ax = None
+    if subplot is None:
+        ax = plt
+    else:
+        ax = subplot
+
+    centroids = cluster_centers
+    u_labels = np.unique(labels)
+
+    for i in u_labels:
+        ax.scatter(
+            df[labels == i][df.columns[col1]],
+            df[labels == i][df.columns[col2]],
+            label=i,
+        )
+
+    ax.scatter(centroids[:, col1], centroids[:, col2], s=80, color="k")
+
+
+def draw_clusters(reduced_data: np.ndarray, kmeans: KMeans):
+    h = 0.02
+
+    x_min, x_max = reduced_data[:, 0].min() - 1, reduced_data[:, 0].max() + 1
+    y_min, y_max = reduced_data[:, 1].min() - 1, reduced_data[:, 1].max() + 1
+    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
+
+    Z = kmeans.predict(np.c_[xx.ravel(), yy.ravel()])
+
+    Z = Z.reshape(xx.shape)
+    plt.figure(1)
+    plt.clf()
+    plt.imshow(
+        Z,
+        interpolation="nearest",
+        extent=(xx.min(), xx.max(), yy.min(), yy.max()),
+        cmap=plt.cm.Paired,  # type: ignore
+        aspect="auto",
+        origin="lower",
+    )
+
+    plt.plot(reduced_data[:, 0], reduced_data[:, 1], "k.", markersize=2)
+    centroids = kmeans.cluster_centers_
+    plt.scatter(
+        centroids[:, 0],
+        centroids[:, 1],
+        marker="x",
+        s=169,
+        linewidths=3,
+        color="w",
+        zorder=10,
+    )
+    plt.title(
+        "K-means clustering (PCA-reduced data)\n"
+        "Centroids are marked with white cross"
+    )
+    plt.xlim(x_min, x_max)
+    plt.ylim(y_min, y_max)
+    plt.xticks(())
+    plt.yticks(())
+
+
+def _draw_cluster_scores(
+    data: List,
+    clusters_range: range,
+    score_name: str,
+    title: str,
+):
+    plt.figure(figsize=(8, 5))
+    plt.plot(clusters_range, data, "bo-")
+    plt.xlabel("$k$", fontsize=8)
+    plt.ylabel(score_name, fontsize=8)
+    plt.title(title)
+
+
+def draw_elbow_diagram(inertias: List, clusters_range: range):
+    _draw_cluster_scores(inertias, clusters_range, "Inertia", "The Elbow Diagram")
+
+
+def draw_silhouettes_diagram(silhouette: List, clusters_range: range):
+    _draw_cluster_scores(
+        silhouette, clusters_range, "Silhouette score", "The Silhouette score"
+    )
+
+
+def _draw_silhouette(
+    ax: Any,
+    reduced_data: np.ndarray,
+    n_clusters: int,
+    silhouette_avg: float,
+    sample_silhouette_values: List,
+    cluster_labels: List,
+):
+    ax.set_xlim([-0.1, 1])
+    ax.set_ylim([0, len(reduced_data) + (n_clusters + 1) * 10])
+
+    y_lower = 10
+    for i in range(n_clusters):
+        ith_cluster_silhouette_values = sample_silhouette_values[cluster_labels == i]
+
+        ith_cluster_silhouette_values.sort()
+
+        size_cluster_i = ith_cluster_silhouette_values.shape[0]
+        y_upper = y_lower + size_cluster_i
+
+        color = cm.nipy_spectral(float(i) / n_clusters)  # type: ignore
+        ax.fill_betweenx(
+            np.arange(y_lower, y_upper),
+            0,
+            ith_cluster_silhouette_values,
+            facecolor=color,
+            edgecolor=color,
+            alpha=0.7,
+        )
+
+        ax.text(-0.05, y_lower + 0.5 * size_cluster_i, str(i))
+
+        y_lower = y_upper + 10  # 10 for the 0 samples
+
+    ax.set_title("The silhouette plot for the various clusters.")
+    ax.set_xlabel("The silhouette coefficient values")
+    ax.set_ylabel("Cluster label")
+
+    ax.axvline(x=silhouette_avg, color="red", linestyle="--")
+
+    ax.set_yticks([])
+    ax.set_xticks([-0.1, 0, 0.2, 0.4, 0.6, 0.8, 1])
+
+
+def _draw_cluster_data(
+    ax: Any,
+    reduced_data: np.ndarray,
+    n_clusters: int,
+    cluster_labels: np.ndarray,
+    cluster_centers: np.ndarray,
+):
+    colors = cm.nipy_spectral(cluster_labels.astype(float) / n_clusters)  # type: ignore
+    ax.scatter(
+        reduced_data[:, 0],
+        reduced_data[:, 1],
+        marker=".",
+        s=30,
+        lw=0,
+        alpha=0.7,
+        c=colors,
+        edgecolor="k",
+    )
+
+    ax.scatter(
+        cluster_centers[:, 0],
+        cluster_centers[:, 1],
+        marker="o",
+        c="white",
+        alpha=1,
+        s=200,
+        edgecolor="k",
+    )
+
+    for i, c in enumerate(cluster_centers):
+        ax.scatter(c[0], c[1], marker="$%d$" % i, alpha=1, s=50, edgecolor="k")
+
+    ax.set_title("The visualization of the clustered data.")
+    ax.set_xlabel("Feature space for the 1st feature")
+    ax.set_ylabel("Feature space for the 2nd feature")
+
+
+def draw_silhouettes(reduced_data: np.ndarray, silhouettes: Dict):
+    for key, value in silhouettes.items():
+        fig, (ax1, ax2) = plt.subplots(1, 2)
+        fig.set_size_inches(18, 7)
+
+        n_clusters = key
+        silhouette_avg = value[0]
+        sample_silhouette_values = value[1]
+        cluster_labels = value[2].labels_
+        cluster_centers = value[2].cluster_centers_
+
+        _draw_silhouette(
+            ax1,
+            reduced_data,
+            n_clusters,
+            silhouette_avg,
+            sample_silhouette_values,
+            cluster_labels,
+        )
+
+        _draw_cluster_data(
+            ax2,
+            reduced_data,
+            n_clusters,
+            cluster_labels,
+            cluster_centers,
+        )
+
+        plt.suptitle(
+            "Silhouette analysis for KMeans clustering on sample data with n_clusters = %d"
+            % n_clusters,
+            fontsize=14,
+            fontweight="bold",
+        )
--- a/poetry.lock
+++ b/poetry.lock
@ -467,6 +467,17 @@ files = [
 [package.dependencies]
 colorama = {version = "*", markers = "platform_system == \"Windows\""}

+[[package]]
+name = "cloudpickle"
+version = "3.1.0"
+description = "Pickler class to extend the standard pickle.Pickler functionality"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "cloudpickle-3.1.0-py3-none-any.whl", hash = "sha256:fe11acda67f61aaaec473e3afe030feb131d78a43461b718185363384f1ba12e"},
+    {file = "cloudpickle-3.1.0.tar.gz", hash = "sha256:81a929b6e3c7335c863c771d673d105f02efdb89dfaba0c90495d1c64796601b"},
+]
+
 [[package]]
 name = "colorama"
 version = "0.4.6"
@ -661,6 +672,17 @@ files = [
 [package.extras]
 tests = ["asttokens (>=2.1.0)", "coverage", "coverage-enable-subprocess", "ipython", "littleutils", "pytest", "rich"]

+[[package]]
+name = "farama-notifications"
+version = "0.0.4"
+description = "Notifications for all Farama Foundation maintained libraries."
+optional = false
+python-versions = "*"
+files = [
+    {file = "Farama-Notifications-0.0.4.tar.gz", hash = "sha256:13fceff2d14314cf80703c8266462ebf3733c7d165336eee998fc58e545efd18"},
+    {file = "Farama_Notifications-0.0.4-py3-none-any.whl", hash = "sha256:14de931035a41961f7c056361dc7f980762a143d05791ef5794a751a2caf05ae"},
+]
+
 [[package]]
 name = "fastjsonschema"
 version = "2.20.0"
@ -675,6 +697,41 @@ files = [
 [package.extras]
 devel = ["colorama", "json-spec", "jsonschema", "pylint", "pytest", "pytest-benchmark", "pytest-cache", "validictory"]

+[[package]]
+name = "featuretools"
+version = "1.31.0"
+description = "a framework for automated feature engineering"
+optional = false
+python-versions = "<4,>=3.9"
+files = [
+    {file = "featuretools-1.31.0-py3-none-any.whl", hash = "sha256:87c94e9ae959c89acd83da96bd2583f3ef0f6daaa9639cbb6e46dbde2c742a18"},
+    {file = "featuretools-1.31.0.tar.gz", hash = "sha256:01bfb17fcc1715b4c3623c7bc94a8982122c4a0fa03350ed478601bb81f90155"},
+]
+
+[package.dependencies]
+cloudpickle = ">=1.5.0"
+holidays = ">=0.17"
+numpy = ">=1.25.0"
+packaging = ">=20.0"
+pandas = ">=2.0.0"
+psutil = ">=5.7.0"
+scipy = ">=1.10.0"
+tqdm = ">=4.66.3"
+woodwork = ">=0.28.0"
+
+[package.extras]
+autonormalize = ["autonormalize (>=2.0.1)"]
+complete = ["featuretools[dask,nlp,premium]"]
+dask = ["dask[dataframe] (>=2023.2.0)", "distributed (>=2023.2.0)"]
+dev = ["black[jupyter] (>=23.1.0)", "featuretools[dask,docs,test]", "pre-commit (>=2.20.0)", "ruff (>=0.1.6)"]
+docs = ["Sphinx (==5.1.1)", "autonormalize (>=2.0.1)", "click (>=7.0.0)", "featuretools[dask,test]", "ipython (==8.4.0)", "jupyter (==1.0.0)", "jupyter-client (>=8.0.2)", "matplotlib (==3.7.2)", "myst-parser (==0.18.0)", "nbconvert (==6.5.0)", "nbsphinx (==0.8.9)", "pydata-sphinx-theme (==0.9.0)", "sphinx-copybutton (==0.5.0)", "sphinx-inline-tabs (==2022.1.2b11)"]
+nlp = ["nlp-primitives (>=2.12.0)"]
+premium = ["premium-primitives (>=0.0.3)"]
+sklearn = ["featuretools-sklearn-transformer (>=1.0.0)"]
+sql = ["featuretools-sql (>=0.0.1)", "psycopg2-binary (>=2.9.3)"]
+test = ["boto3 (>=1.34.32)", "composeml (>=0.8.0)", "graphviz (>=0.8.4)", "moto[all] (>=5.0.0)", "pip (>=23.3.0)", "pyarrow (>=14.0.1)", "pympler (>=0.8)", "pytest (>=7.1.2)", "pytest-cov (>=3.0.0)", "pytest-timeout (>=2.1.0)", "pytest-xdist (>=2.5.0)", "smart-open (>=5.0.0)", "urllib3 (>=1.26.18)"]
+tsfresh = ["featuretools-tsfresh-primitives (>=1.0.0)"]
+
 [[package]]
 name = "flask"
 version = "3.0.3"
@ -822,6 +879,36 @@ files = [
    {file = "fqdn-1.5.1.tar.gz", hash = "sha256:105ed3677e767fb5ca086a0c1f4bb66ebc3c100be518f0e0d755d9eae164d89f"},
 ]

+[[package]]
+name = "gymnasium"
+version = "1.0.0"
+description = "A standard API for reinforcement learning and a diverse set of reference environments (formerly Gym)."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "gymnasium-1.0.0-py3-none-any.whl", hash = "sha256:b6f40e1e24c5bd419361e1a5b86a9117d2499baecc3a660d44dfff4c465393ad"},
+    {file = "gymnasium-1.0.0.tar.gz", hash = "sha256:9d2b66f30c1b34fe3c2ce7fae65ecf365d0e9982d2b3d860235e773328a3b403"},
+]
+
+[package.dependencies]
+cloudpickle = ">=1.2.0"
+farama-notifications = ">=0.0.1"
+numpy = ">=1.21.0"
+typing-extensions = ">=4.3.0"
+
+[package.extras]
+all = ["ale-py (>=0.9)", "box2d-py (==2.3.5)", "cython (<3)", "flax (>=0.5.0)", "imageio (>=2.14.1)", "jax (>=0.4.0)", "jaxlib (>=0.4.0)", "matplotlib (>=3.0)", "moviepy (>=1.0.0)", "mujoco (>=2.1.5)", "mujoco-py (>=2.1,<2.2)", "opencv-python (>=3.0)", "pygame (>=2.1.3)", "swig (==4.*)", "torch (>=1.0.0)"]
+atari = ["ale-py (>=0.9)"]
+box2d = ["box2d-py (==2.3.5)", "pygame (>=2.1.3)", "swig (==4.*)"]
+classic-control = ["pygame (>=2.1.3)", "pygame (>=2.1.3)"]
+jax = ["flax (>=0.5.0)", "jax (>=0.4.0)", "jaxlib (>=0.4.0)"]
+mujoco = ["imageio (>=2.14.1)", "mujoco (>=2.1.5)"]
+mujoco-py = ["cython (<3)", "cython (<3)", "mujoco-py (>=2.1,<2.2)", "mujoco-py (>=2.1,<2.2)"]
+other = ["matplotlib (>=3.0)", "moviepy (>=1.0.0)", "opencv-python (>=3.0)"]
+testing = ["dill (>=0.3.7)", "pytest (==7.1.3)", "scipy (>=1.7.3)"]
+torch = ["torch (>=1.0.0)"]
+toy-text = ["pygame (>=2.1.3)", "pygame (>=2.1.3)"]
+
 [[package]]
 name = "h11"
 version = "0.14.0"
@ -833,6 +920,20 @@ files = [
    {file = "h11-0.14.0.tar.gz", hash = "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d"},
 ]

+[[package]]
+name = "holidays"
+version = "0.60"
+description = "World Holidays Framework"
+optional = false
+python-versions = ">=3.9"
+files = [
+    {file = "holidays-0.60-py3-none-any.whl", hash = "sha256:d857949c5ee35655215a10c5a26e6a856bdc3beccc4fbbc8debef98dfba17b82"},
+    {file = "holidays-0.60.tar.gz", hash = "sha256:495fc5123f5d92b92673237375eb8c15a03d21c647b089db509b7d9612267556"},
+]
+
+[package.dependencies]
+python-dateutil = "*"
+
 [[package]]
 name = "httpcore"
 version = "1.0.5"
@ -914,6 +1015,25 @@ examples = ["keras (>=2.4.3)", "matplotlib (>=3.1.2)", "pandas (>=1.0.5)", "seab
 optional = ["keras (>=2.4.3)", "pandas (>=1.0.5)", "tensorflow (>=2.4.3)"]
 tests = ["black (>=23.3.0)", "flake8 (>=3.8.2)", "keras (>=2.4.3)", "mypy (>=1.3.0)", "pandas (>=1.0.5)", "pytest (>=5.0.1)", "pytest-cov (>=2.9.0)", "tensorflow (>=2.4.3)"]

+[[package]]
+name = "importlib-resources"
+version = "6.4.5"
+description = "Read resources from Python packages"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "importlib_resources-6.4.5-py3-none-any.whl", hash = "sha256:ac29d5f956f01d5e4bb63102a5a19957f1b9175e45649977264a1416783bb717"},
+    {file = "importlib_resources-6.4.5.tar.gz", hash = "sha256:980862a1d16c9e147a59603677fa2aa5fd82b87f223b6cb870695bcfce830065"},
+]
+
+[package.extras]
+check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)"]
+cover = ["pytest-cov"]
+doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-lint"]
+enabler = ["pytest-enabler (>=2.2)"]
+test = ["jaraco.test (>=5.4)", "pytest (>=6,!=8.1.*)", "zipp (>=3.17)"]
+type = ["pytest-mypy"]
+
 [[package]]
 name = "ipykernel"
 version = "6.29.5"
@ -2708,6 +2828,11 @@ files = [
    {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"},
    {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"},
    {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"},
+    {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"},
+    {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"},
+    {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"},
+    {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"},
+    {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"},
    {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"},
    {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"},
    {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"},
@ -2781,6 +2906,27 @@ dev = ["cython-lint (>=0.12.2)", "doit (>=0.36.0)", "mypy (==1.10.0)", "pycodest
 doc = ["jupyterlite-pyodide-kernel", "jupyterlite-sphinx (>=0.13.1)", "jupytext", "matplotlib (>=3.5)", "myst-nb", "numpydoc", "pooch", "pydata-sphinx-theme (>=0.15.2)", "sphinx (>=5.0.0,<=7.3.7)", "sphinx-design (>=0.4.0)"]
 test = ["Cython", "array-api-strict (>=2.0)", "asv", "gmpy2", "hypothesis (>=6.30)", "meson", "mpmath", "ninja", "pooch", "pytest", "pytest-cov", "pytest-timeout", "pytest-xdist", "scikit-umfpack", "threadpoolctl"]

+[[package]]
+name = "seaborn"
+version = "0.13.2"
+description = "Statistical data visualization"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "seaborn-0.13.2-py3-none-any.whl", hash = "sha256:636f8336facf092165e27924f223d3c62ca560b1f2bb5dff7ab7fad265361987"},
+    {file = "seaborn-0.13.2.tar.gz", hash = "sha256:93e60a40988f4d65e9f4885df477e2fdaff6b73a9ded434c1ab356dd57eefff7"},
+]
+
+[package.dependencies]
+matplotlib = ">=3.4,<3.6.1 || >3.6.1"
+numpy = ">=1.20,<1.24.0 || >1.24.0"
+pandas = ">=1.2"
+
+[package.extras]
+dev = ["flake8", "flit", "mypy", "pandas-stubs", "pre-commit", "pytest", "pytest-cov", "pytest-xdist"]
+docs = ["ipykernel", "nbconvert", "numpydoc", "pydata_sphinx_theme (==0.10.0rc2)", "pyyaml", "sphinx (<6.0.0)", "sphinx-copybutton", "sphinx-design", "sphinx-issues"]
+stats = ["scipy (>=1.7)", "statsmodels (>=0.12)"]
+
 [[package]]
 name = "send2trash"
 version = "1.8.3"
@ -2939,6 +3085,27 @@ files = [
    {file = "tornado-6.4.1.tar.gz", hash = "sha256:92d3ab53183d8c50f8204a51e6f91d18a15d5ef261e84d452800d4ff6fc504e9"},
 ]

+[[package]]
+name = "tqdm"
+version = "4.67.0"
+description = "Fast, Extensible Progress Meter"
+optional = false
+python-versions = ">=3.7"
+files = [
+    {file = "tqdm-4.67.0-py3-none-any.whl", hash = "sha256:0cd8af9d56911acab92182e88d763100d4788bdf421d251616040cc4d44863be"},
+    {file = "tqdm-4.67.0.tar.gz", hash = "sha256:fe5a6f95e6fe0b9755e9469b77b9c3cf850048224ecaa8293d7d2d31f97d869a"},
+]
+
+[package.dependencies]
+colorama = {version = "*", markers = "platform_system == \"Windows\""}
+
+[package.extras]
+dev = ["pytest (>=6)", "pytest-cov", "pytest-timeout", "pytest-xdist"]
+discord = ["requests"]
+notebook = ["ipywidgets (>=6)"]
+slack = ["slack-sdk"]
+telegram = ["requests"]
+
 [[package]]
 name = "traitlets"
 version = "5.14.3"
@ -2965,6 +3132,17 @@ files = [
    {file = "types_python_dateutil-2.9.0.20240821-py3-none-any.whl", hash = "sha256:f5889fcb4e63ed4aaa379b44f93c32593d50b9a94c9a60a0c854d8cc3511cd57"},
 ]

+[[package]]
+name = "typing-extensions"
+version = "4.12.2"
+description = "Backported and Experimental Type Hints for Python 3.8+"
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "typing_extensions-4.12.2-py3-none-any.whl", hash = "sha256:04e5ca0351e0f3f85c6853954072df659d0d13fac324d0072316b67d7794700d"},
+    {file = "typing_extensions-4.12.2.tar.gz", hash = "sha256:1a7ead55c7e559dd4dee8856e3a88b41225abfe1ce8df57b7c13915fe121ffb8"},
+]
+
 [[package]]
 name = "tzdata"
 version = "2024.1"
@ -3110,7 +3288,33 @@ files = [
    {file = "widgetsnbextension-4.0.13.tar.gz", hash = "sha256:ffcb67bc9febd10234a362795f643927f4e0c05d9342c727b65d2384f8feacb6"},
 ]

+[[package]]
+name = "woodwork"
+version = "0.31.0"
+description = "a data typing library for machine learning"
+optional = false
+python-versions = "<4,>=3.9"
+files = [
+    {file = "woodwork-0.31.0-py3-none-any.whl", hash = "sha256:5cb3370553b5f466f8c8599b1bf559584dc0b798cc1f2da26bbd7029d256c6f9"},
+    {file = "woodwork-0.31.0.tar.gz", hash = "sha256:6ef82af1d5b6525b02efe6417c574c810cfdcc606cb266bd0d7fb17a1d066b67"},
+]
+
+[package.dependencies]
+importlib-resources = ">=5.10.0"
+numpy = ">=1.25.0"
+pandas = ">=2.0.0"
+python-dateutil = ">=2.8.2"
+scikit-learn = ">=1.1.0"
+scipy = ">=1.10.0"
+
+[package.extras]
+complete = ["woodwork[updater]"]
+dev = ["click (>=8.1.7)", "pre-commit (>=2.20.0)", "ruff (>=0.1.6)", "woodwork[docs,test]"]
+docs = ["Sphinx (==5.1.1)", "ipython (==8.4.0)", "jupyter (==1.0.0)", "myst-parser (==0.18.0)", "nbconvert (==6.5.0)", "nbsphinx (==0.8.9)", "pyarrow (>=14.0.1)", "pydata-sphinx-theme (==0.9.0)", "sphinx-copybutton (==0.5.0)", "sphinx-inline-tabs (==2022.1.2b11)"]
+test = ["boto3 (>=1.34.32)", "moto[all] (>=5.0.0)", "pyarrow (>=14.0.1)", "pytest (>=7.0.1)", "pytest-cov (>=2.10.1)", "pytest-xdist (>=2.1.0)", "smart-open (>=5.0.0)"]
+updater = ["alteryx-open-src-update-checker (>=3.1.0)"]
+
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.12"
-content-hash = "a7e3d516bde2d6e4173d8a9770fb5337a0c806dadaeda355084b262c1995f7ea"
+content-hash = "76a7ecc0524f2a9a187e4242566cf9813bf2265aa4176553ea4f33c9a4c78f17"
--- a/pyproject.toml
+++ b/pyproject.toml
@ -17,8 +17,15 @@ apiflask = "^2.2.0"
 flask-cors = "^5.0.0"
 scikit-learn = "^1.5.2"
 imbalanced-learn = "^0.12.3"
+featuretools = "^1.31.0"
+seaborn = "^0.13.2"
+gymnasium = "^1.0.0"
+scipy = "^1.14.1"


+[tool.poetry.group.dev.dependencies]
+ipykernel = "^6.29.5"
+
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"
--- a/описания_датасетов/cars.odt
+++ b/описания_датасетов/cars.odt
--- a/описания_датасетов/houses.odt
+++ b/описания_датасетов/houses.odt
Author	SHA1	Message	Date
Zakharov_Rostislav	f08c12ac81	lab5-6	2025-02-28 16:02:44 +04:00
Zakharov_Rostislav	338e0b0ad8	feat(lab-4): make grid search	2024-12-14 15:01:59 +04:00
Zakharov_Rostislav	83031d3667	feat(lab-4): finish pipeline	2024-12-14 12:43:37 +04:00
Zakharov_Rostislav	5a6a48e622	feat(lab-4): add prediction output	2024-12-14 10:14:47 +04:00
Zakharov_Rostislav	75b0e0f580	feat(lab-4): r2 0,64	2024-12-12 23:48:52 +04:00
Zakharov_Rostislav	5ab313468c	feat(lab-4): make pipeline	2024-12-07 13:00:14 +04:00
Zakharov_Rostislav	bd8c7a6d2b	feat(lab3): finish preps	2024-12-07 10:51:50 +04:00
Zakharov_Rostislav	292b43e934	fix(lab3): fix age_create	2024-12-07 10:49:03 +04:00
Zakharov_Rostislav	daa238663b	feat(lab3): add oversampling	2024-12-07 01:40:37 +04:00
Zakharov_Rostislav	59b6a164c8	feat(lab3): add featuretools	2024-12-07 00:18:42 +04:00
Zakharov_Rostislav	f77a5e5335	feat(lab3): make lab3	2024-12-07 00:08:27 +04:00
Zakharov_Rostislav	7aa7bd2f42	lab3	2024-12-06 18:43:41 +04:00
Zakharov_Rostislav	11ced38915	feat(lab-2): fix gitignore	2024-11-23 15:40:33 +04:00
Zakharov_Rostislav	c1ec962e77	feat(lab-2): add docs	2024-11-23 15:23:48 +04:00
Zakharov_Rostislav	f7672b7625	feat(lab-2): do lab-2, part 2	2024-11-23 15:06:07 +04:00
Zakharov_Rostislav	f249d643dc	Merge branch 'main' into lab2 merge main into lab2	2024-11-10 15:10:02 +04:00
Zakharov_Rostislav	0b9d379e16	feat(lab-2): do lab-2, part 1	2024-11-10 14:56:44 +04:00
Zakharov_Rostislav	e3ad2174f2	feat(lab1): do lab1	2024-10-26 13:07:42 +04:00