{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "ad47d1b8",
   "metadata": {},
   "source": [
    "## Лабораторная 12\n",
    "Классификация набора изображений\n",
    "\n",
    "Датасет: Vehicle Type Recognition (https://www.kaggle.com/datasets/kaggleashwin/vehicle-type-recognition)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "e9ae4cde",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Всего классов: 4\n",
      "Список классов: ['Bus', 'Car', 'motorcycle', 'Truck']\n"
     ]
    }
   ],
   "source": [
    "import os\n",
    "os.environ[\"KERAS_BACKEND\"] = \"jax\"\n",
    "\n",
    "from keras import backend as K\n",
    "K.clear_session()\n",
    "\n",
    "dataset_path = \"static/Dataset\"\n",
    "\n",
    "classes = os.listdir(dataset_path)\n",
    "num_classes = len(classes)\n",
    "\n",
    "print(f\"Всего классов: {num_classes}\")\n",
    "print(\"Список классов:\", classes)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "74f394e5",
   "metadata": {},
   "source": [
    "Предобработка изображений и разделение их на выборки"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8ada0d8d",
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "c:\\Users\\User\\Desktop\\aim\\aimvenv\\Lib\\site-packages\\PIL\\Image.py:1056: UserWarning: Palette images with Transparency expressed in bytes should be converted to RGBA images\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Размер обучающей выборки: (320, 224, 224, 3)\n",
      "Размер тестовой выборки: (80, 224, 224, 3)\n",
      "Количество классов: 4\n"
     ]
    }
   ],
   "source": [
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "from keras.api.utils import to_categorical\n",
    "from PIL import Image\n",
    "\n",
    "img_width, img_height = 224, 224\n",
    "input_shape = (img_width, img_height, 3)  \n",
    "\n",
    "X = []\n",
    "y = []\n",
    "\n",
    "for i, image_class in enumerate(classes):\n",
    "    class_dir = os.path.join(dataset_path, image_class)\n",
    "    for img_file in os.listdir(class_dir):\n",
    "        try:\n",
    "            img_path = os.path.join(class_dir, img_file)\n",
    "            img = Image.open(img_path).convert('RGB')\n",
    "            img = img.resize((img_width, img_height))\n",
    "            img_array = np.array(img) / 255.0\n",
    "            X.append(img_array)\n",
    "            y.append(i)\n",
    "        except Exception as e:\n",
    "            print(f\"Ошибка при загрузке {img_path}: {e}\")\n",
    "\n",
    "X = np.array(X)\n",
    "y = np.array(y)\n",
    "\n",
    "y = to_categorical(y, num_classes=num_classes)\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    X, y, test_size=0.2, random_state=42, stratify=y\n",
    ")\n",
    "\n",
    "print(f\"Размер обучающей выборки: {X_train.shape}\")\n",
    "print(f\"Размер тестовой выборки: {X_test.shape}\")\n",
    "print(f\"Количество классов: {num_classes}\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "5836cc5f",
   "metadata": {},
   "source": [
    "Проектированию глубокой сверточной нейронной сети\n",
    "\n",
    "используем AlexNet"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "3b721d71",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential\"</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1mModel: \"sequential\"\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃<span style=\"font-weight: bold\"> Layer (type)                    </span>┃<span style=\"font-weight: bold\"> Output Shape           </span>┃<span style=\"font-weight: bold\">       Param # </span>┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ conv2d (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Conv2D</span>)                 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">54</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">96</span>)     │        <span style=\"color: #00af00; text-decoration-color: #00af00\">34,944</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ max_pooling2d (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">MaxPooling2D</span>)    │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">26</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">26</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">96</span>)     │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ batch_normalization             │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">26</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">26</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">96</span>)     │           <span style=\"color: #00af00; text-decoration-color: #00af00\">384</span> │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">BatchNormalization</span>)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv2d_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Conv2D</span>)               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">22</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">22</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)    │       <span style=\"color: #00af00; text-decoration-color: #00af00\">614,656</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ max_pooling2d_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">MaxPooling2D</span>)  │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">10</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">10</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)    │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ batch_normalization_1           │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">10</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">10</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)    │         <span style=\"color: #00af00; text-decoration-color: #00af00\">1,024</span> │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">BatchNormalization</span>)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv2d_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Conv2D</span>)               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">8</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">8</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)      │       <span style=\"color: #00af00; text-decoration-color: #00af00\">590,080</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv2d_3 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Conv2D</span>)               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">6</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">6</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">384</span>)      │       <span style=\"color: #00af00; text-decoration-color: #00af00\">885,120</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv2d_4 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Conv2D</span>)               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">384</span>)      │     <span style=\"color: #00af00; text-decoration-color: #00af00\">1,327,488</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ max_pooling2d_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">MaxPooling2D</span>)  │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">384</span>)      │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ batch_normalization_2           │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">384</span>)      │         <span style=\"color: #00af00; text-decoration-color: #00af00\">1,536</span> │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">BatchNormalization</span>)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ flatten (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Flatten</span>)               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">384</span>)            │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                   │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4096</span>)           │     <span style=\"color: #00af00; text-decoration-color: #00af00\">1,576,960</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>)               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4096</span>)           │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4096</span>)           │    <span style=\"color: #00af00; text-decoration-color: #00af00\">16,781,312</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>)             │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4096</span>)           │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">4</span>)              │        <span style=\"color: #00af00; text-decoration-color: #00af00\">16,388</span> │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
       "</pre>\n"
      ],
      "text/plain": [
       "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃\u001b[1m \u001b[0m\u001b[1mLayer (type)                   \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape          \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m      Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ conv2d (\u001b[38;5;33mConv2D\u001b[0m)                 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m96\u001b[0m)     │        \u001b[38;5;34m34,944\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ max_pooling2d (\u001b[38;5;33mMaxPooling2D\u001b[0m)    │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m96\u001b[0m)     │             \u001b[38;5;34m0\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ batch_normalization             │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m96\u001b[0m)     │           \u001b[38;5;34m384\u001b[0m │\n",
       "│ (\u001b[38;5;33mBatchNormalization\u001b[0m)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv2d_1 (\u001b[38;5;33mConv2D\u001b[0m)               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m22\u001b[0m, \u001b[38;5;34m22\u001b[0m, \u001b[38;5;34m256\u001b[0m)    │       \u001b[38;5;34m614,656\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ max_pooling2d_1 (\u001b[38;5;33mMaxPooling2D\u001b[0m)  │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m10\u001b[0m, \u001b[38;5;34m10\u001b[0m, \u001b[38;5;34m256\u001b[0m)    │             \u001b[38;5;34m0\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ batch_normalization_1           │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m10\u001b[0m, \u001b[38;5;34m10\u001b[0m, \u001b[38;5;34m256\u001b[0m)    │         \u001b[38;5;34m1,024\u001b[0m │\n",
       "│ (\u001b[38;5;33mBatchNormalization\u001b[0m)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv2d_2 (\u001b[38;5;33mConv2D\u001b[0m)               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m256\u001b[0m)      │       \u001b[38;5;34m590,080\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv2d_3 (\u001b[38;5;33mConv2D\u001b[0m)               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m6\u001b[0m, \u001b[38;5;34m6\u001b[0m, \u001b[38;5;34m384\u001b[0m)      │       \u001b[38;5;34m885,120\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv2d_4 (\u001b[38;5;33mConv2D\u001b[0m)               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m384\u001b[0m)      │     \u001b[38;5;34m1,327,488\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ max_pooling2d_2 (\u001b[38;5;33mMaxPooling2D\u001b[0m)  │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m384\u001b[0m)      │             \u001b[38;5;34m0\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ batch_normalization_2           │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m384\u001b[0m)      │         \u001b[38;5;34m1,536\u001b[0m │\n",
       "│ (\u001b[38;5;33mBatchNormalization\u001b[0m)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ flatten (\u001b[38;5;33mFlatten\u001b[0m)               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m384\u001b[0m)            │             \u001b[38;5;34m0\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense (\u001b[38;5;33mDense\u001b[0m)                   │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m)           │     \u001b[38;5;34m1,576,960\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout (\u001b[38;5;33mDropout\u001b[0m)               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m)           │             \u001b[38;5;34m0\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_1 (\u001b[38;5;33mDense\u001b[0m)                 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m)           │    \u001b[38;5;34m16,781,312\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout_1 (\u001b[38;5;33mDropout\u001b[0m)             │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m)           │             \u001b[38;5;34m0\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_2 (\u001b[38;5;33mDense\u001b[0m)                 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m)              │        \u001b[38;5;34m16,388\u001b[0m │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">21,829,892</span> (83.27 MB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m21,829,892\u001b[0m (83.27 MB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">21,828,420</span> (83.27 MB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m21,828,420\u001b[0m (83.27 MB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">1,472</span> (5.75 KB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m1,472\u001b[0m (5.75 KB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from keras.api.models import Sequential\n",
    "from keras.api.layers import InputLayer, Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization\n",
    "\n",
    "alexnet_model = Sequential()\n",
    "\n",
    "alexnet_model.add(InputLayer(shape=(224, 224, 3)))\n",
    "\n",
    "alexnet_model.add(Conv2D(96, kernel_size=(11, 11), strides=(4, 4), activation=\"relu\"))\n",
    "alexnet_model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))\n",
    "alexnet_model.add(BatchNormalization())\n",
    "\n",
    "alexnet_model.add(Conv2D(256, kernel_size=(5, 5), activation=\"relu\"))\n",
    "alexnet_model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))\n",
    "alexnet_model.add(BatchNormalization())\n",
    "\n",
    "alexnet_model.add(Conv2D(256, kernel_size=(3, 3), activation=\"relu\"))\n",
    "\n",
    "alexnet_model.add(Conv2D(384, kernel_size=(3, 3), activation=\"relu\"))\n",
    "\n",
    "alexnet_model.add(Conv2D(384, kernel_size=(3, 3), activation=\"relu\"))\n",
    "alexnet_model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))\n",
    "alexnet_model.add(BatchNormalization())\n",
    "\n",
    "alexnet_model.add(Flatten())\n",
    "alexnet_model.add(Dense(4096, activation=\"tanh\"))\n",
    "alexnet_model.add(Dropout(0.5))\n",
    "\n",
    "alexnet_model.add(Dense(4096, activation=\"tanh\"))\n",
    "alexnet_model.add(Dropout(0.5))\n",
    "\n",
    "alexnet_model.add(Dense(num_classes, activation=\"softmax\"))\n",
    "\n",
    "alexnet_model.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6aa2fc09",
   "metadata": {},
   "source": [
    "Обучение модели"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "8d8931d2",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 935ms/step - accuracy: 0.3209 - loss: 5.4267 - val_accuracy: 0.2500 - val_loss: 9.1606\n",
      "Epoch 2/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 700ms/step - accuracy: 0.3936 - loss: 7.7186 - val_accuracy: 0.2500 - val_loss: 10.6578\n",
      "Epoch 3/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 696ms/step - accuracy: 0.3039 - loss: 7.0676 - val_accuracy: 0.2500 - val_loss: 2.7710\n",
      "Epoch 4/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 702ms/step - accuracy: 0.3167 - loss: 2.5632 - val_accuracy: 0.2500 - val_loss: 1.7581\n",
      "Epoch 5/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 693ms/step - accuracy: 0.2744 - loss: 2.0231 - val_accuracy: 0.2625 - val_loss: 1.7923\n",
      "Epoch 6/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 693ms/step - accuracy: 0.2843 - loss: 1.9969 - val_accuracy: 0.2500 - val_loss: 1.6605\n",
      "Epoch 7/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 694ms/step - accuracy: 0.2986 - loss: 1.8363 - val_accuracy: 0.2500 - val_loss: 1.4946\n",
      "Epoch 8/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 695ms/step - accuracy: 0.3106 - loss: 1.8001 - val_accuracy: 0.2750 - val_loss: 1.6487\n",
      "Epoch 9/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 683ms/step - accuracy: 0.3362 - loss: 1.7756 - val_accuracy: 0.2625 - val_loss: 1.4614\n",
      "Epoch 10/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 696ms/step - accuracy: 0.2665 - loss: 1.9055 - val_accuracy: 0.2500 - val_loss: 1.9306\n",
      "Epoch 11/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 684ms/step - accuracy: 0.3043 - loss: 1.9743 - val_accuracy: 0.2125 - val_loss: 2.4484\n",
      "Epoch 12/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 683ms/step - accuracy: 0.3049 - loss: 1.9582 - val_accuracy: 0.2625 - val_loss: 1.4873\n",
      "Epoch 13/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 691ms/step - accuracy: 0.4019 - loss: 1.6694 - val_accuracy: 0.2875 - val_loss: 1.9368\n",
      "Epoch 14/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 697ms/step - accuracy: 0.4397 - loss: 1.5109 - val_accuracy: 0.2625 - val_loss: 2.7897\n",
      "Epoch 15/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 689ms/step - accuracy: 0.3716 - loss: 1.7504 - val_accuracy: 0.4125 - val_loss: 1.5172\n",
      "Epoch 16/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 697ms/step - accuracy: 0.4378 - loss: 1.5575 - val_accuracy: 0.3250 - val_loss: 1.8495\n",
      "Epoch 17/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 700ms/step - accuracy: 0.4742 - loss: 1.5302 - val_accuracy: 0.3000 - val_loss: 2.0239\n",
      "Epoch 18/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 687ms/step - accuracy: 0.4646 - loss: 1.3401 - val_accuracy: 0.3250 - val_loss: 1.5780\n",
      "Epoch 19/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 700ms/step - accuracy: 0.4993 - loss: 1.3897 - val_accuracy: 0.2750 - val_loss: 1.9663\n",
      "Epoch 20/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 704ms/step - accuracy: 0.5081 - loss: 1.5911 - val_accuracy: 0.2250 - val_loss: 2.3920\n",
      "Epoch 21/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 690ms/step - accuracy: 0.4561 - loss: 1.4729 - val_accuracy: 0.3125 - val_loss: 2.1290\n",
      "Epoch 22/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 689ms/step - accuracy: 0.5576 - loss: 1.4146 - val_accuracy: 0.2500 - val_loss: 1.8015\n",
      "Epoch 23/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 688ms/step - accuracy: 0.4931 - loss: 1.2931 - val_accuracy: 0.2500 - val_loss: 2.0941\n",
      "Epoch 24/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 685ms/step - accuracy: 0.5758 - loss: 1.1323 - val_accuracy: 0.3625 - val_loss: 1.7523\n",
      "Epoch 25/100\n",
      "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 685ms/step - accuracy: 0.5046 - loss: 1.2337 - val_accuracy: 0.3750 - val_loss: 1.8958\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.src.callbacks.history.History at 0x17d1a971880>"
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from keras.api.callbacks import EarlyStopping\n",
    "\n",
    "alexnet_model.compile(\n",
    "    loss=\"categorical_crossentropy\",\n",
    "    optimizer=\"adam\",\n",
    "    metrics=[\"accuracy\"]\n",
    ")\n",
    "\n",
    "early_stop = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)\n",
    "alexnet_model.fit(\n",
    "    x=X_train, \n",
    "    y=y_train,\n",
    "    epochs=100,\n",
    "    validation_data=(X_test, y_test),\n",
    "    callbacks=[early_stop]\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "e2e95800",
   "metadata": {},
   "source": [
    "Оценка качества"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "bf31ddc4",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 122ms/step - accuracy: 0.4289 - loss: 1.5600\n",
      "Loss на тестовой выборке: 1.5172\n",
      "Accuracy на тестовой выборке: 0.4125\n"
     ]
    }
   ],
   "source": [
    "def evaluate_model(model, X_test, y_test):\n",
    "    loss, accuracy = model.evaluate(X_test, y_test)\n",
    "    print(f\"Loss на тестовой выборке: {loss:.4f}\")\n",
    "    print(f\"Accuracy на тестовой выборке: {accuracy:.4f}\")\n",
    "\n",
    "evaluate_model(alexnet_model, X_test, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "09ed1fcf",
   "metadata": {},
   "source": [
    "Качество модели - 41.25% это хуже чем в 9 ЛР, но лучше чем в 11 ЛР"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "aaf81b1d",
   "metadata": {},
   "source": [
    "Классификация текстов"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "7f079c87",
   "metadata": {},
   "outputs": [],
   "source": [
    "import spacy\n",
    "\n",
    "sp = spacy.load(\"ru_core_news_lg\")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "155e198a",
   "metadata": {},
   "source": [
    "Загрузка текстов из файлов в датафрейм"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "09f95467",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>doc</th>\n",
       "      <th>text</th>\n",
       "      <th>type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>tz_01.docx</td>\n",
       "      <td>2.2 Техническое задание\\n2.2.1 Общие сведения\\...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>tz_02.docx</td>\n",
       "      <td>2.2 Техническое задание\\n2.2.1 Общие сведения\\...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>tz_03.docx</td>\n",
       "      <td>2.2. Техническое задание\\nОбщие сведения:\\nВ д...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>tz_04.docx</td>\n",
       "      <td>Техническое задание\\n2.2.1 Общие сведения\\nИнт...</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>tz_05.docx</td>\n",
       "      <td>2.2 Техническое задание\\n2.2.1 Общие сведения....</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          doc                                               text  type\n",
       "0  tz_01.docx  2.2 Техническое задание\\n2.2.1 Общие сведения\\...     0\n",
       "1  tz_02.docx  2.2 Техническое задание\\n2.2.1 Общие сведения\\...     0\n",
       "2  tz_03.docx  2.2. Техническое задание\\nОбщие сведения:\\nВ д...     0\n",
       "3  tz_04.docx  Техническое задание\\n2.2.1 Общие сведения\\nИнт...     0\n",
       "4  tz_05.docx  2.2 Техническое задание\\n2.2.1 Общие сведения....     0"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>doc</th>\n",
       "      <th>text</th>\n",
       "      <th>type</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>Этапы разработки проекта2.docx</td>\n",
       "      <td>Этапы разработки проекта: заключительные стади...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>Этапы разработки проекта3.docx</td>\n",
       "      <td>Этапы разработки проекта: определение стратеги...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>Этапы разработки проекта4.docx</td>\n",
       "      <td>Этапы разработки проекта: реализация, тестиров...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>Этапы разработки проекта5.docx</td>\n",
       "      <td>Этапы разработки проекта: стратегия и анализ\\n...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>Язык манипуляции данными.docx</td>\n",
       "      <td>2.1.3. Язык манипуляции данными (ЯМД)\\nЯзык ма...</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               doc  \\\n",
       "36  Этапы разработки проекта2.docx   \n",
       "37  Этапы разработки проекта3.docx   \n",
       "38  Этапы разработки проекта4.docx   \n",
       "39  Этапы разработки проекта5.docx   \n",
       "40   Язык манипуляции данными.docx   \n",
       "\n",
       "                                                 text  type  \n",
       "36  Этапы разработки проекта: заключительные стади...     1  \n",
       "37  Этапы разработки проекта: определение стратеги...     1  \n",
       "38  Этапы разработки проекта: реализация, тестиров...     1  \n",
       "39  Этапы разработки проекта: стратегия и анализ\\n...     1  \n",
       "40  2.1.3. Язык манипуляции данными (ЯМД)\\nЯзык ма...     1  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "import pandas as pd\n",
    "from docx import Document\n",
    "\n",
    "def read_docx(file_path):\n",
    "    doc = Document(file_path)\n",
    "    full_text = []\n",
    "    for paragraph in doc.paragraphs:\n",
    "        full_text.append(paragraph.text)\n",
    "    return \"\\n\".join(full_text)\n",
    "\n",
    "def load_docs(dataset_path):\n",
    "    df = pd.DataFrame(columns=[\"doc\", \"text\"])\n",
    "    for file_path in os.listdir(dataset_path):\n",
    "        if file_path.startswith(\"~$\"):\n",
    "            continue\n",
    "        text = read_docx(dataset_path + file_path)\n",
    "        df.loc[len(df.index)] = [file_path, text]\n",
    "    return df\n",
    "\n",
    "df = load_docs(\"static/text/\")\n",
    "df[\"type\"] = df.apply(lambda row: 0 if str(row[\"doc\"]).startswith(\"tz_\") else 1, axis=1)\n",
    "df.sort_values(by=[\"doc\"], inplace=True)\n",
    "\n",
    "display(df.head(), df.tail())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "53819918",
   "metadata": {},
   "source": [
    "Предобработка текста"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "c3009a5b",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>doc</th>\n",
       "      <th>text</th>\n",
       "      <th>type</th>\n",
       "      <th>prep_text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>tz_01.docx</td>\n",
       "      <td>2.2 Техническое задание\\n2.2.1 Общие сведения\\...</td>\n",
       "      <td>0</td>\n",
       "      <td>[[2.2, технический, задание, 2.2.1, общий, све...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>tz_02.docx</td>\n",
       "      <td>2.2 Техническое задание\\n2.2.1 Общие сведения\\...</td>\n",
       "      <td>0</td>\n",
       "      <td>[[2.2, технический, задание, 2.2.1, общий, све...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>tz_03.docx</td>\n",
       "      <td>2.2. Техническое задание\\nОбщие сведения:\\nВ д...</td>\n",
       "      <td>0</td>\n",
       "      <td>[[2.2], [технический, задание, общий, сведение...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>tz_04.docx</td>\n",
       "      <td>Техническое задание\\n2.2.1 Общие сведения\\nИнт...</td>\n",
       "      <td>0</td>\n",
       "      <td>[[технический, задание, 2.2.1, общий, сведение...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>tz_05.docx</td>\n",
       "      <td>2.2 Техническое задание\\n2.2.1 Общие сведения....</td>\n",
       "      <td>0</td>\n",
       "      <td>[[2.2, технический, задание, 2.2.1, общий, све...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          doc                                               text  type  \\\n",
       "0  tz_01.docx  2.2 Техническое задание\\n2.2.1 Общие сведения\\...     0   \n",
       "1  tz_02.docx  2.2 Техническое задание\\n2.2.1 Общие сведения\\...     0   \n",
       "2  tz_03.docx  2.2. Техническое задание\\nОбщие сведения:\\nВ д...     0   \n",
       "3  tz_04.docx  Техническое задание\\n2.2.1 Общие сведения\\nИнт...     0   \n",
       "4  tz_05.docx  2.2 Техническое задание\\n2.2.1 Общие сведения....     0   \n",
       "\n",
       "                                           prep_text  \n",
       "0  [[2.2, технический, задание, 2.2.1, общий, све...  \n",
       "1  [[2.2, технический, задание, 2.2.1, общий, све...  \n",
       "2  [[2.2], [технический, задание, общий, сведение...  \n",
       "3  [[технический, задание, 2.2.1, общий, сведение...  \n",
       "4  [[2.2, технический, задание, 2.2.1, общий, све...  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>doc</th>\n",
       "      <th>text</th>\n",
       "      <th>type</th>\n",
       "      <th>prep_text</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>36</th>\n",
       "      <td>Этапы разработки проекта2.docx</td>\n",
       "      <td>Этапы разработки проекта: заключительные стади...</td>\n",
       "      <td>1</td>\n",
       "      <td>[[этап, разработка, проект, заключительные, ст...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>37</th>\n",
       "      <td>Этапы разработки проекта3.docx</td>\n",
       "      <td>Этапы разработки проекта: определение стратеги...</td>\n",
       "      <td>1</td>\n",
       "      <td>[[этап, разработка, проект, определение, страт...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>38</th>\n",
       "      <td>Этапы разработки проекта4.docx</td>\n",
       "      <td>Этапы разработки проекта: реализация, тестиров...</td>\n",
       "      <td>1</td>\n",
       "      <td>[[этап_разработка, проект, реализация, тестиро...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>39</th>\n",
       "      <td>Этапы разработки проекта5.docx</td>\n",
       "      <td>Этапы разработки проекта: стратегия и анализ\\n...</td>\n",
       "      <td>1</td>\n",
       "      <td>[[этап, разработка_проект, стратегия, анализ, ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>40</th>\n",
       "      <td>Язык манипуляции данными.docx</td>\n",
       "      <td>2.1.3. Язык манипуляции данными (ЯМД)\\nЯзык ма...</td>\n",
       "      <td>1</td>\n",
       "      <td>[[2.1.3], [язык, манипуляция, данными, ямд, яз...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                               doc  \\\n",
       "36  Этапы разработки проекта2.docx   \n",
       "37  Этапы разработки проекта3.docx   \n",
       "38  Этапы разработки проекта4.docx   \n",
       "39  Этапы разработки проекта5.docx   \n",
       "40   Язык манипуляции данными.docx   \n",
       "\n",
       "                                                 text  type  \\\n",
       "36  Этапы разработки проекта: заключительные стади...     1   \n",
       "37  Этапы разработки проекта: определение стратеги...     1   \n",
       "38  Этапы разработки проекта: реализация, тестиров...     1   \n",
       "39  Этапы разработки проекта: стратегия и анализ\\n...     1   \n",
       "40  2.1.3. Язык манипуляции данными (ЯМД)\\nЯзык ма...     1   \n",
       "\n",
       "                                            prep_text  \n",
       "36  [[этап, разработка, проект, заключительные, ст...  \n",
       "37  [[этап, разработка, проект, определение, страт...  \n",
       "38  [[этап_разработка, проект, реализация, тестиро...  \n",
       "39  [[этап, разработка_проект, стратегия, анализ, ...  \n",
       "40  [[2.1.3], [язык, манипуляция, данными, ямд, яз...  "
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from gensim.models.phrases import Phraser, Phrases\n",
    "\n",
    "def prep_text(text):\n",
    "    doc = sp(text)\n",
    "    lower_sents = []\n",
    "    for sent in doc.sents:\n",
    "        lower_sents.append([word.lemma_.lower() for word in sent if not word.is_punct and not word.is_stop and not word.is_space])\n",
    "    lower_bigram = Phraser(Phrases(lower_sents))\n",
    "    clean_sents = []\n",
    "    for sent in lower_sents:\n",
    "        clean_sents.append(lower_bigram[sent])\n",
    "    return clean_sents\n",
    "\n",
    "df[\"prep_text\"] = df.apply(lambda row: prep_text(row[\"text\"]), axis=1)\n",
    "display(df.head(), df.tail())"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0fe192fb",
   "metadata": {},
   "source": [
    "Векторизация текстовых данных"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "df567cd7",
   "metadata": {},
   "outputs": [],
   "source": [
    "from gensim.models.word2vec import Word2Vec\n",
    "\n",
    "word2vec = Word2Vec(\n",
    "    sentences=df[\"prep_text\"].explode().tolist(),\n",
    "    vector_size=64,\n",
    "    sg=1,\n",
    "    window=10,\n",
    "    epochs=5,\n",
    "    min_count=10,\n",
    "    workers=4,\n",
    "    seed=9,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "c4d02e19",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "[('pad', 0),\n",
       " ('система', 1),\n",
       " ('работа', 2),\n",
       " ('требование', 3),\n",
       " ('база', 4),\n",
       " ('пользователь', 5),\n",
       " ('разработка', 6),\n",
       " ('модель', 7),\n",
       " ('информация', 8),\n",
       " ('субд', 9),\n",
       " ('этап', 10),\n",
       " ('ошибка', 11),\n",
       " ('являться', 12),\n",
       " ('функция', 13),\n",
       " ('таблица', 14),\n",
       " ('средство', 15),\n",
       " ('проект', 16),\n",
       " ('сервер', 17),\n",
       " ('процесс', 18),\n",
       " ('документ', 19)]"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "word_to_id = word2vec.wv.key_to_index\n",
    "word_to_id = {'pad': 0, **{k: v+1 for k, v in word2vec.wv.key_to_index.items()}}\n",
    "list(word_to_id.items())[:20]"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "810ad794",
   "metadata": {},
   "source": [
    "Преобразуем тексты в списки индексов"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "id": "e58e8c22",
   "metadata": {},
   "outputs": [],
   "source": [
    "def text_to_ids(sentences, word_to_id):\n",
    "    flat_words = [word for sent in sentences for word in sent]\n",
    "    return [word_to_id.get(word, 0) for word in flat_words]\n",
    "\n",
    "df[\"ids\"] = df[\"prep_text\"].apply(lambda doc: text_to_ids(doc, word_to_id))"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cef5601c",
   "metadata": {},
   "source": [
    "padding и truncating"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "id": "db753071",
   "metadata": {},
   "outputs": [],
   "source": [
    "from keras.api.preprocessing.sequence import pad_sequences\n",
    "\n",
    "max_length = 1500\n",
    "X = pad_sequences(df[\"ids\"].tolist(), maxlen=max_length, padding=\"pre\", truncating=\"pre\", value=0)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7b42f1e6",
   "metadata": {},
   "source": [
    "Тренировочная и тестовая выборки"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "id": "220b7c2d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[  0,   0,   0, ..., 134, 108, 148],\n",
       "       [ 29,   0,   0, ...,   0,   0, 294],\n",
       "       [  0,   0,   0, ..., 134, 108, 148],\n",
       "       ...,\n",
       "       [  0,  45, 251, ...,   0, 225,  30],\n",
       "       [  0,   0,   0, ..., 219,   0,   0],\n",
       "       [  0,   0,   0, ..., 194, 134,   5]])"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.model_selection import train_test_split\n",
    "\n",
    "y = df[\"type\"].values\n",
    "\n",
    "X_train, X_test, y_train, y_test = train_test_split(\n",
    "    X, y, test_size=0.2, random_state=42, stratify=y\n",
    ")\n",
    "X_train"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "id": "bd8aa1d0",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1,\n",
       "       0, 1, 1, 1, 0, 1, 0, 1, 0, 0], dtype=int64)"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y_train"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ee8643c7",
   "metadata": {},
   "source": [
    "Архитектура глубокой полносвязанной сети"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "id": "faa6a6c8",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential_1\"</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1mModel: \"sequential_1\"\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃<span style=\"font-weight: bold\"> Layer (type)                    </span>┃<span style=\"font-weight: bold\"> Output Shape           </span>┃<span style=\"font-weight: bold\">       Param # </span>┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ embedding (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>)           │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1500</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)       │        <span style=\"color: #00af00; text-decoration-color: #00af00\">66,496</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ flatten_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Flatten</span>)             │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">96000</span>)          │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_3 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)             │     <span style=\"color: #00af00; text-decoration-color: #00af00\">6,144,064</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>)             │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)             │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_4 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>)              │            <span style=\"color: #00af00; text-decoration-color: #00af00\">65</span> │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
       "</pre>\n"
      ],
      "text/plain": [
       "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃\u001b[1m \u001b[0m\u001b[1mLayer (type)                   \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape          \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m      Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ embedding (\u001b[38;5;33mEmbedding\u001b[0m)           │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m)       │        \u001b[38;5;34m66,496\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ flatten_1 (\u001b[38;5;33mFlatten\u001b[0m)             │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m96000\u001b[0m)          │             \u001b[38;5;34m0\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_3 (\u001b[38;5;33mDense\u001b[0m)                 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m)             │     \u001b[38;5;34m6,144,064\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout_2 (\u001b[38;5;33mDropout\u001b[0m)             │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m)             │             \u001b[38;5;34m0\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_4 (\u001b[38;5;33mDense\u001b[0m)                 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m)              │            \u001b[38;5;34m65\u001b[0m │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">6,210,625</span> (23.69 MB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m6,210,625\u001b[0m (23.69 MB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">6,210,625</span> (23.69 MB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m6,210,625\u001b[0m (23.69 MB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from keras.api.layers import Embedding\n",
    "\n",
    "vocab_size = len(word_to_id)\n",
    "\n",
    "model = Sequential()\n",
    "model.add(InputLayer(shape=(max_length,), dtype=\"int32\"))\n",
    "\n",
    "model.add(Embedding(input_dim=vocab_size, output_dim=64))\n",
    "\n",
    "model.add(Flatten())\n",
    "\n",
    "model.add(Dense(64, activation=\"relu\"))\n",
    "model.add(Dropout(0.5))\n",
    "\n",
    "model.add(Dense(1, activation=\"sigmoid\"))\n",
    "\n",
    "model.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d5b1eb98",
   "metadata": {},
   "source": [
    "Обучение модели"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "id": "ba6a9986",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/5\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 605ms/step - accuracy: 0.3750 - loss: 0.7053 - val_accuracy: 0.4444 - val_loss: 0.8328\n",
      "Epoch 2/5\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 359ms/step - accuracy: 0.6562 - loss: 0.5666 - val_accuracy: 0.7778 - val_loss: 0.5512\n",
      "Epoch 3/5\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 0.8125 - loss: 0.3735 - val_accuracy: 0.7778 - val_loss: 0.5157\n",
      "Epoch 4/5\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 47ms/step - accuracy: 0.9375 - loss: 0.1711 - val_accuracy: 0.7778 - val_loss: 0.5147\n",
      "Epoch 5/5\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 47ms/step - accuracy: 0.9062 - loss: 0.2508 - val_accuracy: 0.7778 - val_loss: 0.5128\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.src.callbacks.history.History at 0x17d91372480>"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "model.compile(\n",
    "    loss=\"binary_crossentropy\",\n",
    "    optimizer=\"adam\",\n",
    "    metrics=[\"accuracy\"],\n",
    ")\n",
    "\n",
    "model.fit(\n",
    "    X_train,\n",
    "    y_train,\n",
    "    batch_size=128,\n",
    "    epochs=5,\n",
    "    validation_data=(X_test, y_test)\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "025165b0",
   "metadata": {},
   "source": [
    "Оценка качества"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "id": "445a5445",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16ms/step - accuracy: 0.7778 - loss: 0.5128\n",
      "Loss на тестовой выборке: 0.5128\n",
      "Accuracy на тестовой выборке: 0.7778\n"
     ]
    }
   ],
   "source": [
    "evaluate_model(model, X_test, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "343261c5",
   "metadata": {},
   "source": [
    "Сверточная сеть"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "id": "c8636b3d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential_2\"</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1mModel: \"sequential_2\"\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃<span style=\"font-weight: bold\"> Layer (type)                    </span>┃<span style=\"font-weight: bold\"> Output Shape           </span>┃<span style=\"font-weight: bold\">       Param # </span>┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ embedding_1 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>)         │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1500</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)       │        <span style=\"color: #00af00; text-decoration-color: #00af00\">66,496</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ spatial_dropout1d               │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1500</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)       │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">SpatialDropout1D</span>)              │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv1d (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Conv1D</span>)                 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1498</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)      │        <span style=\"color: #00af00; text-decoration-color: #00af00\">49,408</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ global_max_pooling1d            │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)            │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">GlobalMaxPooling1D</span>)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_5 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)            │        <span style=\"color: #00af00; text-decoration-color: #00af00\">65,792</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout_3 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dropout</span>)             │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">256</span>)            │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_6 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>)              │           <span style=\"color: #00af00; text-decoration-color: #00af00\">257</span> │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
       "</pre>\n"
      ],
      "text/plain": [
       "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃\u001b[1m \u001b[0m\u001b[1mLayer (type)                   \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape          \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m      Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ embedding_1 (\u001b[38;5;33mEmbedding\u001b[0m)         │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m)       │        \u001b[38;5;34m66,496\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ spatial_dropout1d               │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m)       │             \u001b[38;5;34m0\u001b[0m │\n",
       "│ (\u001b[38;5;33mSpatialDropout1D\u001b[0m)              │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv1d (\u001b[38;5;33mConv1D\u001b[0m)                 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1498\u001b[0m, \u001b[38;5;34m256\u001b[0m)      │        \u001b[38;5;34m49,408\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ global_max_pooling1d            │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m)            │             \u001b[38;5;34m0\u001b[0m │\n",
       "│ (\u001b[38;5;33mGlobalMaxPooling1D\u001b[0m)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_5 (\u001b[38;5;33mDense\u001b[0m)                 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m)            │        \u001b[38;5;34m65,792\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout_3 (\u001b[38;5;33mDropout\u001b[0m)             │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m)            │             \u001b[38;5;34m0\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_6 (\u001b[38;5;33mDense\u001b[0m)                 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m)              │           \u001b[38;5;34m257\u001b[0m │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">181,953</span> (710.75 KB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m181,953\u001b[0m (710.75 KB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">181,953</span> (710.75 KB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m181,953\u001b[0m (710.75 KB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from keras.api.layers import SpatialDropout1D, Conv1D, GlobalMaxPooling1D\n",
    "\n",
    "conv_model = Sequential()\n",
    "conv_model.add(InputLayer(shape=(max_length,), dtype=\"int32\"))\n",
    "\n",
    "conv_model.add(Embedding(input_dim=vocab_size, output_dim=64))\n",
    "\n",
    "conv_model.add(SpatialDropout1D(0.2))\n",
    "\n",
    "conv_model.add(Conv1D(filters=256, kernel_size=3, activation=\"relu\"))\n",
    "conv_model.add(GlobalMaxPooling1D())\n",
    "\n",
    "conv_model.add(Dense(256, activation=\"relu\"))\n",
    "conv_model.add(Dropout(0.3))  \n",
    "\n",
    "conv_model.add(Dense(1, activation=\"sigmoid\"))\n",
    "\n",
    "conv_model.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "cc48d842",
   "metadata": {},
   "source": [
    "Обучение модели"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "id": "435886a6",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/5\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1s/step - accuracy: 0.5938 - loss: 0.6891 - val_accuracy: 0.4444 - val_loss: 0.6881\n",
      "Epoch 2/5\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 656ms/step - accuracy: 0.5938 - loss: 0.6901 - val_accuracy: 1.0000 - val_loss: 0.6837\n",
      "Epoch 3/5\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 187ms/step - accuracy: 0.6250 - loss: 0.6826 - val_accuracy: 1.0000 - val_loss: 0.6792\n",
      "Epoch 4/5\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 125ms/step - accuracy: 0.7188 - loss: 0.6766 - val_accuracy: 0.8889 - val_loss: 0.6733\n",
      "Epoch 5/5\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 156ms/step - accuracy: 0.6875 - loss: 0.6777 - val_accuracy: 1.0000 - val_loss: 0.6678\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.src.callbacks.history.History at 0x17d9148b3e0>"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "conv_model.compile(\n",
    "    loss=\"binary_crossentropy\",\n",
    "    optimizer=\"adam\",\n",
    "    metrics=[\"accuracy\"],\n",
    ")\n",
    "\n",
    "conv_model.fit(\n",
    "    X_train,\n",
    "    y_train,\n",
    "    batch_size=128,\n",
    "    epochs=5,\n",
    "    validation_data=(X_test, y_test)\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "7fdce8fa",
   "metadata": {},
   "source": [
    "Оценка качества"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "id": "b30ff838",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 39ms/step - accuracy: 1.0000 - loss: 0.6678\n",
      "Loss на тестовой выборке: 0.6678\n",
      "Accuracy на тестовой выборке: 1.0000\n"
     ]
    }
   ],
   "source": [
    "evaluate_model(conv_model, X_test, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "fbbe8152",
   "metadata": {},
   "source": [
    "Рекуррентная сеть"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "id": "74751a0d",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\">Model: \"sequential_3\"</span>\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1mModel: \"sequential_3\"\u001b[0m\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃<span style=\"font-weight: bold\"> Layer (type)                    </span>┃<span style=\"font-weight: bold\"> Output Shape           </span>┃<span style=\"font-weight: bold\">       Param # </span>┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ embedding_2 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Embedding</span>)         │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1500</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)       │        <span style=\"color: #00af00; text-decoration-color: #00af00\">66,496</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ spatial_dropout1d_1             │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1500</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">64</span>)       │             <span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> │\n",
       "│ (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">SpatialDropout1D</span>)              │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ simple_rnn (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">SimpleRNN</span>)          │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">128</span>)            │        <span style=\"color: #00af00; text-decoration-color: #00af00\">24,704</span> │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_7 (<span style=\"color: #0087ff; text-decoration-color: #0087ff\">Dense</span>)                 │ (<span style=\"color: #00d7ff; text-decoration-color: #00d7ff\">None</span>, <span style=\"color: #00af00; text-decoration-color: #00af00\">1</span>)              │           <span style=\"color: #00af00; text-decoration-color: #00af00\">129</span> │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
       "</pre>\n"
      ],
      "text/plain": [
       "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃\u001b[1m \u001b[0m\u001b[1mLayer (type)                   \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape          \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m      Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ embedding_2 (\u001b[38;5;33mEmbedding\u001b[0m)         │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m)       │        \u001b[38;5;34m66,496\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ spatial_dropout1d_1             │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m)       │             \u001b[38;5;34m0\u001b[0m │\n",
       "│ (\u001b[38;5;33mSpatialDropout1D\u001b[0m)              │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ simple_rnn (\u001b[38;5;33mSimpleRNN\u001b[0m)          │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m)            │        \u001b[38;5;34m24,704\u001b[0m │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_7 (\u001b[38;5;33mDense\u001b[0m)                 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m)              │           \u001b[38;5;34m129\u001b[0m │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Total params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">91,329</span> (356.75 KB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m91,329\u001b[0m (356.75 KB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">91,329</span> (356.75 KB)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m91,329\u001b[0m (356.75 KB)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "data": {
      "text/html": [
       "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\"><span style=\"font-weight: bold\"> Non-trainable params: </span><span style=\"color: #00af00; text-decoration-color: #00af00\">0</span> (0.00 B)\n",
       "</pre>\n"
      ],
      "text/plain": [
       "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "from keras.api.layers import SimpleRNN\n",
    "\n",
    "rnn_model = Sequential()\n",
    "rnn_model.add(InputLayer(shape=(max_length,), dtype=\"int32\"))\n",
    "\n",
    "rnn_model.add(Embedding(input_dim=vocab_size, output_dim=64))\n",
    "\n",
    "rnn_model.add(SpatialDropout1D(0.2))\n",
    "\n",
    "rnn_model.add(SimpleRNN(128, dropout=0.2, recurrent_dropout=0.2))\n",
    "\n",
    "rnn_model.add(Dense(1, activation=\"sigmoid\"))\n",
    "\n",
    "rnn_model.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "44765c76",
   "metadata": {},
   "source": [
    "Обучение модели"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "id": "93f45751",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Epoch 1/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1s/step - accuracy: 0.5000 - loss: 0.7441 - val_accuracy: 0.8889 - val_loss: 0.6012\n",
      "Epoch 2/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 797ms/step - accuracy: 0.5312 - loss: 0.7488 - val_accuracy: 0.8889 - val_loss: 0.6016\n",
      "Epoch 3/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5000 - loss: 0.7157 - val_accuracy: 0.8889 - val_loss: 0.6043\n",
      "Epoch 4/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.6250 - loss: 0.6322 - val_accuracy: 0.8889 - val_loss: 0.6083\n",
      "Epoch 5/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 234ms/step - accuracy: 0.5000 - loss: 0.6860 - val_accuracy: 0.8889 - val_loss: 0.6114\n",
      "Epoch 6/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.4062 - loss: 0.7382 - val_accuracy: 0.8889 - val_loss: 0.6095\n",
      "Epoch 7/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.6835 - val_accuracy: 0.8889 - val_loss: 0.6095\n",
      "Epoch 8/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5312 - loss: 0.7079 - val_accuracy: 0.8889 - val_loss: 0.6057\n",
      "Epoch 9/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 234ms/step - accuracy: 0.5000 - loss: 0.6995 - val_accuracy: 1.0000 - val_loss: 0.6025\n",
      "Epoch 10/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.4688 - loss: 0.7459 - val_accuracy: 0.8889 - val_loss: 0.6156\n",
      "Epoch 11/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.7364 - val_accuracy: 1.0000 - val_loss: 0.5995\n",
      "Epoch 12/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.7188 - loss: 0.6552 - val_accuracy: 0.8889 - val_loss: 0.6118\n",
      "Epoch 13/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.6433 - val_accuracy: 1.0000 - val_loss: 0.6036\n",
      "Epoch 14/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5625 - loss: 0.7378 - val_accuracy: 1.0000 - val_loss: 0.6088\n",
      "Epoch 15/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.6532 - val_accuracy: 1.0000 - val_loss: 0.6128\n",
      "Epoch 16/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.6827 - val_accuracy: 0.8889 - val_loss: 0.6190\n",
      "Epoch 17/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5312 - loss: 0.7088 - val_accuracy: 0.8889 - val_loss: 0.6045\n",
      "Epoch 18/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5625 - loss: 0.6746 - val_accuracy: 0.7778 - val_loss: 0.6072\n",
      "Epoch 19/40\n",
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.4375 - loss: 0.7517 - val_accuracy: 0.7778 - val_loss: 0.6159\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "<keras.src.callbacks.history.History at 0x17d071bf2f0>"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "rnn_model.compile(\n",
    "    loss=\"binary_crossentropy\",\n",
    "    optimizer=\"adam\",\n",
    "    metrics=[\"accuracy\"],\n",
    ")\n",
    "\n",
    "early_stop = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)\n",
    "rnn_model.fit(\n",
    "    X_train,\n",
    "    y_train,\n",
    "    batch_size=128,\n",
    "    epochs=40,\n",
    "    validation_data=(X_test, y_test),\n",
    "    callbacks=[early_stop]\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "ae9e9d80",
   "metadata": {},
   "source": [
    "Оценка качества"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "id": "8c567bef",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 144ms/step - accuracy: 1.0000 - loss: 0.6025\n",
      "Loss на тестовой выборке: 0.6025\n",
      "Accuracy на тестовой выборке: 1.0000\n"
     ]
    }
   ],
   "source": [
    "evaluate_model(rnn_model, X_test, y_test)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "36c37a9c",
   "metadata": {},
   "source": [
    "Лучший результат - полносвязанная сеть - 77,78%\n",
    "\n",
    "Сверточная сеть - 66,78%\n",
    "\n",
    "Рекуррентная сеть - 60,25%"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "aimvenv",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.12.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}