{ "cells": [ { "cell_type": "markdown", "id": "ad47d1b8", "metadata": {}, "source": [ "## Лабораторная 12\n", "Классификация набора изображений\n", "\n", "Датасет: Vehicle Type Recognition (https://www.kaggle.com/datasets/kaggleashwin/vehicle-type-recognition)" ] }, { "cell_type": "code", "execution_count": 1, "id": "e9ae4cde", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Всего классов: 4\n", "Список классов: ['Bus', 'Car', 'motorcycle', 'Truck']\n" ] } ], "source": [ "import os\n", "os.environ[\"KERAS_BACKEND\"] = \"jax\"\n", "\n", "from keras import backend as K\n", "K.clear_session()\n", "\n", "dataset_path = \"static/Dataset\"\n", "\n", "classes = os.listdir(dataset_path)\n", "num_classes = len(classes)\n", "\n", "print(f\"Всего классов: {num_classes}\")\n", "print(\"Список классов:\", classes)" ] }, { "cell_type": "markdown", "id": "74f394e5", "metadata": {}, "source": [ "Предобработка изображений и разделение их на выборки" ] }, { "cell_type": "code", "execution_count": null, "id": "8ada0d8d", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\User\\Desktop\\aim\\aimvenv\\Lib\\site-packages\\PIL\\Image.py:1056: UserWarning: Palette images with Transparency expressed in bytes should be converted to RGBA images\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Размер обучающей выборки: (320, 224, 224, 3)\n", "Размер тестовой выборки: (80, 224, 224, 3)\n", "Количество классов: 4\n" ] } ], "source": [ "import numpy as np\n", "from sklearn.model_selection import train_test_split\n", "from keras.api.utils import to_categorical\n", "from PIL import Image\n", "\n", "img_width, img_height = 224, 224\n", "input_shape = (img_width, img_height, 3) \n", "\n", "X = []\n", "y = []\n", "\n", "for i, image_class in enumerate(classes):\n", " class_dir = os.path.join(dataset_path, image_class)\n", " for img_file in os.listdir(class_dir):\n", " try:\n", " img_path = os.path.join(class_dir, img_file)\n", " img = Image.open(img_path).convert('RGB')\n", " img = img.resize((img_width, img_height))\n", " img_array = np.array(img) / 255.0\n", " X.append(img_array)\n", " y.append(i)\n", " except Exception as e:\n", " print(f\"Ошибка при загрузке {img_path}: {e}\")\n", "\n", "X = np.array(X)\n", "y = np.array(y)\n", "\n", "y = to_categorical(y, num_classes=num_classes)\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X, y, test_size=0.2, random_state=42, stratify=y\n", ")\n", "\n", "print(f\"Размер обучающей выборки: {X_train.shape}\")\n", "print(f\"Размер тестовой выборки: {X_test.shape}\")\n", "print(f\"Количество классов: {num_classes}\")" ] }, { "cell_type": "markdown", "id": "5836cc5f", "metadata": {}, "source": [ "Проектированию глубокой сверточной нейронной сети\n", "\n", "используем AlexNet" ] }, { "cell_type": "code", "execution_count": null, "id": "3b721d71", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Model: \"sequential\"\n",
       "
\n" ], "text/plain": [ "\u001b[1mModel: \"sequential\"\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃ Layer (type)                     Output Shape                  Param # ┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ conv2d (Conv2D)                 │ (None, 54, 54, 96)     │        34,944 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ max_pooling2d (MaxPooling2D)    │ (None, 26, 26, 96)     │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ batch_normalization             │ (None, 26, 26, 96)     │           384 │\n",
       "│ (BatchNormalization)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv2d_1 (Conv2D)               │ (None, 22, 22, 256)    │       614,656 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ max_pooling2d_1 (MaxPooling2D)  │ (None, 10, 10, 256)    │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ batch_normalization_1           │ (None, 10, 10, 256)    │         1,024 │\n",
       "│ (BatchNormalization)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv2d_2 (Conv2D)               │ (None, 8, 8, 256)      │       590,080 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv2d_3 (Conv2D)               │ (None, 6, 6, 384)      │       885,120 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv2d_4 (Conv2D)               │ (None, 4, 4, 384)      │     1,327,488 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ max_pooling2d_2 (MaxPooling2D)  │ (None, 1, 1, 384)      │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ batch_normalization_2           │ (None, 1, 1, 384)      │         1,536 │\n",
       "│ (BatchNormalization)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ flatten (Flatten)               │ (None, 384)            │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense (Dense)                   │ (None, 4096)           │     1,576,960 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout (Dropout)               │ (None, 4096)           │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_1 (Dense)                 │ (None, 4096)           │    16,781,312 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout_1 (Dropout)             │ (None, 4096)           │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_2 (Dense)                 │ (None, 4)              │        16,388 │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
       "
\n" ], "text/plain": [ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", "│ conv2d (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m96\u001b[0m) │ \u001b[38;5;34m34,944\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ max_pooling2d (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m96\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ batch_normalization │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m96\u001b[0m) │ \u001b[38;5;34m384\u001b[0m │\n", "│ (\u001b[38;5;33mBatchNormalization\u001b[0m) │ │ │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ conv2d_1 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m22\u001b[0m, \u001b[38;5;34m22\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m614,656\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ max_pooling2d_1 (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m10\u001b[0m, \u001b[38;5;34m10\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ batch_normalization_1 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m10\u001b[0m, \u001b[38;5;34m10\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m1,024\u001b[0m │\n", "│ (\u001b[38;5;33mBatchNormalization\u001b[0m) │ │ │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ conv2d_2 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m590,080\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ conv2d_3 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m6\u001b[0m, \u001b[38;5;34m6\u001b[0m, \u001b[38;5;34m384\u001b[0m) │ \u001b[38;5;34m885,120\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ conv2d_4 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m384\u001b[0m) │ \u001b[38;5;34m1,327,488\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ max_pooling2d_2 (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m384\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ batch_normalization_2 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m384\u001b[0m) │ \u001b[38;5;34m1,536\u001b[0m │\n", "│ (\u001b[38;5;33mBatchNormalization\u001b[0m) │ │ │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ flatten (\u001b[38;5;33mFlatten\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m384\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m) │ \u001b[38;5;34m1,576,960\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dropout (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense_1 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m) │ \u001b[38;5;34m16,781,312\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dropout_1 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense_2 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m) │ \u001b[38;5;34m16,388\u001b[0m │\n", "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Total params: 21,829,892 (83.27 MB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m21,829,892\u001b[0m (83.27 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Trainable params: 21,828,420 (83.27 MB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m21,828,420\u001b[0m (83.27 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Non-trainable params: 1,472 (5.75 KB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m1,472\u001b[0m (5.75 KB)\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from keras.api.models import Sequential\n", "from keras.api.layers import InputLayer, Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization\n", "\n", "alexnet_model = Sequential()\n", "\n", "alexnet_model.add(InputLayer(shape=(224, 224, 3)))\n", "\n", "alexnet_model.add(Conv2D(96, kernel_size=(11, 11), strides=(4, 4), activation=\"relu\"))\n", "alexnet_model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))\n", "alexnet_model.add(BatchNormalization())\n", "\n", "alexnet_model.add(Conv2D(256, kernel_size=(5, 5), activation=\"relu\"))\n", "alexnet_model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))\n", "alexnet_model.add(BatchNormalization())\n", "\n", "alexnet_model.add(Conv2D(256, kernel_size=(3, 3), activation=\"relu\"))\n", "\n", "alexnet_model.add(Conv2D(384, kernel_size=(3, 3), activation=\"relu\"))\n", "\n", "alexnet_model.add(Conv2D(384, kernel_size=(3, 3), activation=\"relu\"))\n", "alexnet_model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))\n", "alexnet_model.add(BatchNormalization())\n", "\n", "alexnet_model.add(Flatten())\n", "alexnet_model.add(Dense(4096, activation=\"tanh\"))\n", "alexnet_model.add(Dropout(0.5))\n", "\n", "alexnet_model.add(Dense(4096, activation=\"tanh\"))\n", "alexnet_model.add(Dropout(0.5))\n", "\n", "alexnet_model.add(Dense(num_classes, activation=\"softmax\"))\n", "\n", "alexnet_model.summary()" ] }, { "cell_type": "markdown", "id": "6aa2fc09", "metadata": {}, "source": [ "Обучение модели" ] }, { "cell_type": "code", "execution_count": 4, "id": "8d8931d2", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 935ms/step - accuracy: 0.3209 - loss: 5.4267 - val_accuracy: 0.2500 - val_loss: 9.1606\n", "Epoch 2/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 700ms/step - accuracy: 0.3936 - loss: 7.7186 - val_accuracy: 0.2500 - val_loss: 10.6578\n", "Epoch 3/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 696ms/step - accuracy: 0.3039 - loss: 7.0676 - val_accuracy: 0.2500 - val_loss: 2.7710\n", "Epoch 4/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 702ms/step - accuracy: 0.3167 - loss: 2.5632 - val_accuracy: 0.2500 - val_loss: 1.7581\n", "Epoch 5/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 693ms/step - accuracy: 0.2744 - loss: 2.0231 - val_accuracy: 0.2625 - val_loss: 1.7923\n", "Epoch 6/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 693ms/step - accuracy: 0.2843 - loss: 1.9969 - val_accuracy: 0.2500 - val_loss: 1.6605\n", "Epoch 7/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 694ms/step - accuracy: 0.2986 - loss: 1.8363 - val_accuracy: 0.2500 - val_loss: 1.4946\n", "Epoch 8/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 695ms/step - accuracy: 0.3106 - loss: 1.8001 - val_accuracy: 0.2750 - val_loss: 1.6487\n", "Epoch 9/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 683ms/step - accuracy: 0.3362 - loss: 1.7756 - val_accuracy: 0.2625 - val_loss: 1.4614\n", "Epoch 10/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 696ms/step - accuracy: 0.2665 - loss: 1.9055 - val_accuracy: 0.2500 - val_loss: 1.9306\n", "Epoch 11/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 684ms/step - accuracy: 0.3043 - loss: 1.9743 - val_accuracy: 0.2125 - val_loss: 2.4484\n", "Epoch 12/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 683ms/step - accuracy: 0.3049 - loss: 1.9582 - val_accuracy: 0.2625 - val_loss: 1.4873\n", "Epoch 13/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 691ms/step - accuracy: 0.4019 - loss: 1.6694 - val_accuracy: 0.2875 - val_loss: 1.9368\n", "Epoch 14/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 697ms/step - accuracy: 0.4397 - loss: 1.5109 - val_accuracy: 0.2625 - val_loss: 2.7897\n", "Epoch 15/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 689ms/step - accuracy: 0.3716 - loss: 1.7504 - val_accuracy: 0.4125 - val_loss: 1.5172\n", "Epoch 16/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 697ms/step - accuracy: 0.4378 - loss: 1.5575 - val_accuracy: 0.3250 - val_loss: 1.8495\n", "Epoch 17/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 700ms/step - accuracy: 0.4742 - loss: 1.5302 - val_accuracy: 0.3000 - val_loss: 2.0239\n", "Epoch 18/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 687ms/step - accuracy: 0.4646 - loss: 1.3401 - val_accuracy: 0.3250 - val_loss: 1.5780\n", "Epoch 19/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 700ms/step - accuracy: 0.4993 - loss: 1.3897 - val_accuracy: 0.2750 - val_loss: 1.9663\n", "Epoch 20/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 704ms/step - accuracy: 0.5081 - loss: 1.5911 - val_accuracy: 0.2250 - val_loss: 2.3920\n", "Epoch 21/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 690ms/step - accuracy: 0.4561 - loss: 1.4729 - val_accuracy: 0.3125 - val_loss: 2.1290\n", "Epoch 22/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 689ms/step - accuracy: 0.5576 - loss: 1.4146 - val_accuracy: 0.2500 - val_loss: 1.8015\n", "Epoch 23/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 688ms/step - accuracy: 0.4931 - loss: 1.2931 - val_accuracy: 0.2500 - val_loss: 2.0941\n", "Epoch 24/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 685ms/step - accuracy: 0.5758 - loss: 1.1323 - val_accuracy: 0.3625 - val_loss: 1.7523\n", "Epoch 25/100\n", "\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 685ms/step - accuracy: 0.5046 - loss: 1.2337 - val_accuracy: 0.3750 - val_loss: 1.8958\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from keras.api.callbacks import EarlyStopping\n", "\n", "alexnet_model.compile(\n", " loss=\"categorical_crossentropy\",\n", " optimizer=\"adam\",\n", " metrics=[\"accuracy\"]\n", ")\n", "\n", "early_stop = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)\n", "alexnet_model.fit(\n", " x=X_train, \n", " y=y_train,\n", " epochs=100,\n", " validation_data=(X_test, y_test),\n", " callbacks=[early_stop]\n", ")" ] }, { "cell_type": "markdown", "id": "e2e95800", "metadata": {}, "source": [ "Оценка качества" ] }, { "cell_type": "code", "execution_count": 5, "id": "bf31ddc4", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 122ms/step - accuracy: 0.4289 - loss: 1.5600\n", "Loss на тестовой выборке: 1.5172\n", "Accuracy на тестовой выборке: 0.4125\n" ] } ], "source": [ "def evaluate_model(model, X_test, y_test):\n", " loss, accuracy = model.evaluate(X_test, y_test)\n", " print(f\"Loss на тестовой выборке: {loss:.4f}\")\n", " print(f\"Accuracy на тестовой выборке: {accuracy:.4f}\")\n", "\n", "evaluate_model(alexnet_model, X_test, y_test)" ] }, { "cell_type": "markdown", "id": "09ed1fcf", "metadata": {}, "source": [ "Качество модели - 41.25% это хуже чем в 9 ЛР, но лучше чем в 11 ЛР" ] }, { "cell_type": "markdown", "id": "aaf81b1d", "metadata": {}, "source": [ "Классификация текстов" ] }, { "cell_type": "code", "execution_count": 6, "id": "7f079c87", "metadata": {}, "outputs": [], "source": [ "import spacy\n", "\n", "sp = spacy.load(\"ru_core_news_lg\")" ] }, { "cell_type": "markdown", "id": "155e198a", "metadata": {}, "source": [ "Загрузка текстов из файлов в датафрейм" ] }, { "cell_type": "code", "execution_count": 7, "id": "09f95467", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
doctexttype
0tz_01.docx2.2 Техническое задание\\n2.2.1 Общие сведения\\...0
1tz_02.docx2.2 Техническое задание\\n2.2.1 Общие сведения\\...0
2tz_03.docx2.2. Техническое задание\\nОбщие сведения:\\nВ д...0
3tz_04.docxТехническое задание\\n2.2.1 Общие сведения\\nИнт...0
4tz_05.docx2.2 Техническое задание\\n2.2.1 Общие сведения....0
\n", "
" ], "text/plain": [ " doc text type\n", "0 tz_01.docx 2.2 Техническое задание\\n2.2.1 Общие сведения\\... 0\n", "1 tz_02.docx 2.2 Техническое задание\\n2.2.1 Общие сведения\\... 0\n", "2 tz_03.docx 2.2. Техническое задание\\nОбщие сведения:\\nВ д... 0\n", "3 tz_04.docx Техническое задание\\n2.2.1 Общие сведения\\nИнт... 0\n", "4 tz_05.docx 2.2 Техническое задание\\n2.2.1 Общие сведения.... 0" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
doctexttype
36Этапы разработки проекта2.docxЭтапы разработки проекта: заключительные стади...1
37Этапы разработки проекта3.docxЭтапы разработки проекта: определение стратеги...1
38Этапы разработки проекта4.docxЭтапы разработки проекта: реализация, тестиров...1
39Этапы разработки проекта5.docxЭтапы разработки проекта: стратегия и анализ\\n...1
40Язык манипуляции данными.docx2.1.3. Язык манипуляции данными (ЯМД)\\nЯзык ма...1
\n", "
" ], "text/plain": [ " doc \\\n", "36 Этапы разработки проекта2.docx \n", "37 Этапы разработки проекта3.docx \n", "38 Этапы разработки проекта4.docx \n", "39 Этапы разработки проекта5.docx \n", "40 Язык манипуляции данными.docx \n", "\n", " text type \n", "36 Этапы разработки проекта: заключительные стади... 1 \n", "37 Этапы разработки проекта: определение стратеги... 1 \n", "38 Этапы разработки проекта: реализация, тестиров... 1 \n", "39 Этапы разработки проекта: стратегия и анализ\\n... 1 \n", "40 2.1.3. Язык манипуляции данными (ЯМД)\\nЯзык ма... 1 " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import pandas as pd\n", "from docx import Document\n", "\n", "def read_docx(file_path):\n", " doc = Document(file_path)\n", " full_text = []\n", " for paragraph in doc.paragraphs:\n", " full_text.append(paragraph.text)\n", " return \"\\n\".join(full_text)\n", "\n", "def load_docs(dataset_path):\n", " df = pd.DataFrame(columns=[\"doc\", \"text\"])\n", " for file_path in os.listdir(dataset_path):\n", " if file_path.startswith(\"~$\"):\n", " continue\n", " text = read_docx(dataset_path + file_path)\n", " df.loc[len(df.index)] = [file_path, text]\n", " return df\n", "\n", "df = load_docs(\"static/text/\")\n", "df[\"type\"] = df.apply(lambda row: 0 if str(row[\"doc\"]).startswith(\"tz_\") else 1, axis=1)\n", "df.sort_values(by=[\"doc\"], inplace=True)\n", "\n", "display(df.head(), df.tail())" ] }, { "cell_type": "markdown", "id": "53819918", "metadata": {}, "source": [ "Предобработка текста" ] }, { "cell_type": "code", "execution_count": 8, "id": "c3009a5b", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
doctexttypeprep_text
0tz_01.docx2.2 Техническое задание\\n2.2.1 Общие сведения\\...0[[2.2, технический, задание, 2.2.1, общий, све...
1tz_02.docx2.2 Техническое задание\\n2.2.1 Общие сведения\\...0[[2.2, технический, задание, 2.2.1, общий, све...
2tz_03.docx2.2. Техническое задание\\nОбщие сведения:\\nВ д...0[[2.2], [технический, задание, общий, сведение...
3tz_04.docxТехническое задание\\n2.2.1 Общие сведения\\nИнт...0[[технический, задание, 2.2.1, общий, сведение...
4tz_05.docx2.2 Техническое задание\\n2.2.1 Общие сведения....0[[2.2, технический, задание, 2.2.1, общий, све...
\n", "
" ], "text/plain": [ " doc text type \\\n", "0 tz_01.docx 2.2 Техническое задание\\n2.2.1 Общие сведения\\... 0 \n", "1 tz_02.docx 2.2 Техническое задание\\n2.2.1 Общие сведения\\... 0 \n", "2 tz_03.docx 2.2. Техническое задание\\nОбщие сведения:\\nВ д... 0 \n", "3 tz_04.docx Техническое задание\\n2.2.1 Общие сведения\\nИнт... 0 \n", "4 tz_05.docx 2.2 Техническое задание\\n2.2.1 Общие сведения.... 0 \n", "\n", " prep_text \n", "0 [[2.2, технический, задание, 2.2.1, общий, све... \n", "1 [[2.2, технический, задание, 2.2.1, общий, све... \n", "2 [[2.2], [технический, задание, общий, сведение... \n", "3 [[технический, задание, 2.2.1, общий, сведение... \n", "4 [[2.2, технический, задание, 2.2.1, общий, све... " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
doctexttypeprep_text
36Этапы разработки проекта2.docxЭтапы разработки проекта: заключительные стади...1[[этап, разработка, проект, заключительные, ст...
37Этапы разработки проекта3.docxЭтапы разработки проекта: определение стратеги...1[[этап, разработка, проект, определение, страт...
38Этапы разработки проекта4.docxЭтапы разработки проекта: реализация, тестиров...1[[этап_разработка, проект, реализация, тестиро...
39Этапы разработки проекта5.docxЭтапы разработки проекта: стратегия и анализ\\n...1[[этап, разработка_проект, стратегия, анализ, ...
40Язык манипуляции данными.docx2.1.3. Язык манипуляции данными (ЯМД)\\nЯзык ма...1[[2.1.3], [язык, манипуляция, данными, ямд, яз...
\n", "
" ], "text/plain": [ " doc \\\n", "36 Этапы разработки проекта2.docx \n", "37 Этапы разработки проекта3.docx \n", "38 Этапы разработки проекта4.docx \n", "39 Этапы разработки проекта5.docx \n", "40 Язык манипуляции данными.docx \n", "\n", " text type \\\n", "36 Этапы разработки проекта: заключительные стади... 1 \n", "37 Этапы разработки проекта: определение стратеги... 1 \n", "38 Этапы разработки проекта: реализация, тестиров... 1 \n", "39 Этапы разработки проекта: стратегия и анализ\\n... 1 \n", "40 2.1.3. Язык манипуляции данными (ЯМД)\\nЯзык ма... 1 \n", "\n", " prep_text \n", "36 [[этап, разработка, проект, заключительные, ст... \n", "37 [[этап, разработка, проект, определение, страт... \n", "38 [[этап_разработка, проект, реализация, тестиро... \n", "39 [[этап, разработка_проект, стратегия, анализ, ... \n", "40 [[2.1.3], [язык, манипуляция, данными, ямд, яз... " ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from gensim.models.phrases import Phraser, Phrases\n", "\n", "def prep_text(text):\n", " doc = sp(text)\n", " lower_sents = []\n", " for sent in doc.sents:\n", " lower_sents.append([word.lemma_.lower() for word in sent if not word.is_punct and not word.is_stop and not word.is_space])\n", " lower_bigram = Phraser(Phrases(lower_sents))\n", " clean_sents = []\n", " for sent in lower_sents:\n", " clean_sents.append(lower_bigram[sent])\n", " return clean_sents\n", "\n", "df[\"prep_text\"] = df.apply(lambda row: prep_text(row[\"text\"]), axis=1)\n", "display(df.head(), df.tail())" ] }, { "cell_type": "markdown", "id": "0fe192fb", "metadata": {}, "source": [ "Векторизация текстовых данных" ] }, { "cell_type": "code", "execution_count": 9, "id": "df567cd7", "metadata": {}, "outputs": [], "source": [ "from gensim.models.word2vec import Word2Vec\n", "\n", "word2vec = Word2Vec(\n", " sentences=df[\"prep_text\"].explode().tolist(),\n", " vector_size=64,\n", " sg=1,\n", " window=10,\n", " epochs=5,\n", " min_count=10,\n", " workers=4,\n", " seed=9,\n", ")" ] }, { "cell_type": "code", "execution_count": 10, "id": "c4d02e19", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[('pad', 0),\n", " ('система', 1),\n", " ('работа', 2),\n", " ('требование', 3),\n", " ('база', 4),\n", " ('пользователь', 5),\n", " ('разработка', 6),\n", " ('модель', 7),\n", " ('информация', 8),\n", " ('субд', 9),\n", " ('этап', 10),\n", " ('ошибка', 11),\n", " ('являться', 12),\n", " ('функция', 13),\n", " ('таблица', 14),\n", " ('средство', 15),\n", " ('проект', 16),\n", " ('сервер', 17),\n", " ('процесс', 18),\n", " ('документ', 19)]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "word_to_id = word2vec.wv.key_to_index\n", "word_to_id = {'pad': 0, **{k: v+1 for k, v in word2vec.wv.key_to_index.items()}}\n", "list(word_to_id.items())[:20]" ] }, { "cell_type": "markdown", "id": "810ad794", "metadata": {}, "source": [ "Преобразуем тексты в списки индексов" ] }, { "cell_type": "code", "execution_count": 11, "id": "e58e8c22", "metadata": {}, "outputs": [], "source": [ "def text_to_ids(sentences, word_to_id):\n", " flat_words = [word for sent in sentences for word in sent]\n", " return [word_to_id.get(word, 0) for word in flat_words]\n", "\n", "df[\"ids\"] = df[\"prep_text\"].apply(lambda doc: text_to_ids(doc, word_to_id))" ] }, { "cell_type": "markdown", "id": "cef5601c", "metadata": {}, "source": [ "padding и truncating" ] }, { "cell_type": "code", "execution_count": 12, "id": "db753071", "metadata": {}, "outputs": [], "source": [ "from keras.api.preprocessing.sequence import pad_sequences\n", "\n", "max_length = 1500\n", "X = pad_sequences(df[\"ids\"].tolist(), maxlen=max_length, padding=\"pre\", truncating=\"pre\", value=0)" ] }, { "cell_type": "markdown", "id": "7b42f1e6", "metadata": {}, "source": [ "Тренировочная и тестовая выборки" ] }, { "cell_type": "code", "execution_count": 13, "id": "220b7c2d", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0, 0, 0, ..., 134, 108, 148],\n", " [ 29, 0, 0, ..., 0, 0, 294],\n", " [ 0, 0, 0, ..., 134, 108, 148],\n", " ...,\n", " [ 0, 45, 251, ..., 0, 225, 30],\n", " [ 0, 0, 0, ..., 219, 0, 0],\n", " [ 0, 0, 0, ..., 194, 134, 5]])" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "y = df[\"type\"].values\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(\n", " X, y, test_size=0.2, random_state=42, stratify=y\n", ")\n", "X_train" ] }, { "cell_type": "code", "execution_count": 14, "id": "bd8aa1d0", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1,\n", " 0, 1, 1, 1, 0, 1, 0, 1, 0, 0], dtype=int64)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_train" ] }, { "cell_type": "markdown", "id": "ee8643c7", "metadata": {}, "source": [ "Архитектура глубокой полносвязанной сети" ] }, { "cell_type": "code", "execution_count": 15, "id": "faa6a6c8", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Model: \"sequential_1\"\n",
       "
\n" ], "text/plain": [ "\u001b[1mModel: \"sequential_1\"\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃ Layer (type)                     Output Shape                  Param # ┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ embedding (Embedding)           │ (None, 1500, 64)       │        66,496 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ flatten_1 (Flatten)             │ (None, 96000)          │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_3 (Dense)                 │ (None, 64)             │     6,144,064 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout_2 (Dropout)             │ (None, 64)             │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_4 (Dense)                 │ (None, 1)              │            65 │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
       "
\n" ], "text/plain": [ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", "│ embedding (\u001b[38;5;33mEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m66,496\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ flatten_1 (\u001b[38;5;33mFlatten\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m96000\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense_3 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m6,144,064\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dropout_2 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense_4 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m65\u001b[0m │\n", "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Total params: 6,210,625 (23.69 MB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m6,210,625\u001b[0m (23.69 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Trainable params: 6,210,625 (23.69 MB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m6,210,625\u001b[0m (23.69 MB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Non-trainable params: 0 (0.00 B)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from keras.api.layers import Embedding\n", "\n", "vocab_size = len(word_to_id)\n", "\n", "model = Sequential()\n", "model.add(InputLayer(shape=(max_length,), dtype=\"int32\"))\n", "\n", "model.add(Embedding(input_dim=vocab_size, output_dim=64))\n", "\n", "model.add(Flatten())\n", "\n", "model.add(Dense(64, activation=\"relu\"))\n", "model.add(Dropout(0.5))\n", "\n", "model.add(Dense(1, activation=\"sigmoid\"))\n", "\n", "model.summary()" ] }, { "cell_type": "markdown", "id": "d5b1eb98", "metadata": {}, "source": [ "Обучение модели" ] }, { "cell_type": "code", "execution_count": 16, "id": "ba6a9986", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/5\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 605ms/step - accuracy: 0.3750 - loss: 0.7053 - val_accuracy: 0.4444 - val_loss: 0.8328\n", "Epoch 2/5\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 359ms/step - accuracy: 0.6562 - loss: 0.5666 - val_accuracy: 0.7778 - val_loss: 0.5512\n", "Epoch 3/5\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 0.8125 - loss: 0.3735 - val_accuracy: 0.7778 - val_loss: 0.5157\n", "Epoch 4/5\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 47ms/step - accuracy: 0.9375 - loss: 0.1711 - val_accuracy: 0.7778 - val_loss: 0.5147\n", "Epoch 5/5\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 47ms/step - accuracy: 0.9062 - loss: 0.2508 - val_accuracy: 0.7778 - val_loss: 0.5128\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.compile(\n", " loss=\"binary_crossentropy\",\n", " optimizer=\"adam\",\n", " metrics=[\"accuracy\"],\n", ")\n", "\n", "model.fit(\n", " X_train,\n", " y_train,\n", " batch_size=128,\n", " epochs=5,\n", " validation_data=(X_test, y_test)\n", ")" ] }, { "cell_type": "markdown", "id": "025165b0", "metadata": {}, "source": [ "Оценка качества" ] }, { "cell_type": "code", "execution_count": 17, "id": "445a5445", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16ms/step - accuracy: 0.7778 - loss: 0.5128\n", "Loss на тестовой выборке: 0.5128\n", "Accuracy на тестовой выборке: 0.7778\n" ] } ], "source": [ "evaluate_model(model, X_test, y_test)" ] }, { "cell_type": "markdown", "id": "343261c5", "metadata": {}, "source": [ "Сверточная сеть" ] }, { "cell_type": "code", "execution_count": 18, "id": "c8636b3d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Model: \"sequential_2\"\n",
       "
\n" ], "text/plain": [ "\u001b[1mModel: \"sequential_2\"\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃ Layer (type)                     Output Shape                  Param # ┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ embedding_1 (Embedding)         │ (None, 1500, 64)       │        66,496 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ spatial_dropout1d               │ (None, 1500, 64)       │             0 │\n",
       "│ (SpatialDropout1D)              │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ conv1d (Conv1D)                 │ (None, 1498, 256)      │        49,408 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ global_max_pooling1d            │ (None, 256)            │             0 │\n",
       "│ (GlobalMaxPooling1D)            │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_5 (Dense)                 │ (None, 256)            │        65,792 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dropout_3 (Dropout)             │ (None, 256)            │             0 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_6 (Dense)                 │ (None, 1)              │           257 │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
       "
\n" ], "text/plain": [ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", "│ embedding_1 (\u001b[38;5;33mEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m66,496\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ spatial_dropout1d │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "│ (\u001b[38;5;33mSpatialDropout1D\u001b[0m) │ │ │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ conv1d (\u001b[38;5;33mConv1D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1498\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m49,408\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ global_max_pooling1d │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "│ (\u001b[38;5;33mGlobalMaxPooling1D\u001b[0m) │ │ │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense_5 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m65,792\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dropout_3 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense_6 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m257\u001b[0m │\n", "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Total params: 181,953 (710.75 KB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m181,953\u001b[0m (710.75 KB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Trainable params: 181,953 (710.75 KB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m181,953\u001b[0m (710.75 KB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Non-trainable params: 0 (0.00 B)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from keras.api.layers import SpatialDropout1D, Conv1D, GlobalMaxPooling1D\n", "\n", "conv_model = Sequential()\n", "conv_model.add(InputLayer(shape=(max_length,), dtype=\"int32\"))\n", "\n", "conv_model.add(Embedding(input_dim=vocab_size, output_dim=64))\n", "\n", "conv_model.add(SpatialDropout1D(0.2))\n", "\n", "conv_model.add(Conv1D(filters=256, kernel_size=3, activation=\"relu\"))\n", "conv_model.add(GlobalMaxPooling1D())\n", "\n", "conv_model.add(Dense(256, activation=\"relu\"))\n", "conv_model.add(Dropout(0.3)) \n", "\n", "conv_model.add(Dense(1, activation=\"sigmoid\"))\n", "\n", "conv_model.summary()" ] }, { "cell_type": "markdown", "id": "cc48d842", "metadata": {}, "source": [ "Обучение модели" ] }, { "cell_type": "code", "execution_count": 19, "id": "435886a6", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/5\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1s/step - accuracy: 0.5938 - loss: 0.6891 - val_accuracy: 0.4444 - val_loss: 0.6881\n", "Epoch 2/5\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 656ms/step - accuracy: 0.5938 - loss: 0.6901 - val_accuracy: 1.0000 - val_loss: 0.6837\n", "Epoch 3/5\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 187ms/step - accuracy: 0.6250 - loss: 0.6826 - val_accuracy: 1.0000 - val_loss: 0.6792\n", "Epoch 4/5\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 125ms/step - accuracy: 0.7188 - loss: 0.6766 - val_accuracy: 0.8889 - val_loss: 0.6733\n", "Epoch 5/5\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 156ms/step - accuracy: 0.6875 - loss: 0.6777 - val_accuracy: 1.0000 - val_loss: 0.6678\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "conv_model.compile(\n", " loss=\"binary_crossentropy\",\n", " optimizer=\"adam\",\n", " metrics=[\"accuracy\"],\n", ")\n", "\n", "conv_model.fit(\n", " X_train,\n", " y_train,\n", " batch_size=128,\n", " epochs=5,\n", " validation_data=(X_test, y_test)\n", ")" ] }, { "cell_type": "markdown", "id": "7fdce8fa", "metadata": {}, "source": [ "Оценка качества" ] }, { "cell_type": "code", "execution_count": 20, "id": "b30ff838", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 39ms/step - accuracy: 1.0000 - loss: 0.6678\n", "Loss на тестовой выборке: 0.6678\n", "Accuracy на тестовой выборке: 1.0000\n" ] } ], "source": [ "evaluate_model(conv_model, X_test, y_test)" ] }, { "cell_type": "markdown", "id": "fbbe8152", "metadata": {}, "source": [ "Рекуррентная сеть" ] }, { "cell_type": "code", "execution_count": 21, "id": "74751a0d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
Model: \"sequential_3\"\n",
       "
\n" ], "text/plain": [ "\u001b[1mModel: \"sequential_3\"\u001b[0m\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
       "┃ Layer (type)                     Output Shape                  Param # ┃\n",
       "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
       "│ embedding_2 (Embedding)         │ (None, 1500, 64)       │        66,496 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ spatial_dropout1d_1             │ (None, 1500, 64)       │             0 │\n",
       "│ (SpatialDropout1D)              │                        │               │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ simple_rnn (SimpleRNN)          │ (None, 128)            │        24,704 │\n",
       "├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
       "│ dense_7 (Dense)                 │ (None, 1)              │           129 │\n",
       "└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
       "
\n" ], "text/plain": [ "┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n", "┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n", "┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n", "│ embedding_2 (\u001b[38;5;33mEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m66,496\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ spatial_dropout1d_1 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n", "│ (\u001b[38;5;33mSpatialDropout1D\u001b[0m) │ │ │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ simple_rnn (\u001b[38;5;33mSimpleRNN\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m24,704\u001b[0m │\n", "├─────────────────────────────────┼────────────────────────┼───────────────┤\n", "│ dense_7 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m129\u001b[0m │\n", "└─────────────────────────────────┴────────────────────────┴───────────────┘\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Total params: 91,329 (356.75 KB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Total params: \u001b[0m\u001b[38;5;34m91,329\u001b[0m (356.75 KB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Trainable params: 91,329 (356.75 KB)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m91,329\u001b[0m (356.75 KB)\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/html": [ "
 Non-trainable params: 0 (0.00 B)\n",
       "
\n" ], "text/plain": [ "\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from keras.api.layers import SimpleRNN\n", "\n", "rnn_model = Sequential()\n", "rnn_model.add(InputLayer(shape=(max_length,), dtype=\"int32\"))\n", "\n", "rnn_model.add(Embedding(input_dim=vocab_size, output_dim=64))\n", "\n", "rnn_model.add(SpatialDropout1D(0.2))\n", "\n", "rnn_model.add(SimpleRNN(128, dropout=0.2, recurrent_dropout=0.2))\n", "\n", "rnn_model.add(Dense(1, activation=\"sigmoid\"))\n", "\n", "rnn_model.summary()" ] }, { "cell_type": "markdown", "id": "44765c76", "metadata": {}, "source": [ "Обучение модели" ] }, { "cell_type": "code", "execution_count": 22, "id": "93f45751", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Epoch 1/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1s/step - accuracy: 0.5000 - loss: 0.7441 - val_accuracy: 0.8889 - val_loss: 0.6012\n", "Epoch 2/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 797ms/step - accuracy: 0.5312 - loss: 0.7488 - val_accuracy: 0.8889 - val_loss: 0.6016\n", "Epoch 3/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5000 - loss: 0.7157 - val_accuracy: 0.8889 - val_loss: 0.6043\n", "Epoch 4/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.6250 - loss: 0.6322 - val_accuracy: 0.8889 - val_loss: 0.6083\n", "Epoch 5/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 234ms/step - accuracy: 0.5000 - loss: 0.6860 - val_accuracy: 0.8889 - val_loss: 0.6114\n", "Epoch 6/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.4062 - loss: 0.7382 - val_accuracy: 0.8889 - val_loss: 0.6095\n", "Epoch 7/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.6835 - val_accuracy: 0.8889 - val_loss: 0.6095\n", "Epoch 8/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5312 - loss: 0.7079 - val_accuracy: 0.8889 - val_loss: 0.6057\n", "Epoch 9/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 234ms/step - accuracy: 0.5000 - loss: 0.6995 - val_accuracy: 1.0000 - val_loss: 0.6025\n", "Epoch 10/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.4688 - loss: 0.7459 - val_accuracy: 0.8889 - val_loss: 0.6156\n", "Epoch 11/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.7364 - val_accuracy: 1.0000 - val_loss: 0.5995\n", "Epoch 12/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.7188 - loss: 0.6552 - val_accuracy: 0.8889 - val_loss: 0.6118\n", "Epoch 13/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.6433 - val_accuracy: 1.0000 - val_loss: 0.6036\n", "Epoch 14/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5625 - loss: 0.7378 - val_accuracy: 1.0000 - val_loss: 0.6088\n", "Epoch 15/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.6532 - val_accuracy: 1.0000 - val_loss: 0.6128\n", "Epoch 16/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.6827 - val_accuracy: 0.8889 - val_loss: 0.6190\n", "Epoch 17/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5312 - loss: 0.7088 - val_accuracy: 0.8889 - val_loss: 0.6045\n", "Epoch 18/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5625 - loss: 0.6746 - val_accuracy: 0.7778 - val_loss: 0.6072\n", "Epoch 19/40\n", "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.4375 - loss: 0.7517 - val_accuracy: 0.7778 - val_loss: 0.6159\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "rnn_model.compile(\n", " loss=\"binary_crossentropy\",\n", " optimizer=\"adam\",\n", " metrics=[\"accuracy\"],\n", ")\n", "\n", "early_stop = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)\n", "rnn_model.fit(\n", " X_train,\n", " y_train,\n", " batch_size=128,\n", " epochs=40,\n", " validation_data=(X_test, y_test),\n", " callbacks=[early_stop]\n", ")" ] }, { "cell_type": "markdown", "id": "ae9e9d80", "metadata": {}, "source": [ "Оценка качества" ] }, { "cell_type": "code", "execution_count": 23, "id": "8c567bef", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 144ms/step - accuracy: 1.0000 - loss: 0.6025\n", "Loss на тестовой выборке: 0.6025\n", "Accuracy на тестовой выборке: 1.0000\n" ] } ], "source": [ "evaluate_model(rnn_model, X_test, y_test)" ] }, { "cell_type": "markdown", "id": "36c37a9c", "metadata": {}, "source": [ "Лучший результат - полносвязанная сеть - 77,78%\n", "\n", "Сверточная сеть - 66,78%\n", "\n", "Рекуррентная сеть - 60,25%" ] } ], "metadata": { "kernelspec": { "display_name": "aimvenv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.5" } }, "nbformat": 4, "nbformat_minor": 5 }