{
"cells": [
{
"cell_type": "markdown",
"id": "ad47d1b8",
"metadata": {},
"source": [
"## Лабораторная 12\n",
"Классификация набора изображений\n",
"\n",
"Датасет: Vehicle Type Recognition (https://www.kaggle.com/datasets/kaggleashwin/vehicle-type-recognition)"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "e9ae4cde",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Всего классов: 4\n",
"Список классов: ['Bus', 'Car', 'motorcycle', 'Truck']\n"
]
}
],
"source": [
"import os\n",
"os.environ[\"KERAS_BACKEND\"] = \"jax\"\n",
"\n",
"from keras import backend as K\n",
"K.clear_session()\n",
"\n",
"dataset_path = \"static/Dataset\"\n",
"\n",
"classes = os.listdir(dataset_path)\n",
"num_classes = len(classes)\n",
"\n",
"print(f\"Всего классов: {num_classes}\")\n",
"print(\"Список классов:\", classes)"
]
},
{
"cell_type": "markdown",
"id": "74f394e5",
"metadata": {},
"source": [
"Предобработка изображений и разделение их на выборки"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8ada0d8d",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\User\\Desktop\\aim\\aimvenv\\Lib\\site-packages\\PIL\\Image.py:1056: UserWarning: Palette images with Transparency expressed in bytes should be converted to RGBA images\n",
" warnings.warn(\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Размер обучающей выборки: (320, 224, 224, 3)\n",
"Размер тестовой выборки: (80, 224, 224, 3)\n",
"Количество классов: 4\n"
]
}
],
"source": [
"import numpy as np\n",
"from sklearn.model_selection import train_test_split\n",
"from keras.api.utils import to_categorical\n",
"from PIL import Image\n",
"\n",
"img_width, img_height = 224, 224\n",
"input_shape = (img_width, img_height, 3) \n",
"\n",
"X = []\n",
"y = []\n",
"\n",
"for i, image_class in enumerate(classes):\n",
" class_dir = os.path.join(dataset_path, image_class)\n",
" for img_file in os.listdir(class_dir):\n",
" try:\n",
" img_path = os.path.join(class_dir, img_file)\n",
" img = Image.open(img_path).convert('RGB')\n",
" img = img.resize((img_width, img_height))\n",
" img_array = np.array(img) / 255.0\n",
" X.append(img_array)\n",
" y.append(i)\n",
" except Exception as e:\n",
" print(f\"Ошибка при загрузке {img_path}: {e}\")\n",
"\n",
"X = np.array(X)\n",
"y = np.array(y)\n",
"\n",
"y = to_categorical(y, num_classes=num_classes)\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, test_size=0.2, random_state=42, stratify=y\n",
")\n",
"\n",
"print(f\"Размер обучающей выборки: {X_train.shape}\")\n",
"print(f\"Размер тестовой выборки: {X_test.shape}\")\n",
"print(f\"Количество классов: {num_classes}\")"
]
},
{
"cell_type": "markdown",
"id": "5836cc5f",
"metadata": {},
"source": [
"Проектированию глубокой сверточной нейронной сети\n",
"\n",
"используем AlexNet"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3b721d71",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
Model: \"sequential\"\n",
"\n"
],
"text/plain": [
"\u001b[1mModel: \"sequential\"\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃ Layer (type) ┃ Output Shape ┃ Param # ┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ conv2d (Conv2D) │ (None, 54, 54, 96) │ 34,944 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ max_pooling2d (MaxPooling2D) │ (None, 26, 26, 96) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ batch_normalization │ (None, 26, 26, 96) │ 384 │\n",
"│ (BatchNormalization) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_1 (Conv2D) │ (None, 22, 22, 256) │ 614,656 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ max_pooling2d_1 (MaxPooling2D) │ (None, 10, 10, 256) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ batch_normalization_1 │ (None, 10, 10, 256) │ 1,024 │\n",
"│ (BatchNormalization) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_2 (Conv2D) │ (None, 8, 8, 256) │ 590,080 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_3 (Conv2D) │ (None, 6, 6, 384) │ 885,120 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_4 (Conv2D) │ (None, 4, 4, 384) │ 1,327,488 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ max_pooling2d_2 (MaxPooling2D) │ (None, 1, 1, 384) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ batch_normalization_2 │ (None, 1, 1, 384) │ 1,536 │\n",
"│ (BatchNormalization) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ flatten (Flatten) │ (None, 384) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense (Dense) │ (None, 4096) │ 1,576,960 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout (Dropout) │ (None, 4096) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_1 (Dense) │ (None, 4096) │ 16,781,312 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout_1 (Dropout) │ (None, 4096) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_2 (Dense) │ (None, 4) │ 16,388 │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
"\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ conv2d (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m54\u001b[0m, \u001b[38;5;34m96\u001b[0m) │ \u001b[38;5;34m34,944\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ max_pooling2d (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m96\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ batch_normalization │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m26\u001b[0m, \u001b[38;5;34m96\u001b[0m) │ \u001b[38;5;34m384\u001b[0m │\n",
"│ (\u001b[38;5;33mBatchNormalization\u001b[0m) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_1 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m22\u001b[0m, \u001b[38;5;34m22\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m614,656\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ max_pooling2d_1 (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m10\u001b[0m, \u001b[38;5;34m10\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ batch_normalization_1 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m10\u001b[0m, \u001b[38;5;34m10\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m1,024\u001b[0m │\n",
"│ (\u001b[38;5;33mBatchNormalization\u001b[0m) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_2 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m8\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m590,080\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_3 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m6\u001b[0m, \u001b[38;5;34m6\u001b[0m, \u001b[38;5;34m384\u001b[0m) │ \u001b[38;5;34m885,120\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv2d_4 (\u001b[38;5;33mConv2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m4\u001b[0m, \u001b[38;5;34m384\u001b[0m) │ \u001b[38;5;34m1,327,488\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ max_pooling2d_2 (\u001b[38;5;33mMaxPooling2D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m384\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ batch_normalization_2 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m1\u001b[0m, \u001b[38;5;34m384\u001b[0m) │ \u001b[38;5;34m1,536\u001b[0m │\n",
"│ (\u001b[38;5;33mBatchNormalization\u001b[0m) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ flatten (\u001b[38;5;33mFlatten\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m384\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m) │ \u001b[38;5;34m1,576,960\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_1 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m) │ \u001b[38;5;34m16,781,312\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout_1 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4096\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_2 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m4\u001b[0m) │ \u001b[38;5;34m16,388\u001b[0m │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Total params: 21,829,892 (83.27 MB)\n",
"\n"
],
"text/plain": [
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m21,829,892\u001b[0m (83.27 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Trainable params: 21,828,420 (83.27 MB)\n",
"\n"
],
"text/plain": [
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m21,828,420\u001b[0m (83.27 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Non-trainable params: 1,472 (5.75 KB)\n",
"\n"
],
"text/plain": [
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m1,472\u001b[0m (5.75 KB)\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from keras.api.models import Sequential\n",
"from keras.api.layers import InputLayer, Conv2D, MaxPooling2D, Dropout, Flatten, Dense, BatchNormalization\n",
"\n",
"alexnet_model = Sequential()\n",
"\n",
"alexnet_model.add(InputLayer(shape=(224, 224, 3)))\n",
"\n",
"alexnet_model.add(Conv2D(96, kernel_size=(11, 11), strides=(4, 4), activation=\"relu\"))\n",
"alexnet_model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))\n",
"alexnet_model.add(BatchNormalization())\n",
"\n",
"alexnet_model.add(Conv2D(256, kernel_size=(5, 5), activation=\"relu\"))\n",
"alexnet_model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))\n",
"alexnet_model.add(BatchNormalization())\n",
"\n",
"alexnet_model.add(Conv2D(256, kernel_size=(3, 3), activation=\"relu\"))\n",
"\n",
"alexnet_model.add(Conv2D(384, kernel_size=(3, 3), activation=\"relu\"))\n",
"\n",
"alexnet_model.add(Conv2D(384, kernel_size=(3, 3), activation=\"relu\"))\n",
"alexnet_model.add(MaxPooling2D(pool_size=(3, 3), strides=(2, 2)))\n",
"alexnet_model.add(BatchNormalization())\n",
"\n",
"alexnet_model.add(Flatten())\n",
"alexnet_model.add(Dense(4096, activation=\"tanh\"))\n",
"alexnet_model.add(Dropout(0.5))\n",
"\n",
"alexnet_model.add(Dense(4096, activation=\"tanh\"))\n",
"alexnet_model.add(Dropout(0.5))\n",
"\n",
"alexnet_model.add(Dense(num_classes, activation=\"softmax\"))\n",
"\n",
"alexnet_model.summary()"
]
},
{
"cell_type": "markdown",
"id": "6aa2fc09",
"metadata": {},
"source": [
"Обучение модели"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "8d8931d2",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m11s\u001b[0m 935ms/step - accuracy: 0.3209 - loss: 5.4267 - val_accuracy: 0.2500 - val_loss: 9.1606\n",
"Epoch 2/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m8s\u001b[0m 700ms/step - accuracy: 0.3936 - loss: 7.7186 - val_accuracy: 0.2500 - val_loss: 10.6578\n",
"Epoch 3/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 696ms/step - accuracy: 0.3039 - loss: 7.0676 - val_accuracy: 0.2500 - val_loss: 2.7710\n",
"Epoch 4/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 702ms/step - accuracy: 0.3167 - loss: 2.5632 - val_accuracy: 0.2500 - val_loss: 1.7581\n",
"Epoch 5/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 693ms/step - accuracy: 0.2744 - loss: 2.0231 - val_accuracy: 0.2625 - val_loss: 1.7923\n",
"Epoch 6/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 693ms/step - accuracy: 0.2843 - loss: 1.9969 - val_accuracy: 0.2500 - val_loss: 1.6605\n",
"Epoch 7/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 694ms/step - accuracy: 0.2986 - loss: 1.8363 - val_accuracy: 0.2500 - val_loss: 1.4946\n",
"Epoch 8/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 695ms/step - accuracy: 0.3106 - loss: 1.8001 - val_accuracy: 0.2750 - val_loss: 1.6487\n",
"Epoch 9/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 683ms/step - accuracy: 0.3362 - loss: 1.7756 - val_accuracy: 0.2625 - val_loss: 1.4614\n",
"Epoch 10/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 696ms/step - accuracy: 0.2665 - loss: 1.9055 - val_accuracy: 0.2500 - val_loss: 1.9306\n",
"Epoch 11/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 684ms/step - accuracy: 0.3043 - loss: 1.9743 - val_accuracy: 0.2125 - val_loss: 2.4484\n",
"Epoch 12/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 683ms/step - accuracy: 0.3049 - loss: 1.9582 - val_accuracy: 0.2625 - val_loss: 1.4873\n",
"Epoch 13/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 691ms/step - accuracy: 0.4019 - loss: 1.6694 - val_accuracy: 0.2875 - val_loss: 1.9368\n",
"Epoch 14/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 697ms/step - accuracy: 0.4397 - loss: 1.5109 - val_accuracy: 0.2625 - val_loss: 2.7897\n",
"Epoch 15/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 689ms/step - accuracy: 0.3716 - loss: 1.7504 - val_accuracy: 0.4125 - val_loss: 1.5172\n",
"Epoch 16/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 697ms/step - accuracy: 0.4378 - loss: 1.5575 - val_accuracy: 0.3250 - val_loss: 1.8495\n",
"Epoch 17/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 700ms/step - accuracy: 0.4742 - loss: 1.5302 - val_accuracy: 0.3000 - val_loss: 2.0239\n",
"Epoch 18/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 687ms/step - accuracy: 0.4646 - loss: 1.3401 - val_accuracy: 0.3250 - val_loss: 1.5780\n",
"Epoch 19/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 700ms/step - accuracy: 0.4993 - loss: 1.3897 - val_accuracy: 0.2750 - val_loss: 1.9663\n",
"Epoch 20/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 704ms/step - accuracy: 0.5081 - loss: 1.5911 - val_accuracy: 0.2250 - val_loss: 2.3920\n",
"Epoch 21/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 690ms/step - accuracy: 0.4561 - loss: 1.4729 - val_accuracy: 0.3125 - val_loss: 2.1290\n",
"Epoch 22/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 689ms/step - accuracy: 0.5576 - loss: 1.4146 - val_accuracy: 0.2500 - val_loss: 1.8015\n",
"Epoch 23/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 688ms/step - accuracy: 0.4931 - loss: 1.2931 - val_accuracy: 0.2500 - val_loss: 2.0941\n",
"Epoch 24/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 685ms/step - accuracy: 0.5758 - loss: 1.1323 - val_accuracy: 0.3625 - val_loss: 1.7523\n",
"Epoch 25/100\n",
"\u001b[1m10/10\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m7s\u001b[0m 685ms/step - accuracy: 0.5046 - loss: 1.2337 - val_accuracy: 0.3750 - val_loss: 1.8958\n"
]
},
{
"data": {
"text/plain": [
""
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from keras.api.callbacks import EarlyStopping\n",
"\n",
"alexnet_model.compile(\n",
" loss=\"categorical_crossentropy\",\n",
" optimizer=\"adam\",\n",
" metrics=[\"accuracy\"]\n",
")\n",
"\n",
"early_stop = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)\n",
"alexnet_model.fit(\n",
" x=X_train, \n",
" y=y_train,\n",
" epochs=100,\n",
" validation_data=(X_test, y_test),\n",
" callbacks=[early_stop]\n",
")"
]
},
{
"cell_type": "markdown",
"id": "e2e95800",
"metadata": {},
"source": [
"Оценка качества"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "bf31ddc4",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1m3/3\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 122ms/step - accuracy: 0.4289 - loss: 1.5600\n",
"Loss на тестовой выборке: 1.5172\n",
"Accuracy на тестовой выборке: 0.4125\n"
]
}
],
"source": [
"def evaluate_model(model, X_test, y_test):\n",
" loss, accuracy = model.evaluate(X_test, y_test)\n",
" print(f\"Loss на тестовой выборке: {loss:.4f}\")\n",
" print(f\"Accuracy на тестовой выборке: {accuracy:.4f}\")\n",
"\n",
"evaluate_model(alexnet_model, X_test, y_test)"
]
},
{
"cell_type": "markdown",
"id": "09ed1fcf",
"metadata": {},
"source": [
"Качество модели - 41.25% это хуже чем в 9 ЛР, но лучше чем в 11 ЛР"
]
},
{
"cell_type": "markdown",
"id": "aaf81b1d",
"metadata": {},
"source": [
"Классификация текстов"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "7f079c87",
"metadata": {},
"outputs": [],
"source": [
"import spacy\n",
"\n",
"sp = spacy.load(\"ru_core_news_lg\")"
]
},
{
"cell_type": "markdown",
"id": "155e198a",
"metadata": {},
"source": [
"Загрузка текстов из файлов в датафрейм"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "09f95467",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" doc | \n",
" text | \n",
" type | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" tz_01.docx | \n",
" 2.2 Техническое задание\\n2.2.1 Общие сведения\\... | \n",
" 0 | \n",
"
\n",
" \n",
" | 1 | \n",
" tz_02.docx | \n",
" 2.2 Техническое задание\\n2.2.1 Общие сведения\\... | \n",
" 0 | \n",
"
\n",
" \n",
" | 2 | \n",
" tz_03.docx | \n",
" 2.2. Техническое задание\\nОбщие сведения:\\nВ д... | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
" tz_04.docx | \n",
" Техническое задание\\n2.2.1 Общие сведения\\nИнт... | \n",
" 0 | \n",
"
\n",
" \n",
" | 4 | \n",
" tz_05.docx | \n",
" 2.2 Техническое задание\\n2.2.1 Общие сведения.... | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" doc text type\n",
"0 tz_01.docx 2.2 Техническое задание\\n2.2.1 Общие сведения\\... 0\n",
"1 tz_02.docx 2.2 Техническое задание\\n2.2.1 Общие сведения\\... 0\n",
"2 tz_03.docx 2.2. Техническое задание\\nОбщие сведения:\\nВ д... 0\n",
"3 tz_04.docx Техническое задание\\n2.2.1 Общие сведения\\nИнт... 0\n",
"4 tz_05.docx 2.2 Техническое задание\\n2.2.1 Общие сведения.... 0"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" doc | \n",
" text | \n",
" type | \n",
"
\n",
" \n",
" \n",
" \n",
" | 36 | \n",
" Этапы разработки проекта2.docx | \n",
" Этапы разработки проекта: заключительные стади... | \n",
" 1 | \n",
"
\n",
" \n",
" | 37 | \n",
" Этапы разработки проекта3.docx | \n",
" Этапы разработки проекта: определение стратеги... | \n",
" 1 | \n",
"
\n",
" \n",
" | 38 | \n",
" Этапы разработки проекта4.docx | \n",
" Этапы разработки проекта: реализация, тестиров... | \n",
" 1 | \n",
"
\n",
" \n",
" | 39 | \n",
" Этапы разработки проекта5.docx | \n",
" Этапы разработки проекта: стратегия и анализ\\n... | \n",
" 1 | \n",
"
\n",
" \n",
" | 40 | \n",
" Язык манипуляции данными.docx | \n",
" 2.1.3. Язык манипуляции данными (ЯМД)\\nЯзык ма... | \n",
" 1 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" doc \\\n",
"36 Этапы разработки проекта2.docx \n",
"37 Этапы разработки проекта3.docx \n",
"38 Этапы разработки проекта4.docx \n",
"39 Этапы разработки проекта5.docx \n",
"40 Язык манипуляции данными.docx \n",
"\n",
" text type \n",
"36 Этапы разработки проекта: заключительные стади... 1 \n",
"37 Этапы разработки проекта: определение стратеги... 1 \n",
"38 Этапы разработки проекта: реализация, тестиров... 1 \n",
"39 Этапы разработки проекта: стратегия и анализ\\n... 1 \n",
"40 2.1.3. Язык манипуляции данными (ЯМД)\\nЯзык ма... 1 "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"from docx import Document\n",
"\n",
"def read_docx(file_path):\n",
" doc = Document(file_path)\n",
" full_text = []\n",
" for paragraph in doc.paragraphs:\n",
" full_text.append(paragraph.text)\n",
" return \"\\n\".join(full_text)\n",
"\n",
"def load_docs(dataset_path):\n",
" df = pd.DataFrame(columns=[\"doc\", \"text\"])\n",
" for file_path in os.listdir(dataset_path):\n",
" if file_path.startswith(\"~$\"):\n",
" continue\n",
" text = read_docx(dataset_path + file_path)\n",
" df.loc[len(df.index)] = [file_path, text]\n",
" return df\n",
"\n",
"df = load_docs(\"static/text/\")\n",
"df[\"type\"] = df.apply(lambda row: 0 if str(row[\"doc\"]).startswith(\"tz_\") else 1, axis=1)\n",
"df.sort_values(by=[\"doc\"], inplace=True)\n",
"\n",
"display(df.head(), df.tail())"
]
},
{
"cell_type": "markdown",
"id": "53819918",
"metadata": {},
"source": [
"Предобработка текста"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "c3009a5b",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" doc | \n",
" text | \n",
" type | \n",
" prep_text | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" tz_01.docx | \n",
" 2.2 Техническое задание\\n2.2.1 Общие сведения\\... | \n",
" 0 | \n",
" [[2.2, технический, задание, 2.2.1, общий, све... | \n",
"
\n",
" \n",
" | 1 | \n",
" tz_02.docx | \n",
" 2.2 Техническое задание\\n2.2.1 Общие сведения\\... | \n",
" 0 | \n",
" [[2.2, технический, задание, 2.2.1, общий, све... | \n",
"
\n",
" \n",
" | 2 | \n",
" tz_03.docx | \n",
" 2.2. Техническое задание\\nОбщие сведения:\\nВ д... | \n",
" 0 | \n",
" [[2.2], [технический, задание, общий, сведение... | \n",
"
\n",
" \n",
" | 3 | \n",
" tz_04.docx | \n",
" Техническое задание\\n2.2.1 Общие сведения\\nИнт... | \n",
" 0 | \n",
" [[технический, задание, 2.2.1, общий, сведение... | \n",
"
\n",
" \n",
" | 4 | \n",
" tz_05.docx | \n",
" 2.2 Техническое задание\\n2.2.1 Общие сведения.... | \n",
" 0 | \n",
" [[2.2, технический, задание, 2.2.1, общий, све... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" doc text type \\\n",
"0 tz_01.docx 2.2 Техническое задание\\n2.2.1 Общие сведения\\... 0 \n",
"1 tz_02.docx 2.2 Техническое задание\\n2.2.1 Общие сведения\\... 0 \n",
"2 tz_03.docx 2.2. Техническое задание\\nОбщие сведения:\\nВ д... 0 \n",
"3 tz_04.docx Техническое задание\\n2.2.1 Общие сведения\\nИнт... 0 \n",
"4 tz_05.docx 2.2 Техническое задание\\n2.2.1 Общие сведения.... 0 \n",
"\n",
" prep_text \n",
"0 [[2.2, технический, задание, 2.2.1, общий, све... \n",
"1 [[2.2, технический, задание, 2.2.1, общий, све... \n",
"2 [[2.2], [технический, задание, общий, сведение... \n",
"3 [[технический, задание, 2.2.1, общий, сведение... \n",
"4 [[2.2, технический, задание, 2.2.1, общий, све... "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" doc | \n",
" text | \n",
" type | \n",
" prep_text | \n",
"
\n",
" \n",
" \n",
" \n",
" | 36 | \n",
" Этапы разработки проекта2.docx | \n",
" Этапы разработки проекта: заключительные стади... | \n",
" 1 | \n",
" [[этап, разработка, проект, заключительные, ст... | \n",
"
\n",
" \n",
" | 37 | \n",
" Этапы разработки проекта3.docx | \n",
" Этапы разработки проекта: определение стратеги... | \n",
" 1 | \n",
" [[этап, разработка, проект, определение, страт... | \n",
"
\n",
" \n",
" | 38 | \n",
" Этапы разработки проекта4.docx | \n",
" Этапы разработки проекта: реализация, тестиров... | \n",
" 1 | \n",
" [[этап_разработка, проект, реализация, тестиро... | \n",
"
\n",
" \n",
" | 39 | \n",
" Этапы разработки проекта5.docx | \n",
" Этапы разработки проекта: стратегия и анализ\\n... | \n",
" 1 | \n",
" [[этап, разработка_проект, стратегия, анализ, ... | \n",
"
\n",
" \n",
" | 40 | \n",
" Язык манипуляции данными.docx | \n",
" 2.1.3. Язык манипуляции данными (ЯМД)\\nЯзык ма... | \n",
" 1 | \n",
" [[2.1.3], [язык, манипуляция, данными, ямд, яз... | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" doc \\\n",
"36 Этапы разработки проекта2.docx \n",
"37 Этапы разработки проекта3.docx \n",
"38 Этапы разработки проекта4.docx \n",
"39 Этапы разработки проекта5.docx \n",
"40 Язык манипуляции данными.docx \n",
"\n",
" text type \\\n",
"36 Этапы разработки проекта: заключительные стади... 1 \n",
"37 Этапы разработки проекта: определение стратеги... 1 \n",
"38 Этапы разработки проекта: реализация, тестиров... 1 \n",
"39 Этапы разработки проекта: стратегия и анализ\\n... 1 \n",
"40 2.1.3. Язык манипуляции данными (ЯМД)\\nЯзык ма... 1 \n",
"\n",
" prep_text \n",
"36 [[этап, разработка, проект, заключительные, ст... \n",
"37 [[этап, разработка, проект, определение, страт... \n",
"38 [[этап_разработка, проект, реализация, тестиро... \n",
"39 [[этап, разработка_проект, стратегия, анализ, ... \n",
"40 [[2.1.3], [язык, манипуляция, данными, ямд, яз... "
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from gensim.models.phrases import Phraser, Phrases\n",
"\n",
"def prep_text(text):\n",
" doc = sp(text)\n",
" lower_sents = []\n",
" for sent in doc.sents:\n",
" lower_sents.append([word.lemma_.lower() for word in sent if not word.is_punct and not word.is_stop and not word.is_space])\n",
" lower_bigram = Phraser(Phrases(lower_sents))\n",
" clean_sents = []\n",
" for sent in lower_sents:\n",
" clean_sents.append(lower_bigram[sent])\n",
" return clean_sents\n",
"\n",
"df[\"prep_text\"] = df.apply(lambda row: prep_text(row[\"text\"]), axis=1)\n",
"display(df.head(), df.tail())"
]
},
{
"cell_type": "markdown",
"id": "0fe192fb",
"metadata": {},
"source": [
"Векторизация текстовых данных"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "df567cd7",
"metadata": {},
"outputs": [],
"source": [
"from gensim.models.word2vec import Word2Vec\n",
"\n",
"word2vec = Word2Vec(\n",
" sentences=df[\"prep_text\"].explode().tolist(),\n",
" vector_size=64,\n",
" sg=1,\n",
" window=10,\n",
" epochs=5,\n",
" min_count=10,\n",
" workers=4,\n",
" seed=9,\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c4d02e19",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[('pad', 0),\n",
" ('система', 1),\n",
" ('работа', 2),\n",
" ('требование', 3),\n",
" ('база', 4),\n",
" ('пользователь', 5),\n",
" ('разработка', 6),\n",
" ('модель', 7),\n",
" ('информация', 8),\n",
" ('субд', 9),\n",
" ('этап', 10),\n",
" ('ошибка', 11),\n",
" ('являться', 12),\n",
" ('функция', 13),\n",
" ('таблица', 14),\n",
" ('средство', 15),\n",
" ('проект', 16),\n",
" ('сервер', 17),\n",
" ('процесс', 18),\n",
" ('документ', 19)]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"word_to_id = word2vec.wv.key_to_index\n",
"word_to_id = {'pad': 0, **{k: v+1 for k, v in word2vec.wv.key_to_index.items()}}\n",
"list(word_to_id.items())[:20]"
]
},
{
"cell_type": "markdown",
"id": "810ad794",
"metadata": {},
"source": [
"Преобразуем тексты в списки индексов"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "e58e8c22",
"metadata": {},
"outputs": [],
"source": [
"def text_to_ids(sentences, word_to_id):\n",
" flat_words = [word for sent in sentences for word in sent]\n",
" return [word_to_id.get(word, 0) for word in flat_words]\n",
"\n",
"df[\"ids\"] = df[\"prep_text\"].apply(lambda doc: text_to_ids(doc, word_to_id))"
]
},
{
"cell_type": "markdown",
"id": "cef5601c",
"metadata": {},
"source": [
"padding и truncating"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "db753071",
"metadata": {},
"outputs": [],
"source": [
"from keras.api.preprocessing.sequence import pad_sequences\n",
"\n",
"max_length = 1500\n",
"X = pad_sequences(df[\"ids\"].tolist(), maxlen=max_length, padding=\"pre\", truncating=\"pre\", value=0)"
]
},
{
"cell_type": "markdown",
"id": "7b42f1e6",
"metadata": {},
"source": [
"Тренировочная и тестовая выборки"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "220b7c2d",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[ 0, 0, 0, ..., 134, 108, 148],\n",
" [ 29, 0, 0, ..., 0, 0, 294],\n",
" [ 0, 0, 0, ..., 134, 108, 148],\n",
" ...,\n",
" [ 0, 45, 251, ..., 0, 225, 30],\n",
" [ 0, 0, 0, ..., 219, 0, 0],\n",
" [ 0, 0, 0, ..., 194, 134, 5]])"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.model_selection import train_test_split\n",
"\n",
"y = df[\"type\"].values\n",
"\n",
"X_train, X_test, y_train, y_test = train_test_split(\n",
" X, y, test_size=0.2, random_state=42, stratify=y\n",
")\n",
"X_train"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "bd8aa1d0",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1,\n",
" 0, 1, 1, 1, 0, 1, 0, 1, 0, 0], dtype=int64)"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"y_train"
]
},
{
"cell_type": "markdown",
"id": "ee8643c7",
"metadata": {},
"source": [
"Архитектура глубокой полносвязанной сети"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "faa6a6c8",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Model: \"sequential_1\"\n",
"\n"
],
"text/plain": [
"\u001b[1mModel: \"sequential_1\"\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃ Layer (type) ┃ Output Shape ┃ Param # ┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ embedding (Embedding) │ (None, 1500, 64) │ 66,496 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ flatten_1 (Flatten) │ (None, 96000) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_3 (Dense) │ (None, 64) │ 6,144,064 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout_2 (Dropout) │ (None, 64) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_4 (Dense) │ (None, 1) │ 65 │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
"\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ embedding (\u001b[38;5;33mEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m66,496\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ flatten_1 (\u001b[38;5;33mFlatten\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m96000\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_3 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m6,144,064\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout_2 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_4 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m65\u001b[0m │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Total params: 6,210,625 (23.69 MB)\n",
"\n"
],
"text/plain": [
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m6,210,625\u001b[0m (23.69 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Trainable params: 6,210,625 (23.69 MB)\n",
"\n"
],
"text/plain": [
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m6,210,625\u001b[0m (23.69 MB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Non-trainable params: 0 (0.00 B)\n",
"\n"
],
"text/plain": [
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from keras.api.layers import Embedding\n",
"\n",
"vocab_size = len(word_to_id)\n",
"\n",
"model = Sequential()\n",
"model.add(InputLayer(shape=(max_length,), dtype=\"int32\"))\n",
"\n",
"model.add(Embedding(input_dim=vocab_size, output_dim=64))\n",
"\n",
"model.add(Flatten())\n",
"\n",
"model.add(Dense(64, activation=\"relu\"))\n",
"model.add(Dropout(0.5))\n",
"\n",
"model.add(Dense(1, activation=\"sigmoid\"))\n",
"\n",
"model.summary()"
]
},
{
"cell_type": "markdown",
"id": "d5b1eb98",
"metadata": {},
"source": [
"Обучение модели"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "ba6a9986",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/5\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 605ms/step - accuracy: 0.3750 - loss: 0.7053 - val_accuracy: 0.4444 - val_loss: 0.8328\n",
"Epoch 2/5\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 359ms/step - accuracy: 0.6562 - loss: 0.5666 - val_accuracy: 0.7778 - val_loss: 0.5512\n",
"Epoch 3/5\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 31ms/step - accuracy: 0.8125 - loss: 0.3735 - val_accuracy: 0.7778 - val_loss: 0.5157\n",
"Epoch 4/5\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 47ms/step - accuracy: 0.9375 - loss: 0.1711 - val_accuracy: 0.7778 - val_loss: 0.5147\n",
"Epoch 5/5\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 47ms/step - accuracy: 0.9062 - loss: 0.2508 - val_accuracy: 0.7778 - val_loss: 0.5128\n"
]
},
{
"data": {
"text/plain": [
""
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.compile(\n",
" loss=\"binary_crossentropy\",\n",
" optimizer=\"adam\",\n",
" metrics=[\"accuracy\"],\n",
")\n",
"\n",
"model.fit(\n",
" X_train,\n",
" y_train,\n",
" batch_size=128,\n",
" epochs=5,\n",
" validation_data=(X_test, y_test)\n",
")"
]
},
{
"cell_type": "markdown",
"id": "025165b0",
"metadata": {},
"source": [
"Оценка качества"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "445a5445",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 16ms/step - accuracy: 0.7778 - loss: 0.5128\n",
"Loss на тестовой выборке: 0.5128\n",
"Accuracy на тестовой выборке: 0.7778\n"
]
}
],
"source": [
"evaluate_model(model, X_test, y_test)"
]
},
{
"cell_type": "markdown",
"id": "343261c5",
"metadata": {},
"source": [
"Сверточная сеть"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "c8636b3d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Model: \"sequential_2\"\n",
"\n"
],
"text/plain": [
"\u001b[1mModel: \"sequential_2\"\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃ Layer (type) ┃ Output Shape ┃ Param # ┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ embedding_1 (Embedding) │ (None, 1500, 64) │ 66,496 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ spatial_dropout1d │ (None, 1500, 64) │ 0 │\n",
"│ (SpatialDropout1D) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv1d (Conv1D) │ (None, 1498, 256) │ 49,408 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ global_max_pooling1d │ (None, 256) │ 0 │\n",
"│ (GlobalMaxPooling1D) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_5 (Dense) │ (None, 256) │ 65,792 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout_3 (Dropout) │ (None, 256) │ 0 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_6 (Dense) │ (None, 1) │ 257 │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
"\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ embedding_1 (\u001b[38;5;33mEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m66,496\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ spatial_dropout1d │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"│ (\u001b[38;5;33mSpatialDropout1D\u001b[0m) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ conv1d (\u001b[38;5;33mConv1D\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1498\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m49,408\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ global_max_pooling1d │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"│ (\u001b[38;5;33mGlobalMaxPooling1D\u001b[0m) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_5 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m65,792\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dropout_3 (\u001b[38;5;33mDropout\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m256\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_6 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m257\u001b[0m │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Total params: 181,953 (710.75 KB)\n",
"\n"
],
"text/plain": [
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m181,953\u001b[0m (710.75 KB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Trainable params: 181,953 (710.75 KB)\n",
"\n"
],
"text/plain": [
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m181,953\u001b[0m (710.75 KB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Non-trainable params: 0 (0.00 B)\n",
"\n"
],
"text/plain": [
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from keras.api.layers import SpatialDropout1D, Conv1D, GlobalMaxPooling1D\n",
"\n",
"conv_model = Sequential()\n",
"conv_model.add(InputLayer(shape=(max_length,), dtype=\"int32\"))\n",
"\n",
"conv_model.add(Embedding(input_dim=vocab_size, output_dim=64))\n",
"\n",
"conv_model.add(SpatialDropout1D(0.2))\n",
"\n",
"conv_model.add(Conv1D(filters=256, kernel_size=3, activation=\"relu\"))\n",
"conv_model.add(GlobalMaxPooling1D())\n",
"\n",
"conv_model.add(Dense(256, activation=\"relu\"))\n",
"conv_model.add(Dropout(0.3)) \n",
"\n",
"conv_model.add(Dense(1, activation=\"sigmoid\"))\n",
"\n",
"conv_model.summary()"
]
},
{
"cell_type": "markdown",
"id": "cc48d842",
"metadata": {},
"source": [
"Обучение модели"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "435886a6",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/5\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1s/step - accuracy: 0.5938 - loss: 0.6891 - val_accuracy: 0.4444 - val_loss: 0.6881\n",
"Epoch 2/5\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 656ms/step - accuracy: 0.5938 - loss: 0.6901 - val_accuracy: 1.0000 - val_loss: 0.6837\n",
"Epoch 3/5\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 187ms/step - accuracy: 0.6250 - loss: 0.6826 - val_accuracy: 1.0000 - val_loss: 0.6792\n",
"Epoch 4/5\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 125ms/step - accuracy: 0.7188 - loss: 0.6766 - val_accuracy: 0.8889 - val_loss: 0.6733\n",
"Epoch 5/5\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 156ms/step - accuracy: 0.6875 - loss: 0.6777 - val_accuracy: 1.0000 - val_loss: 0.6678\n"
]
},
{
"data": {
"text/plain": [
""
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"conv_model.compile(\n",
" loss=\"binary_crossentropy\",\n",
" optimizer=\"adam\",\n",
" metrics=[\"accuracy\"],\n",
")\n",
"\n",
"conv_model.fit(\n",
" X_train,\n",
" y_train,\n",
" batch_size=128,\n",
" epochs=5,\n",
" validation_data=(X_test, y_test)\n",
")"
]
},
{
"cell_type": "markdown",
"id": "7fdce8fa",
"metadata": {},
"source": [
"Оценка качества"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "b30ff838",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 39ms/step - accuracy: 1.0000 - loss: 0.6678\n",
"Loss на тестовой выборке: 0.6678\n",
"Accuracy на тестовой выборке: 1.0000\n"
]
}
],
"source": [
"evaluate_model(conv_model, X_test, y_test)"
]
},
{
"cell_type": "markdown",
"id": "fbbe8152",
"metadata": {},
"source": [
"Рекуррентная сеть"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "74751a0d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Model: \"sequential_3\"\n",
"\n"
],
"text/plain": [
"\u001b[1mModel: \"sequential_3\"\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃ Layer (type) ┃ Output Shape ┃ Param # ┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ embedding_2 (Embedding) │ (None, 1500, 64) │ 66,496 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ spatial_dropout1d_1 │ (None, 1500, 64) │ 0 │\n",
"│ (SpatialDropout1D) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ simple_rnn (SimpleRNN) │ (None, 128) │ 24,704 │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_7 (Dense) │ (None, 1) │ 129 │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n",
"\n"
],
"text/plain": [
"┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓\n",
"┃\u001b[1m \u001b[0m\u001b[1mLayer (type) \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1mOutput Shape \u001b[0m\u001b[1m \u001b[0m┃\u001b[1m \u001b[0m\u001b[1m Param #\u001b[0m\u001b[1m \u001b[0m┃\n",
"┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩\n",
"│ embedding_2 (\u001b[38;5;33mEmbedding\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m66,496\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ spatial_dropout1d_1 │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1500\u001b[0m, \u001b[38;5;34m64\u001b[0m) │ \u001b[38;5;34m0\u001b[0m │\n",
"│ (\u001b[38;5;33mSpatialDropout1D\u001b[0m) │ │ │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ simple_rnn (\u001b[38;5;33mSimpleRNN\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m128\u001b[0m) │ \u001b[38;5;34m24,704\u001b[0m │\n",
"├─────────────────────────────────┼────────────────────────┼───────────────┤\n",
"│ dense_7 (\u001b[38;5;33mDense\u001b[0m) │ (\u001b[38;5;45mNone\u001b[0m, \u001b[38;5;34m1\u001b[0m) │ \u001b[38;5;34m129\u001b[0m │\n",
"└─────────────────────────────────┴────────────────────────┴───────────────┘\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Total params: 91,329 (356.75 KB)\n",
"\n"
],
"text/plain": [
"\u001b[1m Total params: \u001b[0m\u001b[38;5;34m91,329\u001b[0m (356.75 KB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Trainable params: 91,329 (356.75 KB)\n",
"\n"
],
"text/plain": [
"\u001b[1m Trainable params: \u001b[0m\u001b[38;5;34m91,329\u001b[0m (356.75 KB)\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" Non-trainable params: 0 (0.00 B)\n",
"\n"
],
"text/plain": [
"\u001b[1m Non-trainable params: \u001b[0m\u001b[38;5;34m0\u001b[0m (0.00 B)\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"from keras.api.layers import SimpleRNN\n",
"\n",
"rnn_model = Sequential()\n",
"rnn_model.add(InputLayer(shape=(max_length,), dtype=\"int32\"))\n",
"\n",
"rnn_model.add(Embedding(input_dim=vocab_size, output_dim=64))\n",
"\n",
"rnn_model.add(SpatialDropout1D(0.2))\n",
"\n",
"rnn_model.add(SimpleRNN(128, dropout=0.2, recurrent_dropout=0.2))\n",
"\n",
"rnn_model.add(Dense(1, activation=\"sigmoid\"))\n",
"\n",
"rnn_model.summary()"
]
},
{
"cell_type": "markdown",
"id": "44765c76",
"metadata": {},
"source": [
"Обучение модели"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "93f45751",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Epoch 1/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 1s/step - accuracy: 0.5000 - loss: 0.7441 - val_accuracy: 0.8889 - val_loss: 0.6012\n",
"Epoch 2/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m1s\u001b[0m 797ms/step - accuracy: 0.5312 - loss: 0.7488 - val_accuracy: 0.8889 - val_loss: 0.6016\n",
"Epoch 3/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5000 - loss: 0.7157 - val_accuracy: 0.8889 - val_loss: 0.6043\n",
"Epoch 4/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.6250 - loss: 0.6322 - val_accuracy: 0.8889 - val_loss: 0.6083\n",
"Epoch 5/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 234ms/step - accuracy: 0.5000 - loss: 0.6860 - val_accuracy: 0.8889 - val_loss: 0.6114\n",
"Epoch 6/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.4062 - loss: 0.7382 - val_accuracy: 0.8889 - val_loss: 0.6095\n",
"Epoch 7/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.6835 - val_accuracy: 0.8889 - val_loss: 0.6095\n",
"Epoch 8/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5312 - loss: 0.7079 - val_accuracy: 0.8889 - val_loss: 0.6057\n",
"Epoch 9/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 234ms/step - accuracy: 0.5000 - loss: 0.6995 - val_accuracy: 1.0000 - val_loss: 0.6025\n",
"Epoch 10/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.4688 - loss: 0.7459 - val_accuracy: 0.8889 - val_loss: 0.6156\n",
"Epoch 11/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.7364 - val_accuracy: 1.0000 - val_loss: 0.5995\n",
"Epoch 12/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.7188 - loss: 0.6552 - val_accuracy: 0.8889 - val_loss: 0.6118\n",
"Epoch 13/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.6433 - val_accuracy: 1.0000 - val_loss: 0.6036\n",
"Epoch 14/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5625 - loss: 0.7378 - val_accuracy: 1.0000 - val_loss: 0.6088\n",
"Epoch 15/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.6532 - val_accuracy: 1.0000 - val_loss: 0.6128\n",
"Epoch 16/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5938 - loss: 0.6827 - val_accuracy: 0.8889 - val_loss: 0.6190\n",
"Epoch 17/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5312 - loss: 0.7088 - val_accuracy: 0.8889 - val_loss: 0.6045\n",
"Epoch 18/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.5625 - loss: 0.6746 - val_accuracy: 0.7778 - val_loss: 0.6072\n",
"Epoch 19/40\n",
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 219ms/step - accuracy: 0.4375 - loss: 0.7517 - val_accuracy: 0.7778 - val_loss: 0.6159\n"
]
},
{
"data": {
"text/plain": [
""
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"rnn_model.compile(\n",
" loss=\"binary_crossentropy\",\n",
" optimizer=\"adam\",\n",
" metrics=[\"accuracy\"],\n",
")\n",
"\n",
"early_stop = EarlyStopping(monitor='val_accuracy', patience=10, restore_best_weights=True)\n",
"rnn_model.fit(\n",
" X_train,\n",
" y_train,\n",
" batch_size=128,\n",
" epochs=40,\n",
" validation_data=(X_test, y_test),\n",
" callbacks=[early_stop]\n",
")"
]
},
{
"cell_type": "markdown",
"id": "ae9e9d80",
"metadata": {},
"source": [
"Оценка качества"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "8c567bef",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[1m1/1\u001b[0m \u001b[32m━━━━━━━━━━━━━━━━━━━━\u001b[0m\u001b[37m\u001b[0m \u001b[1m0s\u001b[0m 144ms/step - accuracy: 1.0000 - loss: 0.6025\n",
"Loss на тестовой выборке: 0.6025\n",
"Accuracy на тестовой выборке: 1.0000\n"
]
}
],
"source": [
"evaluate_model(rnn_model, X_test, y_test)"
]
},
{
"cell_type": "markdown",
"id": "36c37a9c",
"metadata": {},
"source": [
"Лучший результат - полносвязанная сеть - 77,78%\n",
"\n",
"Сверточная сеть - 66,78%\n",
"\n",
"Рекуррентная сеть - 60,25%"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "aimvenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}