Merge pull request 'malkova_anastasia_lab_7 ready' (#165) from malkova_anastasia_lab_7 into main
Reviewed-on: http://student.git.athene.tech/Alexey/IIS_2023_1/pulls/165
This commit is contained in:
commit
3eb7a12e19
31
malkova_anastasia_lab_7/README.md
Normal file
31
malkova_anastasia_lab_7/README.md
Normal file
@ -0,0 +1,31 @@
|
|||||||
|
# Лабораторная работа №7
|
||||||
|
|
||||||
|
> Рекуррентная нейронная сеть и задача генерации текста
|
||||||
|
|
||||||
|
### Как запустить лабораторную работу
|
||||||
|
|
||||||
|
1. Установить python, conda, numpy, torch
|
||||||
|
1. Запустить команду `python main.py` в корне проекта
|
||||||
|
|
||||||
|
### Использованные технологии
|
||||||
|
|
||||||
|
* Язык программирования `python`
|
||||||
|
* Библиотеки `conda, numpy, torch`
|
||||||
|
* Среда разработки `PyCharm`
|
||||||
|
|
||||||
|
### Что делает программа?
|
||||||
|
|
||||||
|
* Считывает данные из файла
|
||||||
|
* Создает словарь всех символов и превращает текст в индексы(числа)
|
||||||
|
* Генерирует батчи из текста
|
||||||
|
* Создает и обучает рекуррентную нейронную сеть(RNN)
|
||||||
|
* Генерирует новый текст
|
||||||
|
* Измеряет ошибку
|
||||||
|
|
||||||
|
#### Запуск
|
||||||
|
|
||||||
|
* На первых прогонаx высокий показатель ошибки и, соответственно, качество текста плохое
|
||||||
|
![starter result](starter_result.png)
|
||||||
|
|
||||||
|
* Через 10-15 минут после обучения ошибка дошла до 0.6, для такой ошибки качество текста нормальное
|
||||||
|
![progress result](progress_result.png)
|
10
malkova_anastasia_lab_7/config.py
Normal file
10
malkova_anastasia_lab_7/config.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
SEQ_LEN = 256
|
||||||
|
BATCH_SIZE = 16
|
||||||
|
NUMBER_OF_LINES = 300
|
||||||
|
PREDICTION_LEN = 200
|
||||||
|
PREDICTION_LEN_START = 1000
|
||||||
|
N_EPOCHS = 50000
|
||||||
|
LOSS_AVG_MAX = 50
|
||||||
|
HIDDEN_SIZE = 128
|
||||||
|
EMBEDDING_SIZE = 128
|
||||||
|
N_LAYERS = 2
|
23
malkova_anastasia_lab_7/data.py
Normal file
23
malkova_anastasia_lab_7/data.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
import numpy as np
|
||||||
|
from collections import Counter
|
||||||
|
|
||||||
|
|
||||||
|
def create_data():
|
||||||
|
with open('train_text.txt', encoding="utf8") as text_file:
|
||||||
|
text_sample = text_file.readlines()
|
||||||
|
text_sample = ' '.join(text_sample)
|
||||||
|
|
||||||
|
return text_sample
|
||||||
|
|
||||||
|
|
||||||
|
def text_to_seq(text_sample=create_data()):
|
||||||
|
char_counts = Counter(text_sample)
|
||||||
|
char_counts = sorted(char_counts.items(), key=lambda x: x[1], reverse=True)
|
||||||
|
|
||||||
|
sorted_chars = [char for char, _ in char_counts]
|
||||||
|
|
||||||
|
char_to_idx = {char: index for index, char in enumerate(sorted_chars)}
|
||||||
|
idx_to_char = {v: k for k, v in char_to_idx.items()}
|
||||||
|
sequence = np.array([char_to_idx[char] for char in text_sample])
|
||||||
|
|
||||||
|
return sequence, char_to_idx, idx_to_char
|
39
malkova_anastasia_lab_7/generation.py
Normal file
39
malkova_anastasia_lab_7/generation.py
Normal file
@ -0,0 +1,39 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
import numpy as np
|
||||||
|
from config import BATCH_SIZE, SEQ_LEN, PREDICTION_LEN
|
||||||
|
|
||||||
|
|
||||||
|
def get_batch(sequence):
|
||||||
|
trains = []
|
||||||
|
targets = []
|
||||||
|
for _ in range(BATCH_SIZE):
|
||||||
|
batch_start = np.random.randint(0, len(sequence) - SEQ_LEN)
|
||||||
|
chunk = sequence[batch_start: batch_start + SEQ_LEN]
|
||||||
|
train = torch.LongTensor(chunk[:-1]).view(-1, 1)
|
||||||
|
target = torch.LongTensor(chunk[1:]).view(-1, 1)
|
||||||
|
trains.append(train)
|
||||||
|
targets.append(target)
|
||||||
|
return torch.stack(trains, dim=0), torch.stack(targets, dim=0)
|
||||||
|
|
||||||
|
|
||||||
|
def evaluate(model, char_to_idx, idx_to_char, device, start_text=' ', prediction_len=PREDICTION_LEN, temp=0.3):
|
||||||
|
hidden = model.init_hidden()
|
||||||
|
idx_input = [char_to_idx[char] for char in start_text]
|
||||||
|
train = torch.LongTensor(idx_input).view(-1, 1, 1).to(device)
|
||||||
|
predicted_text = start_text
|
||||||
|
|
||||||
|
_, hidden = model(train, hidden)
|
||||||
|
|
||||||
|
inp = train[-1].view(-1, 1, 1)
|
||||||
|
|
||||||
|
for i in range(prediction_len):
|
||||||
|
output, hidden = model(inp.to(device), hidden)
|
||||||
|
output_logits = output.cpu().data.view(-1)
|
||||||
|
p_next = F.softmax(output_logits / temp, dim=-1).detach().cpu().data.numpy()
|
||||||
|
top_index = np.random.choice(len(char_to_idx), p=p_next)
|
||||||
|
inp = torch.LongTensor([top_index]).view(-1, 1, 1).to(device)
|
||||||
|
predicted_char = idx_to_char[top_index]
|
||||||
|
predicted_text += predicted_char
|
||||||
|
|
||||||
|
return predicted_text
|
24
malkova_anastasia_lab_7/main.py
Normal file
24
malkova_anastasia_lab_7/main.py
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
from config import PREDICTION_LEN_START
|
||||||
|
from data import text_to_seq
|
||||||
|
from generation import evaluate
|
||||||
|
from train import create_parameters, training
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
sequence, char_to_idx, idx_to_char = text_to_seq()
|
||||||
|
criterion, scheduler, n_epochs, loss_avg, device, model, optimizer = create_parameters(idx_to_char)
|
||||||
|
|
||||||
|
training(n_epochs, model, sequence, device, criterion, optimizer, loss_avg, scheduler, char_to_idx, idx_to_char)
|
||||||
|
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
print(evaluate(
|
||||||
|
model,
|
||||||
|
char_to_idx,
|
||||||
|
idx_to_char,
|
||||||
|
device = device,
|
||||||
|
temp=0.3,
|
||||||
|
prediction_len=PREDICTION_LEN_START,
|
||||||
|
start_text='. '
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
BIN
malkova_anastasia_lab_7/progress_result.png
Normal file
BIN
malkova_anastasia_lab_7/progress_result.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 67 KiB |
BIN
malkova_anastasia_lab_7/starter_result.png
Normal file
BIN
malkova_anastasia_lab_7/starter_result.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 59 KiB |
30
malkova_anastasia_lab_7/text_rnn.py
Normal file
30
malkova_anastasia_lab_7/text_rnn.py
Normal file
@ -0,0 +1,30 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
|
||||||
|
class TextRNN(nn.Module):
|
||||||
|
|
||||||
|
def __init__(self, input_size, hidden_size, embedding_size, device, n_layers=1):
|
||||||
|
super(TextRNN, self).__init__()
|
||||||
|
|
||||||
|
self.input_size = input_size
|
||||||
|
self.hidden_size = hidden_size
|
||||||
|
self.embedding_size = embedding_size
|
||||||
|
self.n_layers = n_layers
|
||||||
|
self.device = device
|
||||||
|
|
||||||
|
self.encoder = nn.Embedding(self.input_size, self.embedding_size)
|
||||||
|
self.lstm = nn.LSTM(self.embedding_size, self.hidden_size, self.n_layers)
|
||||||
|
self.dropout = nn.Dropout(0.2)
|
||||||
|
self.fc = nn.Linear(self.hidden_size, self.input_size)
|
||||||
|
|
||||||
|
def forward(self, x, hidden):
|
||||||
|
x = self.encoder(x).squeeze(2)
|
||||||
|
out, (ht1, ct1) = self.lstm(x, hidden)
|
||||||
|
out = self.dropout(out)
|
||||||
|
x = self.fc(out)
|
||||||
|
return x, (ht1, ct1)
|
||||||
|
|
||||||
|
def init_hidden(self, batch_size=1):
|
||||||
|
return (torch.zeros(self.n_layers, batch_size, self.hidden_size, requires_grad=True).to(self.device),
|
||||||
|
torch.zeros(self.n_layers, batch_size, self.hidden_size, requires_grad=True).to(self.device))
|
58
malkova_anastasia_lab_7/train.py
Normal file
58
malkova_anastasia_lab_7/train.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from text_rnn import TextRNN
|
||||||
|
from config import BATCH_SIZE, N_EPOCHS, LOSS_AVG_MAX, HIDDEN_SIZE, EMBEDDING_SIZE, N_LAYERS
|
||||||
|
from generation import get_batch, evaluate
|
||||||
|
|
||||||
|
|
||||||
|
def create_parameters(idx_to_char):
|
||||||
|
device = torch.device('cuda') if torch.cuda.is_available() else torch.device('cpu')
|
||||||
|
model = TextRNN(input_size=len(idx_to_char), hidden_size=HIDDEN_SIZE, embedding_size=EMBEDDING_SIZE, n_layers=N_LAYERS, device=device)
|
||||||
|
model.to(device)
|
||||||
|
|
||||||
|
criterion = nn.CrossEntropyLoss()
|
||||||
|
optimizer = torch.optim.Adam(model.parameters(), lr=1e-2, amsgrad=True)
|
||||||
|
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
|
||||||
|
optimizer,
|
||||||
|
patience=5,
|
||||||
|
verbose=True,
|
||||||
|
factor=0.5
|
||||||
|
)
|
||||||
|
|
||||||
|
n_epochs = N_EPOCHS
|
||||||
|
loss_avg = []
|
||||||
|
|
||||||
|
return criterion, scheduler, n_epochs, loss_avg, device, model, optimizer
|
||||||
|
|
||||||
|
|
||||||
|
def check_loss(loss_avg, scheduler, model, char_to_idx, idx_to_char, device):
|
||||||
|
if len(loss_avg) >= LOSS_AVG_MAX:
|
||||||
|
mean_loss = np.mean(loss_avg)
|
||||||
|
print(f'Loss: {mean_loss}')
|
||||||
|
scheduler.step(mean_loss)
|
||||||
|
loss_avg = []
|
||||||
|
model.eval()
|
||||||
|
predicted_text = evaluate(model, char_to_idx, idx_to_char, device=device)
|
||||||
|
print(predicted_text)
|
||||||
|
return loss_avg
|
||||||
|
|
||||||
|
|
||||||
|
def training(n_epochs, model, sequence, device, criterion, optimizer, loss_avg, scheduler, char_to_idx, idx_to_char):
|
||||||
|
for epoch in range(n_epochs):
|
||||||
|
model.train()
|
||||||
|
train, target = get_batch(sequence)
|
||||||
|
train = train.permute(1, 0, 2).to(device)
|
||||||
|
target = target.permute(1, 0, 2).to(device)
|
||||||
|
hidden = model.init_hidden(BATCH_SIZE)
|
||||||
|
|
||||||
|
output, hidden = model(train, hidden)
|
||||||
|
loss = criterion(output.permute(1, 2, 0), target.squeeze(-1).permute(1, 0))
|
||||||
|
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
optimizer.zero_grad()
|
||||||
|
|
||||||
|
loss_avg.append(loss.item())
|
||||||
|
loss_avg = check_loss(loss_avg, scheduler, model, char_to_idx, idx_to_char, device)
|
1143
malkova_anastasia_lab_7/train_text.txt
Normal file
1143
malkova_anastasia_lab_7/train_text.txt
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user