112 lines
4.4 KiB
Python
112 lines
4.4 KiB
Python
|
from flask import Flask, render_template, request
|
||
|
from keras.models import Sequential
|
||
|
from keras.layers import LSTM, Dense, Embedding
|
||
|
from keras.preprocessing.text import Tokenizer
|
||
|
from keras.preprocessing.sequence import pad_sequences
|
||
|
import numpy as np
|
||
|
|
||
|
app = Flask(__name__)
|
||
|
|
||
|
def load_text_data(file_path):
|
||
|
with open(file_path, 'r', encoding='utf-8') as file:
|
||
|
text = file.read().lower()
|
||
|
return text
|
||
|
|
||
|
def create_sequences(text, sequence_length=100):
|
||
|
sequences = []
|
||
|
for i in range(sequence_length, len(text)):
|
||
|
seq = text[i - sequence_length:i + 1]
|
||
|
sequences.append(seq)
|
||
|
return sequences
|
||
|
|
||
|
def prepare_data(sequences):
|
||
|
input_sequences = []
|
||
|
output_sequences = []
|
||
|
for sequence in sequences:
|
||
|
if len(sequence) > 1:
|
||
|
input_seq = sequence[:-1]
|
||
|
output_seq = sequence[-1]
|
||
|
input_sequences.append(input_seq)
|
||
|
output_sequences.append(output_seq)
|
||
|
x = np.array(input_sequences)
|
||
|
y = np.array(output_sequences)
|
||
|
return x, y
|
||
|
|
||
|
def build_model(total_words):
|
||
|
model = Sequential()
|
||
|
model.add(Embedding(total_words, 50, input_length=100, trainable=True))
|
||
|
model.add(LSTM(100, return_sequences=True))
|
||
|
model.add(LSTM(100))
|
||
|
model.add(Dense(100, activation='relu'))
|
||
|
model.add(Dense(total_words, activation='softmax'))
|
||
|
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
|
||
|
return model
|
||
|
|
||
|
def generate_text(seed_text, model, tokenizer, max_sequence_len=100, temperature=1.0):
|
||
|
for _ in range(max_sequence_len):
|
||
|
token_list = tokenizer.texts_to_sequences([seed_text])[0]
|
||
|
token_list = pad_sequences([token_list], maxlen=100, padding='pre', truncating='post')[0]
|
||
|
predicted_probs = model.predict(np.array([token_list]), verbose=0)[0]
|
||
|
|
||
|
# Нормализация вероятностей
|
||
|
predicted_probs = np.array(predicted_probs).astype('float64')
|
||
|
predicted_probs /= np.sum(predicted_probs)
|
||
|
|
||
|
# Проверка на temperature не равное 0
|
||
|
if temperature != 0:
|
||
|
predicted_id = np.argmax(np.random.multinomial(1, predicted_probs, 1))
|
||
|
else:
|
||
|
predicted_id = np.argmax(predicted_probs)
|
||
|
|
||
|
output_word = tokenizer.index_word[predicted_id]
|
||
|
seed_text += " " + output_word
|
||
|
return seed_text
|
||
|
|
||
|
# Загрузка русского текста
|
||
|
russian_text = load_text_data('russian_text.txt')
|
||
|
tokenizer_russian = Tokenizer()
|
||
|
tokenizer_russian.fit_on_texts([russian_text])
|
||
|
total_words_russian = len(tokenizer_russian.word_index) + 1
|
||
|
|
||
|
# Создание последовательных последовательностей для русского текста
|
||
|
russian_sequences = create_sequences(tokenizer_russian.texts_to_sequences([russian_text])[0])
|
||
|
russian_x, russian_y = prepare_data(russian_sequences)
|
||
|
|
||
|
# Построение и обучение модели для русского текста
|
||
|
model_russian = build_model(total_words_russian)
|
||
|
model_russian.fit(russian_x, russian_y, epochs=5, batch_size=32)
|
||
|
|
||
|
# Загрузка английского текста
|
||
|
english_text = load_text_data('english_text.txt')
|
||
|
tokenizer_english = Tokenizer()
|
||
|
tokenizer_english.fit_on_texts([english_text])
|
||
|
total_words_english = len(tokenizer_english.word_index) + 1
|
||
|
|
||
|
# Создание последовательных последовательностей для английского текста
|
||
|
english_sequences = create_sequences(tokenizer_english.texts_to_sequences([english_text])[0])
|
||
|
english_x, english_y = prepare_data(english_sequences)
|
||
|
|
||
|
# Построение и обучение модели для английского текста
|
||
|
model_english = build_model(total_words_english)
|
||
|
model_english.fit(english_x, english_y, epochs=5, batch_size=32)
|
||
|
|
||
|
@app.route('/')
|
||
|
def index():
|
||
|
return render_template('index.html')
|
||
|
|
||
|
@app.route('/generate', methods=['POST'])
|
||
|
def generate():
|
||
|
seed_text = request.form['seed_text']
|
||
|
language = request.form['language']
|
||
|
temperature = float(request.form['temperature'])
|
||
|
|
||
|
if language == 'russian':
|
||
|
generated_text = generate_text(seed_text, model_russian, tokenizer_russian, temperature=temperature)
|
||
|
else:
|
||
|
generated_text = generate_text(seed_text, model_english, tokenizer_english, temperature=temperature)
|
||
|
|
||
|
return render_template('index.html', seed_text=seed_text, generated_text=generated_text)
|
||
|
|
||
|
if __name__ == '__main__':
|
||
|
app.run(debug=True)
|