from flask import Flask, render_template, request from keras.models import Sequential from keras.layers import LSTM, Dense, Embedding from keras.preprocessing.text import Tokenizer from keras.preprocessing.sequence import pad_sequences import numpy as np app = Flask(__name__) def load_text_data(file_path): with open(file_path, 'r', encoding='utf-8') as file: text = file.read().lower() return text def create_sequences(text, sequence_length=100): sequences = [] for i in range(sequence_length, len(text)): seq = text[i - sequence_length:i + 1] sequences.append(seq) return sequences def prepare_data(sequences): input_sequences = [] output_sequences = [] for sequence in sequences: if len(sequence) > 1: input_seq = sequence[:-1] output_seq = sequence[-1] input_sequences.append(input_seq) output_sequences.append(output_seq) x = np.array(input_sequences) y = np.array(output_sequences) return x, y def build_model(total_words): model = Sequential() model.add(Embedding(total_words, 50, input_length=100, trainable=True)) model.add(LSTM(100, return_sequences=True)) model.add(LSTM(100)) model.add(Dense(100, activation='relu')) model.add(Dense(total_words, activation='softmax')) model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) return model def generate_text(seed_text, model, tokenizer, max_sequence_len=100, temperature=1.0): for _ in range(max_sequence_len): token_list = tokenizer.texts_to_sequences([seed_text])[0] token_list = pad_sequences([token_list], maxlen=100, padding='pre', truncating='post')[0] predicted_probs = model.predict(np.array([token_list]), verbose=0)[0] # Нормализация вероятностей predicted_probs = np.array(predicted_probs).astype('float64') predicted_probs /= np.sum(predicted_probs) # Проверка на temperature не равное 0 if temperature != 0: predicted_id = np.argmax(np.random.multinomial(1, predicted_probs, 1)) else: predicted_id = np.argmax(predicted_probs) output_word = tokenizer.index_word[predicted_id] seed_text += " " + output_word return seed_text # Загрузка русского текста russian_text = load_text_data('russian_text.txt') tokenizer_russian = Tokenizer() tokenizer_russian.fit_on_texts([russian_text]) total_words_russian = len(tokenizer_russian.word_index) + 1 # Создание последовательных последовательностей для русского текста russian_sequences = create_sequences(tokenizer_russian.texts_to_sequences([russian_text])[0]) russian_x, russian_y = prepare_data(russian_sequences) # Построение и обучение модели для русского текста model_russian = build_model(total_words_russian) model_russian.fit(russian_x, russian_y, epochs=5, batch_size=32) # Загрузка английского текста english_text = load_text_data('english_text.txt') tokenizer_english = Tokenizer() tokenizer_english.fit_on_texts([english_text]) total_words_english = len(tokenizer_english.word_index) + 1 # Создание последовательных последовательностей для английского текста english_sequences = create_sequences(tokenizer_english.texts_to_sequences([english_text])[0]) english_x, english_y = prepare_data(english_sequences) # Построение и обучение модели для английского текста model_english = build_model(total_words_english) model_english.fit(english_x, english_y, epochs=5, batch_size=32) @app.route('/') def index(): return render_template('index.html') @app.route('/generate', methods=['POST']) def generate(): seed_text = request.form['seed_text'] language = request.form['language'] temperature = float(request.form['temperature']) if language == 'russian': generated_text = generate_text(seed_text, model_russian, tokenizer_russian, temperature=temperature) else: generated_text = generate_text(seed_text, model_english, tokenizer_english, temperature=temperature) return render_template('index.html', seed_text=seed_text, generated_text=generated_text) if __name__ == '__main__': app.run(debug=True)