From 1c94583cabeec56ffc8c73d54a49db0bf44552fa Mon Sep 17 00:00:00 2001 From: Sosees04ka <112947786+Sosees04ka@users.noreply.github.com> Date: Fri, 21 Jun 2024 01:04:42 +0400 Subject: [PATCH] =?UTF-8?q?=20=D0=A2=D0=B0=D0=BA=20=D1=82=D0=BE=20=D1=84?= =?UTF-8?q?=D1=83=D0=BB=D0=BB=20=D1=88=D1=83=D1=82=D0=BA=D0=B0=20=D0=BA?= =?UTF-8?q?=D0=BE=D0=BC=D0=B8=D1=82,=20=D0=BD=D0=B0=D0=B4=D0=BE=20=D0=B4?= =?UTF-8?q?=D0=B5=D0=BB=D0=B0=D1=82=D1=8C=20=D1=80=D0=B5=D1=84=D0=B0=D0=BA?= =?UTF-8?q?=D1=82=D0=BE=D1=80=D0=B8=D0=BD=D0=B3=20=D0=B4=D0=BB=D1=8F=20?= =?UTF-8?q?=D0=B7=D0=B0=D0=BF=D0=B8=D1=81=D0=BA=D0=B8.=20=D0=90=20=D0=B2?= =?UTF-8?q?=20=D1=81=D0=B0=D0=BC=D0=BE=D0=BC=20=D0=BA=D0=BE=D0=BC=D0=B8?= =?UTF-8?q?=D1=82=D0=B5=20=D0=BF=D1=80=D0=BE=D1=81=D1=82=D0=BE=20=D0=BD?= =?UTF-8?q?=D0=B0=D0=BF=D0=B8=D1=81=D0=B0=D0=BB=20nltk.download=20=D0=B4?= =?UTF-8?q?=D0=BB=D1=8F=20=D0=B7=D0=B0=D0=BF=D1=83=D1=81=D0=BA=D0=B0=20?= =?UTF-8?q?=D0=BD=D0=B0=20=D0=BD=D0=BE=D0=B2=D1=8B=D1=85=20=D0=BA=D0=BE?= =?UTF-8?q?=D0=BC=D0=BF=D0=B0=D1=85?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- neural_network/tokenization/tokenizer.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/neural_network/tokenization/tokenizer.py b/neural_network/tokenization/tokenizer.py index 18c6c29..0d3c39a 100644 --- a/neural_network/tokenization/tokenizer.py +++ b/neural_network/tokenization/tokenizer.py @@ -1,3 +1,4 @@ +import nltk import pandas as pd from keras.src.legacy.preprocessing.text import Tokenizer from nltk import word_tokenize, SnowballStemmer @@ -68,6 +69,9 @@ def process_and_save_tokenizer(dataset_path, save_path): tokenizer_saver.save_tokenizer(save_path) def main(): + nltk.download('stopwords') + nltk.download('punkt') + positive_dataset_path = '../dataset/filtered/filtered_dataset_positive.csv' negative_dataset_path = '../dataset/filtered/filtered_dataset_negative.csv' positive_tokenizer_path = './tokenizer_positive.pickle'