Так то фулл шутка комит, надо делать рефакторинг для записки. А в самом комите просто написал nltk.download для запуска на новых компах

This commit is contained in:
Sosees04ka 2024-06-21 01:04:42 +04:00
parent 0cfc3d36aa
commit 1c94583cab

View File

@ -1,3 +1,4 @@
import nltk
import pandas as pd import pandas as pd
from keras.src.legacy.preprocessing.text import Tokenizer from keras.src.legacy.preprocessing.text import Tokenizer
from nltk import word_tokenize, SnowballStemmer from nltk import word_tokenize, SnowballStemmer
@ -68,6 +69,9 @@ def process_and_save_tokenizer(dataset_path, save_path):
tokenizer_saver.save_tokenizer(save_path) tokenizer_saver.save_tokenizer(save_path)
def main(): def main():
nltk.download('stopwords')
nltk.download('punkt')
positive_dataset_path = '../dataset/filtered/filtered_dataset_positive.csv' positive_dataset_path = '../dataset/filtered/filtered_dataset_positive.csv'
negative_dataset_path = '../dataset/filtered/filtered_dataset_negative.csv' negative_dataset_path = '../dataset/filtered/filtered_dataset_negative.csv'
positive_tokenizer_path = './tokenizer_positive.pickle' positive_tokenizer_path = './tokenizer_positive.pickle'