Так то фулл шутка комит, надо делать рефакторинг для записки. А в самом комите просто написал nltk.download для запуска на новых компах
This commit is contained in:
parent
0cfc3d36aa
commit
1c94583cab
@ -1,3 +1,4 @@
|
||||
import nltk
|
||||
import pandas as pd
|
||||
from keras.src.legacy.preprocessing.text import Tokenizer
|
||||
from nltk import word_tokenize, SnowballStemmer
|
||||
@ -68,6 +69,9 @@ def process_and_save_tokenizer(dataset_path, save_path):
|
||||
tokenizer_saver.save_tokenizer(save_path)
|
||||
|
||||
def main():
|
||||
nltk.download('stopwords')
|
||||
nltk.download('punkt')
|
||||
|
||||
positive_dataset_path = '../dataset/filtered/filtered_dataset_positive.csv'
|
||||
negative_dataset_path = '../dataset/filtered/filtered_dataset_negative.csv'
|
||||
positive_tokenizer_path = './tokenizer_positive.pickle'
|
||||
|
Loading…
Reference in New Issue
Block a user