Так то фулл шутка комит, надо делать рефакторинг для записки. А в самом комите просто написал nltk.download для запуска на новых компах
This commit is contained in:
parent
0cfc3d36aa
commit
1c94583cab
@ -1,3 +1,4 @@
|
|||||||
|
import nltk
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from keras.src.legacy.preprocessing.text import Tokenizer
|
from keras.src.legacy.preprocessing.text import Tokenizer
|
||||||
from nltk import word_tokenize, SnowballStemmer
|
from nltk import word_tokenize, SnowballStemmer
|
||||||
@ -68,6 +69,9 @@ def process_and_save_tokenizer(dataset_path, save_path):
|
|||||||
tokenizer_saver.save_tokenizer(save_path)
|
tokenizer_saver.save_tokenizer(save_path)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
|
nltk.download('stopwords')
|
||||||
|
nltk.download('punkt')
|
||||||
|
|
||||||
positive_dataset_path = '../dataset/filtered/filtered_dataset_positive.csv'
|
positive_dataset_path = '../dataset/filtered/filtered_dataset_positive.csv'
|
||||||
negative_dataset_path = '../dataset/filtered/filtered_dataset_negative.csv'
|
negative_dataset_path = '../dataset/filtered/filtered_dataset_negative.csv'
|
||||||
positive_tokenizer_path = './tokenizer_positive.pickle'
|
positive_tokenizer_path = './tokenizer_positive.pickle'
|
||||||
|
Loading…
Reference in New Issue
Block a user