import math import time import pymystem3 from collections import defaultdict def filter_stop_words(text): m = pymystem3.Mystem() analysis = m.analyze(text) filtered_words = [word for word in analysis if 'analysis' in word and word['analysis'] and word['analysis'][0]['gr'] not in ['PR', 'INTJ', 'NUM', 'PART']] return filtered_words def count_feminine_nouns(analysis): feminine_nouns = 0 for word in analysis: if 'analysis' in word and 'жен' in word['analysis'][0]['gr'] and 'S' in word['analysis'][0]['gr']: feminine_nouns += 1 return feminine_nouns def find_significant_bigrams(analysis): bigrams = defaultdict(int) for i in range(len(analysis) - 1): word1 = analysis[i]['analysis'][0]['lex'] word2 = analysis[i + 1]['analysis'][0]['lex'] bigrams[(word1, word2)] += 1 significant_bigrams = [] for bigram, count in bigrams.items(): word1_count = sum(1 for word in analysis if word['analysis'][0]['lex'] == bigram[0]) word2_count = sum(1 for word in analysis if word['analysis'][0]['lex'] == bigram[1]) expected_count = (word1_count * word2_count) / len(analysis) mi = count * math.log(count / expected_count, 2) significant_bigrams.append((bigram, mi)) return sorted(significant_bigrams, key=lambda x: x[1], reverse=True) if __name__ == '__main__': text = open('input.txt', encoding='utf8').read() start = time.time() analysis = filter_stop_words(text) print(f"{time.time() - start} sec.") print(count_feminine_nouns(analysis)) print(find_significant_bigrams(analysis))