From 5fd49b30afc2662c80402fd5b2ddf29ca7360002 Mon Sep 17 00:00:00 2001 From: ElEgEv <112943269+ElEgEv@users.noreply.github.com> Date: Thu, 26 Oct 2023 16:05:43 +0400 Subject: [PATCH] Completed LabWork4. --- LabWork01/LabWork4/BloomFilter.py | 35 +-------------------------- LabWork01/LabWork4/SiteSearch.py | 3 +++ LabWork01/LoadDB.py | 38 ++++++++++++++++++++++-------- LabWork01/templates/findUrl.html | 16 +++++++++---- LabWork01/templates/showLinks.html | 19 +++++++++++++++ 5 files changed, 63 insertions(+), 48 deletions(-) create mode 100644 LabWork01/templates/showLinks.html diff --git a/LabWork01/LabWork4/BloomFilter.py b/LabWork01/LabWork4/BloomFilter.py index f3c0317..d34ceab 100644 --- a/LabWork01/LabWork4/BloomFilter.py +++ b/LabWork01/LabWork4/BloomFilter.py @@ -1,4 +1,3 @@ -import hashlib import mmh3 from bitarray import bitarray @@ -20,36 +19,4 @@ class BloomFilter(object): index = mmh3.hash(item, seed) % self.size if self.bit_array[index] == 0: return False - return True - - # def __init__(self, size, number_expected_elements=100000): - # self.size = size - # self.number_expected_elements = number_expected_elements - # - # self.bloom_filter = bitarray(self.size) - # self.bloom_filter.setall(0) - # - # self.number_hash_functions = round((self.size / self.number_expected_elements) * math.log(2)) - # - # # основная хеш-функция - # def _hash_djb2(self, s): - # hash = 5381 - # for x in s: - # hash = ((hash << 5) + hash) + ord(x) - # return hash % self.size - # - # # симулируем создания множества хеш-функций - # def _hash(self, item, K): - # return self._hash_djb2(str(K) + item) - # - # # добавление нового элемента в фильтр Блума - # def add_to_filter(self, item): - # for i in range(self.number_hash_functions): - # self.bloom_filter[self._hash(item, i)] = 1 - # - # # проверка н наличие элемента в фильторе Блума - # def check_is_not_in_filter(self, item): - # for i in range(self.number_hash_functions): - # if self.bloom_filter[self._hash(item, i)] == 0: - # return True - # return False \ No newline at end of file + return True \ No newline at end of file diff --git a/LabWork01/LabWork4/SiteSearch.py b/LabWork01/LabWork4/SiteSearch.py index 0052eca..b5a30ce 100644 --- a/LabWork01/LabWork4/SiteSearch.py +++ b/LabWork01/LabWork4/SiteSearch.py @@ -5,6 +5,7 @@ class SiteSearch: self.filter: BloomFilter = BloomFilter(100000, 5) self.keyword_urls: dict[str, list[str]] = {} + # для всех слов-ассоциаций проводим добавление в фильтр Блума и назначаем к каждому нужную ссылку на ресурс def add(self, url: str, keywords: list[str]) -> None: for keyword in keywords: lowercase_string = keyword.lower() @@ -13,6 +14,7 @@ class SiteSearch: self.keyword_urls[lowercase_string] = [] self.keyword_urls[lowercase_string].append(url) + # проводим поиск по слову-ключу в словаре def find_url(self, keyword: str) -> list[str]: lowercase_string = keyword.lower() if self.filter.contains(lowercase_string): @@ -20,6 +22,7 @@ class SiteSearch: else: return [] + # проверка на содержание слова-ключа в словаре def contains(self, keyword: str) -> list[str]: lowercase_string = keyword.lower() if self.filter.contains(lowercase_string): diff --git a/LabWork01/LoadDB.py b/LabWork01/LoadDB.py index 698fd19..6e0eec2 100644 --- a/LabWork01/LoadDB.py +++ b/LabWork01/LoadDB.py @@ -1,6 +1,6 @@ import os - -from flask import Flask, redirect, url_for, request, render_template +import secrets +from flask import Flask, redirect, url_for, request, render_template, session from matplotlib import pyplot as plt from LabWork01.AnalysCustomers import analysCustomersDataFrame @@ -16,6 +16,10 @@ from LabWork01.LabWork4.SiteSearch import SiteSearch app = Flask(__name__) +# Для работы session +secret = secrets.token_urlsafe(32) +app.secret_key = secret + #сразу загружаем весь док, чтобы потом просто прыгать по нему listShops = createDataFrame() @@ -30,6 +34,14 @@ search_engine = SiteSearch() search_engine.add("https://www.kaggle.com/datasets/ankanhore545/100-highest-valued-unicorns", ["Company", "Valuation", "Country", "State", "City", "Industries", "Founded Year", "Name of Founders", "Total Funding", "Number of Employees"]) search_engine.add("https://www.kaggle.com/datasets/ilyaryabov/tesla-insider-trading", ["Insider Trading", "Relationship", "Date", "Transaction", "Cost", "Shares", "Value", "Shares Total", "SEC Form 4"]) search_engine.add("https://www.kaggle.com/datasets/sameepvani/nasa-nearest-earth-objects", ["NASA", "est_diameter_min", "est_diameter_max", "relative_velocity", "miss_distance", "orbiting_body", "sentry_object", "absolute_magnitude", "hazardous"]) +search_engine.add("https://www.kaggle.com/datasets/surajjha101/stores-area-and-sales-data", ["Store", "Area", "Available", "Daily", "Customer", "Sales"]) +search_engine.add("https://www.kaggle.com/datasets/uciml/pima-indians-diabetes-database", ["Health", "Diabetes", "India"]) +search_engine.add("https://www.kaggle.com/datasets/mirichoi0218/insurance", ["age", "sex", "bmi"]) +search_engine.add("https://www.kaggle.com/datasets/muhammedtausif/world-population-by-countries", ["Country", "Population", "Continent", "Capital", "Yearly Change", "Land Area", "Fertility","Density"]) +search_engine.add("https://www.kaggle.com/datasets/deepcontractor/car-price-prediction-challenge", ["car", "price", "manufacturer"]) +search_engine.add("https://www.kaggle.com/datasets/surajjha101/forbes-billionaires-data-preprocessed", ["Name", "Networth", "Source"]) +search_engine.add("https://www.kaggle.com/datasets/fedesoriano/stroke-prediction-dataset", [ "heart_disease" , "bmi", "stroke" ]) + @app.route("/") def home(): @@ -151,20 +163,26 @@ def analysis(): @app.route('/findURL', methods=['GET']) def get_page_findURL(): - return render_template('findURL.html') + return render_template('findURL.html', find=False, notFind=False) @app.route('/findURL', methods=['POST']) def findURL(): word = request.form["word"] if (search_engine.contains(word)): - links = search_engine.find_url(word) - word_links = [] - for item in links: - word_links.append({item, word}) - print(word_links) + session["new_word"] = word + return render_template('findURL.html', find=True, notFind=False) + return render_template('findURL.html', find=False, notFind=True) - return render_template('findURL.html', word_links=word_links) - return render_template('findURL.html') +@app.route('/showFindLinks', methods=['POST']) +def get_page_showFindURL(): + word = session.get("new_word", None) + + links = search_engine.find_url(word) + word_links = [] + for item in links: + word_links.append({item, word}) + + return render_template('showLinks.html', links=links) if __name__=="__main__": app.run(debug=True) diff --git a/LabWork01/templates/findUrl.html b/LabWork01/templates/findUrl.html index 0212333..ac0824a 100644 --- a/LabWork01/templates/findUrl.html +++ b/LabWork01/templates/findUrl.html @@ -16,16 +16,24 @@
+
+ +
- +
- {% for link, word in word_links %} - {{ word }} - {% endfor %} + {% if find: %} +
+ +
+ {% endif %} + {% if notFind: %} +

Смысла нет

+ {% endif %}
diff --git a/LabWork01/templates/showLinks.html b/LabWork01/templates/showLinks.html new file mode 100644 index 0000000..8bf5b7a --- /dev/null +++ b/LabWork01/templates/showLinks.html @@ -0,0 +1,19 @@ + + + + + Найденные ссылки + + +
+
+ +
+
+
+ {% for link in links %} + {{ link }} + {% endfor %} +
+ + \ No newline at end of file