Completed LabWork4.
This commit is contained in:
parent
05a41ff145
commit
5fd49b30af
@ -1,4 +1,3 @@
|
|||||||
import hashlib
|
|
||||||
import mmh3
|
import mmh3
|
||||||
from bitarray import bitarray
|
from bitarray import bitarray
|
||||||
|
|
||||||
@ -20,36 +19,4 @@ class BloomFilter(object):
|
|||||||
index = mmh3.hash(item, seed) % self.size
|
index = mmh3.hash(item, seed) % self.size
|
||||||
if self.bit_array[index] == 0:
|
if self.bit_array[index] == 0:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# def __init__(self, size, number_expected_elements=100000):
|
|
||||||
# self.size = size
|
|
||||||
# self.number_expected_elements = number_expected_elements
|
|
||||||
#
|
|
||||||
# self.bloom_filter = bitarray(self.size)
|
|
||||||
# self.bloom_filter.setall(0)
|
|
||||||
#
|
|
||||||
# self.number_hash_functions = round((self.size / self.number_expected_elements) * math.log(2))
|
|
||||||
#
|
|
||||||
# # основная хеш-функция
|
|
||||||
# def _hash_djb2(self, s):
|
|
||||||
# hash = 5381
|
|
||||||
# for x in s:
|
|
||||||
# hash = ((hash << 5) + hash) + ord(x)
|
|
||||||
# return hash % self.size
|
|
||||||
#
|
|
||||||
# # симулируем создания множества хеш-функций
|
|
||||||
# def _hash(self, item, K):
|
|
||||||
# return self._hash_djb2(str(K) + item)
|
|
||||||
#
|
|
||||||
# # добавление нового элемента в фильтр Блума
|
|
||||||
# def add_to_filter(self, item):
|
|
||||||
# for i in range(self.number_hash_functions):
|
|
||||||
# self.bloom_filter[self._hash(item, i)] = 1
|
|
||||||
#
|
|
||||||
# # проверка н наличие элемента в фильторе Блума
|
|
||||||
# def check_is_not_in_filter(self, item):
|
|
||||||
# for i in range(self.number_hash_functions):
|
|
||||||
# if self.bloom_filter[self._hash(item, i)] == 0:
|
|
||||||
# return True
|
|
||||||
# return False
|
|
@ -5,6 +5,7 @@ class SiteSearch:
|
|||||||
self.filter: BloomFilter = BloomFilter(100000, 5)
|
self.filter: BloomFilter = BloomFilter(100000, 5)
|
||||||
self.keyword_urls: dict[str, list[str]] = {}
|
self.keyword_urls: dict[str, list[str]] = {}
|
||||||
|
|
||||||
|
# для всех слов-ассоциаций проводим добавление в фильтр Блума и назначаем к каждому нужную ссылку на ресурс
|
||||||
def add(self, url: str, keywords: list[str]) -> None:
|
def add(self, url: str, keywords: list[str]) -> None:
|
||||||
for keyword in keywords:
|
for keyword in keywords:
|
||||||
lowercase_string = keyword.lower()
|
lowercase_string = keyword.lower()
|
||||||
@ -13,6 +14,7 @@ class SiteSearch:
|
|||||||
self.keyword_urls[lowercase_string] = []
|
self.keyword_urls[lowercase_string] = []
|
||||||
self.keyword_urls[lowercase_string].append(url)
|
self.keyword_urls[lowercase_string].append(url)
|
||||||
|
|
||||||
|
# проводим поиск по слову-ключу в словаре
|
||||||
def find_url(self, keyword: str) -> list[str]:
|
def find_url(self, keyword: str) -> list[str]:
|
||||||
lowercase_string = keyword.lower()
|
lowercase_string = keyword.lower()
|
||||||
if self.filter.contains(lowercase_string):
|
if self.filter.contains(lowercase_string):
|
||||||
@ -20,6 +22,7 @@ class SiteSearch:
|
|||||||
else:
|
else:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
|
# проверка на содержание слова-ключа в словаре
|
||||||
def contains(self, keyword: str) -> list[str]:
|
def contains(self, keyword: str) -> list[str]:
|
||||||
lowercase_string = keyword.lower()
|
lowercase_string = keyword.lower()
|
||||||
if self.filter.contains(lowercase_string):
|
if self.filter.contains(lowercase_string):
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
import os
|
import os
|
||||||
|
import secrets
|
||||||
from flask import Flask, redirect, url_for, request, render_template
|
from flask import Flask, redirect, url_for, request, render_template, session
|
||||||
from matplotlib import pyplot as plt
|
from matplotlib import pyplot as plt
|
||||||
|
|
||||||
from LabWork01.AnalysCustomers import analysCustomersDataFrame
|
from LabWork01.AnalysCustomers import analysCustomersDataFrame
|
||||||
@ -16,6 +16,10 @@ from LabWork01.LabWork4.SiteSearch import SiteSearch
|
|||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
|
|
||||||
|
# Для работы session
|
||||||
|
secret = secrets.token_urlsafe(32)
|
||||||
|
app.secret_key = secret
|
||||||
|
|
||||||
#сразу загружаем весь док, чтобы потом просто прыгать по нему
|
#сразу загружаем весь док, чтобы потом просто прыгать по нему
|
||||||
listShops = createDataFrame()
|
listShops = createDataFrame()
|
||||||
|
|
||||||
@ -30,6 +34,14 @@ search_engine = SiteSearch()
|
|||||||
search_engine.add("https://www.kaggle.com/datasets/ankanhore545/100-highest-valued-unicorns", ["Company", "Valuation", "Country", "State", "City", "Industries", "Founded Year", "Name of Founders", "Total Funding", "Number of Employees"])
|
search_engine.add("https://www.kaggle.com/datasets/ankanhore545/100-highest-valued-unicorns", ["Company", "Valuation", "Country", "State", "City", "Industries", "Founded Year", "Name of Founders", "Total Funding", "Number of Employees"])
|
||||||
search_engine.add("https://www.kaggle.com/datasets/ilyaryabov/tesla-insider-trading", ["Insider Trading", "Relationship", "Date", "Transaction", "Cost", "Shares", "Value", "Shares Total", "SEC Form 4"])
|
search_engine.add("https://www.kaggle.com/datasets/ilyaryabov/tesla-insider-trading", ["Insider Trading", "Relationship", "Date", "Transaction", "Cost", "Shares", "Value", "Shares Total", "SEC Form 4"])
|
||||||
search_engine.add("https://www.kaggle.com/datasets/sameepvani/nasa-nearest-earth-objects", ["NASA", "est_diameter_min", "est_diameter_max", "relative_velocity", "miss_distance", "orbiting_body", "sentry_object", "absolute_magnitude", "hazardous"])
|
search_engine.add("https://www.kaggle.com/datasets/sameepvani/nasa-nearest-earth-objects", ["NASA", "est_diameter_min", "est_diameter_max", "relative_velocity", "miss_distance", "orbiting_body", "sentry_object", "absolute_magnitude", "hazardous"])
|
||||||
|
search_engine.add("https://www.kaggle.com/datasets/surajjha101/stores-area-and-sales-data", ["Store", "Area", "Available", "Daily", "Customer", "Sales"])
|
||||||
|
search_engine.add("https://www.kaggle.com/datasets/uciml/pima-indians-diabetes-database", ["Health", "Diabetes", "India"])
|
||||||
|
search_engine.add("https://www.kaggle.com/datasets/mirichoi0218/insurance", ["age", "sex", "bmi"])
|
||||||
|
search_engine.add("https://www.kaggle.com/datasets/muhammedtausif/world-population-by-countries", ["Country", "Population", "Continent", "Capital", "Yearly Change", "Land Area", "Fertility","Density"])
|
||||||
|
search_engine.add("https://www.kaggle.com/datasets/deepcontractor/car-price-prediction-challenge", ["car", "price", "manufacturer"])
|
||||||
|
search_engine.add("https://www.kaggle.com/datasets/surajjha101/forbes-billionaires-data-preprocessed", ["Name", "Networth", "Source"])
|
||||||
|
search_engine.add("https://www.kaggle.com/datasets/fedesoriano/stroke-prediction-dataset", [ "heart_disease" , "bmi", "stroke" ])
|
||||||
|
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
def home():
|
def home():
|
||||||
@ -151,20 +163,26 @@ def analysis():
|
|||||||
|
|
||||||
@app.route('/findURL', methods=['GET'])
|
@app.route('/findURL', methods=['GET'])
|
||||||
def get_page_findURL():
|
def get_page_findURL():
|
||||||
return render_template('findURL.html')
|
return render_template('findURL.html', find=False, notFind=False)
|
||||||
|
|
||||||
@app.route('/findURL', methods=['POST'])
|
@app.route('/findURL', methods=['POST'])
|
||||||
def findURL():
|
def findURL():
|
||||||
word = request.form["word"]
|
word = request.form["word"]
|
||||||
if (search_engine.contains(word)):
|
if (search_engine.contains(word)):
|
||||||
links = search_engine.find_url(word)
|
session["new_word"] = word
|
||||||
word_links = []
|
return render_template('findURL.html', find=True, notFind=False)
|
||||||
for item in links:
|
return render_template('findURL.html', find=False, notFind=True)
|
||||||
word_links.append({item, word})
|
|
||||||
print(word_links)
|
|
||||||
|
|
||||||
return render_template('findURL.html', word_links=word_links)
|
@app.route('/showFindLinks', methods=['POST'])
|
||||||
return render_template('findURL.html')
|
def get_page_showFindURL():
|
||||||
|
word = session.get("new_word", None)
|
||||||
|
|
||||||
|
links = search_engine.find_url(word)
|
||||||
|
word_links = []
|
||||||
|
for item in links:
|
||||||
|
word_links.append({item, word})
|
||||||
|
|
||||||
|
return render_template('showLinks.html', links=links)
|
||||||
|
|
||||||
if __name__=="__main__":
|
if __name__=="__main__":
|
||||||
app.run(debug=True)
|
app.run(debug=True)
|
||||||
|
@ -16,16 +16,24 @@
|
|||||||
<div class="mb-5 mt-3">
|
<div class="mb-5 mt-3">
|
||||||
<label class="form-label">Поиск</label>
|
<label class="form-label">Поиск</label>
|
||||||
<div class="row">
|
<div class="row">
|
||||||
|
<div class="col" style="width: 100%">
|
||||||
|
<input type="text" name="word" placeholder="Слово">
|
||||||
|
</div>
|
||||||
<div class="col">
|
<div class="col">
|
||||||
<input class="form-control" type="text" name="word" id="word" placeholder="Слово">
|
<input type=submit value='Вывод диапазона'>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
<div class="">
|
<div class="">
|
||||||
{% for link, word in word_links %}
|
{% if find: %}
|
||||||
<a href="{{ link }}">{{ word }}</a>
|
<form action='/showFindLinks' method=post>
|
||||||
{% endfor %}
|
<input type=submit value='Показать результаты'>
|
||||||
|
</form>
|
||||||
|
{% endif %}
|
||||||
|
{% if notFind: %}
|
||||||
|
<h1>Смысла нет</h1>
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
19
LabWork01/templates/showLinks.html
Normal file
19
LabWork01/templates/showLinks.html
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>Найденные ссылки</title>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
<form action="/findURL" method="get">
|
||||||
|
<div class="mb-3">
|
||||||
|
<button type="submit" class="btn btn-primary mb-3">Вернуться к фильтру</button>
|
||||||
|
</div>
|
||||||
|
</form>
|
||||||
|
<div class="">
|
||||||
|
{% for link in links %}
|
||||||
|
<a href="{{ link }}">{{ link }}</a>
|
||||||
|
{% endfor %}
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
Loading…
x
Reference in New Issue
Block a user