Спасибо Коле и ребятам
This commit is contained in:
parent
630cf8a4cd
commit
05a41ff145
55
LabWork01/LabWork4/BloomFilter.py
Normal file
55
LabWork01/LabWork4/BloomFilter.py
Normal file
@ -0,0 +1,55 @@
|
||||
import hashlib
|
||||
import mmh3
|
||||
from bitarray import bitarray
|
||||
|
||||
class BloomFilter(object):
|
||||
|
||||
def __init__(self, size, hash_count):
|
||||
self.size = size
|
||||
self.hash_count = hash_count
|
||||
self.bit_array = bitarray(size)
|
||||
self.bit_array.setall(0)
|
||||
|
||||
def add(self, item):
|
||||
for seed in range(self.hash_count):
|
||||
index = mmh3.hash(item, seed) % self.size
|
||||
self.bit_array[index] = 1
|
||||
|
||||
def contains(self, item):
|
||||
for seed in range(self.hash_count):
|
||||
index = mmh3.hash(item, seed) % self.size
|
||||
if self.bit_array[index] == 0:
|
||||
return False
|
||||
return True
|
||||
|
||||
# def __init__(self, size, number_expected_elements=100000):
|
||||
# self.size = size
|
||||
# self.number_expected_elements = number_expected_elements
|
||||
#
|
||||
# self.bloom_filter = bitarray(self.size)
|
||||
# self.bloom_filter.setall(0)
|
||||
#
|
||||
# self.number_hash_functions = round((self.size / self.number_expected_elements) * math.log(2))
|
||||
#
|
||||
# # основная хеш-функция
|
||||
# def _hash_djb2(self, s):
|
||||
# hash = 5381
|
||||
# for x in s:
|
||||
# hash = ((hash << 5) + hash) + ord(x)
|
||||
# return hash % self.size
|
||||
#
|
||||
# # симулируем создания множества хеш-функций
|
||||
# def _hash(self, item, K):
|
||||
# return self._hash_djb2(str(K) + item)
|
||||
#
|
||||
# # добавление нового элемента в фильтр Блума
|
||||
# def add_to_filter(self, item):
|
||||
# for i in range(self.number_hash_functions):
|
||||
# self.bloom_filter[self._hash(item, i)] = 1
|
||||
#
|
||||
# # проверка н наличие элемента в фильторе Блума
|
||||
# def check_is_not_in_filter(self, item):
|
||||
# for i in range(self.number_hash_functions):
|
||||
# if self.bloom_filter[self._hash(item, i)] == 0:
|
||||
# return True
|
||||
# return False
|
28
LabWork01/LabWork4/SiteSearch.py
Normal file
28
LabWork01/LabWork4/SiteSearch.py
Normal file
@ -0,0 +1,28 @@
|
||||
from LabWork01.LabWork4.BloomFilter import BloomFilter
|
||||
|
||||
class SiteSearch:
|
||||
def __init__(self):
|
||||
self.filter: BloomFilter = BloomFilter(100000, 5)
|
||||
self.keyword_urls: dict[str, list[str]] = {}
|
||||
|
||||
def add(self, url: str, keywords: list[str]) -> None:
|
||||
for keyword in keywords:
|
||||
lowercase_string = keyword.lower()
|
||||
self.filter.add(lowercase_string)
|
||||
if lowercase_string not in self.keyword_urls:
|
||||
self.keyword_urls[lowercase_string] = []
|
||||
self.keyword_urls[lowercase_string].append(url)
|
||||
|
||||
def find_url(self, keyword: str) -> list[str]:
|
||||
lowercase_string = keyword.lower()
|
||||
if self.filter.contains(lowercase_string):
|
||||
return self.keyword_urls.get(lowercase_string)
|
||||
else:
|
||||
return []
|
||||
|
||||
def contains(self, keyword: str) -> list[str]:
|
||||
lowercase_string = keyword.lower()
|
||||
if self.filter.contains(lowercase_string):
|
||||
return True
|
||||
else:
|
||||
return False
|
@ -12,7 +12,7 @@ from LabWork01.LabWork3.AddData import addData
|
||||
from LabWork01.LabWork3.CreateGraphics import createGraphics
|
||||
from LabWork01.LabWork3.CustomGraphics import createCusGraphics
|
||||
from LabWork01.LabWork3.DeletePng import deleteAllPng
|
||||
|
||||
from LabWork01.LabWork4.SiteSearch import SiteSearch
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
@ -25,6 +25,12 @@ listTypes = listShops.dtypes.to_list()
|
||||
#формируем записи о кол-ве пустых ячеек в каждом столбце
|
||||
countNull = listShops.isnull().sum()
|
||||
|
||||
# для фильтра Блума
|
||||
search_engine = SiteSearch()
|
||||
search_engine.add("https://www.kaggle.com/datasets/ankanhore545/100-highest-valued-unicorns", ["Company", "Valuation", "Country", "State", "City", "Industries", "Founded Year", "Name of Founders", "Total Funding", "Number of Employees"])
|
||||
search_engine.add("https://www.kaggle.com/datasets/ilyaryabov/tesla-insider-trading", ["Insider Trading", "Relationship", "Date", "Transaction", "Cost", "Shares", "Value", "Shares Total", "SEC Form 4"])
|
||||
search_engine.add("https://www.kaggle.com/datasets/sameepvani/nasa-nearest-earth-objects", ["NASA", "est_diameter_min", "est_diameter_max", "relative_velocity", "miss_distance", "orbiting_body", "sentry_object", "absolute_magnitude", "hazardous"])
|
||||
|
||||
@app.route("/")
|
||||
def home():
|
||||
return render_template('main_page.html', context=[], main_img=[], image_names=[], tableAnalys=[], titles=[''], listTypes=listTypes, countNull=countNull, firstRow=1, secondRow=4, firstColumn=1, secondColumn=4)
|
||||
@ -143,6 +149,23 @@ def analysis():
|
||||
listTypes=listTypes, countNull=countNull, firstRow=1,
|
||||
secondRow=4, firstColumn=1, secondColumn=4)
|
||||
|
||||
@app.route('/findURL', methods=['GET'])
|
||||
def get_page_findURL():
|
||||
return render_template('findURL.html')
|
||||
|
||||
@app.route('/findURL', methods=['POST'])
|
||||
def findURL():
|
||||
word = request.form["word"]
|
||||
if (search_engine.contains(word)):
|
||||
links = search_engine.find_url(word)
|
||||
word_links = []
|
||||
for item in links:
|
||||
word_links.append({item, word})
|
||||
print(word_links)
|
||||
|
||||
return render_template('findURL.html', word_links=word_links)
|
||||
return render_template('findURL.html')
|
||||
|
||||
if __name__=="__main__":
|
||||
app.run(debug=True)
|
||||
|
||||
|
33
LabWork01/templates/findUrl.html
Normal file
33
LabWork01/templates/findUrl.html
Normal file
@ -0,0 +1,33 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport"
|
||||
content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
|
||||
<meta http-equiv="X-UA-Compatible" content="ie=edge">
|
||||
<link rel="stylesheet" type="text/css" href="{{ url_for( 'static', filename='index.css', v=1)}}">
|
||||
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
|
||||
integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
|
||||
<title>Document</title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="container">
|
||||
<form action="/findURL" method="post">
|
||||
<div class="mb-5 mt-3">
|
||||
<label class="form-label">Поиск</label>
|
||||
<div class="row">
|
||||
<div class="col">
|
||||
<input class="form-control" type="text" name="word" id="word" placeholder="Слово">
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
<div class="">
|
||||
{% for link, word in word_links %}
|
||||
<a href="{{ link }}">{{ word }}</a>
|
||||
{% endfor %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
</body>
|
||||
</html>
|
@ -23,6 +23,11 @@
|
||||
<form action='http://127.0.0.1:5000/analysis' method=get>
|
||||
<input type=submit value='Анализ данных'>
|
||||
</form>
|
||||
<form action="/findURL" method="get">
|
||||
<div class="mb-3">
|
||||
<button type="submit" class="btn btn-primary mb-3">Запуск фильтра</button>
|
||||
</div>
|
||||
</form>
|
||||
</div>
|
||||
<div>
|
||||
<table>
|
||||
|
Loading…
x
Reference in New Issue
Block a user