Спасибо Коле и ребятам

This commit is contained in:
ElEgEv 2023-10-25 22:39:10 +04:00
parent 630cf8a4cd
commit 05a41ff145
5 changed files with 145 additions and 1 deletions

View File

@ -0,0 +1,55 @@
import hashlib
import mmh3
from bitarray import bitarray
class BloomFilter(object):
def __init__(self, size, hash_count):
self.size = size
self.hash_count = hash_count
self.bit_array = bitarray(size)
self.bit_array.setall(0)
def add(self, item):
for seed in range(self.hash_count):
index = mmh3.hash(item, seed) % self.size
self.bit_array[index] = 1
def contains(self, item):
for seed in range(self.hash_count):
index = mmh3.hash(item, seed) % self.size
if self.bit_array[index] == 0:
return False
return True
# def __init__(self, size, number_expected_elements=100000):
# self.size = size
# self.number_expected_elements = number_expected_elements
#
# self.bloom_filter = bitarray(self.size)
# self.bloom_filter.setall(0)
#
# self.number_hash_functions = round((self.size / self.number_expected_elements) * math.log(2))
#
# # основная хеш-функция
# def _hash_djb2(self, s):
# hash = 5381
# for x in s:
# hash = ((hash << 5) + hash) + ord(x)
# return hash % self.size
#
# # симулируем создания множества хеш-функций
# def _hash(self, item, K):
# return self._hash_djb2(str(K) + item)
#
# # добавление нового элемента в фильтр Блума
# def add_to_filter(self, item):
# for i in range(self.number_hash_functions):
# self.bloom_filter[self._hash(item, i)] = 1
#
# # проверка н наличие элемента в фильторе Блума
# def check_is_not_in_filter(self, item):
# for i in range(self.number_hash_functions):
# if self.bloom_filter[self._hash(item, i)] == 0:
# return True
# return False

View File

@ -0,0 +1,28 @@
from LabWork01.LabWork4.BloomFilter import BloomFilter
class SiteSearch:
def __init__(self):
self.filter: BloomFilter = BloomFilter(100000, 5)
self.keyword_urls: dict[str, list[str]] = {}
def add(self, url: str, keywords: list[str]) -> None:
for keyword in keywords:
lowercase_string = keyword.lower()
self.filter.add(lowercase_string)
if lowercase_string not in self.keyword_urls:
self.keyword_urls[lowercase_string] = []
self.keyword_urls[lowercase_string].append(url)
def find_url(self, keyword: str) -> list[str]:
lowercase_string = keyword.lower()
if self.filter.contains(lowercase_string):
return self.keyword_urls.get(lowercase_string)
else:
return []
def contains(self, keyword: str) -> list[str]:
lowercase_string = keyword.lower()
if self.filter.contains(lowercase_string):
return True
else:
return False

View File

@ -12,7 +12,7 @@ from LabWork01.LabWork3.AddData import addData
from LabWork01.LabWork3.CreateGraphics import createGraphics from LabWork01.LabWork3.CreateGraphics import createGraphics
from LabWork01.LabWork3.CustomGraphics import createCusGraphics from LabWork01.LabWork3.CustomGraphics import createCusGraphics
from LabWork01.LabWork3.DeletePng import deleteAllPng from LabWork01.LabWork3.DeletePng import deleteAllPng
from LabWork01.LabWork4.SiteSearch import SiteSearch
app = Flask(__name__) app = Flask(__name__)
@ -25,6 +25,12 @@ listTypes = listShops.dtypes.to_list()
#формируем записи о кол-ве пустых ячеек в каждом столбце #формируем записи о кол-ве пустых ячеек в каждом столбце
countNull = listShops.isnull().sum() countNull = listShops.isnull().sum()
# для фильтра Блума
search_engine = SiteSearch()
search_engine.add("https://www.kaggle.com/datasets/ankanhore545/100-highest-valued-unicorns", ["Company", "Valuation", "Country", "State", "City", "Industries", "Founded Year", "Name of Founders", "Total Funding", "Number of Employees"])
search_engine.add("https://www.kaggle.com/datasets/ilyaryabov/tesla-insider-trading", ["Insider Trading", "Relationship", "Date", "Transaction", "Cost", "Shares", "Value", "Shares Total", "SEC Form 4"])
search_engine.add("https://www.kaggle.com/datasets/sameepvani/nasa-nearest-earth-objects", ["NASA", "est_diameter_min", "est_diameter_max", "relative_velocity", "miss_distance", "orbiting_body", "sentry_object", "absolute_magnitude", "hazardous"])
@app.route("/") @app.route("/")
def home(): def home():
return render_template('main_page.html', context=[], main_img=[], image_names=[], tableAnalys=[], titles=[''], listTypes=listTypes, countNull=countNull, firstRow=1, secondRow=4, firstColumn=1, secondColumn=4) return render_template('main_page.html', context=[], main_img=[], image_names=[], tableAnalys=[], titles=[''], listTypes=listTypes, countNull=countNull, firstRow=1, secondRow=4, firstColumn=1, secondColumn=4)
@ -143,6 +149,23 @@ def analysis():
listTypes=listTypes, countNull=countNull, firstRow=1, listTypes=listTypes, countNull=countNull, firstRow=1,
secondRow=4, firstColumn=1, secondColumn=4) secondRow=4, firstColumn=1, secondColumn=4)
@app.route('/findURL', methods=['GET'])
def get_page_findURL():
return render_template('findURL.html')
@app.route('/findURL', methods=['POST'])
def findURL():
word = request.form["word"]
if (search_engine.contains(word)):
links = search_engine.find_url(word)
word_links = []
for item in links:
word_links.append({item, word})
print(word_links)
return render_template('findURL.html', word_links=word_links)
return render_template('findURL.html')
if __name__=="__main__": if __name__=="__main__":
app.run(debug=True) app.run(debug=True)

View File

@ -0,0 +1,33 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport"
content="width=device-width, user-scalable=no, initial-scale=1.0, maximum-scale=1.0, minimum-scale=1.0">
<meta http-equiv="X-UA-Compatible" content="ie=edge">
<link rel="stylesheet" type="text/css" href="{{ url_for( 'static', filename='index.css', v=1)}}">
<link href="https://cdn.jsdelivr.net/npm/bootstrap@5.0.2/dist/css/bootstrap.min.css" rel="stylesheet"
integrity="sha384-EVSTQN3/azprG1Anm3QDgpJLIm9Nao0Yz1ztcQTwFspd3yD65VohhpuuCOmLASjC" crossorigin="anonymous">
<title>Document</title>
</head>
<body>
<div class="container">
<form action="/findURL" method="post">
<div class="mb-5 mt-3">
<label class="form-label">Поиск</label>
<div class="row">
<div class="col">
<input class="form-control" type="text" name="word" id="word" placeholder="Слово">
</div>
</div>
</div>
</form>
<div class="">
{% for link, word in word_links %}
<a href="{{ link }}">{{ word }}</a>
{% endfor %}
</div>
</div>
</body>
</html>

View File

@ -23,6 +23,11 @@
<form action='http://127.0.0.1:5000/analysis' method=get> <form action='http://127.0.0.1:5000/analysis' method=get>
<input type=submit value='Анализ данных'> <input type=submit value='Анализ данных'>
</form> </form>
<form action="/findURL" method="get">
<div class="mb-3">
<button type="submit" class="btn btn-primary mb-3">Запуск фильтра</button>
</div>
</form>
</div> </div>
<div> <div>
<table> <table>