11 Commits
lab_9 ... main

Author SHA1 Message Date
Илья
110f79e4f5 cleaned comments 2025-06-17 12:27:34 +04:00
639e381daa Merge pull request 'lab_12' (#11) from lab_12 into main
Reviewed-on: #11
2025-05-15 16:08:15 +04:00
a610d16a7f Merge pull request 'lab_11' (#10) from lab_11 into main
Reviewed-on: #10
2025-05-15 16:08:07 +04:00
08bd1f76c0 minor design changes 2025-05-12 15:46:01 +04:00
95f9913c6f lab 12 done 2025-05-12 15:26:05 +04:00
061214e244 small fixes and requirements 2025-05-03 17:56:58 +04:00
80e6ee0e8f lab 11 done 2025-05-03 17:38:40 +04:00
3312b4f4d2 Merge pull request 'lab_10' (#9) from lab_10 into main
Reviewed-on: #9
2025-04-12 10:48:55 +04:00
b817368d6c Merge pull request 'lab_9' (#8) from lab_9 into main
Reviewed-on: #8
2025-04-12 10:48:49 +04:00
37103ea009 add requirements for lab 10 2025-04-12 10:03:19 +04:00
9dd4777138 lab 10 done 2025-04-12 09:59:20 +04:00
12 changed files with 4385 additions and 225 deletions

930
lab_10/lab10.ipynb Normal file
View File

@@ -0,0 +1,930 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "ae6b4270",
"metadata": {},
"source": [
"# Лабораторная работа 10"
]
},
{
"cell_type": "markdown",
"id": "b4b9ee35",
"metadata": {},
"source": [
"В качестве задачи оптимизации была выбрана классическая вариация задачи о рюкзаке: дан набор предметов, каждый с определенным весом и ценностью. Требуется определить, какие предметы взять с собой в рюкзак, чтобы их суммарная ценность была максимальной, а суммарный вес не превышал заданную грузоподъемность рюкзака. При этом каждый предмет можно взять только один раз или не брать вовсе (0/1).\n",
"\n",
"Используем соответствующий датасет, в котором имеется большое число вариантов задачи с различными параметрами: https://www.kaggle.com/datasets/warcoder/knapsack-problem?select=knapsack_5_items.csv"
]
},
{
"cell_type": "code",
"execution_count": 70,
"id": "80d638c3",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "Weights",
"rawType": "object",
"type": "string"
},
{
"name": "Prices",
"rawType": "object",
"type": "string"
},
{
"name": "Capacity",
"rawType": "int64",
"type": "integer"
},
{
"name": "Best picks",
"rawType": "object",
"type": "string"
},
{
"name": "Best price",
"rawType": "float64",
"type": "float"
}
],
"conversionMethod": "pd.DataFrame",
"ref": "f950228b-7bd0-4f67-b42e-48be43e881b9",
"rows": [
[
"0",
"[46 40 42 38 10]",
"[12 19 19 15 8]",
"40",
"[0. 1. 0. 0. 0.]",
"19.0"
],
[
"1",
"[11 31 4 6 7]",
"[ 2 8 18 16 3]",
"64",
"[1. 1. 1. 1. 1.]",
"47.0"
],
[
"2",
"[32 49 27 37 24]",
"[19 16 16 4 1]",
"87",
"[1. 0. 1. 0. 1.]",
"36.0"
],
[
"3",
"[20 35 22 23 16]",
"[19 17 19 9 1]",
"21",
"[1. 0. 0. 0. 0.]",
"19.0"
],
[
"4",
"[ 7 12 19 13 20]",
"[10 11 18 15 5]",
"50",
"[0. 1. 1. 1. 0.]",
"44.0"
],
[
"9995",
"[18 12 11 49 32]",
"[12 3 17 19 7]",
"41",
"[1. 1. 1. 0. 0.]",
"32.0"
],
[
"9996",
"[20 2 24 7 7]",
"[17 12 4 3 8]",
"17",
"[0. 1. 0. 1. 1.]",
"23.0"
],
[
"9997",
"[43 43 5 15 23]",
"[15 5 7 2 7]",
"62",
"[1. 0. 1. 0. 0.]",
"22.0"
],
[
"9998",
"[49 9 15 21 39]",
"[11 15 3 12 19]",
"65",
"[0. 1. 1. 0. 1.]",
"37.0"
],
[
"9999",
"[25 36 42 19 39]",
"[15 12 7 18 12]",
"79",
"[1. 0. 0. 1. 0.]",
"33.0"
]
],
"shape": {
"columns": 5,
"rows": 10
}
},
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Weights</th>\n",
" <th>Prices</th>\n",
" <th>Capacity</th>\n",
" <th>Best picks</th>\n",
" <th>Best price</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>[46 40 42 38 10]</td>\n",
" <td>[12 19 19 15 8]</td>\n",
" <td>40</td>\n",
" <td>[0. 1. 0. 0. 0.]</td>\n",
" <td>19.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>[11 31 4 6 7]</td>\n",
" <td>[ 2 8 18 16 3]</td>\n",
" <td>64</td>\n",
" <td>[1. 1. 1. 1. 1.]</td>\n",
" <td>47.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>[32 49 27 37 24]</td>\n",
" <td>[19 16 16 4 1]</td>\n",
" <td>87</td>\n",
" <td>[1. 0. 1. 0. 1.]</td>\n",
" <td>36.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>[20 35 22 23 16]</td>\n",
" <td>[19 17 19 9 1]</td>\n",
" <td>21</td>\n",
" <td>[1. 0. 0. 0. 0.]</td>\n",
" <td>19.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>[ 7 12 19 13 20]</td>\n",
" <td>[10 11 18 15 5]</td>\n",
" <td>50</td>\n",
" <td>[0. 1. 1. 1. 0.]</td>\n",
" <td>44.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9995</th>\n",
" <td>[18 12 11 49 32]</td>\n",
" <td>[12 3 17 19 7]</td>\n",
" <td>41</td>\n",
" <td>[1. 1. 1. 0. 0.]</td>\n",
" <td>32.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9996</th>\n",
" <td>[20 2 24 7 7]</td>\n",
" <td>[17 12 4 3 8]</td>\n",
" <td>17</td>\n",
" <td>[0. 1. 0. 1. 1.]</td>\n",
" <td>23.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9997</th>\n",
" <td>[43 43 5 15 23]</td>\n",
" <td>[15 5 7 2 7]</td>\n",
" <td>62</td>\n",
" <td>[1. 0. 1. 0. 0.]</td>\n",
" <td>22.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9998</th>\n",
" <td>[49 9 15 21 39]</td>\n",
" <td>[11 15 3 12 19]</td>\n",
" <td>65</td>\n",
" <td>[0. 1. 1. 0. 1.]</td>\n",
" <td>37.0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9999</th>\n",
" <td>[25 36 42 19 39]</td>\n",
" <td>[15 12 7 18 12]</td>\n",
" <td>79</td>\n",
" <td>[1. 0. 0. 1. 0.]</td>\n",
" <td>33.0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Weights Prices Capacity Best picks \\\n",
"0 [46 40 42 38 10] [12 19 19 15 8] 40 [0. 1. 0. 0. 0.] \n",
"1 [11 31 4 6 7] [ 2 8 18 16 3] 64 [1. 1. 1. 1. 1.] \n",
"2 [32 49 27 37 24] [19 16 16 4 1] 87 [1. 0. 1. 0. 1.] \n",
"3 [20 35 22 23 16] [19 17 19 9 1] 21 [1. 0. 0. 0. 0.] \n",
"4 [ 7 12 19 13 20] [10 11 18 15 5] 50 [0. 1. 1. 1. 0.] \n",
"9995 [18 12 11 49 32] [12 3 17 19 7] 41 [1. 1. 1. 0. 0.] \n",
"9996 [20 2 24 7 7] [17 12 4 3 8] 17 [0. 1. 0. 1. 1.] \n",
"9997 [43 43 5 15 23] [15 5 7 2 7] 62 [1. 0. 1. 0. 0.] \n",
"9998 [49 9 15 21 39] [11 15 3 12 19] 65 [0. 1. 1. 0. 1.] \n",
"9999 [25 36 42 19 39] [15 12 7 18 12] 79 [1. 0. 0. 1. 0.] \n",
"\n",
" Best price \n",
"0 19.0 \n",
"1 47.0 \n",
"2 36.0 \n",
"3 19.0 \n",
"4 44.0 \n",
"9995 32.0 \n",
"9996 23.0 \n",
"9997 22.0 \n",
"9998 37.0 \n",
"9999 33.0 "
]
},
"execution_count": 70,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"\n",
"df = pd.read_csv(\"..//..//static//csv//knapsack_5_items.csv\")\n",
"\n",
"pd.concat([df.head(5), df.tail(5)])"
]
},
{
"cell_type": "markdown",
"id": "884f0cd1",
"metadata": {},
"source": [
"### Структура хромосомы и тип данных гена\n",
"\n",
"В данном случае хромосома будет представлять из себя список длины n (количество предметов в конкректной задаче), который представляет собой решение задачи рюкзака — то есть указывает, какие предметы включить в рюкзак.\n",
"\n",
"Пример: [1, 0, 1, 0, 0]. В примере выбраны первый и третий предметы.\n",
"\n",
"Ген же — это одно значение в хромосоме. \n",
"\n",
"Тип данных: int.\n",
" \n",
"Возможные значения:\n",
"* 1 — предмет в рюкзаке;\n",
"* 0 — предмет не в рюкзаке."
]
},
{
"cell_type": "markdown",
"id": "92661ad8",
"metadata": {},
"source": [
"### Реализация функции генерации начальной популяции и ее тест:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "9a7b0970",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"[[0, 1, 0, 0, 0],\n",
" [1, 0, 0, 0, 1],\n",
" [1, 0, 0, 0, 0],\n",
" [1, 0, 1, 0, 1],\n",
" [1, 1, 1, 1, 0],\n",
" [0, 1, 0, 0, 1],\n",
" [1, 0, 1, 1, 1],\n",
" [1, 0, 0, 0, 1],\n",
" [1, 0, 1, 1, 0],\n",
" [1, 1, 0, 1, 1]]"
]
},
"execution_count": 71,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import random\n",
"\n",
"def create_individual(elements_num): \n",
" return [random.randint(0, 1) for _ in range(elements_num)]\n",
"\n",
"def create_population(elements_num, population_size): \n",
" return [create_individual(elements_num) for _ in range(population_size)]\n",
"\n",
"create_population(5, 10)"
]
},
{
"cell_type": "markdown",
"id": "bc114201",
"metadata": {},
"source": [
"### Реализация фитнес-функции и ее тест:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0809a0b1",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"44"
]
},
"execution_count": 72,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def evaluate_fitness(individual, weights, prices, capacity):\n",
" total_value = total_weight = 0\n",
" for i in range(len(individual)):\n",
" if individual[i] == 1:\n",
" total_value += prices[i]\n",
" total_weight += weights[i]\n",
" return total_value if total_weight <= capacity else 0\n",
"\n",
"evaluate_fitness([0, 1, 1, 1, 0], [7, 12, 19, 13, 20], [10, 11, 18, 15, 5], 50)"
]
},
{
"cell_type": "markdown",
"id": "cccc6557",
"metadata": {},
"source": [
"### Реализация оператора кроссинговера и его тест:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4d5a13d7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"([0, 1, 1, 0, 0], [1, 0, 1, 1, 0])"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def crossover(parent1, parent2):\n",
" point = random.randint(1, len(parent1) - 1)\n",
" return (parent1[:point] + parent2[point:], parent2[:point] + parent1[point:])\n",
"\n",
"crossover([0, 1, 1, 1, 0], [1, 0, 1, 0, 0])"
]
},
{
"cell_type": "markdown",
"id": "08c626b5",
"metadata": {},
"source": [
"### Реализация двух операторов мутации и их тест:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "66021b53",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[0, 1, 1, 1, 0]\n",
"================\n",
"[0, 1, 0, 1, 0]\n",
"================\n",
"[0, 0, 0, 1, 1]\n"
]
}
],
"source": [
"# Мутация 1: побитовая замена\n",
"def mutate_flip_bits(individual, mutation_rate):\n",
" for i in range(len(individual)):\n",
" if random.random() < mutation_rate:\n",
" individual[i] = 1 - individual[i]\n",
"\n",
"# Мутация 2: случайный свап двух генов\n",
"def mutate_swap_genes(individual, mutation_rate):\n",
" if random.random() < mutation_rate:\n",
" i, j = random.sample(range(len(individual)), 2)\n",
" individual[i], individual[j] = individual[j], individual[i]\n",
"\n",
"individual = [0, 1, 1, 1, 0]\n",
"print(individual)\n",
"mutate_flip_bits(individual, 0.5)\n",
"print(\"================\")\n",
"print(individual)\n",
"print(\"================\")\n",
"mutate_swap_genes(individual, 1)\n",
"print(individual)"
]
},
{
"cell_type": "markdown",
"id": "d199e789",
"metadata": {},
"source": [
"### И наконец реализуем сам генетический алгоритм:"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "17093d62",
"metadata": {},
"outputs": [],
"source": [
"population_size = 100\n",
"num_generations = 10\n",
"mutation_rate = 0.1\n",
"mutation_strategy = 'flip'\n",
"\n",
"def select_parents(population, weights, prices, capacity):\n",
" fitness_values = [evaluate_fitness(ind, weights, prices, capacity) for ind in population]\n",
" total_fitness = sum(fitness_values)\n",
" if total_fitness == 0:\n",
" return random.choice(population), random.choice(population)\n",
" probabilities = [f / total_fitness for f in fitness_values]\n",
" return random.choices(population, weights=probabilities, k=2)\n",
"\n",
"def genetic_algorithm(weights, prices, capacity, population_size = 100, num_generations = 10, mutation_rate = 0.1, mutation_strategy='flip'):\n",
" elements_num = len(weights)\n",
" population = create_population(elements_num, population_size)\n",
"\n",
" for _ in range(num_generations):\n",
" new_population = []\n",
" for _ in range(population_size // 2):\n",
" p1, p2 = select_parents(population, weights, prices, capacity)\n",
" c1, c2 = crossover(p1, p2)\n",
" if mutation_strategy == 'flip':\n",
" mutate_flip_bits(c1, mutation_rate)\n",
" mutate_flip_bits(c2, mutation_rate)\n",
" elif mutation_strategy == 'swap':\n",
" mutate_swap_genes(c1, mutation_rate)\n",
" mutate_swap_genes(c2, mutation_rate)\n",
" new_population.extend([c1, c2])\n",
" population = new_population\n",
"\n",
" best = max(population, key=lambda ind: evaluate_fitness(ind, weights, prices, capacity))\n",
" best_value = evaluate_fitness(best, weights, prices, capacity)\n",
" return best, best_value"
]
},
{
"cell_type": "markdown",
"id": "f241602a",
"metadata": {},
"source": [
"Применим его для всех случаев из датасета:"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "9ef718ac",
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.microsoft.datawrangler.viewer.v0+json": {
"columns": [
{
"name": "index",
"rawType": "int64",
"type": "integer"
},
{
"name": "Weights",
"rawType": "object",
"type": "string"
},
{
"name": "Prices",
"rawType": "object",
"type": "string"
},
{
"name": "Capacity",
"rawType": "int64",
"type": "integer"
},
{
"name": "Best picks",
"rawType": "object",
"type": "string"
},
{
"name": "Best price",
"rawType": "float64",
"type": "float"
},
{
"name": "algorithmPicks",
"rawType": "object",
"type": "unknown"
},
{
"name": "algorithmPrice",
"rawType": "int64",
"type": "integer"
}
],
"conversionMethod": "pd.DataFrame",
"ref": "767fbcfd-78f3-4b7e-a80d-6735c17b7fbd",
"rows": [
[
"0",
"[46 40 42 38 10]",
"[12 19 19 15 8]",
"40",
"[0. 1. 0. 0. 0.]",
"19.0",
"[0, 1, 0, 0, 0]",
"19"
],
[
"1",
"[11 31 4 6 7]",
"[ 2 8 18 16 3]",
"64",
"[1. 1. 1. 1. 1.]",
"47.0",
"[1, 1, 1, 1, 1]",
"47"
],
[
"2",
"[32 49 27 37 24]",
"[19 16 16 4 1]",
"87",
"[1. 0. 1. 0. 1.]",
"36.0",
"[1, 0, 1, 0, 1]",
"36"
],
[
"3",
"[20 35 22 23 16]",
"[19 17 19 9 1]",
"21",
"[1. 0. 0. 0. 0.]",
"19.0",
"[1, 0, 0, 0, 0]",
"19"
],
[
"4",
"[ 7 12 19 13 20]",
"[10 11 18 15 5]",
"50",
"[0. 1. 1. 1. 0.]",
"44.0",
"[0, 1, 1, 1, 0]",
"44"
],
[
"9995",
"[18 12 11 49 32]",
"[12 3 17 19 7]",
"41",
"[1. 1. 1. 0. 0.]",
"32.0",
"[1, 1, 1, 0, 0]",
"32"
],
[
"9996",
"[20 2 24 7 7]",
"[17 12 4 3 8]",
"17",
"[0. 1. 0. 1. 1.]",
"23.0",
"[0, 1, 0, 1, 1]",
"23"
],
[
"9997",
"[43 43 5 15 23]",
"[15 5 7 2 7]",
"62",
"[1. 0. 1. 0. 0.]",
"22.0",
"[1, 0, 1, 0, 0]",
"22"
],
[
"9998",
"[49 9 15 21 39]",
"[11 15 3 12 19]",
"65",
"[0. 1. 1. 0. 1.]",
"37.0",
"[0, 1, 1, 0, 1]",
"37"
],
[
"9999",
"[25 36 42 19 39]",
"[15 12 7 18 12]",
"79",
"[1. 0. 0. 1. 0.]",
"33.0",
"[1, 0, 0, 1, 0]",
"33"
]
],
"shape": {
"columns": 7,
"rows": 10
}
},
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Weights</th>\n",
" <th>Prices</th>\n",
" <th>Capacity</th>\n",
" <th>Best picks</th>\n",
" <th>Best price</th>\n",
" <th>algorithmPicks</th>\n",
" <th>algorithmPrice</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>[46 40 42 38 10]</td>\n",
" <td>[12 19 19 15 8]</td>\n",
" <td>40</td>\n",
" <td>[0. 1. 0. 0. 0.]</td>\n",
" <td>19.0</td>\n",
" <td>[0, 1, 0, 0, 0]</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>[11 31 4 6 7]</td>\n",
" <td>[ 2 8 18 16 3]</td>\n",
" <td>64</td>\n",
" <td>[1. 1. 1. 1. 1.]</td>\n",
" <td>47.0</td>\n",
" <td>[1, 1, 1, 1, 1]</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>[32 49 27 37 24]</td>\n",
" <td>[19 16 16 4 1]</td>\n",
" <td>87</td>\n",
" <td>[1. 0. 1. 0. 1.]</td>\n",
" <td>36.0</td>\n",
" <td>[1, 0, 1, 0, 1]</td>\n",
" <td>36</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>[20 35 22 23 16]</td>\n",
" <td>[19 17 19 9 1]</td>\n",
" <td>21</td>\n",
" <td>[1. 0. 0. 0. 0.]</td>\n",
" <td>19.0</td>\n",
" <td>[1, 0, 0, 0, 0]</td>\n",
" <td>19</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>[ 7 12 19 13 20]</td>\n",
" <td>[10 11 18 15 5]</td>\n",
" <td>50</td>\n",
" <td>[0. 1. 1. 1. 0.]</td>\n",
" <td>44.0</td>\n",
" <td>[0, 1, 1, 1, 0]</td>\n",
" <td>44</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9995</th>\n",
" <td>[18 12 11 49 32]</td>\n",
" <td>[12 3 17 19 7]</td>\n",
" <td>41</td>\n",
" <td>[1. 1. 1. 0. 0.]</td>\n",
" <td>32.0</td>\n",
" <td>[1, 1, 1, 0, 0]</td>\n",
" <td>32</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9996</th>\n",
" <td>[20 2 24 7 7]</td>\n",
" <td>[17 12 4 3 8]</td>\n",
" <td>17</td>\n",
" <td>[0. 1. 0. 1. 1.]</td>\n",
" <td>23.0</td>\n",
" <td>[0, 1, 0, 1, 1]</td>\n",
" <td>23</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9997</th>\n",
" <td>[43 43 5 15 23]</td>\n",
" <td>[15 5 7 2 7]</td>\n",
" <td>62</td>\n",
" <td>[1. 0. 1. 0. 0.]</td>\n",
" <td>22.0</td>\n",
" <td>[1, 0, 1, 0, 0]</td>\n",
" <td>22</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9998</th>\n",
" <td>[49 9 15 21 39]</td>\n",
" <td>[11 15 3 12 19]</td>\n",
" <td>65</td>\n",
" <td>[0. 1. 1. 0. 1.]</td>\n",
" <td>37.0</td>\n",
" <td>[0, 1, 1, 0, 1]</td>\n",
" <td>37</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9999</th>\n",
" <td>[25 36 42 19 39]</td>\n",
" <td>[15 12 7 18 12]</td>\n",
" <td>79</td>\n",
" <td>[1. 0. 0. 1. 0.]</td>\n",
" <td>33.0</td>\n",
" <td>[1, 0, 0, 1, 0]</td>\n",
" <td>33</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Weights Prices Capacity Best picks \\\n",
"0 [46 40 42 38 10] [12 19 19 15 8] 40 [0. 1. 0. 0. 0.] \n",
"1 [11 31 4 6 7] [ 2 8 18 16 3] 64 [1. 1. 1. 1. 1.] \n",
"2 [32 49 27 37 24] [19 16 16 4 1] 87 [1. 0. 1. 0. 1.] \n",
"3 [20 35 22 23 16] [19 17 19 9 1] 21 [1. 0. 0. 0. 0.] \n",
"4 [ 7 12 19 13 20] [10 11 18 15 5] 50 [0. 1. 1. 1. 0.] \n",
"9995 [18 12 11 49 32] [12 3 17 19 7] 41 [1. 1. 1. 0. 0.] \n",
"9996 [20 2 24 7 7] [17 12 4 3 8] 17 [0. 1. 0. 1. 1.] \n",
"9997 [43 43 5 15 23] [15 5 7 2 7] 62 [1. 0. 1. 0. 0.] \n",
"9998 [49 9 15 21 39] [11 15 3 12 19] 65 [0. 1. 1. 0. 1.] \n",
"9999 [25 36 42 19 39] [15 12 7 18 12] 79 [1. 0. 0. 1. 0.] \n",
"\n",
" Best price algorithmPicks algorithmPrice \n",
"0 19.0 [0, 1, 0, 0, 0] 19 \n",
"1 47.0 [1, 1, 1, 1, 1] 47 \n",
"2 36.0 [1, 0, 1, 0, 1] 36 \n",
"3 19.0 [1, 0, 0, 0, 0] 19 \n",
"4 44.0 [0, 1, 1, 1, 0] 44 \n",
"9995 32.0 [1, 1, 1, 0, 0] 32 \n",
"9996 23.0 [0, 1, 0, 1, 1] 23 \n",
"9997 22.0 [1, 0, 1, 0, 0] 22 \n",
"9998 37.0 [0, 1, 1, 0, 1] 37 \n",
"9999 33.0 [1, 0, 0, 1, 0] 33 "
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import ast\n",
"import re\n",
"\n",
"picks = []\n",
"best_prices = []\n",
"\n",
"def fix_list_string(s):\n",
" # Удалить пробел сразу после [ и сразу перед ]\n",
" s = re.sub(r'\\[\\s*', '[', s)\n",
" s = re.sub(r'\\s*\\]', ']', s)\n",
" # Заменить все группы пробелов на запятую\n",
" s = re.sub(r'\\s+', ',', s)\n",
" return s\n",
"\n",
"for _, row in df.iterrows():\n",
" weights = ast.literal_eval(fix_list_string(row['Weights']))\n",
" prices = ast.literal_eval(fix_list_string(row['Prices']))\n",
" capacity = row['Capacity']\n",
"\n",
" best_individual, best_value = genetic_algorithm(weights, prices, capacity, population_size, num_generations, mutation_rate, mutation_strategy)\n",
" \n",
" picks.append(best_individual)\n",
" best_prices.append(best_value)\n",
"\n",
"df['algorithmPicks'] = picks\n",
"df['algorithmPrice'] = best_prices\n",
"\n",
"pd.concat([df.head(5), df.tail(5)])"
]
},
{
"cell_type": "markdown",
"id": "72958862",
"metadata": {},
"source": [
"По полученным результатам видно, что ответы алгоритма совпадают с теми ответами, которые уже имелись в наборе данных. Поэтому, можно сказать, что для таких условий задачи алгоритм работает успешно даже с 10 поколениями"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "aimenv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.5"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

BIN
lab_10/requirements.txt Normal file

Binary file not shown.

1345
lab_11/lab11.ipynb Normal file

File diff suppressed because one or more lines are too long

BIN
lab_11/requirements.txt Normal file

Binary file not shown.

2061
lab_12/lab12.ipynb Normal file

File diff suppressed because one or more lines are too long

BIN
lab_12/requirements.txt Normal file

Binary file not shown.

View File

@@ -15,7 +15,7 @@
},
{
"cell_type": "code",
"execution_count": 532,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -35,7 +35,6 @@
"import seaborn as sns\n",
"from sklearn.model_selection import train_test_split\n",
"\n",
"# вывод всех столбцов\n",
"df = pd.read_csv(\"..//..//static//csv//flavors_of_cacao.csv\")\n",
"df.columns = df.columns.str.replace('\\n', '')\n",
"print(df.columns)"
@@ -85,7 +84,7 @@
},
{
"cell_type": "code",
"execution_count": 533,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -108,13 +107,10 @@
}
],
"source": [
"# Удаляем символ '%' и преобразуем столбец CocoaPercent в числовой формат\n",
"df['CocoaPercent'] = df['CocoaPercent'].str.replace('%', '').astype(float)\n",
"\n",
"# Выбираем столбцы для анализа\n",
"columns_to_check = ['CocoaPercent', 'Rating']\n",
"\n",
"# Функция для подсчета выбросов\n",
"def count_outliers(df, columns):\n",
" outliers_count = {}\n",
" for col in columns:\n",
@@ -123,21 +119,17 @@
" IQR = Q3 - Q1\n",
" lower_bound = Q1 - 1.5 * IQR\n",
" upper_bound = Q3 + 1.5 * IQR\n",
" \n",
" # Считаем количество выбросов\n",
" \n",
" outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)]\n",
" outliers_count[col] = len(outliers)\n",
" \n",
" return outliers_count\n",
"\n",
"# Подсчитываем выбросы\n",
"outliers_count = count_outliers(df, columns_to_check)\n",
"\n",
"# Выводим количество выбросов для каждого столбца\n",
"for col, count in outliers_count.items():\n",
" print(f\"Количество выбросов в столбце '{col}': {count}\")\n",
"\n",
"# Создаем диаграммы размахов\n",
"plt.figure(figsize=(15, 10))\n",
"for i, col in enumerate(columns_to_check, 1):\n",
" plt.subplot(2, 2, i)\n",
@@ -158,7 +150,7 @@
},
{
"cell_type": "code",
"execution_count": 534,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -180,10 +172,8 @@
}
],
"source": [
"# Выбираем столбцы для очистки\n",
"columns_to_clean = ['CocoaPercent']\n",
"\n",
"# Функция для удаления выбросов\n",
"def remove_outliers(df, columns):\n",
" for col in columns:\n",
" Q1 = df[col].quantile(0.25)\n",
@@ -192,21 +182,15 @@
" lower_bound = Q1 - 1.5 * IQR\n",
" upper_bound = Q3 + 1.5 * IQR\n",
" \n",
" # Удаляем строки, содержащие выбросы\n",
" df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]\n",
" \n",
" return df\n",
"\n",
"# Удаляем выбросы\n",
"df_cleaned = remove_outliers(df, columns_to_clean)\n",
"\n",
"# Выводим количество удаленных строк\n",
"print(f\"Количество удаленных строк: {len(df) - len(df_cleaned)}\")\n",
"\n",
"# Создаем диаграммы размаха для очищенных данных\n",
"plt.figure(figsize=(15, 6))\n",
"\n",
"# Диаграмма размаха для CocoaPercent\n",
"plt.subplot(1, 2, 1)\n",
"sns.boxplot(x=df_cleaned['CocoaPercent'])\n",
"plt.title('Box Plot of CocoaPercent (Cleaned)')\n",
@@ -215,7 +199,6 @@
"plt.tight_layout()\n",
"plt.show()\n",
"\n",
"# Сохраняем очищенный датасет\n",
"df_cleaned.to_csv(\"..//..//static//csv//flavors_of_cacao_cleaned.csv\", index=False)\n",
"df = df_cleaned"
]
@@ -231,7 +214,7 @@
},
{
"cell_type": "code",
"execution_count": 535,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -266,17 +249,14 @@
}
],
"source": [
"# Количество пустых значений признаков\n",
"print(df.isnull().sum())\n",
"\n",
"print()\n",
"\n",
"# Есть ли пустые значения признаков\n",
"print(df.isnull().any())\n",
"\n",
"print()\n",
"\n",
"# Процент пустых значений признаков\n",
"for i in df.columns:\n",
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
" if null_rate > 0:\n",
@@ -292,7 +272,7 @@
},
{
"cell_type": "code",
"execution_count": 536,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -315,13 +295,10 @@
}
],
"source": [
"# Удаление пропущенных значений в столбцах BeanType и Broad BeanOrigin\n",
"df = df.dropna(subset=['BeanType', 'Broad BeanOrigin'])\n",
"\n",
"# Проверка на пропущенные значения после удаления\n",
"missing_values_after_drop = df.isnull().sum()\n",
"\n",
"# Вывод результатов после удаления\n",
"print(\"\\nКоличество пустых значений в каждом столбце после удаления:\")\n",
"print(missing_values_after_drop)"
]
@@ -337,7 +314,7 @@
},
{
"cell_type": "code",
"execution_count": 537,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -351,18 +328,13 @@
}
],
"source": [
"# Разделение на признаки (X) и целевую переменную (y)\n",
"# Предположим, что Rating - это целевая переменная\n",
"X = df.drop('Rating', axis=1)\n",
"y = df['Rating']\n",
"\n",
"# Разбиение на обучающую и остальную выборку (контрольную + тестовую)\n",
"X_train, X_rem, y_train, y_rem = train_test_split(X, y, train_size=0.6, random_state=42)\n",
"\n",
"# Разбиение остатка на контрольную и тестовую выборки\n",
"X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, test_size=0.5, random_state=42)\n",
"\n",
"# Вывод размеров выборок\n",
"print(\"Размер обучающей выборки:\", X_train.shape)\n",
"print(\"Размер контрольной выборки:\", X_val.shape)\n",
"print(\"Размер тестовой выборки:\", X_test.shape)"
@@ -370,7 +342,7 @@
},
{
"cell_type": "code",
"execution_count": 538,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -426,7 +398,6 @@
}
],
"source": [
"# Функция для анализа сбалансированности\n",
"def analyze_balance(y_train, y_val, y_test):\n",
" print(\"Распределение классов в обучающей выборке:\")\n",
" print(y_train.value_counts(normalize=True))\n",
@@ -437,7 +408,6 @@
" print(\"\\nРаспределение классов в тестовой выборке:\")\n",
" print(y_test.value_counts(normalize=True))\n",
"\n",
"# Анализ сбалансированности\n",
"analyze_balance(y_train, y_val, y_test)"
]
},
@@ -465,7 +435,7 @@
},
{
"cell_type": "code",
"execution_count": 539,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -485,7 +455,6 @@
"from sklearn.model_selection import train_test_split\n",
"from imblearn.over_sampling import SMOTE\n",
"\n",
"# вывод всех столбцов\n",
"df = pd.read_csv(\"..//..//static//csv//water_potability.csv\")\n",
"print(df.columns)"
]
@@ -538,7 +507,7 @@
},
{
"cell_type": "code",
"execution_count": 540,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -562,10 +531,8 @@
}
],
"source": [
"# Выбираем столбцы для анализа\n",
"columns_to_check = ['Hardness', 'Solids', 'Organic_carbon']\n",
"\n",
"# Функция для подсчета выбросов\n",
"def count_outliers(df, columns):\n",
" outliers_count = {}\n",
" for col in columns:\n",
@@ -575,20 +542,16 @@
" lower_bound = Q1 - 1.5 * IQR\n",
" upper_bound = Q3 + 1.5 * IQR\n",
" \n",
" # Считаем количество выбросов\n",
" outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)]\n",
" outliers_count[col] = len(outliers)\n",
" \n",
" return outliers_count\n",
"\n",
"# Подсчитываем выбросы\n",
"outliers_count = count_outliers(df, columns_to_check)\n",
"\n",
"# Выводим количество выбросов для каждого столбца\n",
"for col, count in outliers_count.items():\n",
" print(f\"Количество выбросов в столбце '{col}': {count}\")\n",
"\n",
"# Создаем диаграммы размахов\n",
"plt.figure(figsize=(15, 10))\n",
"for i, col in enumerate(columns_to_check, 1):\n",
" plt.subplot(2, 2, i)\n",
@@ -607,7 +570,7 @@
},
{
"cell_type": "code",
"execution_count": 541,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -629,10 +592,8 @@
}
],
"source": [
"# Выбираем столбцы для очистки\n",
"columns_to_clean = ['Hardness', 'Solids', 'Organic_carbon']\n",
"\n",
"# Функция для удаления выбросов\n",
"def remove_outliers(df, columns):\n",
" for col in columns:\n",
" Q1 = df[col].quantile(0.25)\n",
@@ -641,33 +602,25 @@
" lower_bound = Q1 - 1.5 * IQR\n",
" upper_bound = Q3 + 1.5 * IQR\n",
" \n",
" # Удаляем строки, содержащие выбросы\n",
" df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]\n",
" \n",
" return df\n",
"\n",
"# Удаляем выбросы\n",
"df_cleaned = remove_outliers(df, columns_to_clean)\n",
"\n",
"# Выводим количество удаленных строк\n",
"print(f\"Количество удаленных строк: {len(df) - len(df_cleaned)}\")\n",
"\n",
"# Создаем диаграммы размаха для очищенных данных\n",
"plt.figure(figsize=(15, 6))\n",
"\n",
"# Диаграмма размаха для Hardness\n",
"plt.subplot(1, 3, 1)\n",
"sns.boxplot(x=df_cleaned['Hardness'])\n",
"plt.title('Box Plot of Hardness (Cleaned)')\n",
"plt.xlabel('Hardness')\n",
"\n",
"# Диаграмма размаха для Solids\n",
"plt.subplot(1, 3, 2)\n",
"sns.boxplot(x=df_cleaned['Solids'])\n",
"plt.title('Box Plot of Solids (Cleaned)')\n",
"plt.xlabel('Solids')\n",
"\n",
"# Диаграмма размаха для Organic_carbon\n",
"plt.subplot(1, 3, 3)\n",
"sns.boxplot(x=df_cleaned['Organic_carbon'])\n",
"plt.title('Box Plot of Organic_carbon (Cleaned)')\n",
@@ -676,7 +629,6 @@
"plt.tight_layout()\n",
"plt.show()\n",
"\n",
"# Сохраняем очищенный датасет\n",
"df_cleaned.to_csv(\"..//..//static//csv//water_potability_cleaned.csv\", index=False)\n",
"df = df_cleaned"
]
@@ -692,7 +644,7 @@
},
{
"cell_type": "code",
"execution_count": 542,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -730,17 +682,14 @@
}
],
"source": [
"# Количество пустых значений признаков\n",
"print(df.isnull().sum())\n",
"\n",
"print()\n",
"\n",
"# Есть ли пустые значения признаков\n",
"print(df.isnull().any())\n",
"\n",
"print()\n",
"\n",
"# Процент пустых значений признаков\n",
"for i in df.columns:\n",
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
" if null_rate > 0:\n",
@@ -756,7 +705,7 @@
},
{
"cell_type": "code",
"execution_count": 543,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -780,15 +729,12 @@
}
],
"source": [
"# Замена значений\n",
"df[\"ph\"] = df[\"ph\"].fillna(df[\"ph\"].median())\n",
"df[\"Sulfate\"] = df[\"Sulfate\"].fillna(df[\"Sulfate\"].median())\n",
"df[\"Trihalomethanes\"] = df[\"Trihalomethanes\"].fillna(df[\"Trihalomethanes\"].median())\n",
"\n",
"# Проверка на пропущенные значения после замены\n",
"missing_values_after_drop = df.isnull().sum()\n",
"\n",
"# Вывод результатов после замены\n",
"print(\"\\nКоличество пустых значений в каждом столбце после замены:\")\n",
"print(missing_values_after_drop)"
]
@@ -804,7 +750,7 @@
},
{
"cell_type": "code",
"execution_count": 544,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -818,18 +764,13 @@
}
],
"source": [
"# Разделение на признаки (X) и целевую переменную (y)\n",
"# Предположим, что Potability - это целевая переменная\n",
"X = df.drop('Potability', axis=1)\n",
"y = df['Potability']\n",
"\n",
"# Разбиение на обучающую и остальную выборку (контрольную + тестовую)\n",
"X_train, X_rem, y_train, y_rem = train_test_split(X, y, train_size=0.6, random_state=42)\n",
"\n",
"# Разбиение остатка на контрольную и тестовую выборки\n",
"X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, test_size=0.5, random_state=42)\n",
"\n",
"# Вывод размеров выборок\n",
"print(\"Размер обучающей выборки:\", X_train.shape)\n",
"print(\"Размер контрольной выборки:\", X_val.shape)\n",
"print(\"Размер тестовой выборки:\", X_test.shape)"
@@ -837,7 +778,7 @@
},
{
"cell_type": "code",
"execution_count": 545,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -865,7 +806,6 @@
}
],
"source": [
"# Функция для анализа сбалансированности\n",
"def analyze_balance(y_train, y_val, y_test):\n",
" print(\"Распределение классов в обучающей выборке:\")\n",
" print(y_train.value_counts(normalize=True))\n",
@@ -876,7 +816,6 @@
" print(\"\\nРаспределение классов в тестовой выборке:\")\n",
" print(y_test.value_counts(normalize=True))\n",
"\n",
"# Анализ сбалансированности\n",
"analyze_balance(y_train, y_val, y_test)"
]
},
@@ -889,7 +828,7 @@
},
{
"cell_type": "code",
"execution_count": 546,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -907,10 +846,8 @@
"source": [
"smote = SMOTE(random_state=42)\n",
"\n",
"# Применение SMOTE для балансировки обучающей выборки\n",
"X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)\n",
"\n",
"# Проверка сбалансированности после SMOTE\n",
"print(\"Сбалансированность обучающей выборки после SMOTE:\")\n",
"print(y_train_resampled.value_counts(normalize=True))"
]
@@ -938,7 +875,7 @@
},
{
"cell_type": "code",
"execution_count": 547,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -958,7 +895,6 @@
"from sklearn.model_selection import train_test_split\n",
"from imblearn.under_sampling import RandomUnderSampler\n",
"\n",
"# вывод всех столбцов\n",
"df = pd.read_csv(\"..//..//static//csv//diabetes.csv\")\n",
"print(df.columns)"
]
@@ -1009,7 +945,7 @@
},
{
"cell_type": "code",
"execution_count": 548,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -1033,10 +969,8 @@
}
],
"source": [
"# Выбираем столбцы для анализа\n",
"columns_to_check = ['Age', 'BloodPressure', 'BMI']\n",
"\n",
"# Функция для подсчета выбросов\n",
"def count_outliers(df, columns):\n",
" outliers_count = {}\n",
" for col in columns:\n",
@@ -1046,20 +980,16 @@
" lower_bound = Q1 - 1.5 * IQR\n",
" upper_bound = Q3 + 1.5 * IQR\n",
" \n",
" # Считаем количество выбросов\n",
" outliers = df[(df[col] < lower_bound) | (df[col] > upper_bound)]\n",
" outliers_count[col] = len(outliers)\n",
" \n",
" return outliers_count\n",
"\n",
"# Подсчитываем выбросы\n",
"outliers_count = count_outliers(df, columns_to_check)\n",
"\n",
"# Выводим количество выбросов для каждого столбца\n",
"for col, count in outliers_count.items():\n",
" print(f\"Количество выбросов в столбце '{col}': {count}\")\n",
"\n",
"# Создаем диаграммы размахов\n",
"plt.figure(figsize=(15, 10))\n",
"for i, col in enumerate(columns_to_check, 1):\n",
" plt.subplot(2, 2, i)\n",
@@ -1078,7 +1008,7 @@
},
{
"cell_type": "code",
"execution_count": 549,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -1100,10 +1030,8 @@
}
],
"source": [
"# Выбираем столбцы для очистки\n",
"columns_to_clean = ['Age', 'BloodPressure', 'BMI']\n",
"\n",
"# Функция для удаления выбросов\n",
"def remove_outliers(df, columns):\n",
" for col in columns:\n",
" Q1 = df[col].quantile(0.25)\n",
@@ -1112,33 +1040,26 @@
" lower_bound = Q1 - 1.5 * IQR\n",
" upper_bound = Q3 + 1.5 * IQR\n",
" \n",
" # Удаляем строки, содержащие выбросы\n",
" df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]\n",
" \n",
" return df\n",
"\n",
"# Удаляем выбросы\n",
"df_cleaned = remove_outliers(df, columns_to_clean)\n",
"\n",
"# Выводим количество удаленных строк\n",
"print(f\"Количество удаленных строк: {len(df) - len(df_cleaned)}\")\n",
"\n",
"# Создаем диаграммы размаха для очищенных данных\n",
"plt.figure(figsize=(15, 6))\n",
"\n",
"# Диаграмма размаха для Age\n",
"plt.subplot(1, 3, 1)\n",
"sns.boxplot(x=df_cleaned['Age'])\n",
"plt.title('Box Plot of Age (Cleaned)')\n",
"plt.xlabel('Age')\n",
"\n",
"# Диаграмма размаха для BloodPressure\n",
"plt.subplot(1, 3, 2)\n",
"sns.boxplot(x=df_cleaned['BloodPressure'])\n",
"plt.title('Box Plot of BloodPressure (Cleaned)')\n",
"plt.xlabel('BloodPressure')\n",
"\n",
"# Диаграмма размаха для BMI\n",
"plt.subplot(1, 3, 3)\n",
"sns.boxplot(x=df_cleaned['BMI'])\n",
"plt.title('Box Plot of BMI (Cleaned)')\n",
@@ -1147,7 +1068,6 @@
"plt.tight_layout()\n",
"plt.show()\n",
"\n",
"# Сохраняем очищенный датасет\n",
"df_cleaned.to_csv(\"..//..//static//csv//diabetes_cleaned.csv\", index=False)\n",
"df = df_cleaned"
]
@@ -1163,7 +1083,7 @@
},
{
"cell_type": "code",
"execution_count": 550,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -1196,17 +1116,14 @@
}
],
"source": [
"# Количество пустых значений признаков\n",
"print(df.isnull().sum())\n",
"\n",
"print()\n",
"\n",
"# Есть ли пустые значения признаков\n",
"print(df.isnull().any())\n",
"\n",
"print()\n",
"\n",
"# Процент пустых значений признаков\n",
"for i in df.columns:\n",
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
" if null_rate > 0:\n",
@@ -1224,7 +1141,7 @@
},
{
"cell_type": "code",
"execution_count": 551,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -1238,18 +1155,13 @@
}
],
"source": [
"# Разделение на признаки (X) и целевую переменную (y)\n",
"# Предположим, что Outcome - это целевая переменная\n",
"X = df.drop('Outcome', axis=1)\n",
"y = df['Outcome']\n",
"\n",
"# Разбиение на обучающую и остальную выборку (контрольную + тестовую)\n",
"X_train, X_rem, y_train, y_rem = train_test_split(X, y, train_size=0.6, random_state=42)\n",
"\n",
"# Разбиение остатка на контрольную и тестовую выборки\n",
"X_val, X_test, y_val, y_test = train_test_split(X_rem, y_rem, test_size=0.5, random_state=42)\n",
"\n",
"# Вывод размеров выборок\n",
"print(\"Размер обучающей выборки:\", X_train.shape)\n",
"print(\"Размер контрольной выборки:\", X_val.shape)\n",
"print(\"Размер тестовой выборки:\", X_test.shape)"
@@ -1257,7 +1169,7 @@
},
{
"cell_type": "code",
"execution_count": 552,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -1285,7 +1197,6 @@
}
],
"source": [
"# Функция для анализа сбалансированности\n",
"def analyze_balance(y_train, y_val, y_test):\n",
" print(\"Распределение классов в обучающей выборке:\")\n",
" print(y_train.value_counts(normalize=True))\n",
@@ -1296,7 +1207,6 @@
" print(\"\\nРаспределение классов в тестовой выборке:\")\n",
" print(y_test.value_counts(normalize=True))\n",
"\n",
"# Анализ сбалансированности\n",
"analyze_balance(y_train, y_val, y_test)"
]
},

View File

@@ -204,7 +204,7 @@
},
{
"cell_type": "code",
"execution_count": 137,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -244,17 +244,14 @@
}
],
"source": [
"# Количество пустых значений признаков\n",
"print(df.isnull().sum())\n",
"\n",
"print()\n",
"\n",
"# Есть ли пустые значения признаков\n",
"print(df.isnull().any())\n",
"\n",
"print()\n",
"\n",
"# Процент пустых значений признаков\n",
"for i in df.columns:\n",
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
" if null_rate > 0:\n",
@@ -270,7 +267,7 @@
},
{
"cell_type": "code",
"execution_count": 138,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -296,13 +293,10 @@
}
],
"source": [
"# Замена значений\n",
"df[\"bmi\"] = df[\"bmi\"].fillna(df[\"bmi\"].median())\n",
"\n",
"# Проверка на пропущенные значения после замены\n",
"missing_values_after_drop = df.isnull().sum()\n",
"\n",
"# Вывод результатов после замены\n",
"print(\"\\nКоличество пустых значений в каждом столбце после замены:\")\n",
"print(missing_values_after_drop)"
]
@@ -344,7 +338,7 @@
},
{
"cell_type": "code",
"execution_count": 140,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -358,19 +352,13 @@
}
],
"source": [
"# Разделение данных на признаки (X) и целевую переменную (y)\n",
"# В данном случае мы хотим предсказать 'stroke'\n",
"X = df.drop(columns=['stroke'])\n",
"y = df['stroke']\n",
"\n",
"# Разбиение данных на обучающую и тестовую выборки\n",
"# Сначала разделим на обучающую и тестовую\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)\n",
"\n",
"# Затем разделим обучающую выборку на обучающую и контрольную\n",
"X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.3)\n",
"\n",
"# Проверка размеров выборок\n",
"print(\"Размер обучающей выборки:\", X_train.shape)\n",
"print(\"Размер контрольной выборки:\", X_val.shape)\n",
"print(\"Размер тестовой выборки:\", X_test.shape)"
@@ -385,7 +373,7 @@
},
{
"cell_type": "code",
"execution_count": 141,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -423,9 +411,7 @@
}
],
"source": [
"# Функция для анализа сбалансированности\n",
"def analyze_balance(y_train, y_val, y_test, y_name):\n",
" # Распределение классов\n",
" print(\"Распределение классов в обучающей выборке:\")\n",
" print(y_train.value_counts(normalize=True))\n",
" \n",
@@ -435,22 +421,18 @@
" print(\"\\nРаспределение классов в тестовой выборке:\")\n",
" print(y_test.value_counts(normalize=True))\n",
"\n",
" # Создание фигуры и осей для трех столбчатых диаграмм\n",
" fig, axes = plt.subplots(1, 3, figsize=(18, 5), sharey=True)\n",
" fig.suptitle('Распределение в различных выборках')\n",
"\n",
" # Обучающая выборка\n",
" sns.barplot(x=y_train.value_counts().index, y=y_train.value_counts(normalize=True), ax=axes[0])\n",
" axes[0].set_title('Обучающая выборка')\n",
" axes[0].set_xlabel(y_name)\n",
" axes[0].set_ylabel('Доля')\n",
"\n",
" # Контрольная выборка\n",
" sns.barplot(x=y_val.value_counts().index, y=y_val.value_counts(normalize=True), ax=axes[1])\n",
" axes[1].set_title('Контрольная выборка')\n",
" axes[1].set_xlabel(y_name)\n",
"\n",
" # Тестовая выборка\n",
" sns.barplot(x=y_test.value_counts().index, y=y_test.value_counts(normalize=True), ax=axes[2])\n",
" axes[2].set_title('Тестовая выборка')\n",
" axes[2].set_xlabel(y_name)\n",
@@ -469,7 +451,7 @@
},
{
"cell_type": "code",
"execution_count": 142,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -509,11 +491,9 @@
"source": [
"ros = RandomOverSampler(random_state=42)\n",
"\n",
"# Применение RandomOverSampler для балансировки выборок\n",
"X_train_resampled, y_train_resampled = ros.fit_resample(X_train, y_train)\n",
"X_val_resampled, y_val_resampled = ros.fit_resample(X_val, y_val)\n",
"\n",
"# Проверка сбалансированности после RandomOverSampler\n",
"analyze_balance(y_train_resampled, y_val_resampled, y_test, 'stroke')"
]
},
@@ -530,7 +510,7 @@
},
{
"cell_type": "code",
"execution_count": 143,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -575,16 +555,12 @@
}
],
"source": [
"# Определение категориальных признаков\n",
"categorical_features = ['gender', 'ever_married', 'work_type', 'Residence_type', 'smoking_status']\n",
"\n",
"# Применение one-hot encoding к обучающей выборке\n",
"X_train_encoded = pd.get_dummies(X_train_resampled, columns=categorical_features, drop_first=True)\n",
"\n",
"# Применение one-hot encoding к контрольной выборке\n",
"X_val_encoded = pd.get_dummies(X_val_resampled, columns=categorical_features, drop_first=True)\n",
"\n",
"# Применение one-hot encoding к тестовой выборке\n",
"X_test_encoded = pd.get_dummies(X_test, columns=categorical_features, drop_first=True)\n",
"\n",
"print(X_train_encoded.head())"
@@ -599,7 +575,7 @@
},
{
"cell_type": "code",
"execution_count": 144,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -644,21 +620,17 @@
}
],
"source": [
"# Определение числовых признаков для дискретизации\n",
"numerical_features = ['age']\n",
"\n",
"# Функция для дискретизации числовых признаков\n",
"def discretize_features(df, features, bins, labels):\n",
" for feature in features:\n",
" df[f'{feature}_bin'] = pd.cut(df[feature], bins=bins, labels=labels)\n",
" df.drop(columns=[feature], inplace=True)\n",
" return df\n",
"\n",
"# Заданные интервалы и метки\n",
"age_bins = [0, 25, 55, 100]\n",
"age_labels = [\"young\", \"middle-aged\", \"old\"]\n",
"\n",
"# Применение дискретизации к обучающей, контрольной и тестовой выборкам\n",
"X_train_encoded = discretize_features(X_train_encoded, numerical_features, bins=age_bins, labels=age_labels)\n",
"X_val_encoded = discretize_features(X_val_encoded, numerical_features, bins=age_bins, labels=age_labels)\n",
"X_test_encoded = discretize_features(X_test_encoded, numerical_features, bins=age_bins, labels=age_labels)\n",
@@ -741,7 +713,7 @@
},
{
"cell_type": "code",
"execution_count": 146,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -786,7 +758,6 @@
}
],
"source": [
"# Пример масштабирования числовых признаков\n",
"numerical_features = ['avg_glucose_level', 'bmi', 'glucose_age_deviation']\n",
"\n",
"scaler = StandardScaler()\n",
@@ -806,7 +777,7 @@
},
{
"cell_type": "code",
"execution_count": 147,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -872,7 +843,7 @@
}
],
"source": [
"data = X_train_encoded.copy() # Используем предобработанные данные\n",
"data = X_train_encoded.copy()\n",
"\n",
"es = ft.EntitySet(id=\"patients\")\n",
"\n",
@@ -918,7 +889,7 @@
},
{
"cell_type": "code",
"execution_count": 148,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -930,23 +901,15 @@
}
],
"source": [
"X_train_encoded = pd.get_dummies(X_train_encoded, drop_first=True)\n",
"X_val_encoded = pd.get_dummies(X_val_encoded, drop_first=True)\n",
"X_test_encoded = pd.get_dummies(X_test_encoded, drop_first=True)\n",
"\n",
"all_columns = X_train_encoded.columns\n",
"X_train_encoded = X_train_encoded.reindex(columns=all_columns, fill_value=0)\n",
"X_val_encoded = X_val_encoded.reindex(columns=all_columns, fill_value=0)\n",
"X_test_encoded = X_test_encoded.reindex(columns=all_columns, fill_value=0)\n",
"\n",
"# Выбор модели\n",
"model = RandomForestClassifier(n_estimators=100, random_state=42)\n",
"\n",
"# Начинаем отсчет времени\n",
"start_time = time.time()\n",
"model.fit(X_train_encoded, y_train_resampled)\n",
"\n",
"# Время обучения модели\n",
"train_time = time.time() - start_time\n",
"\n",
"print(f'Время обучения модели: {train_time:.2f} секунд')"
@@ -954,7 +917,7 @@
},
{
"cell_type": "code",
"execution_count": 149,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -985,11 +948,9 @@
}
],
"source": [
"# Получение важности признаков\n",
"importances = model.feature_importances_\n",
"feature_names = X_train_encoded.columns\n",
"\n",
"# Сортировка признаков по важности\n",
"feature_importance = pd.DataFrame({'feature': feature_names, 'importance': importances})\n",
"feature_importance = feature_importance.sort_values(by='importance', ascending=False)\n",
"\n",
@@ -999,7 +960,7 @@
},
{
"cell_type": "code",
"execution_count": 150,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -1037,7 +998,6 @@
}
],
"source": [
"# Предсказание и оценка\n",
"y_pred = model.predict(X_test_encoded)\n",
"\n",
"accuracy = accuracy_score(y_test, y_pred)\n",
@@ -1052,12 +1012,10 @@
"print(f\"F1 Score: {f1}\")\n",
"print(f\"ROC AUC: {roc_auc}\")\n",
"\n",
"# Кросс-валидация\n",
"scores = cross_val_score(model, X_train_encoded, y_train_resampled, cv=5, scoring='accuracy')\n",
"accuracy_cv = scores.mean()\n",
"print(f\"Cross-validated Accuracy: {accuracy_cv}\")\n",
"\n",
"# Анализ важности признаков\n",
"feature_importances = model.feature_importances_\n",
"feature_names = X_train_encoded.columns\n",
"\n",
@@ -1069,7 +1027,6 @@
"plt.title('Feature Importance')\n",
"plt.show()\n",
"\n",
"# Проверка на переобучение\n",
"y_train_pred = model.predict(X_train_encoded)\n",
"\n",
"accuracy_train = accuracy_score(y_train_resampled, y_train_pred)\n",

View File

@@ -1153,7 +1153,7 @@
},
{
"cell_type": "code",
"execution_count": 86,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -1171,13 +1171,10 @@
"import numpy as np\n",
"from sklearn.metrics import precision_score, recall_score, accuracy_score, f1_score\n",
"\n",
"# Получаем уникальные классы для целевого признака из тренировочного набора данных\n",
"unique_classes = np.unique(y_train)\n",
"\n",
"# Генерируем случайные предсказания, выбирая случайное значение из области значений целевого признака\n",
"random_predictions = np.random.choice(unique_classes, size=len(y_test))\n",
"\n",
"# Вычисление метрик для ориентира\n",
"baseline_accuracy = accuracy_score(y_test, random_predictions)\n",
"baseline_precision = precision_score(y_test, random_predictions)\n",
"baseline_recall = recall_score(y_test, random_predictions)\n",
@@ -1624,7 +1621,6 @@
"from sklearn.model_selection import GridSearchCV\n",
"from sklearn import neighbors, ensemble, neural_network\n",
"\n",
"# Словарь с вариантами гиперпараметров для каждой модели\n",
"param_grids = {\n",
" \"knn\": {\n",
" \"n_neighbors\": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], \n",
@@ -1648,22 +1644,17 @@
" }\n",
"}\n",
"\n",
"# Создаем экземпляры моделей\n",
"models = {\n",
" \"knn\": neighbors.KNeighborsClassifier(),\n",
" \"random_forest\": ensemble.RandomForestClassifier(),\n",
" \"mlp\": neural_network.MLPClassifier()\n",
"}\n",
"\n",
"# Словарь для хранения моделей с их лучшими параметрами\n",
"class_models = {}\n",
"\n",
"# Выполнение поиска по сетке для каждой модели\n",
"for model_name, model in models.items():\n",
" # Создаем GridSearchCV для текущей модели\n",
" gs_optimizer = GridSearchCV(estimator=model, param_grid=param_grids[model_name], scoring=\"f1\", n_jobs=-1)\n",
" \n",
" # Обучаем GridSearchCV\n",
" gs_optimizer.fit(preprocessed_df, y_train.values.ravel())\n",
" \n",
" # Получаем лучшие параметры\n",
@@ -1671,7 +1662,7 @@
" print(f\"Лучшие параметры для {model_name}: {best_params}\")\n",
" \n",
" class_models[model_name] = {\n",
" \"model\": model.set_params(**best_params) # Настраиваем модель с лучшими параметрами\n",
" \"model\": model.set_params(**best_params) \n",
" }"
]
},
@@ -2586,7 +2577,7 @@
},
{
"cell_type": "code",
"execution_count": 94,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -2603,10 +2594,8 @@
"import math\n",
"from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score\n",
"\n",
"# Базовое предсказание: среднее значение по y_train\n",
"baseline_predictions = [y_train.mean()] * len(y_test)\n",
"\n",
"# Вычисление метрик качества для ориентира\n",
"baseline_rmse = math.sqrt(\n",
" mean_squared_error(y_test, baseline_predictions)\n",
" )\n",
@@ -3111,7 +3100,6 @@
}
],
"source": [
"# Словарь с вариантами гиперпараметров для каждой модели\n",
"param_grids = {\n",
" \"knn\": {\n",
" \"n_neighbors\": [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30], \n",
@@ -3136,30 +3124,24 @@
" }\n",
"}\n",
"\n",
"# Создаем экземпляры моделей\n",
"models = {\n",
" \"knn\": neighbors.KNeighborsRegressor(),\n",
" \"random_forest\": ensemble.RandomForestRegressor(),\n",
" \"mlp\": neural_network.MLPRegressor()\n",
"}\n",
"\n",
"# Словарь для хранения моделей с их лучшими параметрами\n",
"class_models = {}\n",
"\n",
"# Выполнение поиска по сетке для каждой модели\n",
"for model_name, model in models.items():\n",
" # Создаем GridSearchCV для текущей модели\n",
" gs_optimizer = GridSearchCV(estimator=model, param_grid=param_grids[model_name], scoring='neg_mean_squared_error', n_jobs=-1)\n",
" \n",
" # Обучаем GridSearchCV\n",
" gs_optimizer.fit(preprocessed_df, y_train.values.ravel())\n",
" \n",
" # Получаем лучшие параметры\n",
" best_params = gs_optimizer.best_params_\n",
" print(f\"Лучшие параметры для {model_name}: {best_params}\")\n",
" \n",
" class_models[model_name] = {\n",
" \"model\": model.set_params(**best_params) # Настраиваем модель с лучшими параметрами\n",
" \"model\": model.set_params(**best_params)\n",
" }"
]
},
@@ -3323,7 +3305,7 @@
},
{
"cell_type": "code",
"execution_count": 100,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -3379,7 +3361,6 @@
}
],
"source": [
"# Создаем графики для всех моделей\n",
"for model_name, model_data in class_models.items():\n",
" print(f\"Model: {model_name}\")\n",
" y_pred = model_data[\"preds\"]\n",

View File

@@ -308,7 +308,7 @@
},
{
"cell_type": "code",
"execution_count": 353,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -348,17 +348,14 @@
}
],
"source": [
"# Количество пустых значений признаков\n",
"print(df.isnull().sum())\n",
"\n",
"print()\n",
"\n",
"# Есть ли пустые значения признаков\n",
"print(df.isnull().any())\n",
"\n",
"print()\n",
"\n",
"# Процент пустых значений признаков\n",
"for i in df.columns:\n",
" null_rate = df[i].isnull().sum() / len(df) * 100\n",
" if null_rate > 0:\n",
@@ -367,11 +364,10 @@
},
{
"cell_type": "code",
"execution_count": 354,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# Замена значений\n",
"df[\"bmi\"] = df[\"bmi\"].fillna(df[\"bmi\"].median())"
]
},

View File

@@ -114,7 +114,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -122,15 +122,9 @@
"import emoji\n",
"from num2words import num2words\n",
"\n",
"# Функция для преобразования эмоджи в слова\n",
"def emojis_words(text):\n",
" \n",
" # Модуль emoji: преобразование эмоджи в их словесные описания\n",
" text = emoji.demojize(text, delimiters=(\" \", \" \"))\n",
" \n",
" # Редактирование текста путём замены \":\" и\" _\", а так же - путём добавления пробела между отдельными словами\n",
" text = text.replace(\":\", \"\").replace(\"_\", \" \")\n",
" \n",
" return text\n",
"\n",
"def transform_text(text):\n",
@@ -140,22 +134,17 @@
" # Удаление из текста всех URL и ссылок\n",
" text = re.sub(r'http\\S+', '', text)\n",
"\n",
" # Преобразование эмоджи в текст\n",
" text = emojis_words(text)\n",
"\n",
" # Приведение к нижнему регистру\n",
" text = text.lower()\n",
"\n",
" # Удаление лишних пробелов\n",
" text = re.sub(r'\\s+', ' ', text) \n",
" \n",
" # Преобразование \"ё\" в \"е\"\n",
" text = text.replace(\"ё\", \"е\")\n",
"\n",
" # Удаление всех специальных символов\n",
" text = re.sub(r'[^a-zA-Zа-яА-Я0-9\\s]', '', text)\n",
"\n",
" # Преобразование чисел в слова\n",
" words: list[str] = text.split()\n",
" words = [num2words(word, lang=\"ru\") if word.isdigit() else word for word in words]\n",
" text = \" \".join(words)\n",
@@ -177,7 +166,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -197,16 +186,15 @@
" doc = sp(text)\n",
" \n",
" filtered_tokens = [\n",
" f\"{token.lemma_}_{token.pos_}_{token.morph}\" # Формирование строки с нужным форматом\n",
" f\"{token.lemma_}_{token.pos_}_{token.morph}\"\n",
" for token in doc\n",
" if token.text not in stop_words and len(token.text) <= 20 # Фильтрация \n",
" if token.text not in stop_words and len(token.text) <= 20 \n",
" ]\n",
" \n",
" return \" \".join(filtered_tokens)\n",
"\n",
"df[\"preprocessed_text\"] = df[\"preprocessed_text\"].apply(preprocess_text)\n",
"\n",
"# Выведем 10 токенов из первого текста\n",
"first_text_tokens = df[\"preprocessed_text\"].iloc[0].split()[:10]\n",
"print(\" \".join(first_text_tokens))"
]
@@ -220,7 +208,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [
{
@@ -299,10 +287,8 @@
" n_grams: list[tuple] = list(ngrams(tokens, n))\n",
" return n_grams\n",
"\n",
"# Пример для биграмм (N=2)\n",
"df[\"bigrams\"] = df[\"preprocessed_text\"].apply(lambda x: generate_ngrams(x, n=2))\n",
"\n",
"# Пример для триграмм (N=3)\n",
"df[\"trigrams\"] = df[\"preprocessed_text\"].apply(lambda x: generate_ngrams(x, n=3))\n",
"\n",
"print(df.iloc[15:25])"

View File

@@ -98,7 +98,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": null,
"id": "5b915c12",
"metadata": {},
"outputs": [
@@ -117,11 +117,8 @@
"def preprocess_images(images):\n",
" processed_images = []\n",
" for img in images:\n",
" # Изменение размера\n",
" img_resized = cv2.resize(img, (128, 128))\n",
" # Преобразование в оттенки серого\n",
" img_gray = cv2.cvtColor(img_resized, cv2.COLOR_BGR2GRAY)\n",
" # Увеличение контраста с помощью выравнивания гистограммы\n",
" img_eq = cv2.equalizeHist(img_gray)\n",
" processed_images.append(img_eq)\n",
" return np.array(processed_images)\n",
@@ -156,7 +153,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": null,
"id": "7cc2f6b2",
"metadata": {},
"outputs": [
@@ -173,12 +170,9 @@
],
"source": [
"def apply_filters(img):\n",
" # Удаление шумов\n",
" img_blur = cv2.GaussianBlur(img, (5, 5), 0)\n",
" # Повышение резкости\n",
" kernel = np.array([[0, -1, 0], [-1, 5, -1], [0, -1, 0]])\n",
" img_sharp = cv2.filter2D(img_blur, -1, kernel)\n",
" # Определение границ\n",
" img_edges = cv2.Canny(img_sharp, 100, 200)\n",
" return img_edges\n",
"\n",