diff --git a/lab4.ipynb b/lab4.ipynb new file mode 100644 index 0000000..7ce733c --- /dev/null +++ b/lab4.ipynb @@ -0,0 +1,3512 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Лабораторная работа 4" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Бизнес цели:\n", + "1. Оптимизация ценовой стратегии: анализ факторов, влияющих на стоимость недвижимости, чтобы помочь продавцам устанавливать конкурентоспособные цены и увеличивать прибыль.\n", + "2. Улучшение инвестиционных решений: предоставление аналитики для инвесторов, чтобы они могли определить наиболее выгодные районы и типы недвижимости для вложений." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Загрузка набора данных" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Среднее значение поля 'цена': 540088.1417665294\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pricebedroomsbathroomssqft_livingsqft_lotfloorswaterfrontviewconditiongrade...sqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15date_numericabove_average_price
id
7129300520221900.031.00118056501.00037...0195509817847.5112-122.25713405650163560
6414100192538000.032.25257072422.00037...400195119919812547.7210-122.31916907639164130
5631500400180000.021.00770100001.00036...0193309802847.7379-122.23327208062164910
2487200875604000.043.00196050001.00057...910196509813647.5208-122.39313605000164131
1954400510510000.032.00168080801.00038...0198709807447.6168-122.04518007503164840
..................................................................
263000018360000.032.50153011313.00038...0200909810347.6993-122.34615301509162110
6600060120400000.042.50231058132.00038...0201409814647.5107-122.36218307200164890
1523300141402101.020.75102013502.00037...0200909814447.5944-122.29910202007162440
291310100400000.032.50160023882.00038...0200409802747.5345-122.06914101287164510
1523300157325000.020.75102010762.00037...0200809814447.5941-122.29910201357163580
\n", + "

21613 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " price bedrooms bathrooms sqft_living sqft_lot floors \\\n", + "id \n", + "7129300520 221900.0 3 1.00 1180 5650 1.0 \n", + "6414100192 538000.0 3 2.25 2570 7242 2.0 \n", + "5631500400 180000.0 2 1.00 770 10000 1.0 \n", + "2487200875 604000.0 4 3.00 1960 5000 1.0 \n", + "1954400510 510000.0 3 2.00 1680 8080 1.0 \n", + "... ... ... ... ... ... ... \n", + "263000018 360000.0 3 2.50 1530 1131 3.0 \n", + "6600060120 400000.0 4 2.50 2310 5813 2.0 \n", + "1523300141 402101.0 2 0.75 1020 1350 2.0 \n", + "291310100 400000.0 3 2.50 1600 2388 2.0 \n", + "1523300157 325000.0 2 0.75 1020 1076 2.0 \n", + "\n", + " waterfront view condition grade ... sqft_basement yr_built \\\n", + "id ... \n", + "7129300520 0 0 3 7 ... 0 1955 \n", + "6414100192 0 0 3 7 ... 400 1951 \n", + "5631500400 0 0 3 6 ... 0 1933 \n", + "2487200875 0 0 5 7 ... 910 1965 \n", + "1954400510 0 0 3 8 ... 0 1987 \n", + "... ... ... ... ... ... ... ... \n", + "263000018 0 0 3 8 ... 0 2009 \n", + "6600060120 0 0 3 8 ... 0 2014 \n", + "1523300141 0 0 3 7 ... 0 2009 \n", + "291310100 0 0 3 8 ... 0 2004 \n", + "1523300157 0 0 3 7 ... 0 2008 \n", + "\n", + " yr_renovated zipcode lat long sqft_living15 \\\n", + "id \n", + "7129300520 0 98178 47.5112 -122.257 1340 \n", + "6414100192 1991 98125 47.7210 -122.319 1690 \n", + "5631500400 0 98028 47.7379 -122.233 2720 \n", + "2487200875 0 98136 47.5208 -122.393 1360 \n", + "1954400510 0 98074 47.6168 -122.045 1800 \n", + "... ... ... ... ... ... \n", + "263000018 0 98103 47.6993 -122.346 1530 \n", + "6600060120 0 98146 47.5107 -122.362 1830 \n", + "1523300141 0 98144 47.5944 -122.299 1020 \n", + "291310100 0 98027 47.5345 -122.069 1410 \n", + "1523300157 0 98144 47.5941 -122.299 1020 \n", + "\n", + " sqft_lot15 date_numeric above_average_price \n", + "id \n", + "7129300520 5650 16356 0 \n", + "6414100192 7639 16413 0 \n", + "5631500400 8062 16491 0 \n", + "2487200875 5000 16413 1 \n", + "1954400510 7503 16484 0 \n", + "... ... ... ... \n", + "263000018 1509 16211 0 \n", + "6600060120 7200 16489 0 \n", + "1523300141 2007 16244 0 \n", + "291310100 1287 16451 0 \n", + "1523300157 1357 16358 0 \n", + "\n", + "[21613 rows x 21 columns]" + ] + }, + "execution_count": 26, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import pandas as pd\n", + "\n", + "from sklearn import set_config\n", + "\n", + "set_config(transform_output=\"pandas\")\n", + "\n", + "random_state = 42\n", + "\n", + "df = pd.read_csv(\"data/kc_house_data.csv\", index_col=\"id\")\n", + "\n", + "df[\"date\"] = pd.to_datetime(df[\"date\"])\n", + "df[\"date_numeric\"] = (df[\"date\"] - pd.Timestamp(\"1970-01-01\")).dt.days\n", + "df = df.drop(columns=[\"date\"])\n", + "\n", + "average_price = df['price'].mean()\n", + "\n", + "print(f\"Среднее значение поля 'цена': {average_price}\")\n", + "\n", + "average_price = df[\"price\"].mean()\n", + "df['above_average_price'] = (df['price'] > average_price).astype(int)\n", + "\n", + "df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Разделение набора данных на обучающую и тестовые выборки (80/20) для задачи классификации" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'X_train'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pricebedroomsbathroomssqft_livingsqft_lotfloorswaterfrontviewconditiongrade...sqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15date_numericabove_average_price
id
5205000020360000.042.50261073332.00038...0198809800347.2721-122.29322809033165340
4221270290544900.032.50199049362.00038...0200409807547.5911-122.01822504815163951
3438501327352500.022.50157023992.00037...390200909810647.5488-122.36415902306165590
2726079098560000.032.5028402164932.00039...0199109801447.7020-121.8922820175111163311
5072200040403000.032.001960131001.00258...310195709816647.4419-122.340196010518161920
..................................................................
5104531120775000.052.753750120772.004310...0200509803847.3525-122.00231207255165171
2685600090345000.031.50103069691.00046...0192109810847.5492-122.30014206000163920
9528104985611000.021.00127051001.00037...170190009811547.6771-122.32816703900163781
3450300430317500.041.50173077001.00047...720196309805947.4996-122.16316508066164400
3956900480779000.031.75199056001.00138...660194109819947.6500-122.41526306780163161
\n", + "

17290 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " price bedrooms bathrooms sqft_living sqft_lot floors \\\n", + "id \n", + "5205000020 360000.0 4 2.50 2610 7333 2.0 \n", + "4221270290 544900.0 3 2.50 1990 4936 2.0 \n", + "3438501327 352500.0 2 2.50 1570 2399 2.0 \n", + "2726079098 560000.0 3 2.50 2840 216493 2.0 \n", + "5072200040 403000.0 3 2.00 1960 13100 1.0 \n", + "... ... ... ... ... ... ... \n", + "5104531120 775000.0 5 2.75 3750 12077 2.0 \n", + "2685600090 345000.0 3 1.50 1030 6969 1.0 \n", + "9528104985 611000.0 2 1.00 1270 5100 1.0 \n", + "3450300430 317500.0 4 1.50 1730 7700 1.0 \n", + "3956900480 779000.0 3 1.75 1990 5600 1.0 \n", + "\n", + " waterfront view condition grade ... sqft_basement yr_built \\\n", + "id ... \n", + "5205000020 0 0 3 8 ... 0 1988 \n", + "4221270290 0 0 3 8 ... 0 2004 \n", + "3438501327 0 0 3 7 ... 390 2009 \n", + "2726079098 0 0 3 9 ... 0 1991 \n", + "5072200040 0 2 5 8 ... 310 1957 \n", + "... ... ... ... ... ... ... ... \n", + "5104531120 0 4 3 10 ... 0 2005 \n", + "2685600090 0 0 4 6 ... 0 1921 \n", + "9528104985 0 0 3 7 ... 170 1900 \n", + "3450300430 0 0 4 7 ... 720 1963 \n", + "3956900480 0 1 3 8 ... 660 1941 \n", + "\n", + " yr_renovated zipcode lat long sqft_living15 \\\n", + "id \n", + "5205000020 0 98003 47.2721 -122.293 2280 \n", + "4221270290 0 98075 47.5911 -122.018 2250 \n", + "3438501327 0 98106 47.5488 -122.364 1590 \n", + "2726079098 0 98014 47.7020 -121.892 2820 \n", + "5072200040 0 98166 47.4419 -122.340 1960 \n", + "... ... ... ... ... ... \n", + "5104531120 0 98038 47.3525 -122.002 3120 \n", + "2685600090 0 98108 47.5492 -122.300 1420 \n", + "9528104985 0 98115 47.6771 -122.328 1670 \n", + "3450300430 0 98059 47.4996 -122.163 1650 \n", + "3956900480 0 98199 47.6500 -122.415 2630 \n", + "\n", + " sqft_lot15 date_numeric above_average_price \n", + "id \n", + "5205000020 9033 16534 0 \n", + "4221270290 4815 16395 1 \n", + "3438501327 2306 16559 0 \n", + "2726079098 175111 16331 1 \n", + "5072200040 10518 16192 0 \n", + "... ... ... ... \n", + "5104531120 7255 16517 1 \n", + "2685600090 6000 16392 0 \n", + "9528104985 3900 16378 1 \n", + "3450300430 8066 16440 0 \n", + "3956900480 6780 16316 1 \n", + "\n", + "[17290 rows x 21 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'y_train'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
above_average_price
id
52050000200
42212702901
34385013270
27260790981
50722000400
......
51045311201
26856000900
95281049851
34503004300
39569004801
\n", + "

17290 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " above_average_price\n", + "id \n", + "5205000020 0\n", + "4221270290 1\n", + "3438501327 0\n", + "2726079098 1\n", + "5072200040 0\n", + "... ...\n", + "5104531120 1\n", + "2685600090 0\n", + "9528104985 1\n", + "3450300430 0\n", + "3956900480 1\n", + "\n", + "[17290 rows x 1 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'X_test'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pricebedroomsbathroomssqft_livingsqft_lotfloorswaterfrontviewconditiongrade...sqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15date_numericabove_average_price
id
9421500010442500.042.25197079021.00038...660196009812547.7249-122.29818608021164710
3204800200665000.042.753320105742.00058...1100196009805647.5376-122.18027208330164431
3320000212397500.032.2513509802.00038...300200709814447.5998-122.31213501245163490
9206950100343000.032.50127025092.00038...0200409810647.5357-122.36514202206162380
3121069038355000.032.502620788431.00347...1310196409809247.2584-122.0932330130244165200
..................................................................
7889601165268000.032.50170022502.00037...0201409816847.4914-122.33415204500163080
7278700070660000.032.50240064741.00238...840196409817747.7728-122.386234010856164371
1823059030159000.031.00132065341.00037...0195209805547.4806-122.22321407405163000
3448900420620000.042.50250082822.00039...0201309805647.5127-122.16925008046163351
626059335527000.042.252330194362.00038...0198709801147.7663-122.215191010055163170
\n", + "

4323 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " price bedrooms bathrooms sqft_living sqft_lot floors \\\n", + "id \n", + "9421500010 442500.0 4 2.25 1970 7902 1.0 \n", + "3204800200 665000.0 4 2.75 3320 10574 2.0 \n", + "3320000212 397500.0 3 2.25 1350 980 2.0 \n", + "9206950100 343000.0 3 2.50 1270 2509 2.0 \n", + "3121069038 355000.0 3 2.50 2620 78843 1.0 \n", + "... ... ... ... ... ... ... \n", + "7889601165 268000.0 3 2.50 1700 2250 2.0 \n", + "7278700070 660000.0 3 2.50 2400 6474 1.0 \n", + "1823059030 159000.0 3 1.00 1320 6534 1.0 \n", + "3448900420 620000.0 4 2.50 2500 8282 2.0 \n", + "626059335 527000.0 4 2.25 2330 19436 2.0 \n", + "\n", + " waterfront view condition grade ... sqft_basement yr_built \\\n", + "id ... \n", + "9421500010 0 0 3 8 ... 660 1960 \n", + "3204800200 0 0 5 8 ... 1100 1960 \n", + "3320000212 0 0 3 8 ... 300 2007 \n", + "9206950100 0 0 3 8 ... 0 2004 \n", + "3121069038 0 3 4 7 ... 1310 1964 \n", + "... ... ... ... ... ... ... ... \n", + "7889601165 0 0 3 7 ... 0 2014 \n", + "7278700070 0 2 3 8 ... 840 1964 \n", + "1823059030 0 0 3 7 ... 0 1952 \n", + "3448900420 0 0 3 9 ... 0 2013 \n", + "626059335 0 0 3 8 ... 0 1987 \n", + "\n", + " yr_renovated zipcode lat long sqft_living15 \\\n", + "id \n", + "9421500010 0 98125 47.7249 -122.298 1860 \n", + "3204800200 0 98056 47.5376 -122.180 2720 \n", + "3320000212 0 98144 47.5998 -122.312 1350 \n", + "9206950100 0 98106 47.5357 -122.365 1420 \n", + "3121069038 0 98092 47.2584 -122.093 2330 \n", + "... ... ... ... ... ... \n", + "7889601165 0 98168 47.4914 -122.334 1520 \n", + "7278700070 0 98177 47.7728 -122.386 2340 \n", + "1823059030 0 98055 47.4806 -122.223 2140 \n", + "3448900420 0 98056 47.5127 -122.169 2500 \n", + "626059335 0 98011 47.7663 -122.215 1910 \n", + "\n", + " sqft_lot15 date_numeric above_average_price \n", + "id \n", + "9421500010 8021 16471 0 \n", + "3204800200 8330 16443 1 \n", + "3320000212 1245 16349 0 \n", + "9206950100 2206 16238 0 \n", + "3121069038 130244 16520 0 \n", + "... ... ... ... \n", + "7889601165 4500 16308 0 \n", + "7278700070 10856 16437 1 \n", + "1823059030 7405 16300 0 \n", + "3448900420 8046 16335 1 \n", + "626059335 10055 16317 0 \n", + "\n", + "[4323 rows x 21 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'y_test'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
above_average_price
id
94215000100
32048002001
33200002120
92069501000
31210690380
......
78896011650
72787000701
18230590300
34489004201
6260593350
\n", + "

4323 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " above_average_price\n", + "id \n", + "9421500010 0\n", + "3204800200 1\n", + "3320000212 0\n", + "9206950100 0\n", + "3121069038 0\n", + "... ...\n", + "7889601165 0\n", + "7278700070 1\n", + "1823059030 0\n", + "3448900420 1\n", + "626059335 0\n", + "\n", + "[4323 rows x 1 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from typing import Tuple\n", + "import pandas as pd\n", + "from pandas import DataFrame\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "def split_stratified_into_train_val_test(\n", + " df_input,\n", + " stratify_colname=\"y\",\n", + " frac_train=0.6,\n", + " frac_val=0.15,\n", + " frac_test=0.25,\n", + " random_state=None,\n", + ") -> Tuple[DataFrame, DataFrame, DataFrame, DataFrame, DataFrame, DataFrame]:\n", + " if frac_train + frac_val + frac_test != 1.0:\n", + " raise ValueError(\n", + " \"fractions %f, %f, %f do not add up to 1.0\"\n", + " % (frac_train, frac_val, frac_test)\n", + " )\n", + " if stratify_colname not in df_input.columns:\n", + " raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n", + " X = df_input \n", + " y = df_input[\n", + " [stratify_colname]\n", + " ] \n", + " \n", + " df_train, df_temp, y_train, y_temp = train_test_split(\n", + " X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n", + " )\n", + " if frac_val <= 0:\n", + " assert len(df_input) == len(df_train) + len(df_temp)\n", + " return df_train, pd.DataFrame(), df_temp, y_train, pd.DataFrame(), y_temp\n", + " \n", + " relative_frac_test = frac_test / (frac_val + frac_test)\n", + " df_val, df_test, y_val, y_test = train_test_split(\n", + " df_temp,\n", + " y_temp,\n", + " stratify=y_temp,\n", + " test_size=relative_frac_test,\n", + " random_state=random_state,\n", + " )\n", + " assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n", + " return df_train, df_val, df_test, y_train, y_val, y_test\n", + "\n", + "X_train, X_val, X_test, y_train, y_val, y_test = split_stratified_into_train_val_test(\n", + " df,\n", + " stratify_colname=\"above_average_price\",\n", + " frac_train=0.80,\n", + " frac_val=0,\n", + " frac_test=0.20,\n", + " random_state=random_state,\n", + ")\n", + "\n", + "display(\"X_train\", X_train)\n", + "display(\"y_train\", y_train)\n", + "\n", + "display(\"X_test\", X_test)\n", + "display(\"y_test\", y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Формирование конвейера для классификации данных\n", + "\n", + "preprocessing_num - конвейер для обработки числовых данных: заполнение пропущенных значений и стандартизация\n", + "\n", + "preprocessing_cat - конвейер для обработки категориальных данных: заполнение пропущенных данных и унитарное кодирование\n", + "\n", + "features_preprocessing - трансформер для предобработки признаков\n", + "\n", + "features_engineering - трансформер для конструирования признаков\n", + "\n", + "drop_columns - трансформер для удаления колонок\n", + "\n", + "pipeline_end - основной конвейер предобработки данных и конструирования признаков" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.pipeline import Pipeline\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.impute import SimpleImputer\n", + "\n", + "# Список числовых колонок\n", + "num_columns = [\n", + " \"price\",\n", + " \"bedrooms\",\n", + " \"bathrooms\",\n", + " \"sqft_living\",\n", + " \"sqft_lot\",\n", + " \"floors\",\n", + " \"waterfront\",\n", + " \"view\",\n", + " \"condition\",\n", + " \"grade\",\n", + " \"sqft_above\",\n", + " \"sqft_basement\",\n", + " \"yr_built\",\n", + " \"yr_renovated\",\n", + " \"zipcode\",\n", + " \"lat\",\n", + " \"long\",\n", + " \"sqft_living15\",\n", + " \"sqft_lot15\",\n", + " \"date_numeric\"\n", + "]\n", + "columns_to_drop = [\"date\"]\n", + "\n", + "# Конвейер для числовых данных\n", + "num_imputer = SimpleImputer(strategy=\"median\")\n", + "num_scaler = StandardScaler()\n", + "preprocessing_num = Pipeline(\n", + " [\n", + " (\"imputer\", num_imputer),\n", + " (\"scaler\", num_scaler),\n", + " ]\n", + ")\n", + "\n", + "# Конвейер для удаления колонок\n", + "drop_columns = ColumnTransformer(\n", + " transformers=[\n", + " (\"drop_columns\", \"drop\", columns_to_drop),\n", + " ],\n", + " remainder=\"passthrough\",\n", + ")\n", + "\n", + "# Предобработка только для числовых данных\n", + "features_preprocessing = ColumnTransformer(\n", + " transformers=[\n", + " (\"preprocessing_num\", preprocessing_num, num_columns),\n", + " ],\n", + " remainder=\"passthrough\",\n", + ")\n", + "\n", + "# Итоговый конвейер\n", + "pipeline_end = Pipeline(\n", + " [\n", + " (\"features_preprocessing\", features_preprocessing),\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Демонстрация работы конвейера для предобработки данных при классификации" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
preprocessing_num__pricepreprocessing_num__bedroomspreprocessing_num__bathroomspreprocessing_num__sqft_livingpreprocessing_num__sqft_lotpreprocessing_num__floorspreprocessing_num__waterfrontpreprocessing_num__viewpreprocessing_num__conditionpreprocessing_num__grade...preprocessing_num__sqft_basementpreprocessing_num__yr_builtpreprocessing_num__yr_renovatedpreprocessing_num__zipcodepreprocessing_num__latpreprocessing_num__longpreprocessing_num__sqft_living15preprocessing_num__sqft_lot15preprocessing_num__date_numericremainder__above_average_price
id
5205000020-0.4928970.6975000.4979600.582210-0.1818720.939548-0.087375-0.307461-0.6302650.293371...-0.6608700.576070-0.208897-1.397782-2.073883-0.5614870.427608-0.1303751.4320620
42212702900.014419-0.4060660.497960-0.097029-0.2393180.939548-0.087375-0.307461-0.6302650.293371...-0.6608701.122105-0.208897-0.0546500.2276821.4033760.383811-0.2894640.2035731
3438501327-0.513475-1.5096330.497960-0.557159-0.3001200.939548-0.087375-0.307461-0.630265-0.560854...0.2214521.292741-0.2088970.523643-0.077510-1.068778-0.579724-0.3840961.6530130
27260790980.055850-0.4060660.4979600.8341864.8308310.939548-0.087375-0.307461-0.6302651.147596...-0.6608700.678452-0.208897-1.1925811.0278192.3036411.2159546.133562-0.3620621
5072200040-0.374916-0.406066-0.153502-0.129896-0.043661-0.918592-0.0873752.2869742.4346450.293371...0.040463-0.481872-0.2088971.642920-0.848786-0.897299-0.039561-0.074365-1.5905510
..................................................................
51045311200.6457521.8010660.8236911.831134-0.0681780.939548-0.0873754.881408-0.6302652.001820...-0.6608701.156232-0.208897-0.744871-1.4938021.5176961.653925-0.1974351.2818151
2685600090-0.534053-0.406066-0.804965-1.148755-0.190596-0.918592-0.087375-0.3074610.902190-1.415078...-0.660870-1.710451-0.2088970.560953-0.074624-0.611501-0.827907-0.2447700.1770590
95281049850.195780-1.509633-1.456427-0.885823-0.235388-0.918592-0.087375-0.307461-0.630265-0.560854...-0.276268-2.427121-0.2088970.6915350.848167-0.811560-0.462932-0.3239750.0533261
3450300430-0.6095050.697500-0.804965-0.381872-0.173076-0.918592-0.087375-0.3074610.902190-0.560854...0.968033-0.277109-0.208897-0.353124-0.4324850.367358-0.492130-0.1668470.6012850
39569004800.656727-0.406066-0.479234-0.097029-0.223405-0.918592-0.0873750.989756-0.6302650.293371...0.832291-1.027907-0.2088972.2585230.652642-1.4331710.938573-0.215351-0.4946331
\n", + "

17290 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " preprocessing_num__price preprocessing_num__bedrooms \\\n", + "id \n", + "5205000020 -0.492897 0.697500 \n", + "4221270290 0.014419 -0.406066 \n", + "3438501327 -0.513475 -1.509633 \n", + "2726079098 0.055850 -0.406066 \n", + "5072200040 -0.374916 -0.406066 \n", + "... ... ... \n", + "5104531120 0.645752 1.801066 \n", + "2685600090 -0.534053 -0.406066 \n", + "9528104985 0.195780 -1.509633 \n", + "3450300430 -0.609505 0.697500 \n", + "3956900480 0.656727 -0.406066 \n", + "\n", + " preprocessing_num__bathrooms preprocessing_num__sqft_living \\\n", + "id \n", + "5205000020 0.497960 0.582210 \n", + "4221270290 0.497960 -0.097029 \n", + "3438501327 0.497960 -0.557159 \n", + "2726079098 0.497960 0.834186 \n", + "5072200040 -0.153502 -0.129896 \n", + "... ... ... \n", + "5104531120 0.823691 1.831134 \n", + "2685600090 -0.804965 -1.148755 \n", + "9528104985 -1.456427 -0.885823 \n", + "3450300430 -0.804965 -0.381872 \n", + "3956900480 -0.479234 -0.097029 \n", + "\n", + " preprocessing_num__sqft_lot preprocessing_num__floors \\\n", + "id \n", + "5205000020 -0.181872 0.939548 \n", + "4221270290 -0.239318 0.939548 \n", + "3438501327 -0.300120 0.939548 \n", + "2726079098 4.830831 0.939548 \n", + "5072200040 -0.043661 -0.918592 \n", + "... ... ... \n", + "5104531120 -0.068178 0.939548 \n", + "2685600090 -0.190596 -0.918592 \n", + "9528104985 -0.235388 -0.918592 \n", + "3450300430 -0.173076 -0.918592 \n", + "3956900480 -0.223405 -0.918592 \n", + "\n", + " preprocessing_num__waterfront preprocessing_num__view \\\n", + "id \n", + "5205000020 -0.087375 -0.307461 \n", + "4221270290 -0.087375 -0.307461 \n", + "3438501327 -0.087375 -0.307461 \n", + "2726079098 -0.087375 -0.307461 \n", + "5072200040 -0.087375 2.286974 \n", + "... ... ... \n", + "5104531120 -0.087375 4.881408 \n", + "2685600090 -0.087375 -0.307461 \n", + "9528104985 -0.087375 -0.307461 \n", + "3450300430 -0.087375 -0.307461 \n", + "3956900480 -0.087375 0.989756 \n", + "\n", + " preprocessing_num__condition preprocessing_num__grade ... \\\n", + "id ... \n", + "5205000020 -0.630265 0.293371 ... \n", + "4221270290 -0.630265 0.293371 ... \n", + "3438501327 -0.630265 -0.560854 ... \n", + "2726079098 -0.630265 1.147596 ... \n", + "5072200040 2.434645 0.293371 ... \n", + "... ... ... ... \n", + "5104531120 -0.630265 2.001820 ... \n", + "2685600090 0.902190 -1.415078 ... \n", + "9528104985 -0.630265 -0.560854 ... \n", + "3450300430 0.902190 -0.560854 ... \n", + "3956900480 -0.630265 0.293371 ... \n", + "\n", + " preprocessing_num__sqft_basement preprocessing_num__yr_built \\\n", + "id \n", + "5205000020 -0.660870 0.576070 \n", + "4221270290 -0.660870 1.122105 \n", + "3438501327 0.221452 1.292741 \n", + "2726079098 -0.660870 0.678452 \n", + "5072200040 0.040463 -0.481872 \n", + "... ... ... \n", + "5104531120 -0.660870 1.156232 \n", + "2685600090 -0.660870 -1.710451 \n", + "9528104985 -0.276268 -2.427121 \n", + "3450300430 0.968033 -0.277109 \n", + "3956900480 0.832291 -1.027907 \n", + "\n", + " preprocessing_num__yr_renovated preprocessing_num__zipcode \\\n", + "id \n", + "5205000020 -0.208897 -1.397782 \n", + "4221270290 -0.208897 -0.054650 \n", + "3438501327 -0.208897 0.523643 \n", + "2726079098 -0.208897 -1.192581 \n", + "5072200040 -0.208897 1.642920 \n", + "... ... ... \n", + "5104531120 -0.208897 -0.744871 \n", + "2685600090 -0.208897 0.560953 \n", + "9528104985 -0.208897 0.691535 \n", + "3450300430 -0.208897 -0.353124 \n", + "3956900480 -0.208897 2.258523 \n", + "\n", + " preprocessing_num__lat preprocessing_num__long \\\n", + "id \n", + "5205000020 -2.073883 -0.561487 \n", + "4221270290 0.227682 1.403376 \n", + "3438501327 -0.077510 -1.068778 \n", + "2726079098 1.027819 2.303641 \n", + "5072200040 -0.848786 -0.897299 \n", + "... ... ... \n", + "5104531120 -1.493802 1.517696 \n", + "2685600090 -0.074624 -0.611501 \n", + "9528104985 0.848167 -0.811560 \n", + "3450300430 -0.432485 0.367358 \n", + "3956900480 0.652642 -1.433171 \n", + "\n", + " preprocessing_num__sqft_living15 preprocessing_num__sqft_lot15 \\\n", + "id \n", + "5205000020 0.427608 -0.130375 \n", + "4221270290 0.383811 -0.289464 \n", + "3438501327 -0.579724 -0.384096 \n", + "2726079098 1.215954 6.133562 \n", + "5072200040 -0.039561 -0.074365 \n", + "... ... ... \n", + "5104531120 1.653925 -0.197435 \n", + "2685600090 -0.827907 -0.244770 \n", + "9528104985 -0.462932 -0.323975 \n", + "3450300430 -0.492130 -0.166847 \n", + "3956900480 0.938573 -0.215351 \n", + "\n", + " preprocessing_num__date_numeric remainder__above_average_price \n", + "id \n", + "5205000020 1.432062 0 \n", + "4221270290 0.203573 1 \n", + "3438501327 1.653013 0 \n", + "2726079098 -0.362062 1 \n", + "5072200040 -1.590551 0 \n", + "... ... ... \n", + "5104531120 1.281815 1 \n", + "2685600090 0.177059 0 \n", + "9528104985 0.053326 1 \n", + "3450300430 0.601285 0 \n", + "3956900480 -0.494633 1 \n", + "\n", + "[17290 rows x 21 columns]" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preprocessing_result = pipeline_end.fit_transform(X_train)\n", + "preprocessed_df = pd.DataFrame(\n", + " preprocessing_result,\n", + " columns=pipeline_end.get_feature_names_out(),\n", + ")\n", + "\n", + "preprocessed_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Формирование набора моделей для классификации\n", + "\n", + "logistic -- логистическая регрессия\n", + "\n", + "ridge -- гребневая регрессия\n", + "\n", + "decision_tree -- дерево решений\n", + "\n", + "knn -- k-ближайших соседей\n", + "\n", + "naive_bayes -- наивный Байесовский классификатор\n", + "\n", + "gradient_boosting -- метод градиентного бустинга (набор деревьев решений)\n", + "\n", + "random_forest -- метод случайного леса (набор деревьев решений)\n", + "\n", + "mlp -- многослойный персептрон (нейронная сеть)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import ensemble, linear_model, naive_bayes, neighbors, neural_network, tree\n", + "\n", + "# Сами классификационные модели\n", + "class_models = {\n", + " # от 0 до 1, принадлежит ли объект к классу\n", + " \"logistic\": {\"model\": linear_model.LogisticRegression()},\n", + " # Логическая, но с регуляризацией (модель не так точно запоминает данные)\n", + " \"ridge\": {\n", + " \"model\": linear_model.LogisticRegression(penalty=\"l2\", class_weight=\"balanced\")\n", + " },\n", + " # Деления данных на условия с помощью построения дерева\n", + " \"decision_tree\": {\n", + " \"model\": tree.DecisionTreeClassifier(max_depth=7, random_state=random_state)\n", + " },\n", + " # Определяет ближайших объектов и находит и класс\n", + " \"knn\": {\"model\": neighbors.KNeighborsClassifier(n_neighbors=7)},\n", + " # Вероятности для классификации\n", + " \"naive_bayes\": {\"model\": naive_bayes.GaussianNB()},\n", + " # Постепенно улучшает предсказания с помощью слабых моделей\n", + " \"gradient_boosting\": {\n", + " \"model\": ensemble.GradientBoostingClassifier(n_estimators=210)\n", + " },\n", + " \"random_forest\": {\n", + " \"model\": ensemble.RandomForestClassifier(\n", + " max_depth=11, class_weight=\"balanced\", random_state=random_state\n", + " )\n", + " },\n", + " \"mlp\": {\n", + " \"model\": neural_network.MLPClassifier(\n", + " hidden_layer_sizes=(7,),\n", + " max_iter=500,\n", + " early_stopping=True,\n", + " random_state=random_state,\n", + " )\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Обучение моделей на обучающем наборе данных и оценка на тестовом" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: logistic\n", + "Model: ridge\n", + "Model: decision_tree\n", + "Model: knn\n", + "Model: naive_bayes\n", + "Model: gradient_boosting\n", + "Model: random_forest\n", + "Model: mlp\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from sklearn import metrics\n", + "\n", + "for model_name in class_models.keys():\n", + " print(f\"Model: {model_name}\")\n", + " model = class_models[model_name][\"model\"]\n", + "\n", + " model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n", + " model_pipeline = model_pipeline.fit(X_train, y_train.values.ravel())\n", + "\n", + " y_train_predict = model_pipeline.predict(X_train)\n", + " y_test_probs = model_pipeline.predict_proba(X_test)[:, 1]\n", + " y_test_predict = np.where(y_test_probs > 0.5, 1, 0)\n", + "\n", + " class_models[model_name][\"pipeline\"] = model_pipeline\n", + " class_models[model_name][\"probs\"] = y_test_probs\n", + " class_models[model_name][\"preds\"] = y_test_predict\n", + "\n", + " class_models[model_name][\"Precision_train\"] = metrics.precision_score(\n", + " y_train, y_train_predict\n", + " )\n", + " class_models[model_name][\"Precision_test\"] = metrics.precision_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Recall_train\"] = metrics.recall_score(\n", + " y_train, y_train_predict\n", + " )\n", + " class_models[model_name][\"Recall_test\"] = metrics.recall_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Accuracy_train\"] = metrics.accuracy_score(\n", + " y_train, y_train_predict\n", + " )\n", + " class_models[model_name][\"Accuracy_test\"] = metrics.accuracy_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"ROC_AUC_test\"] = metrics.roc_auc_score(\n", + " y_test, y_test_probs\n", + " )\n", + " class_models[model_name][\"F1_train\"] = metrics.f1_score(y_train, y_train_predict)\n", + " class_models[model_name][\"F1_test\"] = metrics.f1_score(y_test, y_test_predict)\n", + " class_models[model_name][\"MCC_test\"] = metrics.matthews_corrcoef(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Confusion_matrix\"] = metrics.confusion_matrix(\n", + " y_test, y_test_predict\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Сводная таблица оценок качества для использованных моделей классификации" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Матрица неточностей" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import ConfusionMatrixDisplay\n", + "import matplotlib.pyplot as plt\n", + "\n", + "_, ax = plt.subplots(int(len(class_models) / 2), 2, figsize=(12, 10), sharex=False, sharey=False)\n", + "for index, key in enumerate(class_models.keys()):\n", + " c_matrix = class_models[key][\"Confusion_matrix\"]\n", + " disp = ConfusionMatrixDisplay(\n", + " confusion_matrix=c_matrix, display_labels=[\"Less\", \"More\"]\n", + " ).plot(ax=ax.flat[index])\n", + " disp.ax_.set_title(key)\n", + "\n", + "plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Точность, полнота, верность (аккуратность), F-мера" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Precision_trainPrecision_testRecall_trainRecall_testAccuracy_trainAccuracy_testF1_trainF1_test
logistic1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
ridge1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
decision_tree1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
naive_bayes1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
random_forest1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
gradient_boosting1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
mlp0.9990540.9981060.9998420.9981060.9995950.9986120.9994480.998106
knn0.9820810.9776640.9775850.9671720.9852520.9798750.9798280.972390
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n", + " [\n", + " \"Precision_train\",\n", + " \"Precision_test\",\n", + " \"Recall_train\",\n", + " \"Recall_test\",\n", + " \"Accuracy_train\",\n", + " \"Accuracy_test\",\n", + " \"F1_train\",\n", + " \"F1_test\",\n", + " ]\n", + "]\n", + "class_metrics.sort_values(\n", + " by=\"Accuracy_test\", ascending=False\n", + ").style.background_gradient(\n", + " cmap=\"plasma\",\n", + " low=0.3,\n", + " high=1,\n", + " subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n", + ").background_gradient(\n", + " cmap=\"viridis\",\n", + " low=1,\n", + " high=0.3,\n", + " subset=[\n", + " \"Precision_train\",\n", + " \"Precision_test\",\n", + " \"Recall_train\",\n", + " \"Recall_test\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ROC-кривая, каппа Коэна, коэффициент корреляции Мэтьюса" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Accuracy_testF1_testROC_AUC_testCohen_kappa_testMCC_test
logistic1.0000001.0000001.0000001.0000001.000000
ridge1.0000001.0000001.0000001.0000001.000000
decision_tree1.0000001.0000001.0000001.0000001.000000
naive_bayes1.0000001.0000001.0000001.0000001.000000
random_forest1.0000001.0000001.0000001.0000001.000000
gradient_boosting1.0000001.0000001.0000001.0000001.000000
mlp0.9986120.9981060.9993680.9970110.997011
knn0.9798750.9723900.9966360.9565580.956592
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 35, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n", + " [\n", + " \"Accuracy_test\",\n", + " \"F1_test\",\n", + " \"ROC_AUC_test\",\n", + " \"Cohen_kappa_test\",\n", + " \"MCC_test\",\n", + " ]\n", + "]\n", + "class_metrics.sort_values(by=\"ROC_AUC_test\", ascending=False).style.background_gradient(\n", + " cmap=\"plasma\",\n", + " low=0.3,\n", + " high=1,\n", + " subset=[\n", + " \"ROC_AUC_test\",\n", + " \"MCC_test\",\n", + " \"Cohen_kappa_test\",\n", + " ],\n", + ").background_gradient(\n", + " cmap=\"viridis\",\n", + " low=1,\n", + " high=0.3,\n", + " subset=[\n", + " \"Accuracy_test\",\n", + " \"F1_test\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'logistic'" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "best_model = str(class_metrics.sort_values(by=\"MCC_test\", ascending=False).iloc[0].name)\n", + "\n", + "display(best_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Вывод данных с ошибкой предсказания для оценки" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Error items count: 0'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pricePredictedbedroomsbathroomssqft_livingsqft_lotfloorswaterfrontviewcondition...sqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15date_numericabove_average_price
id
\n", + "

0 rows × 22 columns

\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [price, Predicted, bedrooms, bathrooms, sqft_living, sqft_lot, floors, waterfront, view, condition, grade, sqft_above, sqft_basement, yr_built, yr_renovated, zipcode, lat, long, sqft_living15, sqft_lot15, date_numeric, above_average_price]\n", + "Index: []\n", + "\n", + "[0 rows x 22 columns]" + ] + }, + "execution_count": 37, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preprocessing_result = pipeline_end.transform(X_test)\n", + "preprocessed_df = pd.DataFrame(\n", + " preprocessing_result,\n", + " columns=pipeline_end.get_feature_names_out(),\n", + ")\n", + "\n", + "y_pred = class_models[best_model][\"preds\"]\n", + "\n", + "# Cравнение реальных значений (y_test[\"above_average_price\"]) с предсказанными значениями (y_pred)\n", + "# на тестовых данных\n", + "error_index = y_test[y_test[\"above_average_price\"] != y_pred].index.tolist()\n", + "display(f\"Error items count: {len(error_index)}\")\n", + "\n", + "error_predicted = pd.Series(y_pred, index=y_test.index).loc[error_index]\n", + "error_df = X_test.loc[error_index].copy()\n", + "error_df.insert(loc=1, column=\"Predicted\", value=error_predicted)\n", + "error_df.sort_index()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Пример использования обученной модели (конвейера) для предсказания" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pricebedroomsbathroomssqft_livingsqft_lotfloorswaterfrontviewconditiongrade...sqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15date_numericabove_average_price
626059335527000.04.02.252330.019436.02.00.00.03.08.0...0.01987.00.098011.047.7663-122.2151910.010055.016317.00.0
\n", + "

1 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " price bedrooms bathrooms sqft_living sqft_lot floors \\\n", + "626059335 527000.0 4.0 2.25 2330.0 19436.0 2.0 \n", + "\n", + " waterfront view condition grade ... sqft_basement yr_built \\\n", + "626059335 0.0 0.0 3.0 8.0 ... 0.0 1987.0 \n", + "\n", + " yr_renovated zipcode lat long sqft_living15 sqft_lot15 \\\n", + "626059335 0.0 98011.0 47.7663 -122.215 1910.0 10055.0 \n", + "\n", + " date_numeric above_average_price \n", + "626059335 16317.0 0.0 \n", + "\n", + "[1 rows x 21 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
preprocessing_num__pricepreprocessing_num__bedroomspreprocessing_num__bathroomspreprocessing_num__sqft_livingpreprocessing_num__sqft_lotpreprocessing_num__floorspreprocessing_num__waterfrontpreprocessing_num__viewpreprocessing_num__conditionpreprocessing_num__grade...preprocessing_num__sqft_basementpreprocessing_num__yr_builtpreprocessing_num__yr_renovatedpreprocessing_num__zipcodepreprocessing_num__latpreprocessing_num__longpreprocessing_num__sqft_living15preprocessing_num__sqft_lot15preprocessing_num__date_numericremainder__above_average_price
626059335-0.0346930.69750.1722290.2754570.1081870.939548-0.087375-0.307461-0.6302650.293371...-0.660870.541943-0.208897-1.2485451.491739-0.00418-0.112556-0.091828-0.4857950.0
\n", + "

1 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " preprocessing_num__price preprocessing_num__bedrooms \\\n", + "626059335 -0.034693 0.6975 \n", + "\n", + " preprocessing_num__bathrooms preprocessing_num__sqft_living \\\n", + "626059335 0.172229 0.275457 \n", + "\n", + " preprocessing_num__sqft_lot preprocessing_num__floors \\\n", + "626059335 0.108187 0.939548 \n", + "\n", + " preprocessing_num__waterfront preprocessing_num__view \\\n", + "626059335 -0.087375 -0.307461 \n", + "\n", + " preprocessing_num__condition preprocessing_num__grade ... \\\n", + "626059335 -0.630265 0.293371 ... \n", + "\n", + " preprocessing_num__sqft_basement preprocessing_num__yr_built \\\n", + "626059335 -0.66087 0.541943 \n", + "\n", + " preprocessing_num__yr_renovated preprocessing_num__zipcode \\\n", + "626059335 -0.208897 -1.248545 \n", + "\n", + " preprocessing_num__lat preprocessing_num__long \\\n", + "626059335 1.491739 -0.00418 \n", + "\n", + " preprocessing_num__sqft_living15 preprocessing_num__sqft_lot15 \\\n", + "626059335 -0.112556 -0.091828 \n", + "\n", + " preprocessing_num__date_numeric remainder__above_average_price \n", + "626059335 -0.485795 0.0 \n", + "\n", + "[1 rows x 21 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'predicted: 0 (proba: [0.99455988 0.00544012])'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'real: 0'" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "model = class_models[best_model][\"pipeline\"]\n", + "\n", + "example_id = 626059335\n", + "test = pd.DataFrame(X_test.loc[example_id, :]).T\n", + "test_preprocessed = pd.DataFrame(preprocessed_df.loc[example_id, :]).T\n", + "display(test)\n", + "display(test_preprocessed)\n", + "result_proba = model.predict_proba(test)[0]\n", + "result = model.predict(test)[0]\n", + "real = int(y_test.loc[example_id].values[0])\n", + "display(f\"predicted: {result} (proba: {result_proba})\")\n", + "display(f\"real: {real}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Подбор гиперпараметров методом поиска по сетке" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\TEMP_UNIVERSITY\\mai\\.venv\\Lib\\site-packages\\numpy\\ma\\core.py:2881: RuntimeWarning: invalid value encountered in cast\n", + " _data = np.array(data, dtype=dtype, copy=copy,\n" + ] + }, + { + "data": { + "text/plain": [ + "{'model__criterion': 'gini',\n", + " 'model__max_depth': 2,\n", + " 'model__max_features': 'sqrt',\n", + " 'model__n_estimators': 10}" + ] + }, + "execution_count": 42, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "optimized_model_type = \"random_forest\"\n", + "\n", + "random_forest_model = class_models[optimized_model_type][\"pipeline\"]\n", + "\n", + "param_grid = {\n", + " \"model__n_estimators\": [10, 20, 30, 40, 50, 100, 150, 200, 250, 500],\n", + " \"model__max_features\": [\"sqrt\", \"log2\", 2],\n", + " \"model__max_depth\": [2, 3, 4, 5, 6, 7, 8, 9 ,10],\n", + " \"model__criterion\": [\"gini\", \"entropy\", \"log_loss\"],\n", + "}\n", + "\n", + "gs_optomizer = GridSearchCV(\n", + " estimator=random_forest_model, param_grid=param_grid, n_jobs=-1\n", + ")\n", + "gs_optomizer.fit(X_train, y_train.values.ravel())\n", + "gs_optomizer.best_params_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Обучение модели с новыми гиперпараметрами" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "metadata": {}, + "outputs": [], + "source": [ + "optimized_model = ensemble.RandomForestClassifier(\n", + " random_state=random_state,\n", + " criterion=\"gini\",\n", + " max_depth=7,\n", + " max_features=\"sqrt\",\n", + " n_estimators=30,\n", + ")\n", + "\n", + "result = {}\n", + "\n", + "result[\"pipeline\"] = Pipeline([(\"pipeline\", pipeline_end), (\"model\", optimized_model)]).fit(X_train, y_train.values.ravel())\n", + "result[\"train_preds\"] = result[\"pipeline\"].predict(X_train)\n", + "result[\"probs\"] = result[\"pipeline\"].predict_proba(X_test)[:, 1]\n", + "result[\"preds\"] = np.where(result[\"probs\"] > 0.5, 1, 0)\n", + "\n", + "result[\"Precision_train\"] = metrics.precision_score(y_train, result[\"train_preds\"])\n", + "result[\"Precision_test\"] = metrics.precision_score(y_test, result[\"preds\"])\n", + "result[\"Recall_train\"] = metrics.recall_score(y_train, result[\"train_preds\"])\n", + "result[\"Recall_test\"] = metrics.recall_score(y_test, result[\"preds\"])\n", + "result[\"Accuracy_train\"] = metrics.accuracy_score(y_train, result[\"train_preds\"])\n", + "result[\"Accuracy_test\"] = metrics.accuracy_score(y_test, result[\"preds\"])\n", + "result[\"ROC_AUC_test\"] = metrics.roc_auc_score(y_test, result[\"probs\"])\n", + "result[\"F1_train\"] = metrics.f1_score(y_train, result[\"train_preds\"])\n", + "result[\"F1_test\"] = metrics.f1_score(y_test, result[\"preds\"])\n", + "result[\"MCC_test\"] = metrics.matthews_corrcoef(y_test, result[\"preds\"])\n", + "result[\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(y_test, result[\"preds\"])\n", + "result[\"Confusion_matrix\"] = metrics.confusion_matrix(y_test, result[\"preds\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Формирование данных для оценки старой и новой версии модели" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [], + "source": [ + "optimized_metrics = pd.DataFrame(columns=list(result.keys()))\n", + "optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n", + " data=class_models[optimized_model_type]\n", + ")\n", + "optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n", + " data=result\n", + ")\n", + "optimized_metrics.insert(loc=0, column=\"Name\", value=[\"Old\", \"New\"])\n", + "optimized_metrics = optimized_metrics.set_index(\"Name\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Оценка параметров старой и новой модели" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Precision_trainPrecision_testRecall_trainRecall_testAccuracy_trainAccuracy_testF1_trainF1_test
Name        
Old1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
New1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 45, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "optimized_metrics[\n", + " [\n", + " \"Precision_train\",\n", + " \"Precision_test\",\n", + " \"Recall_train\",\n", + " \"Recall_test\",\n", + " \"Accuracy_train\",\n", + " \"Accuracy_test\",\n", + " \"F1_train\",\n", + " \"F1_test\",\n", + " ]\n", + "].style.background_gradient(\n", + " cmap=\"plasma\",\n", + " low=0.3,\n", + " high=1,\n", + " subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n", + ").background_gradient(\n", + " cmap=\"viridis\",\n", + " low=1,\n", + " high=0.3,\n", + " subset=[\n", + " \"Precision_train\",\n", + " \"Precision_test\",\n", + " \"Recall_train\",\n", + " \"Recall_test\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Accuracy_testF1_testROC_AUC_testCohen_kappa_testMCC_test
Name     
Old1.0000001.0000001.0000001.0000001.000000
New1.0000001.0000001.0000001.0000001.000000
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 46, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "optimized_metrics[\n", + " [\n", + " \"Accuracy_test\",\n", + " \"F1_test\",\n", + " \"ROC_AUC_test\",\n", + " \"Cohen_kappa_test\",\n", + " \"MCC_test\",\n", + " ]\n", + "].style.background_gradient(\n", + " cmap=\"plasma\",\n", + " low=0.3,\n", + " high=1,\n", + " subset=[\n", + " \"ROC_AUC_test\",\n", + " \"MCC_test\",\n", + " \"Cohen_kappa_test\",\n", + " ],\n", + ").background_gradient(\n", + " cmap=\"viridis\",\n", + " low=1,\n", + " high=0.3,\n", + " subset=[\n", + " \"Accuracy_test\",\n", + " \"F1_test\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA2kAAAGsCAYAAABHMu+IAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABMm0lEQVR4nO3deVyVZf7/8fcBZBE5ICogiuRSKhOuTcVo2kKg2eLYTFNpiZpODrZoLtliWjPROFPZqpW5fUd/1rQ4paWRhaaRpUWaqbmlloJbgqCynfP7wzx1FO0+nhvOfY6v5+NxP75y7puL6/br8O5zX5/7vm1Op9MpAAAAAIAlBPl6AgAAAACAX1CkAQAAAICFUKQBAAAAgIVQpAEAAACAhVCkAQAAAICFUKQBAAAAgIVQpAEAAACAhVCkAQAAAICFhPh6AgCAmh07dkwVFRWmjRcaGqrw8HDTxgMAwBPkmnEUaQBgQceOHVPL5AYq3Ftt2pgJCQnavn17wAYaAMC6yDXPUKQBgAVVVFSocG+1tq9Jlj3K+870ksMOtey6QxUVFQEZZgAAayPXPEORBgAWZo8KMiXMAACwAnLNGIo0ALCwaqdD1U5zxgEAwNfINWMo0gDAwhxyyiHv08yMMQAA8Ba5ZgxrjQAAAABgIaykAYCFOeSQGQ0d5owCAIB3yDVjKNIAwMKqnU5VO71v6TBjDAAAvEWuGUO7IwAAAABYCCtpAGBh3GANAAgk5JoxFGkAYGEOOVVNmAEAAgS5ZgztjgAAAABgIaykAYCF0RYCAAgk5JoxrKQBAAAAgIWwkgYAFsajigEAgYRcM4YiDQAszPHzZsY4AAD4GrlmDO2OAAAAAGAhrKQBgIVVm/SoYjPGAADAW+SaMRRpAGBh1c7jmxnjAADga+SaMbQ7AgAAAICFsJIGABbGDdYAgEBCrhlDkQYAFuaQTdWymTIOAAC+Rq4ZQ7sjAAAAAFgIK2kAYGEO5/HNjHEAAPA1cs0YVtIAAAAAwEJYSQMAC6s2qXffjDEAAPAWuWYMRRoAWBhhBgAIJOSaMbQ7AgAAAICFsJIGABbmcNrkcJrwqGITxgAAwFvkmjEUaQBgYbSFAAACCblmDO2OAIBT5OTk6Pe//72ioqIUFxenvn37atOmTW7HXH755bLZbG7bnXfe6XbMzp071adPH9WvX19xcXEaM2aMqqqq3I7Jy8tTly5dFBYWpjZt2mjWrFm1fXoAgHOMv+UaRRoAWFi1gkzbPLFs2TJlZ2frs88+U25uriorK5WRkaGysjK344YOHao9e/a4tsmTJ/8y9+pq9enTRxUVFfr00081e/ZszZo1SxMmTHAds337dvXp00dXXHGFCgoKdO+99+qOO+7QkiVLvPuLAwBYErlmjM3pdAb4q+AAwP+UlJQoOjpaS9e1UGSU99fTyg47dFXqThUXF8tut3v8/fv27VNcXJyWLVumHj16SDp+xbFTp06aMmVKjd/z/vvv69prr9Xu3bsVHx8vSZo2bZrGjRunffv2KTQ0VOPGjdOiRYv0zTffuL7v5ptv1qFDh7R48WLPTxQAYEnkmme5xkoaAJxDSkpK3Lby8nJD31dcXCxJio2Ndft87ty5aty4sS688EKNHz9eR44cce3Lz89XamqqK8gkKTMzUyUlJVq/fr3rmPT0dLcxMzMzlZ+ff1bnBwA4twRqrvHgEACwMLNvsE5KSnL7/JFHHtHEiRPP+L0Oh0P33nuvunXrpgsvvND1+a233qrk5GQlJiZq7dq1GjdunDZt2qS33npLklRYWOgWZJJcXxcWFp7xmJKSEh09elQRERGenywAwLLINWO5RpEGABZW7QxStdP7pofqnxvbd+3a5dYWEhYW9pvfm52drW+++UYrVqxw+3zYsGGuP6empqpp06a66qqrtHXrVrVu3drrOQMAAg+5ZgztjgBwDrHb7W7bb4XZiBEjtHDhQn388cdq3rz5GY+95JJLJElbtmyRJCUkJKioqMjtmBNfJyQknPEYu93OKhoA4DcFaq5RpAGAhTlkk0NBJmyetZY4nU6NGDFCb7/9tj766CO1bNnyN7+noKBAktS0aVNJUlpamtatW6e9e/e6jsnNzZXdbldKSorrmKVLl7qNk5ubq7S0NI/mCwDwD+SaMRRpAIBTZGdn6z//+Y/mzZunqKgoFRYWqrCwUEePHpUkbd26VY899pjWrFmj77//Xu+8845uv/129ejRQx06dJAkZWRkKCUlRbfddpu+/vprLVmyRA899JCys7NdVzrvvPNObdu2TWPHjtXGjRv14osv6vXXX9fIkSN9du4AgMDjb7nGI/gBwIJOPKr4nbWtFRkV7PV4ZYerdX2HrYYfVWyz1XyFcubMmcrKytKuXbs0YMAAffPNNyorK1NSUpL++Mc/6qGHHnIbf8eOHRo+fLjy8vIUGRmpgQMH6oknnlBIyC+3ROfl5WnkyJH69ttv1bx5cz388MPKysry+pwBANZBrmV5dH4UaQBgQSfC7O2vzzctzP7YcfNZv08GAABvkGueod0RAAAAACyER/ADgIUdv8Ha+/fJmDEGAADeIteMoUgDAAtzKEjVJjQ9OERnOwDA98g1Y2h3BAAAAAALYSUNACys2hmkaqf319OqeUYUAMACyDVjKNIAwMJOvLTT+3ECO8wAAP6BXDOGdkcAAAAAsBBW0gDAwqqdNlU7vX+ClRljAADgLXLNGFbSAAAAAMBCWEkDAAurNulRxdUB3rsPAPAP5JoxFGkAYGEOZ5AcJjwFyxHgT8ECAPgHcs0Y2h0BAAAAwEJYSQMAC6MtBAAQSMg1YyjSAMDCHDLnCVYO76cCAIDXyDVjaHcEAAAAAAthJQ0ALMyhIDlMuJ5mxhgAAHiLXDOGIg0ALKzaGaRqE56CZcYYAAB4i1wzJrDPDgAAAAD8DCtpAGBhDtnkkBk3WHs/BgAA3iLXjKFIAwALoy0EABBIyDVjAvvsAAAAAMDPsJIGABZm3ks/uSYHAPA9cs2YwD47AAAAAPAzrKQZ4HA4tHv3bkVFRclmC+ybFAF4z+l06vDhw0pMTFRQkHfXwhxOmxxOE26wNmEMBA5yDYAnyLW6R5FmwO7du5WUlOTraQDwM7t27VLz5s29GsNhUltIoL/0E54h1wCcDXKt7lCkGRAVFSVJ2vHlebI3COx/EPDcHy9I9fUUYDFVqtQKvef63QFYDbmGMyHXcDJyre5RpBlwohXE3iBI9ijCDO5CbPV8PQVYjfP4/zGjjczhDJLDhMcMmzEGAge5hjMh13AKcq3OUaQBgIVVy6ZqE17YacYYAAB4i1wzJrBLUAAAAADwM6ykAYCF0RYCAAgk5JoxFGkAYGHVMqelo9r7qQAA4DVyzZjALkEBAAAAwM+wkgYAFkZbCAAgkJBrxgT22QEAAACAn2ElDQAsrNoZpGoTrhaaMQYAAN4i14yhSAMAC3PKJocJN1g7A/x9MgAA/0CuGRPYJSgAAAAA+BlW0gDAwmgLAQAEEnLNGIo0ALAwh9Mmh9P7lg4zxgAAwFvkmjGBXYICAAAAgJ9hJQ0ALKxaQao24XqaGWMAAOAtcs0YijQAsDDaQgAAgYRcMyawS1AAAAAA8DOspAGAhTkUJIcJ19PMGAMAAG+Ra8ZQpAGAhVU7bao2oaXDjDEAAPAWuWZMYJegAAAAAOBnWEkDAAvjBmsAQCAh14xhJQ0AAAAALISVNACwMKczSA6n99fTnCaMAQCAt8g1YyjSAMDCqmVTtUy4wdqEMQAA8Ba5Zkxgl6AAAAAA4GdYSQMAC3M4zbk52uE0YTIAAHiJXDOGIg0ALMxhUu++GWMAAOAtcs2YwD47AAAAAPAzrKQBgIU5ZJPDhJujzRgDAABvkWvGUKQBgIVVO22qNqF334wxAADwFrlmDO2OAAAAAGAhrKQBgIVxgzUAIJCQa8YE9tkBAAAAgJ9hJQ0ALMwhmznvkwnwG6wBAP6BXDOGIg0ALMxp0lOwnAEeZgAA/0CuGUO7IwAAAABYCEUaAFiYw2kzbfNETk6Ofv/73ysqKkpxcXHq27evNm3a5HbMsWPHlJ2drUaNGqlBgwa68cYbVVRU5HbMzp071adPH9WvX19xcXEaM2aMqqqq3I7Jy8tTly5dFBYWpjZt2mjWrFln9XcFALA+cs0YijQAsLATT8EyY/PEsmXLlJ2drc8++0y5ubmqrKxURkaGysrKXMeMHDlS7777rv773/9q2bJl2r17t/r16+faX11drT59+qiiokKffvqpZs+erVmzZmnChAmuY7Zv364+ffroiiuuUEFBge69917dcccdWrJkifd/eQAAyyHXjLE5nU6nR99xDiopKVF0dLR++q6V7FHUtXCXmdjJ11OAxVQ5K5Wn/6m4uFh2u/2sxjjxe+ePuYNULzLU6zlVllXo7atnnvWc9u3bp7i4OC1btkw9evRQcXGxmjRponnz5ulPf/qTJGnjxo1q37698vPzdemll+r999/Xtddeq927dys+Pl6SNG3aNI0bN0779u1TaGioxo0bp0WLFumbb75x/aybb75Zhw4d0uLFi70+b9SMXMOZkGs4GblW97nGb2YAsDCz20JKSkrctvLyckPzKC4uliTFxsZKktasWaPKykqlp6e7jmnXrp1atGih/Px8SVJ+fr5SU1NdQSZJmZmZKikp0fr1613H/HqME8ecGAMAEFjINWMo0gDAwhw/PwXLjE2SkpKSFB0d7dpycnJ+ew4Oh+69915169ZNF154oSSpsLBQoaGhiomJcTs2Pj5ehYWFrmN+HWQn9p/Yd6ZjSkpKdPToUc//wgAAlkauGcMj+AHgHLJr1y63tpCwsLDf/J7s7Gx98803WrFiRW1ODQAAjwVqrlGkAYCFnc0TrE43jiTZ7XaPevdHjBihhQsXavny5WrevLnr84SEBFVUVOjQoUNuVx2LioqUkJDgOubzzz93G+/EU7J+fczJT84qKiqS3W5XRESE8RMEAPgFcs0Y2h0BwMJ89ahip9OpESNG6O2339ZHH32kli1buu3v2rWr6tWrp6VLl7o+27Rpk3bu3Km0tDRJUlpamtatW6e9e/e6jsnNzZXdbldKSorrmF+PceKYE2MAAAILuWYMK2kAgFNkZ2dr3rx5+t///qeoqChXr310dLQiIiIUHR2tIUOGaNSoUYqNjZXdbtddd92ltLQ0XXrppZKkjIwMpaSk6LbbbtPkyZNVWFiohx56SNnZ2a52lDvvvFPPP/+8xo4dq8GDB+ujjz7S66+/rkWLFvns3AEAgcffco0iDQAszOy2EKOmTp0qSbr88svdPp85c6aysrIkSU8//bSCgoJ04403qry8XJmZmXrxxRddxwYHB2vhwoUaPny40tLSFBkZqYEDB+rRRx91HdOyZUstWrRII0eO1DPPPKPmzZtr+vTpyszMPLsTBQBYGrlmDO9JM4D3yeBMeJ8MTmbm+2Qy3x9m2vtklvR+2as5IXCQazgTcg0nI9fqHitpAGBhvrriCABAbSDXjKFIAwALc0qud8F4Ow4AAL5GrhlDjwMAAAAAWAgraQBgYbSFAAACCblmDEUaAFgYYQYACCTkmjG0OwIAAACAhbCSBgAWxhVHAEAgIdeMoUgDAAsjzAAAgYRcM4Z2RwAAAACwEFbSAMDCnE6bnCZcLTRjDAAAvEWuGcNKGgAAAABYCCtpAGBhDtnkkAm9+yaMAQCAt8g1YyjSAMDCuMEaABBIyDVjKNLOQfOfi9PK92K0a0uYQsMdSrnoiIY8uFtJbcolSYW7QjXwkpQav/fBl7arx3XFKjkYrCdGJGv7hggd/ilY0Y2qlJZZrEHj9ygyyuE6/p2ZjfXOzMYq+iFUcYkVuvmeIl3955/q5DxRd67L2q8/Dd+r2CZV2vZthF58qJk2FdT39bQAnCPINZiNXIOvWapIy8rK0qFDh7RgwQJfTyWgrc1voOuy9uuCTkdUXSXNeqKpHriltV5ZtlHh9R1qklih/1fwjdv3vPefRnpjapx+f+VhSZItSErLLFbWuD2KblSl3dvD9PwDzXX4UIjGv7hDkvTu7EaamdNU9/xrl9p2OqJNX9XXlDFJioqu1qUZJXV+3qgdPa//ScMe2a3n7m+ujV/W1x+H7tM/5m3TkMvaqvhAPV9Pz+9xg7V/I9fqBrkGM5FrtYtcM8ZSRRrqxuPztrl9fd+UnfpLaqo2r41Q6qVlCg6WYuOq3I759P1o9bjukCIij19NjIqp1nUDD7j2xzev1HUD9+u/U+Ncny19I1bXDDigy284JElqmlyhTV/X1+svxBFmAaTfsP1aPC9WH7wWK0l6dlxzXXxViTJvOajXn4/38ez8H20hwG8j12Amcq12kWvG+M3THb/55hv17t1bDRo0UHx8vG677Tbt37/ftf+NN95QamqqIiIi1KhRI6Wnp6usrEySlJeXp4svvliRkZGKiYlRt27dtGPHDl+diuWUlQRLOh5QNdm8NkJb19dX5i0HatwvSQcKQ7Ty/Rh1SCt1fVZZYVNouMPtuLBwhzYV1FdVpQkTh8+F1HPo/A5H9OUnUa7PnE6bvvokSildj/hwZoD1kWu1h1zD2SLXYBV+UaQdOnRIV155pTp37qzVq1dr8eLFKioq0k033SRJ2rNnj2655RYNHjxYGzZsUF5envr16yen06mqqir17dtXPXv21Nq1a5Wfn69hw4bJZjt99V1eXq6SkhK3LVA5HNK0R5rpd78v1XntjtV4zOL/10gtzj+m3/3+1F9OOcOTdX2rDrq1y4Wq36BaI/+9y7Wv6+WHtXheI21eGyGnU/ru6wgtntdIVZVBKj7IIm4gsMdWKzhEOrTP/f+fP+0PUcMmVaf5LnjiRFuIGRusg1yrPeQavEGu1T5yzRi/+I3y/PPPq3Pnznr88cddn82YMUNJSUn67rvvVFpaqqqqKvXr10/JycmSpNTUVEnSwYMHVVxcrGuvvVatW7eWJLVv3/6MPy8nJ0eTJk2qpbOxlucfaK4dGyP05ILNNe4vP2rTx2831K33Fta4/6+TflT/UYX6cVuYZuQ01UuTmumunB8kSf3vLdRPe0N0z7UXyOmUGjapVPqfD+q/L8YryC8uDwC+5zSpLSTQw8zfkGu1h1wDrI1cM8YvfqV8/fXX+vjjj9WgQQPX1q5dO0nS1q1b1bFjR1111VVKTU3Vn//8Z73yyiv66afjT1qKjY1VVlaWMjMzdd111+mZZ57Rnj17zvjzxo8fr+LiYte2a9euMx7vr55/oJlW5do1+Y0tapJYc5/GJ4tiVH7UpvQ/H6xxf2xclVqcX660zBLd888ftHB2Yx0oOl77h0U4dd/Tu/TO1q81Z9W3+r8vvlV8UoXqN6hWdCOuRgWCkoPBqq6SYk66utiwcZV+2ucX14AAnyDXage5Bm+Ra7AKvyjSSktLdd1116mgoMBt27x5s3r06KHg4GDl5ubq/fffV0pKip577jm1bdtW27dvlyTNnDlT+fn5+sMf/qDXXntNF1xwgT777LPT/rywsDDZ7Xa3LZA4nceD7NPF0Zr83y1KaFFx2mOX/L9GujSjRDGNau7rP3lcSaqscP9nFVJPapJYqeBgadn/Guri9BKuOAaIqsogbV5bX527H3Z9ZrM51al7qb5dw6OKzeDU8f9teb35+kTghlwzF7kGs5BrtY9cM8YvLgl06dJFb775ps477zyFhNQ8ZZvNpm7duqlbt26aMGGCkpOT9fbbb2vUqFGSpM6dO6tz584aP3680tLSNG/ePF166aV1eRqW8fwDzfXx2w01ceY2RTRw6ODe43+nkVHVCov45Z/8j9tDte6zSD32n22njPH50ij9tK+e2nY6ovBIh3ZsCtf0xxL1u9+XKiHpeDj+sDVMmwrqq13nMh0uDtFbLzXR95vCNfqZnXVzoqgTb73cWKOn7NJ3X9fXpq+OP6o4vL5DH8yP9fXUAoJDNtlkwlOwTBgD5iHXzEWuwUzkWu0i14yxXJFWXFysgoICt8+GDRumV155RbfccovGjh2r2NhYbdmyRfPnz9f06dO1evVqLV26VBkZGYqLi9OqVau0b98+tW/fXtu3b9fLL7+s66+/XomJidq0aZM2b96s22+/3TcnaAELZzeWJI258Xy3z+97eqcy/vJL+8eS+Y3UuGmluvY8rJOFhjv1/txGemliM1VW2NQksULdehfrLyP2uo5xOKQ3pzXRD1uTFFzPqY5/KNXT/9vsCjsEhmXvNFR0o2rdPqZQDZtUadv6CD3Yv6UO7eddMoBErtUFcg1mItdgBZYr0vLy8tS5c2e3z4YMGaKVK1dq3LhxysjIUHl5uZKTk9WrVy8FBQXJbrdr+fLlmjJlikpKSpScnKwnn3xSvXv3VlFRkTZu3KjZs2frwIEDatq0qbKzs/XXv/7VR2foe0t2Fxg6bvD4PRo8vub7HDp1K9WUd2u+KfuEFueX68Xc7zydHvzQOzMb652ZjX09jYDESz/9H7lW+8g1mI1cqz3kmjE2p9MZ6C2dXispKVF0dLR++q6V7FE0ncNdZmInX08BFlPlrFSe/qfi4uKzvvfnxO+dDv8dreD6YV7PqfpIudb++d9ezQmBg1zDmZBrOBm5Vvcst5IGAPiFw2mTzYSrhWY87hgAAG+Ra8ZQpAGAhZ14ipUZ4wAA4GvkmjH0OAAAAACAhbCSBgAWxg3WAIBAQq4ZQ5EGABZGmAEAAgm5ZgztjgAAAABgIaykAYCF8RQsAEAgIdeMoUgDAAvjKVgAgEBCrhlDuyMAAAAAWAgraQBgYcevOJpxg7UJkwEAwEvkmjGspAEAAACAhbCSBgAWxqOKAQCBhFwzhiINACzM+fNmxjgAAPgauWYM7Y4AAAAAYCGspAGAhdEWAgAIJOSaMRRpAGBl9IUAAAIJuWYI7Y4AAAAAYCGspAGAlZnUFqIAbwsBAPgJcs0QijQAsLDjL/00ZxwAAHyNXDOGdkcAAAAAsBBW0gDAwngKFgAgkJBrxlCkAYCVOW3m9N0HeJgBAPwEuWYI7Y4AAAAAYCGspAGAhXGDNQAgkJBrxrCSBgAAAAAWwkoaAFiZ8+fNjHEAAPA1cs0QijQAsDCeggUACCTkmjG0OwIAAACAhbCSBgBWF+AtHQCAcwy59psMFWnvvPOO4QGvv/76s54MAMAdbSG1g1wDAN8g14wxVKT17dvX0GA2m03V1dXezAcAgFpHrgEArMxQkeZwOGp7HgCAmvAUrFpBrgGAj5Brhnj14JBjx46ZNQ8AQI1sJm74LeQaANQ2cs0Ij4u06upqPfbYY2rWrJkaNGigbdu2SZIefvhhvfrqq6ZPEACA2kSuAQCsxuMi7R//+IdmzZqlyZMnKzQ01PX5hRdeqOnTp5s6OQA45zlN3FAjcg0A6hC5ZojHRdqcOXP08ssvq3///goODnZ93rFjR23cuNHUyQEAUNvINQCA1Xj8nrQff/xRbdq0OeVzh8OhyspKUyYFAPgZN1jXOnINAOoQuWaIxytpKSkp+uSTT075/I033lDnzp1NmRQA4GdOm3mbB5YvX67rrrtOiYmJstlsWrBggdv+rKws2Ww2t61Xr15uxxw8eFD9+/eX3W5XTEyMhgwZotLSUrdj1q5dq8suu0zh4eFKSkrS5MmTz+qvyRvkGgDUIR/lmuRf2ebxStqECRM0cOBA/fjjj3I4HHrrrbe0adMmzZkzRwsXLvR4AgAA6ykrK1PHjh01ePBg9evXr8ZjevXqpZkzZ7q+DgsLc9vfv39/7dmzR7m5uaqsrNSgQYM0bNgwzZs3T5JUUlKijIwMpaena9q0aVq3bp0GDx6smJgYDRs2rPZO7iTkGgCcG/wp2zwu0m644Qa9++67evTRRxUZGakJEyaoS5cuevfdd3X11Vd7OhwA4AyczuObGeN4onfv3urdu/cZjwkLC1NCQkKN+zZs2KDFixfriy++0EUXXSRJeu6553TNNdfo3//+txITEzV37lxVVFRoxowZCg0N1e9+9zsVFBToqaeeqtMijVwDgLrjq1yT/Cvbzuo9aZdddplyc3O1d+9eHTlyRCtWrFBGRsbZDAUAOBOTn4JVUlLitpWXl5/11PLy8hQXF6e2bdtq+PDhOnDggGtffn6+YmJiXCEmSenp6QoKCtKqVatcx/To0cPtiYqZmZnatGmTfvrpp7Oe19kg1wCgjlg41yTrZJvHK2knrF69Whs2bJB0vJ+/a9euZzsUAKCOJCUluX39yCOPaOLEiR6P06tXL/Xr108tW7bU1q1b9cADD6h3797Kz89XcHCwCgsLFRcX5/Y9ISEhio2NVWFhoSSpsLBQLVu2dDsmPj7eta9hw4Yez8sb5BoA+B+zck2yVrZ5XKT98MMPuuWWW7Ry5UrFxMRIkg4dOqQ//OEPmj9/vpo3b+7pkACA0znLm6NrHEfSrl27ZLfbXR+f3Gtv1M033+z6c2pqqjp06KDWrVsrLy9PV111lXdzrWPkGgDUIYvmmmStbPO43fGOO+5QZWWlNmzYoIMHD+rgwYPasGGDHA6H7rjjjtqYIwCcs2xO8zZJstvtbps3YfZrrVq1UuPGjbVlyxZJUkJCgvbu3et2TFVVlQ4ePOjq9U9ISFBRUZHbMSe+Pt39ALWBXAOAuuMvuSb5Nts8LtKWLVumqVOnqm3btq7P2rZtq+eee07Lly/3dDgAQAD44YcfdODAATVt2lSSlJaWpkOHDmnNmjWuYz766CM5HA5dcsklrmOWL1/u9i6y3NxctW3btk5bHck1AEBNfJltHhdpSUlJNb7cs7q6WomJiZ4OBwA4E5NvsDaqtLRUBQUFKigokCRt375dBQUF2rlzp0pLSzVmzBh99tln+v7777V06VLdcMMNatOmjTIzMyVJ7du3V69evTR06FB9/vnnWrlypUaMGKGbb77ZlRW33nqrQkNDNWTIEK1fv16vvfaannnmGY0aNers/77OArkGAHXIR7km+Ve2eVyk/etf/9Jdd92l1atXuz5bvXq17rnnHv373//2dDgAwJn46KWfq1evVufOnV0vcx41apQ6d+6sCRMmKDg4WGvXrtX111+vCy64QEOGDFHXrl31ySefuLWZzJ07V+3atdNVV12la665Rt27d9fLL7/s2h8dHa0PPvhA27dvV9euXXXfffdpwoQJdfr4fYlcA4A65cOXWftTttmczt9+y0DDhg1ls/3yF1FWVqaqqiqFhBx/7siJP0dGRurgwYMeTcAflJSUKDo6Wj9910r2qLN6awECWGZiJ19PARZT5axUnv6n4uJit5uZPXHi907S048pKCLc6zk5jh7TrpEPezWnQEKukWs4PXINJyPX6p6hpztOmTKllqcBAKjRWbZ01DgOXMg1APARcs0QQ0XawIEDa3seAADUGXINAGBlZ/0ya0k6duyYKioq3D4LxOVGAPAZrjjWKXINAGoZuWaIx43oZWVlGjFihOLi4hQZGamGDRu6bQAAE/nwKVjnCnINAOoQuWaIx0Xa2LFj9dFHH2nq1KkKCwvT9OnTNWnSJCUmJmrOnDm1MUcAAGoNuQYAsBqP2x3fffddzZkzR5dffrkGDRqkyy67TG3atFFycrLmzp2r/v3718Y8AeDcdJaPGa5xHNSIXAOAOkSuGeLxStrBgwfVqlUrScf79E88mrh79+5avny5ubMDgHOczWnehpqRawBQd8g1Yzwu0lq1aqXt27dLktq1a6fXX39d0vErkTExMaZODgCA2kauAQCsxuMibdCgQfr6668lSffff79eeOEFhYeHa+TIkRozZozpEwSAcxo3WNc6cg0A6hC5ZojH96SNHDnS9ef09HRt3LhRa9asUZs2bdShQwdTJwcAQG0j1wAAVuPVe9IkKTk5WcnJyWbMBQAAnyPXAAC+ZqhIe/bZZw0PePfdd5/1ZAAA7mwy5+bowH4GlufINQDwDXLNGENF2tNPP21oMJvNFtBh9scLUhViq+fracBiNs/p4uspwGIcR49Jw/7n62ngDMi148g11IRcw8nItbpnqEg78dQrAEAd430ytYJcAwAfIdcM8fqeNABALTLrCVYB/hQsAICfINcM8fgR/AAAAACA2sNKGgBYGVccAQCBhFwzhCINACzM5jTpKVgBHmYAAP9ArhlDuyMAAAAAWMhZFWmffPKJBgwYoLS0NP3444+SpP/7v//TihUrTJ0cAJzznCZuOC1yDQDqCLlmiMdF2ptvvqnMzExFREToq6++Unl5uSSpuLhYjz/+uOkTBIBzGmFW68g1AKhD5JohHhdpf//73zVt2jS98sorqlfvlxdgduvWTV9++aWpkwMAoLaRawAAq/H4wSGbNm1Sjx49Tvk8Ojpahw4dMmNOAICfcYN17SPXAKDukGvGeLySlpCQoC1btpzy+YoVK9SqVStTJgUA+JnTZt6GGpFrAFCHyDVDPC7Shg4dqnvuuUerVq2SzWbT7t27NXfuXI0ePVrDhw+vjTkCAFBryDUAgNV43O54//33y+Fw6KqrrtKRI0fUo0cPhYWFafTo0brrrrtqY44AcO7ipZ+1jlwDgDpErhnicZFms9n04IMPasyYMdqyZYtKS0uVkpKiBg0a1Mb8AACoVeQaAMBqPC7STggNDVVKSoqZcwEAnIQbrOsOuQYAtY9cM8bjIu2KK66QzXb6G/U++ugjryYEAPgV2kJqHbkGAHWIXDPE4yKtU6dObl9XVlaqoKBA33zzjQYOHGjWvAAAqBPkGgDAajwu0p5++ukaP584caJKS0u9nhAA4FdMagsJ9CuO3iDXAKAOkWuGePwI/tMZMGCAZsyYYdZwAADpl7YQMzZ4hFwDgFpArhliWpGWn5+v8PBws4YDAMCnyDUAgK943O7Yr18/t6+dTqf27Nmj1atX6+GHHzZtYgAAcYN1HSDXAKAOkWuGeFykRUdHu30dFBSktm3b6tFHH1VGRoZpEwMA8KjiukCuAUDdIdeM8ahIq66u1qBBg5SamqqGDRvW1pwAAKgT5BoAwIo8uictODhYGRkZOnToUC1NBwCAukOuAQCsyOMHh1x44YXatm1bbcwFAIA6R64BAKzG4yLt73//u0aPHq2FCxdqz549KikpcdsAACbiUcW1jlwDgDpErhli+J60Rx99VPfdd5+uueYaSdL1118vm83m2u90OmWz2VRdXW3+LAHgHMUN1rWHXAOAukeuGWO4SJs0aZLuvPNOffzxx7U5HwAA6gS5BgCwKsNFmtN5vFzt2bNnrU0GAFCDAL9a6CvkGgD4CLn2mzx6BP+v20AAAHWAl37WKnINAOoYuWaIR0XaBRdc8JuBdvDgQa8mBABAXSHXAABW5FGRNmnSJEVHR9fWXAAAJ+EG69pFrgFA3SLXjPGoSLv55psVFxdXW3MBAJyMtpBaRa4BQB0j1wwx/J40+vYBAIGEXAMAWJXHT3cEANQd2kJqD7kGAHWPXDPGcJHmcDhqcx4AgJrQFlJryDUA8AFyzRDD7Y4AAAAAgNrn0YNDAAB1jCuOAIBAQq4ZwkoaAAAAAFgIK2kAYGHcYA0ACCTkmjEUaQBgZbSFAAACCblmCO2OAAAAAGAhrKQBgJVxxREAEEjINUMo0gDAwujdBwAEEnLNGNodAQAAAMBCWEkDACujLQQAEEjINUMo0gDAwmgLAQAEEnLNGNodAQAAAMBCWEkDACujLQQAEEjINUNYSQMAnGL58uW67rrrlJiYKJvNpgULFrjtdzqdmjBhgpo2baqIiAilp6dr8+bNbsccPHhQ/fv3l91uV0xMjIYMGaLS0lK3Y9auXavLLrtM4eHhSkpK0uTJk2v71AAA5yh/yjaKNACwMqeJmwfKysrUsWNHvfDCCzXunzx5sp599llNmzZNq1atUmRkpDIzM3Xs2DHXMf3799f69euVm5urhQsXavny5Ro2bJhrf0lJiTIyMpScnKw1a9boX//6lyZOnKiXX37Zs8kCAPyHj3JN8q9so90RACzM9vNmxjie6N27t3r37l3jPqfTqSlTpuihhx7SDTfcIEmaM2eO4uPjtWDBAt18883asGGDFi9erC+++EIXXXSRJOm5557TNddco3//+99KTEzU3LlzVVFRoRkzZig0NFS/+93vVFBQoKeeesot8AAAgcNXuSb5V7axkgYA55CSkhK3rby83OMxtm/frsLCQqWnp7s+i46O1iWXXKL8/HxJUn5+vmJiYlwhJknp6ekKCgrSqlWrXMf06NFDoaGhrmMyMzO1adMm/fTTT2d7igCAc4gZuSZZL9so0gDAykxuC0lKSlJ0dLRry8nJ8XhKhYWFkqT4+Hi3z+Pj4137CgsLFRcX57Y/JCREsbGxbsfUNMavfwYAIMBYMNck62Ub7Y4AYGFmv09m165dstvtrs/DwsK8HxwAAIPINWNYSQOAc4jdbnfbzibMEhISJElFRUVunxcVFbn2JSQkaO/evW77q6qqdPDgQbdjahrj1z8DAIAzMSPXJOtlG0UaAFiZD5+CdTotW7ZUQkKCli5d6vqspKREq1atUlpamiQpLS1Nhw4d0po1a1zHfPTRR3I4HLrkkktcxyxfvlyVlZWuY3Jzc9W2bVs1bNjQvAkDAKzDgrkmWS/bKNIAwOp8EGSlpaUqKChQQUGBpOM3VBcUFGjnzp2y2Wy699579fe//13vvPOO1q1bp9tvv12JiYnq27evJKl9+/bq1auXhg4dqs8//1wrV67UiBEjdPPNNysxMVGSdOuttyo0NFRDhgzR+vXr9dprr+mZZ57RqFGjzvIvCgDgF3xUoPlTtnFPGgDgFKtXr9YVV1zh+vpEuAwcOFCzZs3S2LFjVVZWpmHDhunQoUPq3r27Fi9erPDwcNf3zJ07VyNGjNBVV12loKAg3XjjjXr22Wdd+6Ojo/XBBx8oOztbXbt2VePGjTVhwgQevw8AqBX+lG02p9Np8mJh4CkpKVF0dLQu1w0KsdXz9XRgMZvndPH1FGAxjqPHtGvYoyouLna7mdkTJ37vXDjscQWHhv/2N/yG6opj+ublB7yaEwIHuYYzIddwMnKt7rGSBgBWZlbfPZfjAABWQK4ZQpEGj1yXtV9/Gr5XsU2qtO3bCL34UDNtKqjv62nBJOEbD6vhe0UK//6oQg5Vavc9rVTWNca1P/7l72VfcdDte8pS7do9po3r63p7jqnx/B8VsblUqnKqIilCB25M1NGUqFN+XtDhKrV4aIPq/VSprVM7yBHJryQAdYtcC2zkGvwV/3JgWM/rf9KwR3brufuba+OX9fXHofv0j3nbNOSytio+QLtMIAgqd6iiRX2V9GisxGe31XhMWQe7iu5Idn3trGdz25/41FZVJoTph/vPlzM0SDFL9irxqa36/t+/U3WM+7+T+Fd3qCIpQvV+qhRqZvb7ZAD8glwLfOSa9ZBrxvj06Y5ZWVmy2Wy68847T9mXnZ0tm82mrKysup8YatRv2H4tnherD16L1c7N4Xp2XHOVH7Up85aDv/3N8AtHOkbrwJ8SVXZRzGmPcYbYVB1Tz7X9+iph0OEqhRaV6+C1CapoUV+VCeHaf1MzBVU4FPrDUbdxopfuU9CRav10TXxtnQ5Q58g1/0KuBT5yDf7K54/gT0pK0vz583X06C//0I8dO6Z58+apRYsWZz2u0+lUVVWVGVOEpJB6Dp3f4Yi+/OSXpX2n06avPolSStcjPpwZ6lrExlK1zF6r5LHr1WTWTgUd/uV/Z44GwapoGib7igOylVdL1U5Ff7xfVfYQlbf8pX0o9Mejil2wR0XDzpNsNfwQ/MKi75PB6ZFr/oFcwwnkWh0j1wzxeZHWpUsXJSUl6a233nJ99tZbb6lFixbq3Lmz67Py8nLdfffdiouLU3h4uLp3764vvvjCtT8vL082m03vv/++unbtqrCwMK1YsUIOh0M5OTlq2bKlIiIi1LFjR73xxht1eo6BwB5breAQ6dA+9w7Zn/aHqGET/qPhXFHWwa6iYcn68f7ztf8vzRSxsVTNntwiOX7+TWmz6cdx5ytsx1G1Hva12gz5Sg0X79Xu0W1cVyZtlQ4lvPi99t/cTFWNQ314Nv7hRFuIGRvqBrnmH8g1SOSaL5Brxvi8SJOkwYMHa+bMma6vZ8yYoUGDBrkdM3bsWL355puaPXu2vvzyS7Vp00aZmZk6eNC9JeH+++/XE088oQ0bNqhDhw7KycnRnDlzNG3aNK1fv14jR47UgAEDtGzZstPOp7y8XCUlJW4bAKn00liVdYlRRVKEyrrGaPeo1grfdkQRGw4fP8DpVJPZu1RtD9EPD16gXRPbqbRLtJo+vVXBh4735zd6fbcqEsN1uFsjH54JULvINcA/kGuwKksUaQMGDNCKFSu0Y8cO7dixQytXrtSAAQNc+8vKyjR16lT961//Uu/evZWSkqJXXnlFERERevXVV93GevTRR3X11VerdevWioyM1OOPP64ZM2YoMzNTrVq1UlZWlgYMGKCXXnrptPPJyclRdHS0a0tKSqq1c/cXJQeDVV0lxZx0dbFh4yr9tI/nz5yrquLCVBUVonpF5ZKkiG8PK7KgWIXZLXXsggYqP6++9mW1kDM0SPZPDkiS6m84rAaf/6Q2WV+qTdaXavbEZklSq+y1in1rt8/OxbJoC/FL5Jr1kWuoCblWB8g1QyzxW6hJkybq06ePZs2aJafTqT59+qhx48au/Vu3blVlZaW6devm+qxevXq6+OKLtWHDBrexLrroIteft2zZoiNHjujqq692O6aiosKt5eRk48ePd72BXDr+8r1zPdCqKoO0eW19de5+WPmLoyVJNptTnbqX6p1ZXDk6V4UcrFBwaZXr6VZBFQ5JkvPkfnybXL9M99zVSrZKh2tX+LYjip++Qz88eIEq48PqYNZ+xqwgCvAwsxpyzfrINdSEXKsD5JohlijSpOOtISNGjJAkvfDCC2c9TmRkpOvPpaWlkqRFixapWbNmbseFhZ3+fzRhYWFn3H+ueuvlxho9ZZe++7q+Nn11/FHF4fUd+mB+rK+nBpPYjlW7rh5KUr195QrdcUSOyBBVNwhWo7f3qPT3DVUVHaJ6e8vV+LUfVRkXpiOpdknS0TYN5IgMVsLLO3Sgb4Kc9YIUnbdf9fZVqKzT8WNODqzgn2/QrkgM530yCCjkmvWRa4GPXIO/ssy/nF69eqmiokI2m02ZmZlu+1q3bq3Q0FCtXLlSycnJkqTKykp98cUXuvfee087ZkpKisLCwrRz50717NmzNqd/Tlj2TkNFN6rW7WMK1bBJlbatj9CD/Vvq0H7eJRMowrcfUfOcza6vm8z7UZJU0j1We7NaKHTXUTVdcVDBR6pV1bCejlwYpQM3JspZ73jntCMqRD+ObqNGb+w+Pk61UxXNIrT73laqaMHLYc8G75PxX+Sa9ZFrgY9csx5yzRjLFGnBwcGuFo/g4GC3fZGRkRo+fLjGjBmj2NhYtWjRQpMnT9aRI0c0ZMiQ044ZFRWl0aNHa+TIkXI4HOrevbuKi4u1cuVK2e12DRw4sFbPKRC9M7Ox3pnZ+LcPhF862j5Km+d0Oe3+3WPP/80xyltFGjrO6M8859EW4rfINf9ArgU2cs2CyDVDLFOkSZLdbj/tvieeeEIOh0O33XabDh8+rIsuukhLlixRw4YNzzjmY489piZNmignJ0fbtm1TTEyMunTpogceeMDs6QMA4IZcAwCcDZvT6QzwOtR7JSUlio6O1uW6QSE2WiDgjqtlOJnj6DHtGvaoiouLz/gf6Wdy4vdOp9v+oeDQcK/nVF1xTAX/96BXc0LgINdwJuQaTkau1T1LPIIfAAAAAHCcpdodAQAnoXcfABBIyDVDKNIAwMJ4ChYAIJCQa8bQ7ggAAAAAFsJKGgBYGW0hAIBAQq4ZQpEGABZGWwgAIJCQa8bQ7ggAAAAAFsJKGgBYGW0hAIBAQq4ZQpEGABZGWwgAIJCQa8bQ7ggAAAAAFsJKGgBYGW0hAIBAQq4ZQpEGABYX6C0dAIBzC7n222h3BAAAAAALYSUNAKzM6Ty+mTEOAAC+Rq4ZwkoaAAAAAFgIK2kAYGE8qhgAEEjINWMo0gDAyngKFgAgkJBrhtDuCAAAAAAWwkoaAFiYzXF8M2McAAB8jVwzhiINAKyMthAAQCAh1wyh3REAAAAALISVNACwMJ6CBQAIJOSaMRRpAGBlvPQTABBIyDVDaHcEAAAAAAthJQ0ALIy2EABAICHXjGElDQAAAAAshJU0ALAyHlUMAAgk5JohFGkAYGG0hQAAAgm5ZgztjgAAAABgIaykAYCV8ahiAEAgIdcMoUgDAAujLQQAEEjINWNodwQAAAAAC2ElDQCsjKdgAQACCblmCEUaAFgYbSEAgEBCrhlDuyMAAAAAWAgraQBgZQ7n8c2McQAA8DVyzRCKNACwMnr3AQCBhFwzhHZHAAAAALAQVtIAwMJsMukGa++HAADAa+SaMaykAQAAAICFsJIGAFbmdB7fzBgHAABfI9cMoUgDAAvjfTIAgEBCrhlDuyMAAAAAWAgraQBgZTyqGAAQSMg1QyjSAMDCbE6nbCb03ZsxBgAA3iLXjKHdEQAAAAAshJU0ALAyx8+bGeMAAOBr5JohFGkAYGG0hQAAAgm5ZgztjgCAU0ycOFE2m81ta9eunWv/sWPHlJ2drUaNGqlBgwa68cYbVVRU5DbGzp071adPH9WvX19xcXEaM2aMqqqq6vpUAADwu1xjJQ0ArMyHT8H63e9+pw8//ND1dUjIL5ExcuRILVq0SP/9738VHR2tESNGqF+/flq5cqUkqbq6Wn369FFCQoI+/fRT7dmzR7fffrvq1aunxx9/3OvTAQD4KXLNEIo0AECNQkJClJCQcMrnxcXFevXVVzVv3jxdeeWVkqSZM2eqffv2+uyzz3TppZfqgw8+0LfffqsPP/xQ8fHx6tSpkx577DGNGzdOEydOVGhoaF2fDgDgHOdPuUa7IwBYmdNp3iappKTEbSsvLz/tj968ebMSExPVqlUr9e/fXzt37pQkrVmzRpWVlUpPT3cd265dO7Vo0UL5+fmSpPz8fKWmpio+Pt51TGZmpkpKSrR+/fra+JsCAPgDcs0QijQAsDCb07xNkpKSkhQdHe3acnJyavy5l1xyiWbNmqXFixdr6tSp2r59uy677DIdPnxYhYWFCg0NVUxMjNv3xMfHq7CwUJJUWFjoFmQn9p/YBwA4N5FrxtDuCADnkF27dslut7u+DgsLq/G43r17u/7coUMHXXLJJUpOTtbrr7+uiIiIWp8nAABGBGqusZIGAFZmcluI3W53204XZieLiYnRBRdcoC1btighIUEVFRU6dOiQ2zFFRUWuXv+EhIRTnop14uua7gcAAJwjyDVDKNIAwMJsDvM2b5SWlmrr1q1q2rSpunbtqnr16mnp0qWu/Zs2bdLOnTuVlpYmSUpLS9O6deu0d+9e1zG5ubmy2+1KSUnxbjIAAL9FrhlDuyMA4BSjR4/Wddddp+TkZO3evVuPPPKIgoODdcsttyg6OlpDhgzRqFGjFBsbK7vdrrvuuktpaWm69NJLJUkZGRlKSUnRbbfdpsmTJ6uwsFAPPfSQsrOzDV/lBADALP6WaxRpAGBlv2rp8HocD/zwww+65ZZbdODAATVp0kTdu3fXZ599piZNmkiSnn76aQUFBenGG29UeXm5MjMz9eKLL7q+Pzg4WAsXLtTw4cOVlpamyMhIDRw4UI8++qj35wIA8F/kmiEUaQBgZT566ef8+fPPuD88PFwvvPCCXnjhhdMek5ycrPfee8+zHwwACGzkmiEUaQY4f67Uq1Rpzj8qBBTH0WO+ngIsxnH0+DtanGZcKQRqAbmGMyHXcDJyre5RpBlw+PBhSdIKcUUYNRj2P1/PABZ1+PBhRUdHezWGzemUzYRQNGMMBA5yDWdEruE0yLW6Q5FmQGJionbt2qWoqCjZbDZfT8enSkpKlJSUdMo7KQD+bfzC6XTq8OHDSkxMNGMwn/TuI7CRa7/gdxdOh38bvyDX6h5FmgFBQUFq3ry5r6dhKSfeRQGcjH8bx3l7pRGoTeTaqfjdhdPh38Zx5FrdokgDACtzSvLyXTCucQAA8DVyzRBeZg0AAAAAFsJKGjwSFhamRx55hJfR4hT826gd3GAN1C5+d+F0+LdRO8g1Y2xOnqUJAJZTUlKi6OhoXdnpfoUEe/8fCFXV5fqo4AkVFxdzbwUAoM6Ra56h3REAAAAALIR2RwCwMh5VDAAIJOSaIRRpAGBlDklmvMbKjCdpAQDgLXLNENodAQAAAMBCWEkDAAvjKVgAgEBCrhnDSto5LCsrS3379vX1NGAhWVlZstlsuvPOO0/Zl52dLZvNpqysrLqf2LnsRO++GRsQ4Mg1nIxcsyByzRCKNABukpKSNH/+fB09etT12bFjxzRv3jy1aNHirMd1Op2qqqoyY4oAABhGrsEfUaShRt9884169+6tBg0aKD4+Xrfddpv279/v2v/GG28oNTVVERERatSokdLT01VWViZJysvL08UXX6zIyEjFxMSoW7du2rFjh69OBR7q0qWLkpKS9NZbb7k+e+utt9SiRQt17tzZ9Vl5ebnuvvtuxcXFKTw8XN27d9cXX3zh2p+Xlyebzab3339fXbt2VVhYmFasWCGHw6GcnBy1bNlSERER6tixo9544406PUe/whVHwBTk2rmLXLMYcs0QijSc4tChQ7ryyivVuXNnrV69WosXL1ZRUZFuuukmSdKePXt0yy23aPDgwdqwYYPy8vLUr18/1xWlvn37qmfPnlq7dq3y8/M1bNgw2WxmPMYHdWXw4MGaOXOm6+sZM2Zo0KBBbseMHTtWb775pmbPnq0vv/xSbdq0UWZmpg4ePOh23P33368nnnhCGzZsUIcOHZSTk6M5c+Zo2rRpWr9+vUaOHKkBAwZo2bJldXJuAM495BrINfgbHhyCUzz//PPq3LmzHn/8cddnM2bMUFJSkr777juVlpaqqqpK/fr1U3JysiQpNTVVknTw4EEVFxfr2muvVevWrSVJ7du3r/uTgFcGDBig8ePHu64Ur1y5UvPnz1deXp4kqaysTFOnTtWsWbPUu3dvSdIrr7yi3NxcvfrqqxozZoxrrEcffVRXX321pONXKR9//HF9+OGHSktLkyS1atVKK1as0EsvvaSePXvW4Vn6Cd4nA3iNXAO5ZiHkmiEUaTjF119/rY8//lgNGjQ4Zd/WrVuVkZGhq666SqmpqcrMzFRGRob+9Kc/qWHDhoqNjVVWVpYyMzN19dVXKz09XTfddJOaNm3qgzPB2WrSpIn69OmjWbNmyel0qk+fPmrcuLFr/9atW1VZWalu3bq5PqtXr54uvvhibdiwwW2siy66yPXnLVu26MiRI65wO6GiosKt5QS/wvtkAK+RayDXLIRcM4QiDacoLS3Vddddp3/+85+n7GvatKmCg4OVm5urTz/9VB988IGee+45Pfjgg1q1apVatmypmTNn6u6779bixYv12muv6aGHHlJubq4uvfRSH5wNztbgwYM1YsQISdILL7xw1uNERka6/lxaWipJWrRokZo1a+Z2XFhY2Fn/DAA4E3INErkG/8I9aThFly5dtH79ep133nlq06aN23biF5PNZlO3bt00adIkffXVVwoNDdXbb7/tGqNz584aP368Pv30U1144YWaN2+er04HZ6lXr16qqKhQZWWlMjMz3fa1bt1aoaGhWrlypeuzyspKffHFF0pJSTntmCkpKQoLC9POnTtP+beVlJRUa+fiz068T8aMDThXkWuQyDWrINeMYSXtHFdcXKyCggK3z4YNG6ZXXnlFt9xyi8aOHavY2Fht2bJF8+fP1/Tp07V69WotXbpUGRkZiouL06pVq7Rv3z61b99e27dv18svv6zrr79eiYmJ2rRpkzZv3qzbb7/dNyeIsxYcHOxq8QgODnbbFxkZqeHDh2vMmDGKjY1VixYtNHnyZB05ckRDhgw57ZhRUVEaPXq0Ro4cKYfDoe7du6u4uFgrV66U3W7XwIEDa/Wc/BK9+4BHyDWcDrlmEeSaIRRp57i8vLxTeqaHDBmilStXaty4ccrIyFB5ebmSk5PVq1cvBQUFyW63a/ny5ZoyZYpKSkqUnJysJ598Ur1791ZRUZE2btyo2bNn68CBA2ratKmys7P117/+1UdnCG/Y7fbT7nviiSfkcDh022236fDhw7rooou0ZMkSNWzY8IxjPvbYY2rSpIlycnK0bds2xcTEqEuXLnrggQfMnj6AcxC5hjMh1+AvbE5ngJehAOCHSkpKFB0drfTW9yok2Pv7Gqqqy/Xh1ikqLi4+43+kAABQG8g1z7CSBgBWRlsIACCQkGuG8OAQAAAAALAQVtIAwNJMuuKowL7iCADwF+SaERRpAGBltIUAAAIJuWYI7Y4AAAAAYCGspAGAlTmcMqWlwxHYVxwBAH6CXDOElTQAAAAAsBCKNAS8rKws9e3b1/X15ZdfrnvvvbfO55GXlyebzaZDhw6d9hibzaYFCxYYHnPixInq1KmTV/P6/vvvZbPZVFBQ4NU4qCVOh3kbgIBArp0ZuWZx5JohFGnwiaysLNlsNtlsNoWGhqpNmzZ69NFHVVVVVes/+6233tJjjz1m6FgjAQTUqhM3WJuxAag15BpgELlmCPekwWd69eqlmTNnqry8XO+9956ys7NVr149jR8//pRjKyoqFBoaasrPjY2NNWUcAAB+jVwDYBZW0uAzYWFhSkhIUHJysoYPH6709HS98847kn5p5fjHP/6hxMREtW3bVpK0a9cu3XTTTYqJiVFsbKxuuOEGff/9964xq6urNWrUKMXExKhRo0YaO3asnCddaTm5LaS8vFzjxo1TUlKSwsLC1KZNG7366qv6/vvvdcUVV0iSGjZsKJvNpqysLEmSw+FQTk6OWrZsqYiICHXs2FFvvPGG28957733dMEFFygiIkJXXHGF2zyNGjdunC644ALVr19frVq10sMPP6zKyspTjnvppZeUlJSk+vXr66abblJxcbHb/unTp6t9+/YKDw9Xu3bt9OKLL3o8F/iIw2neBqBWkWu/jVwDuWYMK2mwjIiICB04cMD19dKlS2W325WbmytJqqysVGZmptLS0vTJJ58oJCREf//739WrVy+tXbtWoaGhevLJJzVr1izNmDFD7du315NPPqm3335bV1555Wl/7u233678/Hw9++yz6tixo7Zv3679+/crKSlJb775pm688UZt2rRJdrtdERERkqScnBz95z//0bRp03T++edr+fLlGjBggJo0aaKePXtq165d6tevn7KzszVs2DCtXr1a9913n8d/J1FRUZo1a5YSExO1bt06DR06VFFRURo7dqzrmC1btuj111/Xu+++q5KSEg0ZMkR/+9vfNHfuXEnS3LlzNWHCBD3//PPq3LmzvvrqKw0dOlSRkZEaOHCgx3NCHeN9MoDfItdORa6BXDOGIg0+53Q6tXTpUi1ZskR33XWX6/PIyEhNnz7d1Q7yn//8Rw6HQ9OnT5fNZpMkzZw5UzExMcrLy1NGRoamTJmi8ePHq1+/fpKkadOmacmSJaf92d99951ef/115ebmKj09XZLUqlUr1/4TLSRxcXGKiYmRdPwK5eOPP64PP/xQaWlpru9ZsWKFXnrpJfXs2VNTp05V69at9eSTT0qS2rZtq3Xr1umf//ynR383Dz30kOvP5513nkaPHq358+e7hdmxY8c0Z84cNWvWTJL03HPPqU+fPnryySeVkJCgRx55RE8++aTr76Rly5b69ttv9dJLLxFmAFALyLXTI9cAYyjS4DMLFy5UgwYNVFlZKYfDoVtvvVUTJ0507U9NTXXr1//666+1ZcsWRUVFuY1z7Ngxbd26VcXFxdqzZ48uueQS176QkBBddNFFp7SGnFBQUKDg4GD17NnT8Ly3bNmiI0eO6Oqrr3b7vKKiQp07d5YkbdiwwW0eklzB54nXXntNzz77rLZu3arS0lJVVVXJbre7HdOiRQtXkJ34OQ6HQ5s2bVJUVJS2bt2qIUOGaOjQoa5jqqqqFB0d7fF84ANOmXTF0fshAJwZufbbyDWQa8ZQpMFnrrjiCk2dOlWhoaFKTExUSIj7P8fIyEi3r0tLS9W1a1dXu8OvNWnS5KzmcKLNwxOlpaWSpEWLFrmFiHT8fgSz5Ofnq3///po0aZIyMzMVHR2t+fPnu65iejLXV1555ZRwDQ4ONm2uqEW0hQB+g1w7M3INksg1gyjS4DORkZFq06aN4eO7dOmi1157TXFxcadcdTuhadOmWrVqlXr06CHp+JW1NWvWqEuXLjUen5qaKofDoWXLlrnaQn7txBXP6upq12cpKSkKCwvTzp07T3ulsn379q6bxU/47LPPfvskf+XTTz9VcnKyHnzwQddnO3bsOOW4nTt3avfu3UpMTHT9nKCgILVt21bx8fFKTEzUtm3b1L9/f49+PgDAM+TamZFrgHE83RF+o3///mrcuLFuuOEGffLJJ9q+fbvy8vJ0991364cffpAk3XPPPXriiSe0YMECbdy4UX/729/O+C6Y8847TwMHDtTgwYO1YMEC15ivv/66JCk5OVk2m00LFy7Uvn37VFpaqqioKI0ePVojR47U7NmztXXrVn355Zd67rnnNHv2bEnSnXfeqc2bN2vMmDHatGmT5s2bp1mzZnl0vueff7527typ+fPna+vWrXr22Wf19ttvn3JceHi4Bg4cqK+//lqffPKJ7r77bt10001KSEiQJE2aNEk5OTl69tln9d1332ndunWaOXOmnnrqKY/mAx9xOMzbAFgKuUaunZPINUMo0uA36tevr+XLl6tFixbq16+f2rdvryFDhujYsWOuK5D33XefbrvtNg0cOFBpaWmKiorSH//4xzOOO3XqVP3pT3/S3/72N7Vr105Dhw5VWVmZJKlZs2aaNGmS7r//fsXHx2vEiBGSpMcee0wPP/ywcnJy1L59e/Xq1UuLFi1Sy5YtJR3vp3/zzTe1YMECdezYUdOmTdPjjz/u0flef/31GjlypEaMGKFOnTrp008/1cMPP3zKcW3atFG/fv10zTXXKCMjQx06dHB7FPEdd9yh6dOna+bMmUpNTVXPnj01a9Ys11wBAL5BrpFrwOnYnKe78xQA4DMlJSWKjo5WepMhCgny/oW3VY4KfbjvVRUXF5+2rQoAgNpCrnmGe9IAwMq4wRoAEEjINUNodwQAAAAAC2ElDQCszOGUKS+DcQT2FUcAgJ8g1wyhSAMAC3M6HXI6vX+ClRljAADgLXLNGNodAQAAAMBCWEkDACtzOs1p6QjwG6wBAH6CXDOEIg0ArMxpUu9+gIcZAMBPkGuG0O4IAAAAABbCShoAWJnDIdlMuDk6wG+wBgD4CXLNEIo0ALAy2kIAAIGEXDOEdkcAAAAAsBBW0gDAwpwOh5wmtIUE+vtkAAD+gVwzhpU0AAAAALAQVtIAwMro3QcABBJyzRCKNACwModTshFmAIAAQa4ZQrsjAAAAAFgIK2kAYGVOpyQz3icT2FccAQB+glwzhCINACzM6XDKaUJbiDPAwwwA4B/INWNodwQAAAAAC6FIAwArczrM287CCy+8oPPOO0/h4eG65JJL9Pnnn5t8ggCAcwq5ZghFGgBYmNPhNG3z1GuvvaZRo0bpkUce0ZdffqmOHTsqMzNTe/furYUzBQCcC8g1YyjSAAA1euqppzR06FANGjRIKSkpmjZtmurXr68ZM2b4emoAAHjMn3KNIg0ArMxHbSEVFRVas2aN0tPTXZ8FBQUpPT1d+fn5Zp8lAOBcQa4ZwtMdAcDCqlQpmfAAqypVSpJKSkrcPg8LC1NYWNgpx+/fv1/V1dWKj493+zw+Pl4bN270fkIAgHMSuWYMRRoAWFBoaKgSEhK0ovA908Zs0KCBkpKS3D575JFHNHHiRNN+BgAANSHXPEORBgAWFB4eru3bt6uiosK0MZ1Op2w2m9tnNV1tlKTGjRsrODhYRUVFbp8XFRUpISHBtDkBAM4N5JpnKNIAwKLCw8MVHh7uk58dGhqqrl27aunSperbt68kyeFwaOnSpRoxYoRP5gQA8G/kmnEUaQCAGo0aNUoDBw7URRddpIsvvlhTpkxRWVmZBg0a5OupAQDgMX/KNYo0AECN/vKXv2jfvn2aMGGCCgsL1alTJy1evPiUm64BAPAH/pRrNqfTacLzVQAAAAAAZuA9aQAAAABgIRRpAAAAAGAhFGkAAAAAYCEUaQAAAABgIRRpAAAAAGAhFGkAAAAAYCEUaQAAAABgIRRpAAAAAGAhFGkAAAAAYCEUaQAAAABgIRRpAAAAAGAhFGkAAAAAYCH/H0wUCv7bhJcWAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "_, ax = plt.subplots(1, 2, figsize=(10, 4), sharex=False, sharey=False\n", + ")\n", + "\n", + "for index in range(0, len(optimized_metrics)):\n", + " c_matrix = optimized_metrics.iloc[index][\"Confusion_matrix\"]\n", + " disp = ConfusionMatrixDisplay(\n", + " confusion_matrix=c_matrix, display_labels=[\"Less\", \"More\"]\n", + " ).plot(ax=ax.flat[index])\n", + "\n", + "plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.3)\n", + "plt.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.5" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}