diff --git a/laboratory_4/lab4.ipynb b/laboratory_4/lab4.ipynb new file mode 100644 index 0000000..5584a37 --- /dev/null +++ b/laboratory_4/lab4.ipynb @@ -0,0 +1,5712 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Начинаем работу... \n", + "\n", + "Датасет: Продажи домов в округе Кинг " + ] + }, + { + "cell_type": "code", + "execution_count": 144, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living',\n", + " 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',\n", + " 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',\n", + " 'lat', 'long', 'sqft_living15', 'sqft_lot15'],\n", + " dtype='object')\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from sklearn import set_config\n", + "\n", + "# Установим параметры для вывода\n", + "set_config(transform_output=\"pandas\")\n", + "\n", + "random_state = 42\n", + "\n", + "# Подключим датафрейм и выгрузим данные\n", + "df = pd.read_csv(\".//static//csv//kc_house_data.csv\")\n", + "print(df.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 145, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iddatepricebedroomsbathroomssqft_livingsqft_lotfloorswaterfrontview...gradesqft_abovesqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15
0712930052020141013T000000221900.031.00118056501.000...711800195509817847.5112-122.25713405650
1641410019220141209T000000538000.032.25257072422.000...72170400195119919812547.7210-122.31916907639
2563150040020150225T000000180000.021.00770100001.000...67700193309802847.7379-122.23327208062
3248720087520141209T000000604000.043.00196050001.000...71050910196509813647.5208-122.39313605000
4195440051020150218T000000510000.032.00168080801.000...816800198709807447.6168-122.04518007503
\n", + "

5 rows × 21 columns

\n", + "
" + ], + "text/plain": [ + " id date price bedrooms bathrooms sqft_living \\\n", + "0 7129300520 20141013T000000 221900.0 3 1.00 1180 \n", + "1 6414100192 20141209T000000 538000.0 3 2.25 2570 \n", + "2 5631500400 20150225T000000 180000.0 2 1.00 770 \n", + "3 2487200875 20141209T000000 604000.0 4 3.00 1960 \n", + "4 1954400510 20150218T000000 510000.0 3 2.00 1680 \n", + "\n", + " sqft_lot floors waterfront view ... grade sqft_above sqft_basement \\\n", + "0 5650 1.0 0 0 ... 7 1180 0 \n", + "1 7242 2.0 0 0 ... 7 2170 400 \n", + "2 10000 1.0 0 0 ... 6 770 0 \n", + "3 5000 1.0 0 0 ... 7 1050 910 \n", + "4 8080 1.0 0 0 ... 8 1680 0 \n", + "\n", + " yr_built yr_renovated zipcode lat long sqft_living15 \\\n", + "0 1955 0 98178 47.5112 -122.257 1340 \n", + "1 1951 1991 98125 47.7210 -122.319 1690 \n", + "2 1933 0 98028 47.7379 -122.233 2720 \n", + "3 1965 0 98136 47.5208 -122.393 1360 \n", + "4 1987 0 98074 47.6168 -122.045 1800 \n", + "\n", + " sqft_lot15 \n", + "0 5650 \n", + "1 7639 \n", + "2 8062 \n", + "3 5000 \n", + "4 7503 \n", + "\n", + "[5 rows x 21 columns]" + ] + }, + "execution_count": 145, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.head()" + ] + }, + { + "cell_type": "code", + "execution_count": 146, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idpricebedroomsbathroomssqft_livingsqft_lotfloorswaterfrontviewconditiongradesqft_abovesqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15
count2.161300e+042.161300e+0421613.00000021613.00000021613.0000002.161300e+0421613.00000021613.00000021613.00000021613.00000021613.00000021613.00000021613.00000021613.00000021613.00000021613.00000021613.00000021613.00000021613.00000021613.000000
mean4.580302e+095.400881e+053.3708422.1147572079.8997361.510697e+041.4943090.0075420.2343033.4094307.6568731788.390691291.5090451971.00513684.40225898077.93980547.560053-122.2138961986.55249212768.455652
std2.876566e+093.671272e+050.9300620.770163918.4408974.142051e+040.5399890.0865170.7663180.6507431.175459828.090978442.57504329.373411401.67924053.5050260.1385640.140828685.39130427304.179631
min1.000102e+067.500000e+040.0000000.000000290.0000005.200000e+021.0000000.0000000.0000001.0000001.000000290.0000000.0000001900.0000000.00000098001.00000047.155900-122.519000399.000000651.000000
25%2.123049e+093.219500e+053.0000001.7500001427.0000005.040000e+031.0000000.0000000.0000003.0000007.0000001190.0000000.0000001951.0000000.00000098033.00000047.471000-122.3280001490.0000005100.000000
50%3.904930e+094.500000e+053.0000002.2500001910.0000007.618000e+031.5000000.0000000.0000003.0000007.0000001560.0000000.0000001975.0000000.00000098065.00000047.571800-122.2300001840.0000007620.000000
75%7.308900e+096.450000e+054.0000002.5000002550.0000001.068800e+042.0000000.0000000.0000004.0000008.0000002210.000000560.0000001997.0000000.00000098118.00000047.678000-122.1250002360.00000010083.000000
max9.900000e+097.700000e+0633.0000008.00000013540.0000001.651359e+063.5000001.0000004.0000005.00000013.0000009410.0000004820.0000002015.0000002015.00000098199.00000047.777600-121.3150006210.000000871200.000000
\n", + "
" + ], + "text/plain": [ + " id price bedrooms bathrooms sqft_living \\\n", + "count 2.161300e+04 2.161300e+04 21613.000000 21613.000000 21613.000000 \n", + "mean 4.580302e+09 5.400881e+05 3.370842 2.114757 2079.899736 \n", + "std 2.876566e+09 3.671272e+05 0.930062 0.770163 918.440897 \n", + "min 1.000102e+06 7.500000e+04 0.000000 0.000000 290.000000 \n", + "25% 2.123049e+09 3.219500e+05 3.000000 1.750000 1427.000000 \n", + "50% 3.904930e+09 4.500000e+05 3.000000 2.250000 1910.000000 \n", + "75% 7.308900e+09 6.450000e+05 4.000000 2.500000 2550.000000 \n", + "max 9.900000e+09 7.700000e+06 33.000000 8.000000 13540.000000 \n", + "\n", + " sqft_lot floors waterfront view condition \\\n", + "count 2.161300e+04 21613.000000 21613.000000 21613.000000 21613.000000 \n", + "mean 1.510697e+04 1.494309 0.007542 0.234303 3.409430 \n", + "std 4.142051e+04 0.539989 0.086517 0.766318 0.650743 \n", + "min 5.200000e+02 1.000000 0.000000 0.000000 1.000000 \n", + "25% 5.040000e+03 1.000000 0.000000 0.000000 3.000000 \n", + "50% 7.618000e+03 1.500000 0.000000 0.000000 3.000000 \n", + "75% 1.068800e+04 2.000000 0.000000 0.000000 4.000000 \n", + "max 1.651359e+06 3.500000 1.000000 4.000000 5.000000 \n", + "\n", + " grade sqft_above sqft_basement yr_built yr_renovated \\\n", + "count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n", + "mean 7.656873 1788.390691 291.509045 1971.005136 84.402258 \n", + "std 1.175459 828.090978 442.575043 29.373411 401.679240 \n", + "min 1.000000 290.000000 0.000000 1900.000000 0.000000 \n", + "25% 7.000000 1190.000000 0.000000 1951.000000 0.000000 \n", + "50% 7.000000 1560.000000 0.000000 1975.000000 0.000000 \n", + "75% 8.000000 2210.000000 560.000000 1997.000000 0.000000 \n", + "max 13.000000 9410.000000 4820.000000 2015.000000 2015.000000 \n", + "\n", + " zipcode lat long sqft_living15 sqft_lot15 \n", + "count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n", + "mean 98077.939805 47.560053 -122.213896 1986.552492 12768.455652 \n", + "std 53.505026 0.138564 0.140828 685.391304 27304.179631 \n", + "min 98001.000000 47.155900 -122.519000 399.000000 651.000000 \n", + "25% 98033.000000 47.471000 -122.328000 1490.000000 5100.000000 \n", + "50% 98065.000000 47.571800 -122.230000 1840.000000 7620.000000 \n", + "75% 98118.000000 47.678000 -122.125000 2360.000000 10083.000000 \n", + "max 98199.000000 47.777600 -121.315000 6210.000000 871200.000000 " + ] + }, + "execution_count": 146, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df.describe()" + ] + }, + { + "cell_type": "code", + "execution_count": 147, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id 0\n", + "date 0\n", + "price 0\n", + "bedrooms 0\n", + "bathrooms 0\n", + "sqft_living 0\n", + "sqft_lot 0\n", + "floors 0\n", + "waterfront 0\n", + "view 0\n", + "condition 0\n", + "grade 0\n", + "sqft_above 0\n", + "sqft_basement 0\n", + "yr_built 0\n", + "yr_renovated 0\n", + "zipcode 0\n", + "lat 0\n", + "long 0\n", + "sqft_living15 0\n", + "sqft_lot15 0\n", + "dtype: int64\n", + "id False\n", + "date False\n", + "price False\n", + "bedrooms False\n", + "bathrooms False\n", + "sqft_living False\n", + "sqft_lot False\n", + "floors False\n", + "waterfront False\n", + "view False\n", + "condition False\n", + "grade False\n", + "sqft_above False\n", + "sqft_basement False\n", + "yr_built False\n", + "yr_renovated False\n", + "zipcode False\n", + "lat False\n", + "long False\n", + "sqft_living15 False\n", + "sqft_lot15 False\n", + "dtype: bool\n" + ] + } + ], + "source": [ + "# Процент пропущенных значений признаков\n", + "for i in df.columns:\n", + " null_rate = df[i].isnull().sum() / len(df) * 100\n", + " if null_rate > 0:\n", + " print(f'{i} Процент пустых значений: %{null_rate:.2f}')\n", + "\n", + "print(df.isnull().sum())\n", + "\n", + "print(df.isnull().any())" + ] + }, + { + "cell_type": "code", + "execution_count": 148, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "id int64\n", + "date object\n", + "price float64\n", + "bedrooms int64\n", + "bathrooms float64\n", + "sqft_living int64\n", + "sqft_lot int64\n", + "floors float64\n", + "waterfront int64\n", + "view int64\n", + "condition int64\n", + "grade int64\n", + "sqft_above int64\n", + "sqft_basement int64\n", + "yr_built int64\n", + "yr_renovated int64\n", + "zipcode int64\n", + "lat float64\n", + "long float64\n", + "sqft_living15 int64\n", + "sqft_lot15 int64\n", + "dtype: object" + ] + }, + "execution_count": 148, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "# Проверка типов столбцов\n", + "df.dtypes" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Выбор бизнес-целей \n", + "Для датасета недвижимости предлагаются две бизнес-цели:\n", + "\n", + "*Задача регрессии* – предсказание цены дома (price). Это может помочь риэлторам и аналитикам определить справедливую рыночную стоимость недвижимости. \n", + "\n", + "*Задача классификации* – определение вероятности того, что цена дома будет выше/ниже медианы рынка. Классифицировать дома по ценовым категориям (например, низкая, средняя, высокая цена). Это может помочь определить, какие дома популярны у покупателей.\n", + "\n", + "## Определение достижимого уровня качества модели \n", + "Для регрессии и классификации мы выберем метрики: \n", + "\n", + "Для регрессии будем использовать метрики MAE (средняя абсолютная ошибка) и R^2 (коэффициент детерминации), стремясь к MAE ниже 10% от средней цены. А классификация будте ориентироваться на метрики accuracy и F1-score при целевом значении accuracy около 80%.\n", + "\n", + "## Ориентир для каждой задачи\n", + "Для регрессии ориентиром будет медианная цена (price.median()), так как это стабильное значение. Для классификации ориентируемся на среднюю вероятность предсказания класса выше медианы.\n", + "\n", + "## Анализ алгоритмов машинного обучения \n", + "Рассмотрим для задачи регрессии:\n", + "\n", + "*Линейная регрессия:* подходит для простых линейных зависимостей. \n", + "*Дерево решений:* учитывает нелинейные зависимости, может учесть сложные закономерности. \n", + "*Случайный лес:* ансамблевый метод, обобщающий данные и эффективно обрабатывающий выбросы. \n", + "\n", + "Для задачи классификации: \n", + "\n", + "*Логистическая регрессия:* простая модель, подходящая для бинарной классификации. \n", + "*Метод опорных векторов (SVM):* работает хорошо на данных с четкими разделениями. \n", + "*Градиентный бустинг:* подходит для сложных и высокоразмерных данных, обеспечивает высокую точность. \n", + "\n", + "## Выбор моделей \n", + "Выбираем по три модели для каждой задачи:\n", + "\n", + "*Регрессия:* Линейная регрессия, Дерево решений, Случайный лес. \n", + "*Классификация:* Логистическая регрессия, Метод опорных векторов (SVM), Градиентный бустинг. \n", + "\n", + "\n", + "## Построение конвейера и визуализации \n", + "Теперь напишем код для загрузки данных, анализа и подготовки моделей с визуализацией результатов.\n", + "\n", + "\n", + "# Начнём с задачи классификации\n", + "\n", + "Целевой признак --> above_median_price\n", + "\n", + "Формируем выборки. Разделяем набор данных на обучающую и тестовые выборки (80/20) для задачи классификации" + ] + }, + { + "cell_type": "code", + "execution_count": 149, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'X_train'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iddatepricebedroomsbathroomssqft_livingsqft_lotfloorswaterfrontview...sqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15above_median_priceprice_category
20962127800021020150311T000000110000.021.0082845241.000...0196820079800147.2655-122.244828540200
12284219330039020140923T000000624000.043.252810112501.000...1130198009805247.6920-122.09921101125011
7343428990000520141230T0000001535000.043.25285041002.003...1030190820039812247.6147-122.2852130420012
1424731600014520150325T000000235000.041.00136071321.500...0194109816847.5054-122.3011280717500
1667062940048020140619T000000775000.042.753010159922.000...0199609807547.5895-121.99433301233312
..................................................................
88133270027020140519T000000215000.022.25161020402.000...0197909805647.5180-122.1941950202500
15031712930307020140820T000000735000.042.75304024152.014...0196609811847.5188-122.2562620243312
5234243200013020150414T000000675000.031.75166095491.000...0195609803347.6503-122.1982090954911
1998077410047520140627T000000415000.032.752600646261.500...0200909801447.7185-121.40517406462601
3671884740011520140723T000000590000.032.0024202086521.500...0200509801047.3666-121.978318021213711
\n", + "

17290 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " id date price bedrooms bathrooms \\\n", + "20962 1278000210 20150311T000000 110000.0 2 1.00 \n", + "12284 2193300390 20140923T000000 624000.0 4 3.25 \n", + "7343 4289900005 20141230T000000 1535000.0 4 3.25 \n", + "14247 316000145 20150325T000000 235000.0 4 1.00 \n", + "16670 629400480 20140619T000000 775000.0 4 2.75 \n", + "... ... ... ... ... ... \n", + "88 1332700270 20140519T000000 215000.0 2 2.25 \n", + "15031 7129303070 20140820T000000 735000.0 4 2.75 \n", + "5234 2432000130 20150414T000000 675000.0 3 1.75 \n", + "19980 774100475 20140627T000000 415000.0 3 2.75 \n", + "3671 8847400115 20140723T000000 590000.0 3 2.00 \n", + "\n", + " sqft_living sqft_lot floors waterfront view ... sqft_basement \\\n", + "20962 828 4524 1.0 0 0 ... 0 \n", + "12284 2810 11250 1.0 0 0 ... 1130 \n", + "7343 2850 4100 2.0 0 3 ... 1030 \n", + "14247 1360 7132 1.5 0 0 ... 0 \n", + "16670 3010 15992 2.0 0 0 ... 0 \n", + "... ... ... ... ... ... ... ... \n", + "88 1610 2040 2.0 0 0 ... 0 \n", + "15031 3040 2415 2.0 1 4 ... 0 \n", + "5234 1660 9549 1.0 0 0 ... 0 \n", + "19980 2600 64626 1.5 0 0 ... 0 \n", + "3671 2420 208652 1.5 0 0 ... 0 \n", + "\n", + " yr_built yr_renovated zipcode lat long sqft_living15 \\\n", + "20962 1968 2007 98001 47.2655 -122.244 828 \n", + "12284 1980 0 98052 47.6920 -122.099 2110 \n", + "7343 1908 2003 98122 47.6147 -122.285 2130 \n", + "14247 1941 0 98168 47.5054 -122.301 1280 \n", + "16670 1996 0 98075 47.5895 -121.994 3330 \n", + "... ... ... ... ... ... ... \n", + "88 1979 0 98056 47.5180 -122.194 1950 \n", + "15031 1966 0 98118 47.5188 -122.256 2620 \n", + "5234 1956 0 98033 47.6503 -122.198 2090 \n", + "19980 2009 0 98014 47.7185 -121.405 1740 \n", + "3671 2005 0 98010 47.3666 -121.978 3180 \n", + "\n", + " sqft_lot15 above_median_price price_category \n", + "20962 5402 0 0 \n", + "12284 11250 1 1 \n", + "7343 4200 1 2 \n", + "14247 7175 0 0 \n", + "16670 12333 1 2 \n", + "... ... ... ... \n", + "88 2025 0 0 \n", + "15031 2433 1 2 \n", + "5234 9549 1 1 \n", + "19980 64626 0 1 \n", + "3671 212137 1 1 \n", + "\n", + "[17290 rows x 23 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'y_train'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
above_median_price
209620
122841
73431
142470
166701
......
880
150311
52341
199800
36711
\n", + "

17290 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " above_median_price\n", + "20962 0\n", + "12284 1\n", + "7343 1\n", + "14247 0\n", + "16670 1\n", + "... ...\n", + "88 0\n", + "15031 1\n", + "5234 1\n", + "19980 0\n", + "3671 1\n", + "\n", + "[17290 rows x 1 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'X_test'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iddatepricebedroomsbathroomssqft_livingsqft_lotfloorswaterfrontview...sqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15above_median_priceprice_category
11592202870100020140529T000000635200.041.75164042401.000...720192109811747.6766-122.3681300424011
8984940650053020140912T000000249000.022.00109013572.000...0199009802847.7526-122.2441078131800
8280809700033020140721T000000359950.032.75254086042.000...0199109809247.3209-122.1852260743801
792808102037020140709T0000001355000.043.503550110001.002...1290199909800647.5506-122.13441001001212
10371751850758020150502T000000581000.021.00117040801.000...0190909811747.6784-122.3861560458611
..................................................................
16733721265095020140708T000000336000.042.50253081692.000...0199309800347.2634-122.3122220801301
13151436520062020150312T000000394000.031.00145079301.000...300192309812647.5212-122.3711040774001
11667408330435520150318T000000675000.041.75153036151.500...0191309810347.6529-122.3341650420011
3683289110082020140825T000000213500.031.00122060001.000...0196809800247.3245-122.2091420600000
1205995200064020141027T000000715000.031.50167050602.002...0192509812647.5671-122.3791670511812
\n", + "

4323 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " id date price bedrooms bathrooms \\\n", + "11592 2028701000 20140529T000000 635200.0 4 1.75 \n", + "8984 9406500530 20140912T000000 249000.0 2 2.00 \n", + "8280 8097000330 20140721T000000 359950.0 3 2.75 \n", + "792 8081020370 20140709T000000 1355000.0 4 3.50 \n", + "10371 7518507580 20150502T000000 581000.0 2 1.00 \n", + "... ... ... ... ... ... \n", + "16733 7212650950 20140708T000000 336000.0 4 2.50 \n", + "13151 4365200620 20150312T000000 394000.0 3 1.00 \n", + "11667 4083304355 20150318T000000 675000.0 4 1.75 \n", + "3683 2891100820 20140825T000000 213500.0 3 1.00 \n", + "12059 952000640 20141027T000000 715000.0 3 1.50 \n", + "\n", + " sqft_living sqft_lot floors waterfront view ... sqft_basement \\\n", + "11592 1640 4240 1.0 0 0 ... 720 \n", + "8984 1090 1357 2.0 0 0 ... 0 \n", + "8280 2540 8604 2.0 0 0 ... 0 \n", + "792 3550 11000 1.0 0 2 ... 1290 \n", + "10371 1170 4080 1.0 0 0 ... 0 \n", + "... ... ... ... ... ... ... ... \n", + "16733 2530 8169 2.0 0 0 ... 0 \n", + "13151 1450 7930 1.0 0 0 ... 300 \n", + "11667 1530 3615 1.5 0 0 ... 0 \n", + "3683 1220 6000 1.0 0 0 ... 0 \n", + "12059 1670 5060 2.0 0 2 ... 0 \n", + "\n", + " yr_built yr_renovated zipcode lat long sqft_living15 \\\n", + "11592 1921 0 98117 47.6766 -122.368 1300 \n", + "8984 1990 0 98028 47.7526 -122.244 1078 \n", + "8280 1991 0 98092 47.3209 -122.185 2260 \n", + "792 1999 0 98006 47.5506 -122.134 4100 \n", + "10371 1909 0 98117 47.6784 -122.386 1560 \n", + "... ... ... ... ... ... ... \n", + "16733 1993 0 98003 47.2634 -122.312 2220 \n", + "13151 1923 0 98126 47.5212 -122.371 1040 \n", + "11667 1913 0 98103 47.6529 -122.334 1650 \n", + "3683 1968 0 98002 47.3245 -122.209 1420 \n", + "12059 1925 0 98126 47.5671 -122.379 1670 \n", + "\n", + " sqft_lot15 above_median_price price_category \n", + "11592 4240 1 1 \n", + "8984 1318 0 0 \n", + "8280 7438 0 1 \n", + "792 10012 1 2 \n", + "10371 4586 1 1 \n", + "... ... ... ... \n", + "16733 8013 0 1 \n", + "13151 7740 0 1 \n", + "11667 4200 1 1 \n", + "3683 6000 0 0 \n", + "12059 5118 1 2 \n", + "\n", + "[4323 rows x 23 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'y_test'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
above_median_price
115921
89840
82800
7921
103711
......
167330
131510
116671
36830
120591
\n", + "

4323 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " above_median_price\n", + "11592 1\n", + "8984 0\n", + "8280 0\n", + "792 1\n", + "10371 1\n", + "... ...\n", + "16733 0\n", + "13151 0\n", + "11667 1\n", + "3683 0\n", + "12059 1\n", + "\n", + "[4323 rows x 1 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "id int64\n", + "date object\n", + "price float64\n", + "bedrooms int64\n", + "bathrooms float64\n", + "sqft_living int64\n", + "sqft_lot int64\n", + "floors float64\n", + "waterfront int64\n", + "view int64\n", + "condition int64\n", + "grade int64\n", + "sqft_above int64\n", + "sqft_basement int64\n", + "yr_built int64\n", + "yr_renovated int64\n", + "zipcode int64\n", + "lat float64\n", + "long float64\n", + "sqft_living15 int64\n", + "sqft_lot15 int64\n", + "above_median_price int64\n", + "price_category category\n", + "dtype: object\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAIjCAYAAADFthA8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACGn0lEQVR4nO3de1yTdf8/8NcYZ0UOngamxjwWukQrQwqtTPOUnTy1UtOyG6gsu++7zOGiILO0M1BqYQmlWZnZbZn6TdcBz9IU8zzTFDyEHAQ5uF2/P/xtbTJg4OBi1/V6Ph48ZNf13va+2MDrvc/nen8UgiAIICIiIiIikgkvsRMgIiIiIiJqTiyCiIiIiIhIVlgEERERERGRrLAIIiIiIiIiWWERREREREREssIiiIiIiIiIZIVFEBERERERyQqLICIiIiIikhUWQUREREREJCssgoiIiIiISFZYBBERERERkaywCCKSsQ8++ADDhw9Hx44d4ePjA5VKhcGDB+PTTz+FxWIROz0iIiKiJqEQBEEQOwkiEkdMTAzCw8Nxxx13oE2bNigqKsKWLVuwfPlyTJgwAZ9//rnYKRIRERG5HYsgIhmrrq6Gj49Pje1PPfUU3n//fZhMJlx77bXNnxgRERFRE+J0OCIZc1YAAbAVPl5e//yJWL16NUaNGoWIiAj4+fmhW7dueOWVV2A2mx3uO2TIECgUCttXu3btMGrUKOzdu9chTqFQ4KWXXnLY9sYbb0ChUGDIkCEO2ysqKvDSSy+hZ8+e8Pf3R3h4OO6//34cOXIEAHDs2DEoFAosXbrU4X6JiYlQKBSYOnWqbdvSpUuhUCjg6+uLs2fPOsTn5OTY8t6xY4fDvpUrV2LAgAEICAhAu3bt8PDDD+PkyZM1fnb79+/H+PHj0b59ewQEBKBXr16YM2cOAOCll15y+Nk4+9q0aZPt59inT58aj++K2u67YMECKBQKHDt2zGF7UVERnnnmGXTu3Bl+fn7o3r075s+f7zAl0vozXrBgQY3H7dOnj8NrtmnTJigUCnz55Ze15jh16lSXC+z09HRERUXBz88PERERSExMRFFRkcPx1vdzrcuQIUNqvOdSU1Ph5eWFzz77zGG7q+8DALXmYv/zd/X3wPreudK1117r8P4GXHs9AcBiseCdd95B37594e/vj/bt2+Puu++2vffr+5la87O+3tYvPz8/9OzZE/PmzYP956x//vknEhIS0KtXLwQEBKBt27YYN25cjfdjberLt76cr3y/LViwAIMGDULbtm0REBCAAQMG1Pqetf7dqO1n0JCffUN+l4io6XiLnQARia+oqAiXLl1CaWkpdu7ciQULFmDixIno0qWLLWbp0qVo3bo1Zs2ahdatW+P//u//MHfuXJSUlOCNN95weLzevXtjzpw5EAQBR44cwZtvvomRI0fi+PHjdeYwb968GtvNZjNGjx6NjRs3YuLEiZg5cyZKS0uxfv167N27F926dXP6eIcPH8bixYtrfT6lUomsrCw8++yztm2ZmZnw9/dHRUWFQ+zSpUvx6KOP4qabbsK8efNw+vRpvPPOO/j111+xe/duhISEAACMRiNuu+02+Pj4YMaMGbj22mtx5MgRrFmzBqmpqbj//vvRvXt32+M+++yzuO666zBjxgzbtuuuu67WnJtCeXk5Bg8ejJMnT+KJJ55Aly5d8Ntvv2H27NnIz8/H22+/3az5XOmll15CcnIyhg4divj4eBw4cAAZGRnYvn07fv31V/j4+GDOnDl47LHHAADnzp3Ds88+ixkzZuC2225r1HNmZmZCp9Nh4cKFeOihh2zbXX0f2Lvvvvtw//33AwB+/vlnLFq0qM7nru33wFUNeT2nT5+OpUuXYsSIEXjsscdw6dIl/Pzzz9iyZQtuvPFGLFu2zBZrzf2tt95Cu3btAAAdO3Z0eO4XX3wR1113HS5evIgVK1bgxRdfRIcOHTB9+nQAwPbt2/Hbb79h4sSJuOaaa3Ds2DFkZGRgyJAh2LdvHwIDA+s8tvrytbrrrrswefJkh/suXLgQ58+fd9j2zjvv4J577oFWq0VVVRWWL1+OcePG4bvvvsOoUaOc5mB//KmpqY3+2RNRCyAQkez16tVLAGD7mjx5slBdXe0QU15eXuN+TzzxhBAYGChUVFTYtg0ePFgYPHiwQ9yLL74oABDOnDlj2wZA0Ov1ttv//e9/hQ4dOggDBgxwuP/HH38sABDefPPNGs9vsVgEQRAEk8kkABAyMzNt+8aPHy/06dNH6Ny5szBlyhTb9szMTAGAMGnSJKFv37627WVlZUKbNm2Ehx56SAAgbN++XRAEQaiqqhI6dOgg9OnTR7h48aIt/rvvvhMACHPnzrVti4uLE4KCgoQ///zTaZ5X6tq1q0Nu9gYPHixERUU53Vef2u77xhtvCAAEk8lk2/bKK68IrVq1Eg4ePOgQ+8ILLwhKpVI4fvy4IAj//IzfeOONGo8bFRXl8Jr99NNPAgBh5cqVteY4ZcoUoWvXrnUex5kzZwRfX19h2LBhgtlstm1///33BQDCxx9/XOM+zt4L9bF/z/7vf/8TvL29heeee84hpiHvA0EQhOrqagGAkJycbNtmfe/Z//xd/T1ITk4WANR4L135HnL19fy///s/AYDw9NNP1/h5OHu/Osvdyvp6//TTT7ZtFRUVgpeXl5CQkGDb5uxvSE5OjgBA+PTTT2vss+dqvgCExMTEGjGjRo2q8X67Mp+qqiqhT58+wh133FHj/osXLxYAOPxuX/m3ril+l4io6UhmOpzBYMCYMWMQEREBhUKBb775psGPIQgCFixYgJ49e8LPzw+dOnWq8UkPkRRlZmZi/fr1yM7OxvTp05Gdne0wOgEAAQEBtu9LS0tx7tw53HbbbSgvL8f+/fsdYqurq3Hu3DmcPXsWOTk5WLVqFTQaje0T1CudPHkS7733HpKSktC6dWuHfV999RXatWuHp556qsb9apvmtHPnTqxcuRLz5s1zmNJn75FHHsH+/fttU2m++uorBAcH484773SI27FjB86cOYOEhAT4+/vbto8aNQq9e/fG//73PwDA2bNnYTAYMG3aNIcRtLryrI/ZbMa5c+dw7tw5VFVVNeox6rNy5UrcdtttCA0NtT3XuXPnMHToUJjNZhgMBof48vJyh7hz587VmBJpZX2f2E9da4gNGzagqqoKzzzzjMPr+Pjjj6NNmza2n727bNu2DePHj8cDDzxQY3TT1feBlfX18vPzc/n56/o96NChAwDgr7/+qvMxXH09v/rqKygUCuj1+hqP0dj3a3FxMc6dO4fjx4/j9ddfh8ViwR133GHbb/83pLq6Gn///Te6d++OkJAQ7Nq1q87Hbop87fM5f/48iouLcdtttznNxZXXsyl/l4jI/SQzHa6srAw33HADpk2bZpt60FAzZ87Ejz/+iAULFqBv374oLCxEYWGhmzMlanliYmJs3z/00ENQq9WYM2cOpk+fjtjYWABAXl4edDod/u///g8lJSUO9y8uLna4/dtvv6F9+/a22z169MA333xT68mKXq9HREQEnnjiiRpz8o8cOYJevXrB29v1P1cvvPACbrvtNowePRpPPvmk05j27dtj1KhR+Pjjj3HjjTfi448/xpQpU2oUTX/++ScAoFevXjUeo3fv3vjll18AAEePHgWARl/H48z+/fttP0cvLy90794der3eYYrW1Tp06BCMRqPD62XvzJkzDrf1er3TE9Erp0YBwLRp02zft27dGmPGjMFbb73lNNaZ2n72vr6+UKvVtv3ucPLkSYwaNQplZWX4+++/a7xXXX0fWFkLvyuLmbrU9XsQExMDhUKB2bNnIyUlxfa4V17n4+rreeTIEURERCAsLMzl/Opz77332r738vKCTqfDAw88YNt28eJFzJs3D5mZmTh58qTD9UJX/g25UlPk+9133yElJQW5ubmorKy0bXf2d8qV17Mpf5eIyP0kUwSNGDECI0aMqHV/ZWUl5syZg88//xxFRUXo06cP5s+fb7sA8Y8//kBGRgb27t1r+08uMjKyOVInanEefPBBzJkzB1u3bkVsbCyKioowePBgtGnTBi+//DK6desGf39/7Nq1C88//3yNEzGNRoOFCxcCuDxC8u6772LIkCHYtWsXVCqVQ+wff/yBpUuXIisrq9ZGDQ3x448/YsOGDcjJyak3dtq0aZg8eTKeeuopGAwGLFmyBD///PNV5+Au1157re26pr///hvvvvsuHnnkEajVatxyyy1ueQ6LxYK77roL//3vf53u79mzp8PtGTNmYNy4cQ7bHn/8caf3nTt3Lm677TZUV1dj586dePnll1FUVIS1a9e6JXd3Onz4MPr374+33noLjzzyCD755BNMmTKl0Y9XUFAAADXe77Wp7/fghhtugF6vR3JyMrKzs2t9nIa+nu60YMEC3HDDDaiursb27duRkpICb29v24n+U089hczMTDzzzDOIiYlBcHAwFAoFJk6c2Ozrkv3888+45557EBcXh/T0dISHh8PHxweZmZk1mmEAl1/P1q1bo1WrVrU+ZlP+LhGR+0mmCKrPk08+iX379mH58uWIiIjAqlWrcPfdd2PPnj3o0aMH1qxZA7Vaje+++w533303BEHA0KFD8frrr7v1kyciT3Dx4kUAl5sHAJe7P/3999/4+uuvERcXZ4szmUxO7x8aGoqhQ4fabg8ZMgQRERHIzMzE7NmzHWJnz56Nfv36YcKECU4fq1u3bti6dWut7bztCYKAF154Affdd59LRcKIESPg7++PiRMn4tZbb0W3bt1qFEFdu3YFABw4cMBhao91m3W/Wq0GgBpd8K5Gq1atHH6Ot912Gzp16oQff/zRbUVQt27dcOHCBYfnqUuPHj1qxNZ2Yti3b19b7IgRI3D8+HF88sknuHTpkkvPZf+zt/58gctTk0wmk8s5uyI8PBxr165Fx44dsXr1ajz33HMYOXKk7VN9V98HVvv27QPgeqOL+n4PgMsjBzNmzMD+/ftt06YefvhhhxhXX89u3bph3bp1KCwsdNv/cQMGDLB9sDhixAicPHkS8+fPR1JSEry8vPDll19iypQptg9IgMudH12ZLunufL/66iv4+/tj3bp1DlPcMjMzncbv27ev3teyKX+XiMj9JHNNUF2OHz+OzMxM23zdbt264d///jduvfVW2x+8o0eP4s8//8TKlSvx6aefYunSpdi5cycefPBBkbMnajq1fSK/ePFiKBQK28metRiyn75SVVWF9PR0l57HWlTZTzkBLrekXr16NV577bVap8o98MADOHfuHN5///0a+4Qrljlbvnw5jEajy921vL29MXnyZBiNRoepW/ZuvPFGdOjQAR988IFD/t9//z3++OMPWxep9u3bIy4uDh9//HGNLnhX5tlY1k/Lra+HO4wfPx45OTlYt25djX3WroHuYrFY4OXl5fI1HEOHDoWvry/effddh5/hRx99hOLi4lo7eDVGz549bdOQ3nvvPVgsFsycOdO239X3gdWKFSsQHh7uUhHkyu+BVXh4OG6//XYMHToUQ4cOdbg+CXD99XzggQcgCAKSk5NrxLnr/Xrx4kVcunTJ9pxKpbLGY7/33nsuXQfj7nyVSiUUCoXDcx87dszp9cQnTpzAr7/+WqP4vVJz/i4R0dWTxUjQnj17YDabawxFV1ZWom3btgAu/+dcWVmJTz/91Bb30UcfYcCAAThw4IDTeeBEnu6hhx5C7969cd9996Fjx444e/Ysvv/+e/z000+YM2cO+vbtCwAYNGgQQkNDMWXKFDz99NNQKBRYtmxZrScfp0+fRlZWFoDLLYs//PBDeHt7Y/To0Q5xP/74I+666646PzmdPHkyPv30U8yaNQvbtm3DbbfdhrKyMmzYsAEJCQkYO3asw+M9/vjjDfp9feWVV/Cf//wHoaGhTvf7+Phg/vz5ePTRRzF48GBMmjTJ1hr52muvdWix/e677+LWW29F//79MWPGDERGRuLYsWP43//+h9zcXJdzsrpw4QJ++OEHAEBhYSHeffdd+Pj4uHTyb39fqwMHDgAANm/eDB8fH3Tq1An/+c9/8O2332L06NGYOnUqBgwYgLKyMuzZswdffvkljh07VmtDi/rk5uaidevWuHTpEnbu3IlPP/0UY8eOdbmIa9++PWbPno3k5GTcfffduOeee3DgwAGkp6fjpptuqjEK4i4qlQpvvPEGHnvsMTz88MMYOXKky++DHTt2ICkpCT/88AM++OADlwo+V34PXOXq63n77bfjkUcewbvvvotDhw7h7rvvhsViwc8//4zbb7+91mvp6rJ+/Xr89ddftulw2dnZuOeee+Dr6wsAGD16NJYtW4bg4GBcf/31yMnJwYYNG2z/D9fF3fmOGjUKb775Ju6++2489NBDOHPmDNLS0tC9e3cYjUZbXEZGBubNm4fAwEA8/fTTdT5mU/4uEVETEKMlXVMDIKxatcp2e/ny5YJSqRT2798vHDp0yOErPz9fEARBmDt3ruDt7e3wOOXl5QIA4ccff2zO9ImaTUZGhjBy5EghIiJC8Pb2FkJCQoThw4cLa9eurRH766+/CrfccosQEBAgRERECP/973+FdevW1WiNO3jwYId22yEhIUJsbGyNxwQgKBQKYefOnQ7bnbXYLi8vF+bMmSNERkYKPj4+gkqlEh588EHhyJEjgiD803I2ICBAOHnypMN9r2whbG31a22BfaXa9q9YsUKIjo4W/Pz8hLCwMEGr1Qp//fVXjfvv3btXuO+++4SQkBDB399f6NWrl5CUlOT0ueprke3s5/j99987ja/rvs6+7FtIl5aWCrNnzxa6d+8u+Pr6Cu3atRMGDRokLFiwQKiqqhIEoXEtsq1f3t7eQteuXYWnn35aOH/+vCAIrrXItnr//feF3r17Cz4+PkLHjh2F+Ph42+Nc6WpbZNu74447hC5dugilpaW2bfW9D+bPny/cdNNNQnZ2do3Hq61Ftqu/B844ew+58noKgiBcunRJeOONN4TevXsLvr6+Qvv27YURI0bUyKW23K1ceb0FQRDOnz8vPProo0K7du2E1q1bC8OHDxf2799f5++BPVfyRQNaZH/00UdCjx49BD8/P6F3795CZmamoNfrBftTo5tvvlkYN26csH///hqP6ew1cvfvEhE1HYUguGncuwVRKBRYtWqVrVPNwYMH0atXLxgMhloXz/vxxx8xfPhwHD582Lb44u+//45+/frhwIEDTXoxKRFRc7r22mvx0ksvYerUqWKnQkREJArJTIe7cOECDh8+bLttMpmQm5uLsLAw9OzZE1qtFpMnT8bChQsRHR2Ns2fPYuPGjdBoNBg1ahSGDh2K/v37Y9q0aXj77bdhsViQmJiIu+66iwUQEREREZGESKYxwo4dOxAdHY3o6GgAwKxZsxAdHY25c+cCuNzxZfLkyXjuuefQq1cv3Hvvvdi+fbttUUMvLy+sWbMG7dq1Q1xcHEaNGoXrrrsOy5cvF+2YiIiawuDBg9GpUyex0yAiIhKNJKfDERERERER1UYyI0FERERERESuYBFERERERESy4tGNESwWC06dOoWgoCCXF98jIiIiIiLpEQQBpaWliIiIgJdX3WM9ohZBZrMZL730ErKyslBQUICIiAhMnToVOp3OpaLm1KlT6Ny5czNkSkREREREnuDEiRO45ppr6owRtQiaP38+MjIy8MknnyAqKgo7duzAo48+iuDg4HpXZgaAoKAgAJcPtE2bNk2dLhERERERtVAlJSXo3LmzrUaoi6hF0G+//YaxY8di1KhRAC4v4Pf5559j27ZtLt3fOlrUpk0bFkFEREREROTSjDJRGyMMGjQIGzduxMGDBwEAv//+O3755ReMGDHCaXxlZSVKSkocvoiIiIiIiBpC1JGgF154ASUlJejduzeUSiXMZjNSU1Oh1Wqdxs+bNw/JycnNnCUREREREUmJqCNBX3zxBbKzs/HZZ59h165d+OSTT7BgwQJ88sknTuNnz56N4uJi29eJEyeaOWMiIiIiIvJ0CkEQBLGevHPnznjhhReQmJho25aSkoKsrCzs37+/3vuXlJQgODgYxcXFvCaIiIiIiEjGGlIbiDoSVF5eXqOHt1KphMViESkjIiIiIiKSOlGvCRozZgxSU1PRpUsXREVFYffu3XjzzTcxbdo0MdMiIiIiIiIJE3U6XGlpKZKSkrBq1SqcOXMGERERmDRpEubOnQtfX99678/pcEREREREBDSsNhC1CLpaLIKIiIiIiAjwoGuCiIiIiIiImhuLICIiIiIikhUWQUREREREJCssgoiIiIiISFZYBBERERERkayIuk4QERHR1TKbzTAajSgsLERYWBg0Gg2USqXYaRERUQvGIoiIiDyWwWBAeno6CgoKbNtUKhUSEhIQFxcnYmZERNSScTocERF5JIPBAL1eD7VajbS0NKxduxZpaWlQq9XQ6/UwGAxip0hERC0UF0slIiKPYzabodVqoVarkZKSAi+vfz7Ts1gs0Ol0MJlMyMrK4tQ4IiKZ4GKpREQkaUajEQUFBdBqtQ4FEAB4eXlBq9UiPz8fRqNRpAyJiKglYxFEREQep7CwEAAQGRnpdL91uzWOiIjIHosgIiLyOGFhYQAAk8nkdL91uzWOiIjIHosgIiLyOBqNBiqVCtnZ2bBYLA77LBYLsrOzER4eDo1GI1KGRETUkrEIIiIij6NUKpGQkICcnBzodDrk5eWhvLwceXl50Ol0yMnJQXx8PJsiEBGRU+wOR0REHsvZOkHh4eGIj4/nOkFERDLTkNqARRAREXk0s9kMo9GIwsJChIWFQaPRcASIiEiGGlIbeDdTTkRERE1CqVQiOjpa7DSIiMiD8JogIiIiIiKSFRZBREREREQkKyyCiIiIiIhIVlgEERERERGRrLAIIiIiIiIiWWERREREREREssIiiIiIiIiIZIVFEBERERERyQqLICIiIiIikhUWQUREREREJCssgoiIiIiISFZYBBERERERkaywCCIiIiIiIllhEURERERERLLCIoiIiIiIiGSFRRAREREREckKiyAiIiIiIpIVFkFERERERCQrLIKIiIiIiEhWWAQREREREZGssAgiIiIiIiJZYRFERERERESywiKIiIiIiIhkxVvsBIiIyL3MZjOMRiMKCwsRFhYGjUYDpVIpdlpEREQtBosgIiIJMRgMSE9PR0FBgW2bSqVCQkIC4uLiRMyMiIio5eB0OCIiiTAYDNDr9VCr1UhLS8PatWuRlpYGtVoNvV4Pg8EgdopEREQtgkIQBEHsJBqrpKQEwcHBKC4uRps2bcROh4hINGazGVqtFmq1GikpKfDy+uczLovFAp1OB5PJhKysLE6NIyIiSWpIbSDqSNC1114LhUJR4ysxMVHMtIiIPI7RaERBQQG0Wq1DAQQAXl5e0Gq1yM/Ph9FoFClDIiKilkPUa4K2b98Os9lsu713717cddddGDdunIhZERF5nsLCQgBAZGSk0/3W7dY4IiIiORN1JKh9+/ZQqVS2r++++w7dunXD4MGDxUyLiMjjhIWFAQBMJpPT/dbt1jgiIiI5azGNEaqqqpCVlYVp06ZBoVA4jamsrERJSYnDFxERARqNBiqVCtnZ2bBYLA77LBYLsrOzER4eDo1GI1KGRERELUeLKYK++eYbFBUVYerUqbXGzJs3D8HBwbavzp07N1+CREQtmFKpREJCAnJycqDT6ZCXl4fy8nLk5eVBp9MhJycH8fHxbIpARESEFtQdbvjw4fD19cWaNWtqjamsrERlZaXtdklJCTp37szucERE/5+zdYLCw8MRHx/PdYKIiEjSGtIdrkUslvrnn39iw4YN+Prrr+uM8/Pzg5+fXzNlRUTkeeLi4hAbGwuj0YjCwkKEhYVBo9FIegTIbDbL6niJiOjqtYgiKDMzEx06dMCoUaPEToWIyOMplUpER0eLnUazcDbypVKpkJCQwJEvIiKqlejXBFksFmRmZmLKlCnw9m4RNRkREXkAg8EAvV4PtVqNtLQ0rF27FmlpaVCr1dDr9TAYDGKnSERELZTo1wT9+OOPGD58OA4cOICePXs26L4NmfdHRETSYTabodVqoVarkZKS4rBArMVigU6ng8lkQlZWFqfGERHJRENqA9FHgoYNGwZBEBpcABERkXwZjUYUFBRAq9U6FEAA4OXlBa1Wi/z8fBiNRpEyJCKilkz0IoiIiKihCgsLAQCRkZFO91u3W+OIiIjssQgiIiKPExYWBgAwmUxO91u3W+OIiIjssQgiIiKPo9FooFKpkJ2dDYvF4rDPYrEgOzsb4eHh0Gg0ImVIREQtGYsgIiLyOEqlEgkJCcjJyYFOp0NeXh7Ky8uRl5cHnU6HnJwcxMfHsykCERE5JXp3uKvB7nBERPLmbJ2g8PBwxMfHc50gIiKZaUhtwCKIiIg8mtlshtFoRGFhIcLCwqDRaDgCREQkQw2pDbg6KREReTSlUono6Gix0yAiIg/Ca4KIiIiIiEhWWAQREREREZGssAgiIiIiIiJZYRFERERERESywiKIiIiIiIhkhUUQERERERHJCosgIiIiIiKSFRZBREREREQkKyyCiIiIiIhIVlgEERERERGRrLAIIiIiIiIiWWERREREREREssIiiIiIiIiIZIVFEBERERERyQqLICIiIiIikhUWQUREREREJCssgoiIiIiISFZYBBERERERkaywCCIiIiIiIllhEURERERERLLCIoiIiIiIiGSFRRAREREREckKiyAiIiIiIpIVFkFERERERCQrLIKIiIiIiEhWWAQREREREZGssAgiIiIiIiJZYRFERERERESywiKIiIiIiIhkhUUQERERERHJCosgIiIiIiKSFRZBREREREQkKyyCiIiIiIhIVlgEERERERGRrLAIIiIiIiIiWWERREREREREssIiiIiIiIiIZEX0IujkyZN4+OGH0bZtWwQEBKBv377YsWOH2GkREREREZFEeYv55OfPn0dsbCxuv/12fP/992jfvj0OHTqE0NBQMdMiIiIiIiIJE7UImj9/Pjp37ozMzEzbtsjISBEzIiIiIiIiqRN1Oty3336LG2+8EePGjUOHDh0QHR2NxYsX1xpfWVmJkpIShy8iIiIiIqKGELUIOnr0KDIyMtCjRw+sW7cO8fHxePrpp/HJJ584jZ83bx6Cg4NtX507d27mjImIiIiIyNMpBEEQxHpyX19f3Hjjjfjtt99s255++mls374dOTk5NeIrKytRWVlpu11SUoLOnTujuLgYbdq0aZaciYiIiIio5SkpKUFwcLBLtYGoI0Hh4eG4/vrrHbZdd911OH78uNN4Pz8/tGnTxuGLiIiIiIioIUQtgmJjY3HgwAGHbQcPHkTXrl1FyoiIiIiIiKRO1CLo2WefxZYtW/Dqq6/i8OHD+Oyzz7Bo0SIkJiaKmRYREREREUmYqEXQTTfdhFWrVuHzzz9Hnz598Morr+Dtt9+GVqsVMy0iIiIiIpIwURsjXK2GXPxERERERETS5TGNEYiIiIiIiJobiyAiIiIiIpIVFkFERERERCQrLIKIiIiIiEhWWAQREREREZGssAgiIiIiIiJZYRFERERERESywiKIiIiIiIhkhUUQERERERHJCosgIiIiIiKSFRZBREREREQkKyyCiIiIiIhIVrzFToCIiNzLbDbDaDSisLAQYWFh0Gg0UCqVYqdFRETUYrAIIiKSEIPBgPT0dBQUFNi2qVQqJCQkIC4uTsTMiIiIWg5OhyMikgiDwQC9Xg+1Wo20tDSsXbsWaWlpUKvV0Ov1MBgMYqdIRETUIigEQRDETqKxSkpKEBwcjOLiYrRp00bsdIiIRGM2m6HVaqFWq5GSkgIvr38+47JYLNDpdDCZTMjKyuLUOCIikqSG1AYcCSIikgCj0YiCggJotVqHAggAvLy8oNVqkZ+fD6PRKFKGRERELQeLICIiCSgsLAQAREZGOt1v3W6NIyIikjMWQUREEhAWFgYAMJlMTvdbt1vjiIiI5IxFEBGRBGg0GqhUKmRnZ8NisTjss1gsyM7ORnh4ODQajUgZEhERtRwsgoiIJECpVCIhIQE5OTnQ6XTIy8tDeXk58vLyoNPpkJOTg/j4eDZFICIiArvDERFJirN1gsLDwxEfH891goiISNIaUhuwCCIikhiz2Qyj0YjCwkKEhYVBo9FwBIiIiCSvIbWBdzPlREREzUSpVCI6OlrsNIiIiFosFkFERBLDkSAiIqK6sQgiIpIQZ9cEqVQqJCQk8JogIiKi/4/d4YiIJMJgMECv10OtViMtLQ1r165FWloa1Go19Ho9DAaD2CkSERG1CGyMQEQkAWazGVqtFmq1GikpKfDy+uczLovFAp1OB5PJhKysLMlNjeP0PyIiAtgYgYhIdoxGIwoKCpCUlORQAAGAl5cXtFotEhMTYTQaJdU0gdP/iIioMTgdjohIAgoLCwEAkZGRTvdbt1vjpIDT/4iIqLFYBBERSUBYWBgAwGQyOd1v3W6N83Rmsxnp6emIiYlBSkoKoqKiEBgYiKioKKSkpCAmJgYZGRkwm81ip0pERC0QiyAiIgnQaDRQqVTIzs6GxWJx2GexWJCdnY3w8HBoNBqRMnQv6/Q/rVZb6/S//Px8GI1GkTIkIqKWjEUQEZEEKJVKJCQkICcnBzqdDnl5eSgvL0deXh50Oh1ycnIQHx8vmYYBcpz+R0RE7sPGCEREEhEXF4fk5GSkp6cjMTHRtj08PBzJycmSahRgP/0vKiqqxn6pTf8jIiL3YhFERCQhcXFxiI2NlXzLaPvpf85agktt+h8REbkXp8MREUmMUqlEdHQ07rzzTkRHR0uuAALkN/2PiIjci4ulEhGRx3K2TlB4eDji4+MlNf2PiIjq15DagEUQEZHEmM1myU+Hsye34yUiIucaUhvwmiAiIglxNjKiUqmQkJAg2ZER6/Q/IiIiV/GaICIiiTAYDNDr9VCr1UhLS8PatWuRlpYGtVoNvV4Pg8EgdopEREQtAqfDERFJgNlshlarhVqtdtotTafTwWQyISsri1PFiIhIkhpSG3AkiIhIAoxGIwoKCqDVah0KIADw8vKCVqtFfn4+jEajSBkSERG1HCyCiIgkoLCwEAAQGRnpdL91uzWOiIhIzlgEERFJQFhYGADAZDI53W/dbo0jIiKSMxZBREQSoNFooFKpkJ2dDYvF4rDPYrEgOzsb4eHh0Gg0ImVIRETUcohaBL300ktQKBQOX7179xYzJSIij6RUKpGQkICcnBzodDrk5eWhvLwceXl50Ol0yMnJQXx8PJsiEBERoQWsExQVFYUNGzbYbnt7i54SEZFHiouLQ3JyMtLT05GYmGjbHh4ejuTkZMmuE0RERNRQolcc3t7eUKlULsVWVlaisrLSdrukpKSp0iIi8khxcXGIjY2F0WhEYWEhwsLCoNFoOAJERERkR/Qi6NChQ4iIiIC/vz9iYmIwb948dOnSxWnsvHnzkJyc3MwZEhF5FqVSiejoaLHTICIiarFEXSz1+++/x4ULF9CrVy/k5+cjOTkZJ0+exN69exEUFFQj3tlIUOfOnblYKhERERGRzDVksVRRi6ArFRUVoWvXrnjzzTcxffr0euMbcqBERERERCRdDakNWlSL7JCQEPTs2ROHDx8WOxUiIiIiIpKoFlUEXbhwAUeOHEF4eLjYqRARERERkUSJWgT9+9//xubNm3Hs2DH89ttvuO+++6BUKjFp0iQx0yIiIiIiIgkTtTvcX3/9hUmTJuHvv/9G+/btceutt2LLli1o3769mGkREREREZGEiVoELV++XMynJyIiIiIiGWpR1wQRERERERE1NRZBREREREQkKyyCiIiIiIhIVlgEERERERGRrLAIIiIiIiIiWWERREREREREsiJqi2wiInI/s9kMo9GIwsJChIWFQaPRQKlUip0WERFRi8EiiIhIQgwGA9LT01FQUGDbplKpkJCQgLi4OBEzIyIiajk4HY6ISCIMBgP0ej3UajXS0tKwdu1apKWlQa1WQ6/Xw2AwiJ0iERFRi6AQBEEQO4nGKikpQXBwMIqLi9GmTRux0yEiEo3ZbIZWq4VarUZKSgq8vP75jMtisUCn08FkMiErK4tT44iISJIaUhtwJIiISAKMRiMKCgqg1WodCiAA8PLyglarRX5+PoxGo0gZEhERtRwsgoiIJKCwsBAAEBkZ6XS/dbs1joiISM5YBBERSUBYWBgAwGQyOd1v3W6NIyIikjMWQUREEqDRaKBSqZCdnQ2LxeKwz2KxIDs7G+Hh4dBoNCJlSERE1HKwCCIikgClUomEhATk5ORAp9MhLy8P5eXlyMvLg06nQ05ODuLj49kUgYiICOwOR0QkKc7WCQoPD0d8fDzXCSIiIklrSG3AxVKJSPLMZjOMRiMKCwsRFhYGjUYj6RGRKz/bunJ6HBERkdyxCCIiSXM2MqJSqZCQkCC5kRHrYqkxMTGYO3cuIiMjYTKZkJ2dDb1ej+TkZMkdMxERUWNwOhwRSZZ9UaDVah2KgpycHEkVBVwslYiI5I6LpRKR7JnNZqSnpyMmJgYpKSmIiopCYGAgoqKikJKSgpiYGGRkZMBsNoudqltwsVQiIiLXsQgiIkmSW1HAxVKJiIhcxyKIiCRJbkUBF0slIiJyHYsgIpIkuRUFXCyViIjIdSyCiEiS5FYUcLFUIiIi17E7HBFJlpy6w1lxsVQiIpKrhtQGLIKISNLkWBTIbXFYIiIigEUQEZEDFgVERETS15DawLuZciIiEo1SqUR0dLTYaRAREVELwcYIREREREQkKyyCiIiIiIhIVlgEERERERGRrPCaICIiiWEjCCIiorqxCCIikhBnLcFVKhUSEhIk2xKciIiooTgdjohIIqyLw6rVaqSlpWHt2rVIS0uDWq2GXq+HwWAQO0UiIqIWgesEERFJgNlshlarhVqtRkpKCry8/vmMy2KxQKfTwWQyISsri1PjiIhIkhpSG3AkiIhIAoxGIwoKCqDVah0KIADw8vKCVqtFfn4+jEajSBkSERG1HCyCiIgkoLCwEAAQGRnpdL91uzWOiIhIzlgEERFJQFhYGADAZDI53W/dbo0jIiKSMxZBREQSoNFooFKpkJ2dDYvF4rDPYrEgOzsb4eHh0Gg0ImVIRETUcrAIIiKSAKVSiYSEBOTk5ECn0yEvLw/l5eXIy8uDTqdDTk4O4uPj2RSBiIgIV9kdbseOHfjiiy9w/PhxVFVVOez7+uuvrzq5+rA7HBGRI2frBIWHhyM+Pp7rBBERkaQ1pDZo9GKpy5cvx+TJkzF8+HD8+OOPGDZsGA4ePIjTp0/jvvvua+zDEhHRVYiLi0NsbCyMRiMKCwsRFhYGjUbDESAiIiI7jS6CXn31Vbz11ltITExEUFAQ3nnnHURGRuKJJ55AeHi4O3MkIqIGUCqViI6OFjsNIiKiFqvR1wQdOXIEo0aNAgD4+vqirKwMCoUCzz77LBYtWuS2BImIrpbZbMbu3buxceNG7N69G2azWeyUiIiISESNLoJCQ0NRWloKAOjUqRP27t0LACgqKkJ5eXmDH++1116DQqHAM88809iUiIhqMBgM0Gq1ePbZZ/HKK6/g2WefhVarhcFgEDs1IiIiEkmji6C4uDisX78eADBu3DjMnDkTjz/+OCZNmoQ777yzQY+1fft2fPjhh2zdSkRuZTAYoNfroVarkZaWhrVr1yItLQ1qtRp6vZ6FEBERkUw1ujtcYWEhKioqEBERAYvFgtdffx2//fYbevToAZ1Oh9DQUJce58KFC+jfvz/S09ORkpKCfv364e2333bpvuwOR0S1MZvN0Gq1UKvVSElJgZfXP5/5WCwW6HQ6mEwmZGVlsWkAERGRBDSkNmj0SFBYWBgiIiIuP4iXF1544QV8++23WLhwocsFEAAkJiZi1KhRGDp0aL2xlZWVKCkpcfgiInLGaDSioKAAWq3WoQACLv/N0mq1yM/Ph9FoFClDIiIiEkuju8PVV4C4MjKzfPly7Nq1C9u3b3fpOefNm4fk5GSXYolI3goLCwEAkZGRTvdbt1vjiIiISD4aXQSFhIRAoVDU2C4IAhQKRb3dl06cOIGZM2di/fr18Pf3d+k5Z8+ejVmzZtlul5SUoHPnzg1LnIhkISwsDABgMpkQFRVVY7/JZHKIIyIiIvlodBH0008/Abhc9IwcORJLlixBp06dXL7/zp07cebMGfTv39+2zWw2w2Aw4P3330dlZWWNefp+fn7w8/NrbMpEJCMajQYqlQrZ2dlOrwnKzs5GeHg4G7IQERHJUKMbI9gLCgrC77//DrVa7fJ9SktL8eeffzpse/TRR9G7d288//zz6NOnT72PwcYIRFQXa3e4W265BTfffDP8/PxQWVmJbdu2YcuWLUhOTkZcXJzYaRIREZEbNKQ2aPRI0NUKCgqqUei0atUKbdu2dakAIiKqT1xcHCZMmICVK1ciJyfHtl2pVGLChAksgIiIiGTKbUWQs+uDiIjEZDAYsGLFCqcjQStWrMD111/PQoiIiEiGGj0dLjo62lb4GI1G9O7dG76+vrb9u3btck+GdeB0OKLGM5vNMBqNKCwsRFhYGDQajaTWy5HzOkFSf22JiIicaZbpcPfee6/t+7Fjxzb2YYhIBAaDAenp6SgoKLBtU6lUSEhIkMzIiHWdoKSkpFrXCUpMTITRaER0dLRIWbqfHF5bIiKiq9XoIkiv17szDyJqJtZmATExMUhKSkJkZCRMJhOys7Oh1+sl0yxAjusEyeW1JSIiulpe9YfUrqioCEuWLMHs2bNtJxK7du3CyZMn3ZIcEbmX2WxGeno6YmJikJKSgqioKAQGBiIqKgopKSmIiYlBRkZGvet8eQL7dYKckdo6QXJ6bYmIiK5Wo4sgo9GInj17Yv78+ViwYAGKiooAAF9//TVmz57trvyIyI2sU8S0Wm2tU8Ty8/NhNBpFytB97NcJqq6uxu7du7Fx40bs3r0b1dXVklsnSE6vLRER0dVq9HS4WbNmYerUqXj99dcRFBRk2z5y5Eg89NBDbkmOiNxLTlPElEolEhISMHfuXIwePRqVlZW2fdYucS+//LJkGgbI6bUlIiK6Wo0eCdq+fTueeOKJGts7derkcEEuEbUccpsiBtTevl9qbf3l+NoSERE1VqOLID8/P5SUlNTYfvDgQbRv3/6qkiKipmE/RcxisTjss1gskpoiZn+NzHfffYe33noLSUlJeOutt/Ddd99J7hoZOb22REREV6vRRdA999yDl19+GdXV1QAuf6p6/PhxPP/883jggQfcliARuY91ilhOTg50Oh3y8vJQXl6OvLw86HQ65OTkID4+XhJTxOR2jYz9aztnzhysWrUKa9euxapVqzBnzhxJvbZXMpvNDtd8SaWwJSKiptPoa4IWLlyIBx98EB06dMDFixcxePBgFBQUICYmBqmpqe7MkYjcKC4uDsnJyUhPT0diYqJte3h4uKRaKFuvfTl16hReeeWVGuvmTJ8+3SFOCuLi4jBhwgR88cUXyMnJsW338vLChAkTJPPa2uO6SERE1BgKQRCEq3mAX375BUajERcuXED//v1xyy23YNeuXQCA1q1bo3///m5J1JmGrApLRI7MZjOMRiMKCwsRFhYGjUYjqVGC3bt349lnnwUADBo0CFqt1mHdnN9++w0A8NZbb0lmsVSDwYC5c+faGj9Y2TeCkFJhYL8u0pWvb05OjqSKeiIiql9DaoMGF0HOrgOyt2fPHsTFxaFLly6IiorCd99915CHbxAWQUSNJ/UiqKqqCiNGjECbNm2wcuVKeHv/M/B96dIljBs3DiUlJfj+++/h6+srYqbuYTab8cADD6CoqAgxMTF4+OGHbUVBVlYWcnJyEBISgq+++koSr7PZbIZWq4VarUZKSorDlEeLxQKdTmc7dikcLxER1a8htUGDp8OFhITU2VVJEAQoFIpaOxQRkfjkMIUoLy8PZrMZRUVFmDt3bo2RgqKiIgiCgLy8PEmMBOXm5qKoqAh9+/bFyy+/jL179yInJwdhYWF4+eWXMWvWLOzZswe5ubkYMGCA2OleNes1X0lJSbVe85WYmAij0SiJ15eIiNyrwUXQTz/9VOf+Q4cOOW2dTUQtg/0UoqSkJIfCQK/XS2YKkfVanxdffBEfffRRjeufXnzxRaSmpkrmmqDc3FwAwIABA/DII4/UKHCHDx8uqSKI6yIREdHVaHARNHjw4Dr3h4SENDYXImpi9m2j7acQRUVFISUlBTqdDhkZGYiNjfX4KUTW9XAiIiKQnZ1dY+rf/v37HeKk4pNPPnFa4H766adip+ZW9usiRUVF1djPdZGIiKgujW6RTUSeR05to+3XzVEoFIiOjsadd96J6OhoKBQKya2bYz2O1q1b4+WXX0ZUVBQCAwMRFRWFl19+Ga1bt3aI83RcF4mIiK4GiyAiGZHTFCI5rYkEwFbUlpaWIikpyeF4k5KSUFpa6hDn6eT2+hIRkXs1ep0gIvI8cptCJJc1kQCgqKjI9v2uXbsc1gny8/NzGufp5PT6EhGRezW4CLr//vvr3C+l/2CJpMZ+CpGztsJSnEIUFxeH2NhYSbcDB/4pXB9//HGsWbPGoTFCWFgYRo0ahSVLlkimwLWSy+tLRETu1eAiKDg4uN79kydPbnRCRNR0rFOI9Ho9dDpdrQtMSu0EUqlUSr5NsrXAzcvLw7Jly7B3715bUdCnTx/o9XrJFbhWcnh9iYjIvRq8WGpLwsVSiRrH2TpB4eHhiI+P5xQiD2bf/ry2ApevLxERSVVDagMWQUQyZTabOYVIgljgEhGRXDWkNmBjBCKSvKqqKqxevRqnTp1CREQExo4dC19fX7HTajJXfrZ1ZQtpIiIiueNIEJEMORstUKlUSEhIkNxowQcffICVK1fCbDbbtimVSowbNw7/+te/RMzM/TgdjoiI5IzT4YioVnI6Uf7ggw+wfPlyhIaGYvr06YiJiUFOTg4++ugjnD9/HhMnTpRMIWQ2m6HVaqFWq512/tPpdDCZTMjKyuK0RyIikqSG1AbSWDWPiFxiNpuRnp6OmJgYpKSkICoqCoGBgYiKikJKSgpiYmKQkZHhMGriqaqqqrBy5UqEhoZi5cqVGD16NNq2bYvRo0c7bK+qqhI7VbcwGo0oKCiAVquFIAjYvXs3Nm7ciN27d0MQBGi1WuTn58NoNIqdKhERkehYBBHJiP2Jsv1IAQB4eXlJ6kR59erVMJvNmD59Ory9HS9/9Pb2xrRp02A2m7F69WqRMnSvwsJCAMCpU6eg1Wrx7LPP4pVXXsGzzz4LrVaLU6dOOcQRERHJGRsjEMmI9QQ4MjLSaXe4yMhIhzhPZj3pj4mJcbrfut0a5+msi6C++uqrGDhwIGJjY1FVVQVfX1+cPHkSr776qkMcERGRnLEIIvr/5NAy2noCvGrVKqxZs6ZGY4QxY8Y4xHmyiIgIAEBOTg5Gjx5dY39OTo5DnKeLioqCUqmEt7c3tm3b5tARzsvLC76+vrh06RKioqJEzJKIiKhlYBFEBPl0S9NoNAgJCcHixYsRExODpKQkW2OErKwsLF68GCEhIdBoNGKnetXGjh2LDz74AB999BHuvvtuhylxly5dwscffwylUomxY8eKmKX75OXlwWw2w2w2w8fHBxMnTsTIkSOxdu1arFy5EpWVlba46OhokbMlIiISF68JItmzdktTq9VIS0vD2rVrkZaWBrVaDb1eD4PBIHaKzUqhUIidglv4+vpi3LhxOH/+PMaNG4c1a9bg3LlzWLNmjcN2qawXdObMGQBAYGAgQkND8dlnn+Hhhx/GZ599hrCwMAQGBjrEERERyRlHgkjWruyWZm0WYO2WptPpkJGRgdjYWElMjTMajSgqKsLjjz+ONWvWIDEx0bYvPDwcjz32GJYsWQKj0SiJ0QJr++uVK1di4cKFtu1KpVJS7bEB4I8//gAA3HvvvZg+fXqNqZ1LlizB559/jj/++APDhw8XOVuSs4qKChw/frzJn6dLly7w9/dv8uchIs/EIohkzdotLSkpqdZuaYmJiZIpCqwND+677z5MnDixxolyZWUllixZIonGCFb/+te/MGXKFHz44Yf466+/cM011+CJJ55AQECA2Kk1iYMHD0KhUDi8Xy0WCw4dOiRiVkT/OH78OGbMmNHkz7No0SL07NmzyZ+HiDwTiyCSNftuac5IqVsa8E/DA5PJhN69e9fYbzKZHOKk4MrrvXbs2IEtW7ZI7nqvTp06Abh8fDqdrsZCuDt27HCIkxI5NDWxksKxdunSBYsWLXI5/s8//0RqairmzJmDrl27Nuh5iIhqwyKIZM2+KHDWNUtqRYFGo4FKpcK7776LoqIinD592ravY8eOCAkJQXh4uCQaIwD/XO915XU/58+fh16vR3JysmQKIWsjCH9/fxw5csRhqqNKpUKrVq1QUVEhmUYQVnJpagJI51j9/f0bNULTtWtXjuwQkduwMQLJmrUoyM7OdmgpDFyeQpSdnS2pokCpVGLIkCE4cOAAqqqq8Nxzz+HLL7/Ec889h6qqKhw4cACDBw/2uE+WnTGbzXjzzTchCAL69+/v0PSif//+EAQBb731Fsxms9ipuoW1EURZWVmN5genT59GWVmZpBpBAPJqaiKnYyUiag4sgkjWlEolEhISkJOTA51Oh7y8PJSXlyMvLw86nQ45OTmIj4+XRFEAXC4MNm3ahF69esHPzw8LFy7Egw8+iIULF8Lf3x+9evXC5s2bJVEY5ObmoqioCH379kVqaiqioqIQGBiIqKgopKamom/fvjh//jxyc3PFTtVtrr/+egCAIAgO2623rful4MqmJvavb0pKCmJiYpCRkSGJ97KcjpWIqLmwCCLZi4uLQ3JyMo4ePYrExESMHDkSiYmJMJlMkpouBfzTCOLpp5/Gp59+isTERNx3331ITEzEJ598gqeffhr5+fkwGo1ip3rVrMXN1KlTnTa9mDp1qkOcp7OeKA8aNAg//PCDw2v7ww8/YNCgQZI6Uba+l7Vaba1NTaTyXpbTsRIRNRdeE0SEy4VQbGysx19wXB9rg4dTp07hlVdecbi24KuvvsL06dMd4qRAKuse1ce+06GPjw+6d++OsLAwhIWFwcfHR7KdDiMjI502C5BSUxO5NXAhImoOLIKI/j+lUimJk8O6WBs8pKamws/Pz2Hf+fPnkZqa6hDnyfr164dly5YhMzMTffv2xd69e20nyX369MHSpUttcVJQV4GrUqkkV+Ba36OrVq3CmjVrahzvmDFjHOI8mdwauBARNQcWQUQyEhUVBS8vL1gsFvTr1w+33HIL/P39UVFRgS1btmDr1q3w8vJyeqLlafr164eQkBDs2bMHo0aNQlVVlW2fr68vqqqqEBISIpkiyHoC/Oqrr+KWW27BhAkTbK/ttm3b8OqrrzrEeTqNRoOQkBAsXrwYMTExSEpKsrUEz8rKwuLFixESEiKJpib2DVzsF3UGpNnAhYioObAIIpKRPXv22Lrg7d69G1u3brXts3YNs1gs2LNnDwYMGCBKju6iVCpx9913Y/ny5bh06ZLDPuvtu+++WzJTHqOioqBUKuHv74+jR48iJyfHtq9jx44IDAxERUWFJApcV0llKqS1gYter3e6BlROTg6Sk5Ml814mImoObIxAJCP2TQCuPEG0vy2FZgH2nfDat2/vsK9Dhw6S6oQHAHl5eTCbzSgrK0NVVRX+/e9/46uvvsK///1vVFVVoaysDGazGXl5eWKn6hZGoxFFRUV4/PHHYTKZHJqaHDt2DI899hjOnz8vmWYBcmrgQkTUHDgSRCQj9q2S33nnnRrXycycORP79u2r0WLZE9k3Cujdu3eNC+f3798vqUYB586dAwD06NEDJSUlWLBggW2fSqVCjx49cOjQIVucp7Ne23Tfffdh4sSJNV7fyspKLFmyRDLXQAHyaeBCRNQcWAQRyUhQUBAAoLKy0un+iooKhzhPdmX3sMOHD+PUqVOIiIhAVFSU5DpqFRUVAQDGjh2LIUOGYN68ebbjnT17Nn766ScsXLjQFufprmwWcGUhK9VmAXJo4EJE1BxELYIyMjKQkZGBY8eOAbg8p33u3LkYMWKEmGkRSZb1hPDIkSMYPXq0QzHk5+dnuy2FE0frMbz55pv46aefHKa9ffDBB7j99tsd4jxdSEgIAGDJkiUOo0AmkwmjR4+27bf+6+nYLICIiK6GqNcEXXPNNXjttdewc+dO7NixA3fccQfGjh0rmTnrRC1Nu3btbN9XV1c77LO/bR/nqTQaDQIDA7FhwwYEBQWhX79+uOGGG9CvXz8EBQVhw4YNCAwMlMxJsvU1q22kx7pdCq8t8E+zgJycHOh0OuTl5aG8vBx5eXnQ6XTIyclBfHw8p4oREZFToo4EWddxsEpNTUVGRga2bNkiqw5GRM3FvoNYq1atcObMGdu+9u3b48KFC5LpIGY2m23T+4qKipw2e6ioqIDZbJbEiXKPHj3cGucJrM0C0tPTkZiYaNseHh7OZgFERFSnFnNNkNlsxsqVK1FWVoaYmBinMZWVlQ7Td0pKSporPSJJsO8gptFoMGnSJNs0uG3bttnaKufl5Xn8dQerV6+2tQOvjcViwerVqzFu3LhmyqrpLFq0yPa9dS0oZ7cXLVqEWbNmNXt+TYXNAoiIqDFEL4L27NmDmJgYVFRUoHXr1li1ahWuv/56p7Hz5s1DcnJyM2dIJB3WJgBz5szBRx995LCWTHh4OObMmYPU1FRJNAs4fvw4gMsFQNu2bXH27Fnbvvbt2+Pvv/+GxWKxxXm6/fv327739vZ2WBzW/rZ9nFSwWQARETWU6EVQr169kJubi+LiYnz55ZeYMmUKNm/e7LQQmj17tsMnmCUlJejcuXNzpkvk0axNACIiIpCdne20bbR9nCezdgezWCwOBRAAh9vWOE9nbWvu7+/vUAABlxeH9ff3R0VFhSTan8uZ2WzmqBcRkRuIXgT5+vqie/fuAIABAwZg+/bteOedd/Dhhx/WiPXz84Ofn19zp0gkGVd21LL/9FxqHbV8fHzcGtfSde7cGYcOHUJFRUWNhXAFQbBdH8UPjjyXwWBAeno6CgoKbNtUKhUSEhJ4/RMRUQOJ2h3OGYvFUusaJkR0deTUUcvf39/2vUKhwIABA/DYY49hwIABDkWCfZwn69Chg+17QRBw1113YdGiRbjrrrscRn/s48hzGAwG6PV6qNVqpKWlYe3atUhLS4NarYZer4fBYBA7RSIijyLqSNDs2bMxYsQIdOnSBaWlpfjss8+wadMmrFu3Tsy0iCRNLh217EeNBUHAzp07sXPnzjrjPFlZWZnD7fXr12P9+vX1xlHLZzabkZ6ejpiYGIc1kaKiopCSkgKdToeMjAzExsZK4gMMIqLmIGoRdObMGUyePBn5+fkIDg6GRqPBunXrcNddd4mZFpHkyaGjlnURZquePXuiU6dOOHnyJA4ePFhrnKeyb2bh7e2NS5cuOb0thaYXcmM0GlFQUICkpCSHRWGBy40/tFotEhMTYTQa2SCCiMhFohZBH330kZhPTyRrUu+o1apVKwD/tIc+ePCgQ/Fj3W6N83SBgYG2752dKDuLI89gLVwjIyOd7rduZ4FLROS6FndNEBGRO6jVagCXrzO8+eabERcXh+joaMTFxeHmm2+2rZtjjfN0w4YNA3C52Yz9KBBwuTucr6+vQxx5Dmu3xto6GVq3S6GrIxFRcxG9OxwRUVPo06cP1qxZAwDYvn27Q3MA+8YIffr0afbcmkL//v1tC996e3vjhhtuQNu2bfH3339jz549qKqqgp+fH/r37y92qtRAV3Z1tB/Zk1pXRyKi5sKRICKSpCu7pdmTare0gIAAAJdHfnbv3o0NGzZg9+7dtpEh637yLHLq6khE1Fw4EkREkqTRaBAYGIjy8vJaYwIDAyXz6bnRaERRURGAy1Pi7BdMtd4uKirixfMeSi5dHYmImguLICKSJLPZbFsgtDYVFRUwm82S+AT93LlzAICBAwdizpw5mD9/Pk6dOoWIiAg8//zzSE1NxdatW21x5Hnk0NWRiKi5sAgiIklavXq1rflBbSwWC1avXo1x48Y1U1ZNxzoKdO7cOdxzzz227SaTCffccw+6devmEEeeSepdHYmImguLICKSpL/++sv2fUhICIYNG2ZbJ+jHH3+0FQP2cZ4sJCQEAHDkyBGn+63brXFERERyxiKIiCTp7NmzAC5fD7NixQrs27cPhYWFiImJwfTp0zFmzBhUVVXZ4jxdmzZtbN8HBwdj+PDhiIiIwKlTp7Bu3ToUFxfXiCMiIpIrFkFEJEnWk35BEPDII4/gzJkztn0dOnSwdYizxnm6X375BcDl6VKlpaX44osvbPuUSiWUSiXMZjN++eUXDBw4UKw0m4TZbOZ1MkRE1CAsgohIkqxrAVVXV+P8+fOYNGkSRo4cibVr1+LLL79EdXW1Q5ynO3jwIIDLBUFoaCjuuusu20jQ+vXrcf78eYc4qTAYDEhPT0dBQYFtm0qlQkJCAjumERFRrVgEEcmU1D89HzRoEPbu3Qvg8ro5n3/+OT7//HMAcFhsctCgQaLk526tWrUCAISGhsLX19dhJEilUiE0NBTnz5+3xUmBwWCAXq9HTEwMkpKSEBkZCZPJhOzsbOj1eraOJiKiWrEIIpIhOXx63qNHD9v3Vy6Wat81zj7Ok914443YtWsXSkpKsHr1anz//fe2FtkjRozA2LFjbXFSYDabkZ6ejpiYGKSkpNgK26ioKKSkpECn0yEjIwOxsbGSKu6JiMg9vOoPISIpsX56rlarkZaWhrVr1yItLQ1qtRp6vR4Gg0HsFN2ipKTErXEtXfv27QFcLg5Gjx6NtLQ0rFq1CmlpaRg9ejTMZrNDnKczGo0oKCiAVqt1GNkDLo/0abVa5Ofnw2g0ipQhERG1ZCyCiGTkyk/Po6KiEBgYaPv0PCYmBhkZGbYTZk/maitoqbSMbteunVvjWrrCwkIAQGRkpNP91u3WOCIiInssgohkRE6fnldVVbk1rqWLioqCUqmEj4+P0/0+Pj5QKpWIiopq5syaRlhYGIDLi8E6Y91ujSMiIrLHIohIRuw/PTebzdi9ezc2btyI3bt3w2w2S+rT85UrV7o1rqXLy8uD2WxGdXU1vL29ER0djaFDhyI6Ohre3t6orq6G2WxGXl6e2Km6hUajgUqlQnZ2tsM1XsDla76ys7MRHh4OjUYjUoZERNSSsTECkYxYPxVftWoV1qxZU6MxwpgxYxziPJn9sbkjrqWzroPk5+eH6upq7N6927bPy8sLfn5+qKysdFgvyZMplUokJCRAr9dDp9NBq9U6dIfLyclBcnIymyIQEZFTLIKIZESj0SAkJASLFy/GLbfcggkTJthOjrdu3YrFixcjJCREEp+eX7hwwa1xLd0ff/wBAKisrISPj4/D6IhSqURlZaUtbvjw4aLk6G5xcXFITk5Geno6EhMTbdvDw8PZHpuIiOrEIohIpnbv3o0tW7bYbvv5+QGQzuKhV7bFvtq4ls6+6LnyNbS/feXUMU8XFxeH2NhYSa95ZU/q63sRETUXFkFEMmI0GlFUVFRnzPnz52E0GhEdHd08STWRS5cuuTXOk1RXV9d5W2qUSqXHv19dIYf1vYiImguLICIZOXfuHABg4MCBSElJwd69e22fKPfp0wc6nQ5bt261xXmyK7vfXW1cSxcYGGj7XqlUOhR39rft48hzWNf3iomJQVJSksP1T3q9ntP/iIgaSBr/+xORS6yjQLfddpvTFtm33nqrQ5wn8/X1dWtcS2dfuF45umV/WwoF7pWcdTqUEjmt70VE1Fw4EkQkI9aFQVevXo1PP/3UoVNYhw4dEBwc7BDnyQICAnD+/HmX4shzyWGKmHV9r6SkpFrX90pMTJTENFYioubCIohIRtq1awcAOHToUI2TqXPnztmKImucJ2vdurVb41o6V18zKby2VtYpYld2Oty2bZukpojZr+/ljJTW9yIiai4sgohkJCoqCl5eXrBYLPD29kZVVZVtn/W2l5cXoqKiRMzSPbp27YqDBw+6FCcFZWVlbo1r6axTxHr27AmTyYScnBzbPpVKhZ49eyIjIwOxsbEe3z3Num6XyWRy+rtpMpkc4oiIqH4sgohkZM+ePbYWyf3798fAgQMd1gnasmULLBYL9uzZgwEDBoic7dXJzc11a1xL9/fff9u+9/HxcegI5+vrayt47eM8mXWK2OnTp502C8jJyYEgCJKYIqbRaKBSqZCdnY2UlBSHUVyLxYLs7GyEh4dLYn0vIqLmwiKISEasJ/xTp07FDz/84LBOUHh4OKZMmYJPPvkEubm5Hl8EuXI9UEPiWjrrYqhAzZbY9iN+9nGezNrg4eabb3YoDKzNAmbPni2ZTodKpRIJCQnQ6/WYM2cObr75Zoepf1u2bEFycrLHj3gRETUnFkFEMtS3b1888sgjNRZdlMqoCCC/Ftk9e/bEzp07ATiO/Fx5u2fPnqLk526udDrcunWrJDodApcXhZ0wYQJWrlzpMPVPqVRiwoQJkrj2iYioOUnjf38ickm/fv0AAEuXLrVNi7OyWCxYunSpQ5wnu/baa90a19L179/f9n1gYCDGjx+PmTNnYvz48Q5rA9nHeTJrB8Off/7Z6Xv5l19+cYjzdAaDAStWrKgx2qNUKrFixQoYDAaRMiMi8kwcCSKSkX79+iEkJAR79uzB8OHDHU4erQ0TQkNDJVEE2Y+EuCOupbMfDSkqKsIXX3xRb5wns3a527p1K3Q6HbRarcM1QVu3bnWI82RmsxlvvvkmBEHAgAED8PDDD9uONSsrCzk5OXjrrbck0QSCiKi5sAgikhGlUom7774by5cvd/rpOQAMHz5cEidSJ0+edGtcS+fqtC+pTA+zNgsIDg7GkSNHkJiYaNunUqnQq1cvlJSUSKJZQG5uLoqKitC3b1+kpqY6XP+UmpqKmTNnYs+ePZK4lo+IqLlI4yNBInKJ2WzGl19+WWfMl19+KYmV5109BikcK+B6e2SptFG2Ngs4ePAg1Go1Zs6cif/+97+YOXMmIiMjcfDgQcTHx0uioLdvaOLs+qepU6c6xBERUf04EkQkI9u2bcOlS5fqjLl06RK2bduGmJiYZsqqafj6+qKiosKlOCno3bu37fsBAwbg5MmTuHDhAlq3bo1OnTrZmibYx3m6uLg4JCcnIz093aFZQHh4uGQWSrWnUCjEToGISDJYBBHJyJIlS1yO8/QiSKVS4dixYy7FScG3335r+95a8ADAhQsXUFBQ4BA3YcKEZs2tKcXFxeGWW27B6tWrcerUKURERGDs2LGSKW6By9fyLVu2DJmZmejXr1+NdYKk1NCEiKi5sAgikpHjx4/bvg8ODkZ0dDQCAgJw8eJF7N69G8XFxTXiPFV9I14NjWvp9uzZ43KclIogg8GA9PR0h0Lvq6++QkJCgmRGguwbmsyZM6dGY4Q9e/YgJCSERRARUQOwCCKSEfvpNKWlpdi0aZPttv2ny1KYdtOuXTv89ddfLsVJgbf3P3/O16xZg8OHD9vWgOrevTvGjBlTI87TGQwG6PV63HLLLZgwYQL8/f1RUVGBbdu2Qa/XS2ZKnFKpxKxZszB37lzs2rXLYeqfn58fAGDWrFmSuP6JiKi5SOd/QyKqV+vWrVFYWAgAtXaHs8Z5usrKSrfGtXQmkwnA5RPmgIAAREdH2/ZdunQJSqUSZrPZFufpzGYz0tPT0bNnTxw9etShMOjYsSN69uyJjIwMybSNjouLw8svv4y0tDScPn3atj00NFRSo15ERM2FRRCRjPTo0cO2fkp9cZ7u77//dmtcS1dWVgbgcnHw4IMPYtiwYQgPD0d+fj5+/PFHWxc8a5ynMxqNKCgoQEFBAQYNGoS5c+c6rBP022+/2eLsC0JPFhcXh9jYWBiNRtson0ajkUSRR0TU3FgEEcmIqydLUjipunKk62rjWjqVSoVz584BqHuxVKk0grAe68CBA5GSkuKwdk5KSgpmz56NrVu32uKkQqlUSqaoIyISE9cJIpIRQRDcGteSBQYG2r738fFx2Gd/2z7Ok6Wmptq+v7Izmv1t+zhPZl309bbbbnO6ds6tt97qEEdERGSPRRCRjMjpOhn75g7V1dUO++xvS6EJBHD5Oi5rMVBVVYX27dujV69eaN++PaqqqgBcLg6kcL0XAISEhAAAfv75Z6fXt/3yyy8OcURERPZYBBHJSGhoKIDLHcKuPPlXKBS2zmHWOE8WFhbm1riWzmg0wmKxICAgAABw9uxZHDhwAGfPngUABAQEwGKxwGg0ipmm21i7+m3btg06nQ55eXkoLy9HXl4edDodtm3b5hBHRERkj9cEEcmI9XqQS5cu2dYVsbYVzs3NtU0dksJ1Iz169MDu3btdipMCa9e/ixcvIigoCEFBQaioqIC/vz9KS0tRWlrqEOfpNBoNVCoVgoODcfToUSQmJtr2qVQq9OzZEyUlJdBoNCJmSURELRVHgoj+P7PZjN27d2Pjxo3YvXu3rZuWlPTv39/2fVFRETZt2oQffvgBmzZtcrh2wj7OU5WXl7s1rqVr06YNACAoKAjLli1DZGQkgoODERkZiWXLliEoKMghztMplUokJCTgwIEDNQq7wsJCHDhwAPHx8ZJo8kFERO4n6kjQvHnz8PXXX2P//v0ICAjAoEGDMH/+fPTq1UvMtEiGnK06r1KpJLf+hnXl+bouFg8NDZXEyvOurocjlXVzjh49CuDy9Vz33nuvbbvJZMKvv/5qa45w9OhR3HTTTWKk2CQUCoXTqZ1SudaLiIiahqgjQZs3b0ZiYiK2bNmC9evXo7q6GsOGDZPMOhbkGayrzqvVaqSlpWHt2rVIS0uDWq2GXq+HwWAQO0W3USqVaNWqVZ0xgYGBkvj03NW/I1L5e2Mt4K1NEDp37oxbb70VnTt3dthuX+h7MvvFUoODgx32BQcH2xZLldqIrhxGrImImoOoI0E//PCDw+2lS5eiQ4cO2Llzp6Q+faeWy3oiFRMT43StEZ1OJ6lV5y9cuICTJ0/WGXPy5ElcuHDB47uIde3aFceOHXMpTgqubPBw4sQJnDhxot44T3XlYql6vV7yi6XKZcSaiKg5tKhrgoqLiwHU/p90ZWUlSkpKHL6Irob1REqr1Tpda0Sr1SI/P18yHbXmzZtn+z40NBRDhgzBiBEjMGTIEIeOcPZxnurMmTNujWvp9u3b59a4lu7KxVKjoqIQGBho+wBj4MCBDnGeTk4j1kREzaHFdIezWCx45plnEBsbiz59+jiNmTdvHpKTk5s5M5Iy6wXVkZGRMJvNMBqNKCwsRFhYGDQaDSIjIx3iPJ11FCggIABKpRKbNm2y7WvXrh0CAgJw8eLFekeLPIGrxyCFYwWA/Px8t8a1dPaLpQqCgN27dzv87t56663YunWrJBZLlduINRFRc2gxRVBiYiL27t1rW+DOmdmzZ2PWrFm22yUlJbb57kSNYR11XLVqFdasWVNjmsno0aMd4qTi4sWLuHjxosM2qXxibuXqSLFURpQFQXBrXEtnXQR19erVWLZsGU6fPm3b17FjR1sXPCkslmodsU5KSqp1xDoxMVFSU/+IiJpaiyiCnnzySXz33XcwGAy45pprao3z8/ODn59fM2ZGUqfRaBASEoLFixcjJiYGSUlJtusKsrKysGTJEoSGhkpmrZHrr7/epetkrr/++qZPhtzKukiqu+JaOusiqIcOHUJoaCjGjx+PiIgInDp1CuvXr8ehQ4cc4jyZ/Yi1M1IbsSYiag6iFkGCIOCpp57CqlWrsGnTplr/wBOJSSqfnAOo8Sny1cZRy3H+/Hm3xrV0UVFRUCqV8Pb2RnFxMb744gvbPqVSCT8/P1y6dAlRUVEiZuke1pFok8nk9Hisbd6lNmJNRNSURD3TSUxMRFZWFj777DMEBQXZOv1cOU2HqKkYjUYUFRXh8ccfh8lkQmJiIkaOHInExEQcO3YMjz/+OIqKiiTTGEFu18nIiat/N6Xy9zUvLw9msxmVlZVo06YNxo8fj2eeeQbjx49HUFAQKisrYTabkZeXJ3aqV02j0UClUiE7OxsWi8Vhn8ViQXZ2NsLDwyUzYk1E1BxEHQnKyMgAAAwZMsRhe2ZmJqZOndr8CZHsWKeP3HfffZg4cWKNxgiVlZVYvHixZKaZnD171q1x1HJUV1c73Pb394eXlxcsFgsqKipqjfNU1mvYevTogZKSEoeRIJVKhR49euDQoUOSuNZNqVQiISEBer0eOp0OWq3WoR14Tk4OkpOT2RSBiKgBRJ8ORySmK6eZXHlRsdSmmQQGBro1jloOhULhcNu+8KkrzlNZu76NHTsWI0aMqPEBxtq1a7Fw4UJJdIcDgLi4OCQnJyM9PR2JiYm27eHh4UhOTuY6QUREDdQiGiMQicV+mol961lAmtNMNBoNDh486FIceRa5Xe9l7fr2888/Y+TIkQ4fYFgsFlunUSl0h7OKi4tDbGxsjYKPI0BERA3HIohkTW7TTGobHWhsHLUcQUFBuHDhgktxUmDt+rZt2za8+OKL6NSpEyorK+Hn54eTJ09i27ZtDnFSoVQq2QabiMgNWASR7MlpmsnRo0fdGkctR6tWrdwa19JZR3ErKyuxZcuWGvtDQ0Ph7+/PUU0iInKKRRAR5DPNxNWLxKVwMblCoXDpukOpXCPz119/uTWupVMqlejWrRt+/fVXeHt7Y/DgwejVqxcOHDiAzZs34/z584iNjZXc7zAREbkHiyAiGamqqrJ9r1QqYTabnd62j/NUrjZekUqDFle7vkmlO1xVVRW2bNmCVq1aoVWrVti4cSM2btwIAOjYsSMuXLiALVu2oKqqCr6+viJnS0RELQ2LICIABoMB6enpKCgosG1TqVRISEiQ1HQ4+6LH/vv69lHLFxQU5FInNKlcE7R69WqYzWbEx8dj2LBhWL16NU6dOoWIiAiMHTsW69atw8KFC7F69WqMGzdO7HSJiKiFYRFEsmcwGKDX6xETE4OkpCSHxgh6vV5S1wUFBgaitLTUpTjyLNdffz1+++03l+Kk4NSpUwAuT2ecPHmywwcYX331FR5++GGHOCIiInvS6JVK1Ehmsxnp6emIiYlBSkoKoqKiEBgYiKioKKSkpCAmJgYZGRmSGRnp3r27W+Oo5Th+/Lhb41q6iIgIAMAbb7xRYzHjwsJCLFiwwCGOiIjIHosgkjWj0YiCggJotdoa66d4eXlBq9UiPz8fRqNRpAzd68CBA26No5ZDbo0RRo8ebfveYrE47LO/bR9HRERkxSKIZM36CXJkZKTT/dbtV37S7KnOnz/v1jgisezbt8/2vUKhwKRJk7Bs2TJMmjTJoeOffRwREZEViyCStbCwMACAyWRyut+63Rrn6a4c7braOCKx7Nq1CwDQtm1bWCwWfP7553jkkUfw+eefw2KxoG3btg5xRERE9tgYgWTNuuBidnY2UlJSHE7+LRYLsrOzER4eLpkFF9u2betwAXldceRZWrVqhbKyMpfipODMmTMAgIEDB2LHjh2228Dl9++AAQPw/fffO2ynpnP69GkUFxc3yWP/+eefDv82heDgYHTs2LHJHp+IWh4WQSRrSqUSCQkJ0Ov1mDNnDm6++Wb4+fmhsrIS27Ztw5YtW5CcnCyZBRddbfAglUYQctKmTRuXiqA2bdo0QzZNr0OHDgCAtWvXIiYmBnq93tbZMSsrC99//71DHDWd06dP4+FHJqO6qrJJnyc1NbXJHtvH1w9Zyz5lIUQkIyyCSPbi4uIwYcIErFy5Ejk5ObbtSqUSEyZMkEx7bAAoLy93axw1n4qKijo7u3l7u/bn3NvbGwcPHqx1f5cuXeDv79/g/Jpbv379kJ2d7VIcNa3i4mJUV1XionowLP7BYqfTYF4VxcDRzSguLmYRRCQjLIJI9gwGA1asWIFbbrmlxkjQihUrcP3110umEGIR5LmOHz+OGTNmXPXjnDhxos7HWbRoEXr27HnVz9PU7Keu7tq1y+EDDD8/P6dx1LQs/sGwtGondhpERC5hEUSyZr9OUHJyMvbu3YvCwkKEh4dj9OjR0Ov1yMjIQGxsrCSmxAmC4NY4aj5dunTBokWLat1fVVWFJ598st7Hef/99+Hr61vn83iCoqIi2/dXvl/tb9vHERERWbEIIlmzrhM0ZswYPPLIIw5NA1QqFcaMGYPffvsNRqMR0dHRImZKcufv71/vCE1sbCx+/fXXOvf36dPH3amJwtqx8fHHH8eaNWscfnfbtm2L0aNHY/HixZLp7EhERO7FIohkzbr+z5IlSxATE4OkpCTbxdXZ2dlYsmSJQ5ynUyqVLjU9kMKolxylpqZizpw5Tguh2NjYJr2wvLlZOzvm5eVh2bJltlHcsLAw9OnTB3q9XlKdHYmIyL1YBJGshYSEAAD69OmDuXPnYs2aNdiwYQMiIiIwd+5c/Oc//8GePXtscZ4uKCjIpelBQUFBTZ8MNYnU1FRcvHgR8+fPx6ZNmzBkyBA8//zzCAgIEDu1BqmvEQQA3Hvvvfjwww/x3HPPYcSIEejUqRNOnjyJzMxM7NmzB0888QSOHDlS52N4SiMIIiJyLxZBRADy8/MxYsQIh2sJ0tPTJbdeTlVVlVvjqGUKCAjAQw89hE2bNuGhhx7yuAIIaFgjCKPRCKPRWGP7Bx98UO99PaURBBERuReLIJI166jIuXPnauwTBMG2XSoXV1+8eNGtcURNpb5GEPYsFgt++eUXZGVl4eGHH8att97qclc4T2kEQURE7sUiiGTN1YUjpbLAJLvDkadwpRGEPS8vL2RlZSEuLo4jO0REVC8WQSRrhw8ftn0fEhKCfv36ISAgABcvXkRubq5tBOjw4cO46aabRMqSiIiIiNyJRRDJ2p49e2zfl5WVYdOmTbbbPj4+DnGTJk1qztSIiIiIqIlwKW2SNftrgaqrqx322d92ds0QEREREXkmjgSRrNl3f2vTpg3uvvtuRERE4NSpU/jhhx9QUlJSI86TeXl5wWKxuBRHRERXz5V271eLrd6JGo5FUAOZzWYYjUbbonwajYYLS3ow++JGoVAA+KcpgPX2lXGezJUCqCFxRERUt4a0e28stnonajgWQQ1gMBiQnp6OgoIC2zaVSoWEhATExcWJmBk1ln0r6OLiYnzxxRf1xhEREbmqIe3e//zzT6SmpmLOnDno2rVrg56DiBqGRZCLDAYD9Ho9YmJikJSUhMjISJhMJmRnZ0Ov1yM5OZmFkAeqrKx0axw1H3dOMTl48GCt+zjNhIiuRkPbvQNA165dObJD1MRYBLnAbDYjPT0dMTExSElJsV0vERUVhZSUFOh0OmRkZCA2NlZyU+OkPv0vKioKv/zyi0tx1LK4c4pJXY/DaSZERETSwyLIBUajEQUFBUhKSqpxwbiXlxe0Wi0SExNhNBoRHR0tUpbuJ4fpf2q12q1x1Hzqm2Ly+eef46effqr3cW6//fY6259zmgkREZH0sAhyQWFhIQAgMjLS6X7rdmucFMhl+t/vv//uctzAgQObOBtqiPqmmMyePdulImj27Nnw9fV1Z2pERETUwrEPrgvCwsIAACaTyel+63ZrnKe7cvpfVFQUAgMDbdP/YmJikJGRAbPZLHaqV23//v1ujaOWw9fXFxMnTqwzZuLEiSyAiIiIZIhFkAs0Gg1UKhWys7NrtA62WCzIzs5GeHg4NBqNSBm6l3X6n1arrXX6X35+PoxGo0gZuo+14YFSqXR6rNbrn9gYwTP961//qrUQmjhxIv71r381c0ZERETUEnA6nAuUSiUSEhKg1+uh0+mg1Wodpofl5OQgOTlZMg0D5DT9z8/PDwCcjmrZF7zWOPI8//rXvzBt2jQsWbIEX3zxBcaPH4/HHnuMI0DUYjTHYpoAOx0SEdljEeSiuLg4JCcnIz09HYmJibbt4eHhkrk+xsp++p+zrmhSmv7XvXt37Nq1y6U48ly+vr4YOnQovvjiCwwdOpQFELUozbGYJsBOh0RE9lgENUBcXBxiY2Ml3TIacJz+Z98SHJDe9D9XF0HlYqlE1FQaspgmwAU1W4LTp0+juLjY7Y/7559/OvzbFIKDg9GxY8cme3wiT8EiqIGUSqWk2mA7I6fpf65O6ZPC1D8iapkas5gmwAU1xXL69Gk8/MhkVFc13bWiqampTfbYPr5+yFr2KQshkj0WQQ0k9cVDreQy/S8gIMCtcUREJG3FxcWorqrERfVgWPyDxU6nQbwqioGjm1FcXMwiiGSPRVADyGHxUHtymP7XvXt3bNiwwaU4IiIiK4t/MCyt2omdRovCJh/kSVgEuUgui4deSerT/0JDQ90aR0REJFds8kGehEWQC65cPNTaKMC6eKhOp0NGRgZiY2MlNUoiB0VFRW6NIyIikis2+SBPwiLIBdbFQ5OSkmpdPDQxMRFGo1HSoyZSxMYIRERE7sEmH+RJvOoPITktHio3O3bssH1/5Sie/W37OCIiIiLybKIWQQaDAWPGjEFERAQUCgW++eYbMdOplf3ioc5IafFQuTl79qzte7PZ7LDP/rZ9HBERERF5NlGLoLKyMtxwww1IS0sTM4162S8earFYHPZJbfFQufH19XVrHBERERG1fKIWQSNGjEBKSgruu+8+MdOol3Xx0JycHOh0OuTl5aG8vBx5eXnQ6XTIyclBfHw8myJ4oNqmODY2joiIiIhaPo9qjFBZWYnKyn9WaC4pKWm255bL4qFyc+7cObfGEREREVHL51FF0Lx585CcnCza88th8VC5YRFEREREJD8eVQTNnj0bs2bNst0uKSlB586dmzUHqS8eKjdXtjy/2jixuXO17oMHD9a6j6t1ExERkSfzqCLIz88Pfn5+YqdBEiK1Isidq3XX9ThcrZuobqdPn0ZxcXGTPPaff/7p8G9TCA4ORseOHZvs8YmIxOZRRRCRu1VVVbk1Tmz1rdadmJiI6urqeh/Hx8enzq6NXK2bqHanT5/Gw49MRnVVZf3BVyE1NbXJHtvH1w9Zyz5lIUREkiVqEXThwgUcPnzYdttkMiE3NxdhYWE8yaJmUVFR4dY4sdW3WveyZcswceLEeh9n2bJlUKlU7kyNnJDbaIEnH29DjrW4uBjVVZW4qB4Mi39wk+TTlLwqioGjm1FcXMwiiDz69xbgqCbVTtQiaMeOHbj99tttt63X+0yZMgVLly4VKSuSkysXSL3auJZOpVLB39+/zqLO39+fBVAzkNtogacfb2NGRiz+wbC0atck+RA1B0//vQU4qkm1E7UIGjJkCARBEDMFItn54YcfcPfddzsthPz9/fHDDz+IkJX8yG20wJOPlyMjJFee/HsL8HeX6sZrgohk6IcffkBBQQFmzJiBkpIStGnTBosWLeIIkAjkNlogt+MlkgL+3pIUeUbLKyJyO5VKhQULFgAAFixYwAKIiIiIZIMjQSRpXDeHiIiagtfFIrFTaDBPzJmoqbAIIknjujlERNQUAkwGsVMgoqvAIojqZDabYTQaUVhYiLCwMGg0GiiVSrHTcll96+Zs2bIFH3/8cb2PM23aNNxyyy11Pg8REcnHxcg4WAJCxE6jQbwuFrF4I/r/WARRrQwGA9LT01FQUGDbplKpkJCQgLi4OBEzc1196+Z069bNpSJIq9V6VPHnyZpqTQquR0FE7mQJCGGzACIPxiKInDIYDNDr9fD19XXYfv78eej1eiQnJ3tMIVQXpVKJl19+GXPnzq015uWXX2YB1EyaY00KrkdBRERELIKoBrPZjDfffBOCIOCGG26Av78/SktLERQUhIqKCmzbtg1vvfUWYmNjJVEcxMXF4eWXX8a7776Lc+fO2ba3a9cOTz/9tCSKPU/hyWtScD0KupKnXoTuqXkTETUEiyCqITc3F0VFRQgKCsK2bdtq7A8KCsL58+eRm5uLAQMGiJCh+8XFxSE2NhZr167FwoUL8dxzz2HkyJGSKPI8EdekICmQ27UXnlo8eWreRHR1WARRDbm5uQCA0tJS+Pj4YNy4cRg5ciTWrl2LlStXorS01BYnlSIIuDw1rlevXgCAXr16sQAioqviiRfOA42/eF5uRR8ReTYWQVRDdXU1gMtFwf/+9z/bdUEzZszA1KlTMWLECJjNZlscERHVJLcL5+VW9MmJp46WeWre1DxYBFENJ06cAACEhYXB29vxLeLt7Y3Q0FCcO3fOFkdE5CpPPCnxxJzFILeiT05YJJIUsQiSoYqKChw/frzW/YWFhQCAs2fP4plnnsGIESPQqVMnnDx5Et9//72teUBhYSEOHjzo9DG6dOkCf39/9ydPRB6NJ1MkFV4V7m/l39QamnNwcDC8fXxxqbqqiTJqet4+vggO9qxGO83l8OHDMJlMLsWWl5fjyJEjTZzR5aVLAgMDXYqNjIxE9+7dG/1cLIJk6Pjx45gxY4ZLsUajEUaj0em+P/74o9bHWbRoUZ3r8xDRZZ46ytDYvD1xyhSnS5G94OBg+Pj6AUc3i51Ko/j4+rlcFHTs2BHZWctcXrutsrLSYW3BpqJSqeDn5+dSLNdvq917772H33//Xew0Gu2GG27AO++80+j7swiSoS5dumDRokW17q+qqsKTTz4JLy8vBAUFOfzxCwkJQUlJCSwWC95///0a6wjZPwcR1U9uJ9ecMkWermPHjsha9mmTLeqcmpqKOXPmoGvXrm5/fKDhRUHHjh0bFN+3b9/GpEUieOqppzx+JOhqsAiSIX9//3pHaWJjY/Hrr7+ivLwcN910E7Zv346bbroJubm5sFgsiI2NRZ8+fZopYyLp8sSREYCjIyRvDS0MGqpr166cTUFNrnv37lc1nczTsQgip6yfRP3666/Yvn07ANj+jY2NRWpqqpjpycrp06eb5BNH4PKnjvb/NgVORagbR0aIiIiaH4sgqlVqaiouXryI+fPnY9OmTRgyZAief/55BAQEiJ2abJw+fRoPPzIZ1VWVTfo8TVnU+vj6IWvZpw0qhDzxOhlPzJmalideOA94bt5ERA3BIojqFBAQgIceegibNm3CQw89xAKomRUXF6O6qhIX1YNh8fe87jZeFcXA0c0oLi5uUBHEaVbkyTz9wnmgYRfPE0mJ3DumyQmLICIPYPEPltWUKU+8TobXyJBVU144D7TMi+eJpELuHdPkhEUQeRxeIyN9vE6GPF1TXzgPtLyL5z11Gp2n5k1NQ+4d0+SERRB5FLleI0NE1FJx+h9Jidw7pskJiyDyKHK9RoaIqKXi9L+6VVRU4Pjx4y7FNnY2QpcuXeDv79/g3IjkjEUQeSS5XSND0uWpU3Eam7cnHq8n5tzc5Dj9z1XHjx/HjBkzGnSfhs5GWLRokUf+bIjExCKIyAN4avtlT827OchtCpGnHy+nS1FjdenSBYsWLWry5yCihmERJBGe3CyAjQLqx65j0iO3KUSefrz8O0WN5e/vz1EaohaIRRAaNl/3ajTVnF1PbxYgl8U0gcbn7Ykto4HGt432xOlHjclZblOI5Ha8RETUckm2CGrIyIj1E8Sm1pBPKBvyqaMnNwvgYpqukUvLaE6ZIiIiouYgySKouUZGGqohhVZjRkfk1CxAbiMjctGUU6Za2vQwIrlq6OwLdkwjoqYgySLIk0dGALZRdoVcRkbkqKmnTHG6FJG4GtMtDWDHNCJyL0kWQVZyGhkBPPM6GU/MmYiIGq85uqVZn4eIqDaSLoI89QS7sXlzmpV0eWKjAMBz8yZqTnKbHsZuaUTUEki6CJJbUeCJ18nIqXsY0PC8Pb1RAMBmAUT14fQwIqLmJ+kiyBOLAqDxhYEcrpORW1Hg6WurAGwWQFQfTg8jImp+ki6CoFCInUHjeGrezUCORQHXViGSNk4PIyJqfpIsguQ2WmDliVPEuMAkERERETU3SRZBchst8PSij9eMEBEREVFzkmQRBMhrtMDTiz5eM0JEV5JbxzQiImpeki2C5EZORR8RSR87phERUVNiEQR+4khE1NKwYxoRETUlFkHgJ45E1PLJ7cMadkwjIqKmxCII/MSRyBPJrSjghzVERETuwyII/MSRpENOhYHcigJ+WENEROQ+LIJkqDlOlFvCSTIgr6IAkFdhILeigB/WEBERuY9CEARB7CTS0tLwxhtvoKCgADfccAPee+893HzzzfXer6SkBMHBwSguLkabNm2aIVNpOHjwYKNOlBuiJZwkA81zrEDLOd6GFn2N1VKKPiIiIiKrhtQGohdBK1aswOTJk/HBBx9g4MCBePvtt7Fy5UocOHAAHTp0qPO+LIIapzlOlFvKSTKLAiIiIiJ58KgiaODAgbjpppvw/vvvAwAsFgs6d+6Mp556Ci+88EKd92URREREREREQMNqA69mysmpqqoq7Ny5E0OHDrVt8/LywtChQ5GTk1MjvrKyEiUlJQ5fREREREREDSFqEXTu3DmYzWZ07NjRYXvHjh1RUFBQI37evHkIDg62fXXu3Lm5UiUiIiIiIokQtQhqqNmzZ6O4uNj2deLECbFTIiIiIiIiDyNqi+x27dpBqVTi9OnTDttPnz4NlUpVI97Pzw9+fn7NlR4REREREUmQqCNBvr6+GDBgADZu3GjbZrFYsHHjRsTExIiYGRERERERSZXoi6XOmjULU6ZMwY033oibb74Zb7/9NsrKyvDoo4+KnRoREREREUmQ6EXQhAkTcPbsWcydOxcFBQXo168ffvjhhxrNEoiIiIiIiNxB9HWCrgbXCSIiIiIiIsCD1gkiIiIiIiJqbiyCiIiIiIhIVlgEERERERGRrLAIIiIiIiIiWWERREREREREssIiiIiIiIiIZIVFEBERERERyQqLICIiIiIikhUWQUREREREJCveYidwNQRBAHB5dVgiIiIiIpIva01grRHq4tFFUGlpKQCgc+fOImdCREREREQtQWlpKYKDg+uMUQiulEotlMViwalTpxAUFASFQtFsz1tSUoLOnTvjxIkTaNOmTbM9r1jkdLxyOlZAXscrp2MFeLxSJqdjBeR1vHI6VoDHK2ViHasgCCgtLUVERAS8vOq+6sejR4K8vLxwzTXXiPb8bdq0kfyb2J6cjldOxwrI63jldKwAj1fK5HSsgLyOV07HCvB4pUyMY61vBMiKjRGIiIiIiEhWWAQREREREZGssAhqBD8/P+j1evj5+YmdSrOQ0/HK6VgBeR2vnI4V4PFKmZyOFZDX8crpWAEer5R5wrF6dGMEIiIiIiKihuJIEBERERERyQqLICIiIiIikhUWQUREREREJCssgoiIiIiISFZYBDVQWloarr32Wvj7+2PgwIHYtm2b2Ck1GYPBgDFjxiAiIgIKhQLffPON2Ck1mXnz5uGmm25CUFAQOnTogHvvvRcHDhwQO60mkZGRAY1GY1vALCYmBt9//73YaTWb1157DQqFAs8884zYqTSJl156CQqFwuGrd+/eYqfVZE6ePImHH34Ybdu2RUBAAPr27YsdO3aInVaTuPbaa2u8tgqFAomJiWKn5nZmsxlJSUmIjIxEQEAAunXrhldeeQVS7uVUWlqKZ555Bl27dkVAQAAGDRqE7du3i52WW9R3PiEIAubOnYvw8HAEBARg6NChOHTokDjJXqX6jvXrr7/GsGHD0LZtWygUCuTm5oqSp7vUd07xxBNPoFu3bggICED79u0xduxY7N+/X8SM/8EiqAFWrFiBWbNmQa/XY9euXbjhhhswfPhwnDlzRuzUmkRZWRluuOEGpKWliZ1Kk9u8eTMSExOxZcsWrF+/HtXV1Rg2bBjKysrETs3trrnmGrz22mvYuXMnduzYgTvuuANjx45FXl6e2Kk1ue3bt+PDDz+ERqMRO5UmFRUVhfz8fNvXL7/8InZKTeL8+fOIjY2Fj48Pvv/+e+zbtw8LFy5EaGio2Kk1ie3btzu8ruvXrwcAjBs3TuTM3G/+/PnIyMjA+++/jz/++APz58/H66+/jvfee0/s1JrMY489hvXr12PZsmXYs2cPhg0bhqFDh+LkyZNip3bV6jufeP311/Huu+/igw8+wNatW9GqVSsMHz4cFRUVzZzp1avvWMvKynDrrbdi/vz5zZxZ06jvnGLAgAHIzMzEH3/8gXXr1kEQBAwbNgxms1nkzAEI5LKbb75ZSExMtN02m81CRESEMG/ePBGzah4AhFWrVomdRrM5c+aMAEDYvHmz2Kk0i9DQUGHJkiVip9GkSktLhR49egjr168XBg8eLMycOVPslJqEXq8XbrjhBrHTaBbPP/+8cOutt4qdhmhmzpwpdOvWTbBYLGKn4najRo0Spk2b5rDt/vvvF7RarUgZNa3y8nJBqVQK3333ncP2/v37C3PmzBEpq6Zx5fmExWIRVCqV8MYbb9i2FRUVCX5+fsLnn38uQobuU9e5k8lkEgAIu3fvbtacmkNd5xS///67AEA4fPhwM2dVE0eCXFRVVYWdO3di6NChtm1eXl4YOnQocnJyRMyMmkJxcTEAICwsTORMmpbZbMby5ctRVlaGmJgYsdNpUomJiRg1apTD77BUHTp0CBEREVCr1dBqtTh+/LjYKTWJb7/9FjfeeCPGjRuHDh06IDo6GosXLxY7rWZRVVWFrKwsTJs2DQqFQux03G7QoEHYuHEjDh48CAD4/fff8csvv2DEiBEiZ9Y0Ll26BLPZDH9/f4ftAQEBkh3JtTKZTCgoKHD42xwcHIyBAwfy/MrD1HdOUVZWhszMTERGRqJz584iZOjIW+wEPMW5c+dgNpvRsWNHh+0dO3ZsMXMbyT0sFgueeeYZxMbGok+fPmKn0yT27NmDmJgYVFRUoHXr1li1ahWuv/56sdNqMsuXL8euXbskM7++LgMHDsTSpUvRq1cv5OfnIzk5Gbfddhv27t2LoKAgsdNzq6NHjyIjIwOzZs3Ciy++iO3bt+Ppp5+Gr68vpkyZInZ6Teqbb75BUVERpk6dKnYqTeKFF15ASUkJevfuDaVSCbPZjNTUVGi1WrFTaxJBQUGIiYnBK6+8guuuuw4dO3bE559/jpycHHTv3l3s9JpUQUEBADg9v7Luo5atvnOK9PR0/Pe//0VZWRl69eqF9evXw9fXV8SML+NIENEVEhMTsXfvXixfvlzsVJpMr169kJubi61btyI+Ph5TpkzBvn37xE6rSZw4cQIzZ85EdnZ2jU9ZpWjEiBEYN24cNBoNhg8fjrVr16KoqAhffPGF2Km5ncViQf/+/fHqq68iOjoaM2bMwOOPP44PPvhA7NSa3EcffYQRI0YgIiJC7FSaxBdffIHs7Gx89tln2LVrFz755BMsWLAAn3zyidipNZlly5ZBEAR06tQJfn5+ePfddzFp0iR4efFUjVq2+s4ptFotdu/ejc2bN6Nnz54YP358i7jei79ZLmrXrh2USiVOnz7tsP306dNQqVQiZUXu9uSTT+K7777DTz/9hGuuuUbsdJqMr68vunfvjgEDBmDevHm44YYb8M4774idVpPYuXMnzpw5g/79+8Pb2xve3t7YvHkz3n33XXh7e7eMizObUEhICHr27InDhw+LnYrbhYeH1xjBvO666yQ7/c/qzz//xIYNG/DYY4+JnUqT+c9//oMXXngBEydORN++ffHII4/g2Wefxbx588ROrcl069YNmzdvxoULF3DixAls27YN1dXVUKvVYqfWpKznUDy/8lz1nVMEBwejR48eiIuLw5dffon9+/dj1apVImZ8GYsgF/n6+mLAgAHYuHGjbZvFYsHGjRslfy2FHAiCgCeffBKrVq3C//3f/yEyMlLslJqVxWJBZWWl2Gk0iTvvvBN79uxBbm6u7evGG2+EVqtFbm4ulEql2Ck2qQsXLuDIkSMIDw8XOxW3i42NrdHK/uDBg+jatatIGTWPzMxMdOjQAaNGjRI7lSZTXl5eYwREqVTCYrGIlFHzadWqFcLDw3H+/HmsW7cOY8eOFTulJhUZGQmVSuVwflVSUoKtW7fy/MpD1XVOIQgCBEFoEeccvCaoAWbNmoUpU6bgxhtvxM0334y3334bZWVlePTRR8VOrUlcuHDB4dNjk8mE3NxchIWFoUuXLiJm5n6JiYn47LPPsHr1agQFBdnmIQcHByMgIEDk7Nxr9uzZGDFiBLp06YLS0lJ89tln2LRpE9atWyd2ak0iKCioxrVdrVq1Qtu2bSV5zde///1vjBkzBl27dsWpU6eg1+uhVCoxadIksVNzu2effRaDBg3Cq6++ivHjx2Pbtm1YtGgRFi1aJHZqTcZisSAzMxNTpkyBt7d0/wsfM2YMUlNT0aVLF0RFRWH37t148803MW3aNLFTazLW9sG9evXC4cOH8Z///Ae9e/eWxDlGfecTzzzzDFJSUtCjRw9ERkYiKSkJERERuPfee8VLupHqO9bCwkIcP34cp06dAgDbBzkqlcojR77qOqc4evQoVqxYgWHDhqF9+/b466+/8NprryEgIAAjR44UO3W2yG6o9957T+jSpYvg6+sr3HzzzcKWLVvETqnJ/PTTTwKAGl9TpkwROzW3c3acAITMzEyxU3O7adOmCV27dhV8fX2F9u3bC3feeafw448/ip1Ws5Jyi+wJEyYI4eHhgq+vr9CpUydhwoQJLaIVaVNZs2aN0KdPH8HPz0/o3bu3sGjRIrFTalLr1q0TAAgHDhwQO5UmVVJSIsycOVPo0qWL4O/vL6jVamHOnDlCZWWl2Kk1mRUrVghqtVrw9fUVVCqVkJiYKBQVFYmdllvUdz5hsViEpKQkoWPHjoKfn59w5513eux7vL5jzczMdLpfr9eLmndj1XVOcfLkSWHEiBFChw4dBB8fH+Gaa64RHnroIWH//v0iZ32ZQhAkvPwyERERERHRFXhNEBERERERyQqLICIiIiIikhUWQUREREREJCssgoiIiIiISFZYBBERERERkaywCCIiIiIiIllhEURERERERLLCIoiIiIiIiGSFRRAREREREckKiyAiIg8zdepU3HvvvQ7bzp49iz59+mDgwIEoLi4WJzEiIiIPwSKIiMjDnT17FnfccQcCAgLw448/Ijg4WOyUiIiIWjQWQUREHuzcuXO488474efnh/Xr1zsUQMePH8fYsWPRunVrtGnTBuPHj8fp06cd7n/s2DEoFIoaX0VFRQCAl156Cf369bPFV1VVoXv37g4xzkamFAoFvvnmG9vtEydOYPz48QgJCUFYWBjGjh2LY8eOOdzn448/RlRUFPz8/BAeHo4nn3wSAHDttdc6zVGhUGDp0qW257N+tWnTBnfddReOHDlie+zz589j8uTJCA0NRWBgIEaMGIFDhw7V+bMtKirCE088gY4dO8Lf3x99+vTBd999BwBYunRprTnl5uYCAMxmM6ZPn47IyEgEBASgV69eeOedd2o8z6ZNm2o8RkhIiEPMkiVLcN1118Hf3x+9e/dGenq6bZ/1NbQ+r9W1116Lt99+u85jJCKSKxZBREQe6u+//8bQoUPh7e2N9evXO5w4WywWjB07FoWFhdi8eTPWr1+Po0ePYsKECQ6PIQgCAGDDhg3Iz8/HV199Vedzvv/++zUKqfpUV1dj+PDhCAoKws8//4xff/0VrVu3xt13342qqioAQEZGBhITEzFjxgzs2bMH3377Lbp37w4A2L59O/Lz85Gfn49rrrkGb7/9tu22/fFkZmYiPz8fBoMBZ86cwYsvvmjbN3XqVOzYsQPffvstcnJyIAgCRo4cierqaqc5WywWjBgxAr/++iuysrKwb98+vPbaa1AqlbaYNm3a2PLIz8/Htm3bajzGNddcg5UrV2Lfvn2YO3cuXnzxRXzxxRcOcdbX4MCBA8jPz69RuGRnZ2Pu3LlITU3FH3/8gVdffRVJSUn45JNPGvQ6EBHRP7zFToCIiBru/PnzGDp0KPbt24cBAwagTZs2Dvs3btyIPXv2wGQyoXPnzgCATz/9FFFRUdi+fTtuuukmALAVASqVCiqVCmFhYbU+Z2FhIVJSUvD8888jKSnJtj0gIAD5+fm13m/FihWwWCxYsmQJFAoFgMsFS0hICDZt2oRhw4YhJSUFzz33HGbOnGm7nzXH9u3b27YplUoEBwdDpVLVeJ6QkBCoVCoEBAQgKCjINip26NAhfPvtt/j1118xaNAgAJcLi86dO+Obb77BuHHjajzWhg0bsG3bNvzxxx/o2bMnAECtVjvEKBQKhzwqKioc9vv4+CA5Odl2OzIyEjk5Ofjiiy8wfvx423bra9CpUye0atWqxnRGvV6PhQsX4v7777c9zr59+/Dhhx9iypQpNXInIqL6cSSIiMgDGQwGWCwW5Obm4vDhw3j99dcd9v/xxx/o3LmzrQACgOuvvx4hISH4448/bNtKSkoAAK1atar3OV9++WXcfvvtuPXWWx229+nTB1u2bIHJZHJ6v99//x2HDx9GUFAQWrdujdatWyMsLAwVFRU4cuQIzpw5g1OnTuHOO+90+fidmTRpElq3bo3Q0FCUlpZi3rx5AC7/LLy9vTFw4EBbbNu2bdGrVy+Hn4W93NxcXHPNNbYCqLHS0tIwYMAAtG/fHq1bt8aiRYtw/Phxh5iSkhJ4eXkhICCgxv3Lyspw5MgRTJ8+3faza926NVJSUhym+wHAoEGDHGKufB4iIvoHR4KIiDyQWq3Gxo0b0a5dO6Snp+Phhx/GqFGjoNFoGvQ4p06dgpeXl9ORFXuHDh3CkiVLkJubi7/++sth37Rp07Bq1Sqo1WqnxdSFCxcwYMAAZGdn19jXvn17eHm55/O4t956C0OHDkVRURHmzJmDqVOnYs2aNY16LGcFSUMtX74c//73v7Fw4ULExMQgKCgIb7zxBrZu3eoQd+rUKXTs2NHpz+HChQsAgMWLFzsUcQAcpuYBl0fcrrvuOtvtIUOGXPUxEBFJFYsgIiIP1LdvX7Rr1w4AMG7cOHz99deYPHkytm3bBl9fX1x33XU4ceIETpw4YRsN2rdvH4qKinD99dfbHmf79u3o3bs3/P3963y+559/Ho899hi6d+9eowgKCAjAhg0bcPr0aZSWlgIAevToYdvfv39/rFixAh06dKgxbc/q2muvxcaNG3H77bc3/Ifx/6lUKtt1RE899RTuueceVFdX47rrrsOlS5ewdetW23S4v//+GwcOHHD4WdjTaDT466+/cPDgwUaPBlmn3yUkJNi2XTl6A1x+DaKjo50+RseOHREREYGjR49Cq9XW+XydO3e2HT8AeHvzv3giotpwOhwRkQSkpaXhzJkztmtQhg4dir59+0Kr1WLXrl3Ytm0bJk+ejMGDB+PGG29EVVUVli1bhjfffBOPPvponY99+PBhbNq0CXPnzq0zrmPHjujevbvDiTgAaLVatGvXDmPHjsXPP/8Mk8mETZs24emnn7YVVC+99BIWLlyId999F4cOHcKuXbvw3nvvNehnUFRUhIKCAhw4cAAfffQR1Go1fHx80KNHD4wdOxaPP/44fvnlF/z+++94+OGH0alTJ4wdO9bpYw0ePBhxcXF44IEHsH79ephMJnz//ff44YcfXM6nR48e2LFjB9atW4eDBw8iKSkJ27dvt+2/cOEC3n77bXz22Wd1vgbJycmYN28e3n33XRw8eBB79uxBZmYm3nzzTdd/OERE5IBFEBGRBISFhWHx4sWYP38+tm7dCoVCgdWrVyM0NBRxcXEYOnQo1Go1VqxYAQDYs2cPXnrpJSQlJWHWrFl1PnZZWRnmzJlTZ9OEugQGBsJgMKBLly64//77cd1112H69OmoqKiwjQxNmTIFb7/9NtLT0xEVFYXRo0fX28L6So8++ijCw8Nx00034fz58/jyyy9t+zIzMzFgwACMHj0aMTExEAQBa9euhY+PT62P99VXX+Gmm27CpEmTcP311+O///0vzGazy/k88cQTuP/++zFhwgQMHDgQf//9t8Oo0Pr167F48WJ8+OGHePDBB2t9nMceewxLlixBZmYm+vbti8GDB2Pp0qWIjIx0ORciInKkEKy9OYmIiIiIiGSAI0FERERERCQrLIKIiIiIiEhWWAQREREREZGssAgiIiIiIiJZYRFERERERESywiKIiIiIiIhkhUUQERERERHJCosgIiIiIiKSFRZBREREREQkKyyCiIiIiIhIVlgEERERERGRrPw/cdMjxzujrDQAAAAASUVORK5CYII=", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "import seaborn as sns\n", + "\n", + "from typing import Tuple\n", + "import pandas as pd\n", + "from pandas import DataFrame\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "\n", + "# Создание целевого признака\n", + "median_price = df['price'].median()\n", + "df['above_median_price'] = np.where(df['price'] > median_price, 1, 0)\n", + "\n", + "# Разделение на признаки и целевую переменную\n", + "X = df.drop(columns=['id', 'date', 'price', 'above_median_price'])\n", + "y = df['above_median_price']\n", + "\n", + "# Примерная категоризация\n", + "df['price_category'] = pd.cut(df['price'], bins=[0, 300000, 700000, np.inf], labels=[0, 1, 2])\n", + "\n", + "# Выбор признаков и целевых переменных\n", + "X = df.drop(columns=['id', 'date', 'price', 'price_category'])\n", + "\n", + "\n", + "def split_stratified_into_train_val_test(\n", + " df_input,\n", + " stratify_colname=\"y\",\n", + " frac_train=0.6,\n", + " frac_val=0.15,\n", + " frac_test=0.25,\n", + " random_state=None,\n", + ") -> Tuple[DataFrame, DataFrame, DataFrame, DataFrame, DataFrame, DataFrame]:\n", + " \n", + " if frac_train + frac_val + frac_test != 1.0:\n", + " raise ValueError(\n", + " \"fractions %f, %f, %f do not add up to 1.0\"\n", + " % (frac_train, frac_val, frac_test)\n", + " )\n", + " \n", + " if stratify_colname not in df_input.columns:\n", + " raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n", + " X = df_input # Contains all columns.\n", + " y = df_input[\n", + " [stratify_colname]\n", + " ] # Dataframe of just the column on which to stratify.\n", + " \n", + " # Split original dataframe into train and temp dataframes.\n", + " df_train, df_temp, y_train, y_temp = train_test_split(\n", + " X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n", + " )\n", + "\n", + " if frac_val <= 0:\n", + " assert len(df_input) == len(df_train) + len(df_temp)\n", + " return df_train, pd.DataFrame(), df_temp, y_train, pd.DataFrame(), y_temp\n", + " # Split the temp dataframe into val and test dataframes.\n", + " relative_frac_test = frac_test / (frac_val + frac_test)\n", + "\n", + " df_val, df_test, y_val, y_test = train_test_split(\n", + " df_temp,\n", + " y_temp,\n", + " stratify=y_temp,\n", + " test_size=relative_frac_test,\n", + " random_state=random_state,\n", + " )\n", + "\n", + " assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n", + " return df_train, df_val, df_test, y_train, y_val, y_test\n", + "\n", + "X_train, X_val, X_test, y_train, y_val, y_test = split_stratified_into_train_val_test(\n", + " df, stratify_colname=\"above_median_price\", frac_train=0.80, frac_val=0, frac_test=0.20, random_state=42\n", + ")\n", + "\n", + "display(\"X_train\", X_train)\n", + "display(\"y_train\", y_train)\n", + "\n", + "display(\"X_test\", X_test)\n", + "display(\"y_test\", y_test)\n", + "\n", + "\n", + "# Проверка преобразования\n", + "print(df.dtypes)\n", + "\n", + "# Визуализация распределения цен\n", + "plt.figure(figsize=(10, 6))\n", + "sns.histplot(df['price'], bins=50, kde=True)\n", + "plt.title('Распределение цен на недвижимость')\n", + "plt.xlabel('Цена')\n", + "plt.ylabel('Частота')\n", + "plt.show()\n", + "\n", + "# Визуализация зависимости между ценой и количеством спален\n", + "plt.figure(figsize=(10, 6))\n", + "sns.boxplot(x='bedrooms', y='price', data=df)\n", + "plt.title('Зависимость цены от количества спален')\n", + "plt.xlabel('Количество спален')\n", + "plt.ylabel('Цена')\n", + "plt.show()\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Построение конвейеров предобработки \n", + "Создадим пайплайн для числовых и категориальных данных. \n", + "\n", + "preprocessing_num -- конвейер для обработки числовых данных: заполнение пропущенных значений и стандартизация\n", + "\n", + "preprocessing_cat -- конвейер для обработки категориальных данных: заполнение пропущенных данных и унитарное кодирование\n", + "\n", + "features_preprocessing -- трансформер для предобработки признаков\n", + "\n", + "features_engineering -- трансформер для конструирования признаков\n", + "\n", + "drop_columns -- трансформер для удаления колонок\n", + "\n", + "pipeline_end -- основной конвейер предобработки данных и конструирования признаков" + ] + }, + { + "cell_type": "code", + "execution_count": 150, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from sklearn.base import BaseEstimator, TransformerMixin\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.discriminant_analysis import StandardScaler\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.pipeline import Pipeline\n", + "\n", + "pipeline_end = StandardScaler()\n", + "\n", + "\n", + "# Построение конвейеров предобработки\n", + "\n", + "class HouseFeatures(BaseEstimator, TransformerMixin):\n", + " def __init__(self):\n", + " pass\n", + " def fit(self, X, y=None):\n", + " return self\n", + " def transform(self, X, y=None):\n", + " # Создание новых признаков\n", + " X = X.copy()\n", + " X[\"Living_area_to_Lot_ratio\"] = X[\"sqft_living\"] / X[\"sqft_lot\"]\n", + " return X\n", + " def get_feature_names_out(self, features_in):\n", + " # Добавление имен новых признаков\n", + " new_features = [\"Living_area_to_Lot_ratio\"]\n", + " return np.append(features_in, new_features, axis=0)\n", + "\n", + "\n", + "# Обработка числовых данных. Числовой конвейр: заполнение пропущенных значений медианой и стандартизация\n", + "preprocessing_num_class = Pipeline(steps=[\n", + " ('imputer', SimpleImputer(strategy='median')),\n", + " ('scaler', StandardScaler())\n", + "])\n", + "\n", + "preprocessing_cat_class = Pipeline(steps=[\n", + " ('imputer', SimpleImputer(strategy='most_frequent')),\n", + " ('onehot', OneHotEncoder(handle_unknown='ignore'))\n", + "])\n", + "\n", + "columns_to_drop = [\"date\"]\n", + "numeric_columns = [\"sqft_living\", \"sqft_lot\", \"above_median_price\"]\n", + "cat_columns = []\n", + "\n", + "features_preprocessing = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"prepocessing_num\", preprocessing_num_class, numeric_columns),\n", + " (\"prepocessing_cat\", preprocessing_cat_class, cat_columns),\n", + " ],\n", + " remainder=\"passthrough\"\n", + ")\n", + "\n", + "drop_columns = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"drop_columns\", \"drop\", columns_to_drop),\n", + " ],\n", + " remainder=\"passthrough\",\n", + ")\n", + "\n", + "features_postprocessing = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " ('preprocessing_cat', preprocessing_cat_class, [\"price_category\"]),\n", + " ],\n", + " remainder=\"passthrough\",\n", + ")\n", + "\n", + "pipeline_end = Pipeline(\n", + " [\n", + " (\"features_preprocessing\", features_preprocessing),\n", + " (\"custom_features\", HouseFeatures()),\n", + " (\"drop_columns\", drop_columns),\n", + " ]\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Демонстрация работы конвейра для предобработки данных при классификации**" + ] + }, + { + "cell_type": "code", + "execution_count": 151, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sqft_livingsqft_lotabove_median_priceidpricebedroomsbathroomsfloorswaterfrontview...sqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15price_categoryLiving_area_to_Lot_ratio
20962-1.360742-0.262132-0.9946931278000210110000.021.001.000...0196820079800147.2655-122.244828540205.191063
122840.794390-0.0941211.0053352193300390624000.043.251.000...1130198009805247.6920-122.0992110112501-8.440052
73430.837884-0.2727231.00533542899000051535000.043.252.003...1030190820039812247.6147-122.285213042002-3.072292
14247-0.782270-0.196986-0.994693316000145235000.041.001.500...0194109816847.5054-122.3011280717503.971201
166701.0118600.0243301.005335629400480775000.042.752.000...0199609807547.5895-121.994333012333241.589045
..................................................................
88-0.510432-0.324180-0.9946931332700270215000.022.252.000...0197909805647.5180-122.1941950202501.574534
150311.044481-0.3148131.0053357129303070735000.042.752.014...0196609811847.5188-122.256262024332-3.317784
5234-0.456065-0.1366111.0053352432000130675000.031.751.000...0195609803347.6503-122.1982090954913.338418
199800.5660461.239169-0.994693774100475415000.032.751.500...0200909801447.7185-121.40517406462610.456795
36710.3703234.8368251.0053358847400115590000.032.001.500...0200509801047.3666-121.978318021213710.076563
\n", + "

17290 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " sqft_living sqft_lot above_median_price id price \\\n", + "20962 -1.360742 -0.262132 -0.994693 1278000210 110000.0 \n", + "12284 0.794390 -0.094121 1.005335 2193300390 624000.0 \n", + "7343 0.837884 -0.272723 1.005335 4289900005 1535000.0 \n", + "14247 -0.782270 -0.196986 -0.994693 316000145 235000.0 \n", + "16670 1.011860 0.024330 1.005335 629400480 775000.0 \n", + "... ... ... ... ... ... \n", + "88 -0.510432 -0.324180 -0.994693 1332700270 215000.0 \n", + "15031 1.044481 -0.314813 1.005335 7129303070 735000.0 \n", + "5234 -0.456065 -0.136611 1.005335 2432000130 675000.0 \n", + "19980 0.566046 1.239169 -0.994693 774100475 415000.0 \n", + "3671 0.370323 4.836825 1.005335 8847400115 590000.0 \n", + "\n", + " bedrooms bathrooms floors waterfront view ... sqft_basement \\\n", + "20962 2 1.00 1.0 0 0 ... 0 \n", + "12284 4 3.25 1.0 0 0 ... 1130 \n", + "7343 4 3.25 2.0 0 3 ... 1030 \n", + "14247 4 1.00 1.5 0 0 ... 0 \n", + "16670 4 2.75 2.0 0 0 ... 0 \n", + "... ... ... ... ... ... ... ... \n", + "88 2 2.25 2.0 0 0 ... 0 \n", + "15031 4 2.75 2.0 1 4 ... 0 \n", + "5234 3 1.75 1.0 0 0 ... 0 \n", + "19980 3 2.75 1.5 0 0 ... 0 \n", + "3671 3 2.00 1.5 0 0 ... 0 \n", + "\n", + " yr_built yr_renovated zipcode lat long sqft_living15 \\\n", + "20962 1968 2007 98001 47.2655 -122.244 828 \n", + "12284 1980 0 98052 47.6920 -122.099 2110 \n", + "7343 1908 2003 98122 47.6147 -122.285 2130 \n", + "14247 1941 0 98168 47.5054 -122.301 1280 \n", + "16670 1996 0 98075 47.5895 -121.994 3330 \n", + "... ... ... ... ... ... ... \n", + "88 1979 0 98056 47.5180 -122.194 1950 \n", + "15031 1966 0 98118 47.5188 -122.256 2620 \n", + "5234 1956 0 98033 47.6503 -122.198 2090 \n", + "19980 2009 0 98014 47.7185 -121.405 1740 \n", + "3671 2005 0 98010 47.3666 -121.978 3180 \n", + "\n", + " sqft_lot15 price_category Living_area_to_Lot_ratio \n", + "20962 5402 0 5.191063 \n", + "12284 11250 1 -8.440052 \n", + "7343 4200 2 -3.072292 \n", + "14247 7175 0 3.971201 \n", + "16670 12333 2 41.589045 \n", + "... ... ... ... \n", + "88 2025 0 1.574534 \n", + "15031 2433 2 -3.317784 \n", + "5234 9549 1 3.338418 \n", + "19980 64626 1 0.456795 \n", + "3671 212137 1 0.076563 \n", + "\n", + "[17290 rows x 23 columns]" + ] + }, + "execution_count": 151, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preprocessing_result = pipeline_end.fit_transform(X_train)\n", + "preprocessed_df = pd.DataFrame(\n", + " preprocessing_result,\n", + " columns=pipeline_end.get_feature_names_out(),\n", + ")\n", + "\n", + "preprocessed_df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Формирование набора моделей для классификации\n", + "\n", + "logistic -- логистическая регрессия\n", + "\n", + "ridge -- гребневая регрессия\n", + "\n", + "decision_tree -- дерево решений\n", + "\n", + "knn -- k-ближайших соседей\n", + "\n", + "naive_bayes -- наивный Байесовский классификатор\n", + "\n", + "gradient_boosting -- метод градиентного бустинга (набор деревьев решений)\n", + "\n", + "random_forest -- метод случайного леса (набор деревьев решений)\n", + "\n", + "mlp -- многослойный персептрон (нейронная сеть)" + ] + }, + { + "cell_type": "code", + "execution_count": 152, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import ensemble, linear_model, naive_bayes, neighbors, neural_network, tree, svm\n", + "\n", + "class_models = {\n", + " \"logistic\": {\"model\": linear_model.LogisticRegression(max_iter=150)},\n", + " \"ridge\": {\"model\": linear_model.RidgeClassifierCV(cv=5, class_weight=\"balanced\")},\n", + " \"ridge\": {\"model\": linear_model.LogisticRegression(max_iter=150, solver='lbfgs', penalty=\"l2\", class_weight=\"balanced\")},\n", + " \"decision_tree\": {\n", + " \"model\": tree.DecisionTreeClassifier(max_depth=5, min_samples_split=10, random_state=random_state)\n", + " },\n", + "\n", + " \"knn\": {\"model\": neighbors.KNeighborsClassifier(n_neighbors=7)},\n", + " \"naive_bayes\": {\"model\": naive_bayes.GaussianNB()},\n", + " \"gradient_boosting\": {\n", + " \"model\": ensemble.GradientBoostingClassifier(n_estimators=210)\n", + " },\n", + "\n", + " \"random_forest\": {\n", + " \"model\": ensemble.RandomForestClassifier(\n", + " max_depth=5, class_weight=\"balanced\", random_state=random_state\n", + " )\n", + " },\n", + "\n", + " \"mlp\": {\n", + " \"model\": neural_network.MLPClassifier(\n", + " hidden_layer_sizes=(7,),\n", + " max_iter=200,\n", + " early_stopping=True,\n", + " random_state=random_state,\n", + " )\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Обучение моделей на обучающем наборе данных и оценка на тестовом**" + ] + }, + { + "cell_type": "code", + "execution_count": 153, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: logistic\n", + "Model: ridge\n", + "Model: decision_tree\n", + "Model: knn\n", + "Model: naive_bayes\n", + "Model: gradient_boosting\n", + "Model: random_forest\n", + "Model: mlp\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from sklearn import metrics\n", + "\n", + "for model_name in class_models.keys():\n", + " print(f\"Model: {model_name}\")\n", + " model = class_models[model_name][\"model\"]\n", + "\n", + " model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n", + " model_pipeline = model_pipeline.fit(X_train, y_train.values.ravel())\n", + "\n", + " y_train_predict = model_pipeline.predict(X_train)\n", + " y_test_probs = model_pipeline.predict_proba(X_test)[:, 1]\n", + " y_test_predict = np.where(y_test_probs > 0.5, 1, 0)\n", + "\n", + " class_models[model_name][\"pipeline\"] = model_pipeline\n", + " class_models[model_name][\"probs\"] = y_test_probs\n", + " class_models[model_name][\"preds\"] = y_test_predict\n", + "\n", + " class_models[model_name][\"Precision_train\"] = metrics.precision_score(\n", + " y_train, y_train_predict, zero_division=1\n", + " )\n", + " class_models[model_name][\"Precision_test\"] = metrics.precision_score(\n", + " y_test, y_test_predict, zero_division=1\n", + " )\n", + " class_models[model_name][\"Recall_train\"] = metrics.recall_score(\n", + " y_train, y_train_predict\n", + " )\n", + " class_models[model_name][\"Recall_test\"] = metrics.recall_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Accuracy_train\"] = metrics.accuracy_score(\n", + " y_train, y_train_predict\n", + " )\n", + " class_models[model_name][\"Accuracy_test\"] = metrics.accuracy_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"ROC_AUC_test\"] = metrics.roc_auc_score(\n", + " y_test, y_test_probs\n", + " )\n", + " class_models[model_name][\"F1_train\"] = metrics.f1_score(y_train, y_train_predict)\n", + " class_models[model_name][\"F1_test\"] = metrics.f1_score(y_test, y_test_predict)\n", + " class_models[model_name][\"MCC_test\"] = metrics.matthews_corrcoef(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Confusion_matrix\"] = metrics.confusion_matrix(\n", + " y_test, y_test_predict\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Сводная таблица оценок качества для использованных моделей классификации¶\n", + "Матрица неточностей**" + ] + }, + { + "cell_type": "code", + "execution_count": 154, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import ConfusionMatrixDisplay\n", + "import matplotlib.pyplot as plt\n", + "\n", + "_, ax = plt.subplots(int(len(class_models) / 2), 2, figsize=(12, 10), sharex=False, sharey=False)\n", + "for index, key in enumerate(class_models.keys()):\n", + " c_matrix = class_models[key][\"Confusion_matrix\"]\n", + " disp = ConfusionMatrixDisplay(\n", + " confusion_matrix=c_matrix, display_labels=[\"Less\", \"More\"]\n", + " ).plot(ax=ax.flat[index])\n", + " disp.ax_.set_title(key)\n", + "\n", + "plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Значение 2173 в желтом квадрате представляет собой количество объектов, относимых к классу \"Less\", которые модель правильно классифицировала. Это свидетельствует о высоком уровне точности в идентификации этого класса. Значение 2150 в жёлтом нижнем правом квадрате указывает на количество правильно классифицированных объектов класса \"More\". Хотя это также является положительным результатом, мы можем заметить, что он местами ниже, чем для класса \"Less\", а местами и выше.\n", + "\n", + "Точность, полнота, верность (аккуратность), F-мера" + ] + }, + { + "cell_type": "code", + "execution_count": 155, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Precision_trainPrecision_testRecall_trainRecall_testAccuracy_trainAccuracy_testF1_trainF1_test
logistic1.0000001.0000000.9997671.0000000.9998841.0000000.9998841.000000
ridge1.0000001.0000000.9996511.0000000.9998261.0000000.9998261.000000
decision_tree1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
gradient_boosting1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
random_forest1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
naive_bayes1.0000001.0000000.7867190.7939530.8939270.8975250.8806300.885144
knn0.8724860.8274730.8577740.8209300.8669170.8258150.8650680.824189
mlp0.6875000.6153850.0025580.0037210.5033550.5033540.0050980.007397
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 155, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n", + " [\n", + " \"Precision_train\",\n", + " \"Precision_test\",\n", + " \"Recall_train\",\n", + " \"Recall_test\",\n", + " \"Accuracy_train\",\n", + " \"Accuracy_test\",\n", + " \"F1_train\",\n", + " \"F1_test\",\n", + " ]\n", + "]\n", + "class_metrics.sort_values(\n", + " by=\"Accuracy_test\", ascending=False\n", + ").style.background_gradient(\n", + " cmap=\"plasma\",\n", + " low=0.3,\n", + " high=1,\n", + " subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n", + ").background_gradient(\n", + " cmap=\"viridis\",\n", + " low=1,\n", + " high=0.3,\n", + " subset=[\n", + " \"Precision_train\",\n", + " \"Precision_test\",\n", + " \"Recall_train\",\n", + " \"Recall_test\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Действительно, если модели, включая логистическую регрессию (есть исключения), ридж-регрессию (есть исключения), дерево решений, случайный лес и градиентный бустинг, показывают 100% точность на обучающей выборке, это может свидетельствовать о переобучении. Переобучение (overfitting) происходит, когда модель слишком хорошо подстраивается под обучающие данные, включая шум и случайные вариации, и начинает плохо работать на новых данных (например, на тестовой выборке). \n", + "\n", + "ROC-кривая, каппа Коэна, коэффициент корреляции Мэтьюса" + ] + }, + { + "cell_type": "code", + "execution_count": 156, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Accuracy_testF1_testROC_AUC_testCohen_kappa_testMCC_test
logistic1.0000001.0000001.0000001.0000001.000000
ridge1.0000001.0000001.0000001.0000001.000000
decision_tree1.0000001.0000001.0000001.0000001.000000
gradient_boosting1.0000001.0000001.0000001.0000001.000000
random_forest1.0000001.0000001.0000001.0000001.000000
naive_bayes0.8975250.8851440.9995660.7948200.812098
knn0.8258150.8241890.9108230.6516060.651627
mlp0.5033540.0073970.4970710.0014270.012966
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 156, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n", + " [\n", + " \"Accuracy_test\",\n", + " \"F1_test\",\n", + " \"ROC_AUC_test\",\n", + " \"Cohen_kappa_test\",\n", + " \"MCC_test\",\n", + " ]\n", + "]\n", + "class_metrics.sort_values(by=\"ROC_AUC_test\", ascending=False).style.background_gradient(\n", + " cmap=\"plasma\",\n", + " low=0.3,\n", + " high=1,\n", + " subset=[\n", + " \"ROC_AUC_test\",\n", + " \"MCC_test\",\n", + " \"Cohen_kappa_test\",\n", + " ],\n", + ").background_gradient(\n", + " cmap=\"viridis\",\n", + " low=1,\n", + " high=0.3,\n", + " subset=[\n", + " \"Accuracy_test\",\n", + " \"F1_test\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 157, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'logistic'" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "best_model = str(class_metrics.sort_values(by=\"MCC_test\", ascending=False).iloc[0].name)\n", + "\n", + "display(best_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Вывод данных с ошибкой предсказания для оценки**" + ] + }, + { + "cell_type": "code", + "execution_count": 158, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Error items count: 0'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
idPredicteddatepricebedroomsbathroomssqft_livingsqft_lotfloorswaterfront...sqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15above_median_priceprice_category
\n", + "

0 rows × 24 columns

\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [id, Predicted, date, price, bedrooms, bathrooms, sqft_living, sqft_lot, floors, waterfront, view, condition, grade, sqft_above, sqft_basement, yr_built, yr_renovated, zipcode, lat, long, sqft_living15, sqft_lot15, above_median_price, price_category]\n", + "Index: []\n", + "\n", + "[0 rows x 24 columns]" + ] + }, + "execution_count": 158, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "preprocessing_result = pipeline_end.transform(X_test)\n", + "preprocessed_df = pd.DataFrame(\n", + " preprocessing_result,\n", + " columns=pipeline_end.get_feature_names_out(),\n", + ")\n", + "\n", + "y_pred = class_models[best_model][\"preds\"]\n", + "\n", + "error_index = y_test[y_test[\"above_median_price\"] != y_pred].index.tolist()\n", + "display(f\"Error items count: {len(error_index)}\")\n", + "\n", + "error_predicted = pd.Series(y_pred, index=y_test.index).loc[error_index]\n", + "error_df = X_test.loc[error_index].copy()\n", + "error_df.insert(loc=1, column=\"Predicted\", value=error_predicted)\n", + "error_df.sort_index()" + ] + }, + { + "cell_type": "code", + "execution_count": 159, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iddatepricebedroomsbathroomssqft_livingsqft_lotfloorswaterfrontview...sqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15above_median_priceprice_category
6863112400005020140729T000000461000.041.0126085051.500...0195109817747.7181-122.3711480810011
\n", + "

1 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " id date price bedrooms bathrooms sqft_living \\\n", + "6863 1124000050 20140729T000000 461000.0 4 1.0 1260 \n", + "\n", + " sqft_lot floors waterfront view ... sqft_basement yr_built yr_renovated \\\n", + "6863 8505 1.5 0 0 ... 0 1951 0 \n", + "\n", + " zipcode lat long sqft_living15 sqft_lot15 above_median_price \\\n", + "6863 98177 47.7181 -122.371 1480 8100 1 \n", + "\n", + " price_category \n", + "6863 1 \n", + "\n", + "[1 rows x 23 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
sqft_livingsqft_lotabove_median_priceidpricebedroomsbathroomsfloorswaterfrontview...sqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15price_categoryLiving_area_to_Lot_ratio
6863-0.891006-0.1626891.0053351.124000e+09461000.04.01.01.50.00.0...0.01951.00.098177.047.7181-122.3711480.08100.01.05.476729
\n", + "

1 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " sqft_living sqft_lot above_median_price id price \\\n", + "6863 -0.891006 -0.162689 1.005335 1.124000e+09 461000.0 \n", + "\n", + " bedrooms bathrooms floors waterfront view ... sqft_basement \\\n", + "6863 4.0 1.0 1.5 0.0 0.0 ... 0.0 \n", + "\n", + " yr_built yr_renovated zipcode lat long sqft_living15 \\\n", + "6863 1951.0 0.0 98177.0 47.7181 -122.371 1480.0 \n", + "\n", + " sqft_lot15 price_category Living_area_to_Lot_ratio \n", + "6863 8100.0 1.0 5.476729 \n", + "\n", + "[1 rows x 23 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'predicted: 1 (proba: [0. 1.])'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'real: 1'" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "model = class_models[best_model][\"pipeline\"]\n", + "\n", + "example_id = 6863\n", + "test = pd.DataFrame(X_test.loc[example_id, :]).T\n", + "test_preprocessed = pd.DataFrame(preprocessed_df.loc[example_id, :]).T\n", + "display(test)\n", + "display(test_preprocessed)\n", + "result_proba = model.predict_proba(test)[0]\n", + "result = model.predict(test)[0]\n", + "real = int(y_test.loc[example_id].values[0])\n", + "display(f\"predicted: {result} (proba: {result_proba})\")\n", + "display(f\"real: {real}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Подбор гиперпараметров методом поиска по сетке**" + ] + }, + { + "cell_type": "code", + "execution_count": 160, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "e:\\MII\\laboratory\\mai\\Lib\\site-packages\\numpy\\ma\\core.py:2881: RuntimeWarning: invalid value encountered in cast\n", + " _data = np.array(data, dtype=dtype, copy=copy,\n" + ] + }, + { + "data": { + "text/plain": [ + "{'model__criterion': 'gini',\n", + " 'model__max_depth': 5,\n", + " 'model__max_features': 'sqrt',\n", + " 'model__n_estimators': 10}" + ] + }, + "execution_count": 160, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "optimized_model_type = \"random_forest\"\n", + "\n", + "random_forest_model = class_models[optimized_model_type][\"pipeline\"]\n", + "\n", + "param_grid = {\n", + " \"model__n_estimators\": [10, 50, 100],\n", + " \"model__max_features\": [\"sqrt\", \"log2\"],\n", + " \"model__max_depth\": [5, 7, 10],\n", + " \"model__criterion\": [\"gini\", \"entropy\"],\n", + "}\n", + "\n", + "gs_optomizer = GridSearchCV(\n", + " estimator=random_forest_model, param_grid=param_grid, n_jobs=-1\n", + ")\n", + "gs_optomizer.fit(X_train, y_train.values.ravel())\n", + "gs_optomizer.best_params_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Обучение модели с новыми гиперпараметрами" + ] + }, + { + "cell_type": "code", + "execution_count": 161, + "metadata": {}, + "outputs": [], + "source": [ + "optimized_model = ensemble.RandomForestClassifier(\n", + " random_state=random_state,\n", + " criterion=\"gini\",\n", + " max_depth=5,\n", + " max_features=\"log2\",\n", + " n_estimators=10,\n", + ")\n", + "\n", + "result = {}\n", + "\n", + "result[\"pipeline\"] = Pipeline([(\"pipeline\", pipeline_end), (\"model\", optimized_model)]).fit(X_train, y_train.values.ravel())\n", + "result[\"train_preds\"] = result[\"pipeline\"].predict(X_train)\n", + "result[\"probs\"] = result[\"pipeline\"].predict_proba(X_test)[:, 1]\n", + "result[\"preds\"] = np.where(result[\"probs\"] > 0.5, 1, 0)\n", + "\n", + "result[\"Precision_train\"] = metrics.precision_score(y_train, result[\"train_preds\"])\n", + "result[\"Precision_test\"] = metrics.precision_score(y_test, result[\"preds\"])\n", + "result[\"Recall_train\"] = metrics.recall_score(y_train, result[\"train_preds\"])\n", + "result[\"Recall_test\"] = metrics.recall_score(y_test, result[\"preds\"])\n", + "result[\"Accuracy_train\"] = metrics.accuracy_score(y_train, result[\"train_preds\"])\n", + "result[\"Accuracy_test\"] = metrics.accuracy_score(y_test, result[\"preds\"])\n", + "result[\"ROC_AUC_test\"] = metrics.roc_auc_score(y_test, result[\"probs\"])\n", + "result[\"F1_train\"] = metrics.f1_score(y_train, result[\"train_preds\"])\n", + "result[\"F1_test\"] = metrics.f1_score(y_test, result[\"preds\"])\n", + "result[\"MCC_test\"] = metrics.matthews_corrcoef(y_test, result[\"preds\"])\n", + "result[\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(y_test, result[\"preds\"])\n", + "result[\"Confusion_matrix\"] = metrics.confusion_matrix(y_test, result[\"preds\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Формирование данных для оценки старой и новой версии модели**" + ] + }, + { + "cell_type": "code", + "execution_count": 162, + "metadata": {}, + "outputs": [], + "source": [ + "optimized_metrics = pd.DataFrame(columns=list(result.keys()))\n", + "optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n", + " data=class_models[optimized_model_type]\n", + ")\n", + "optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n", + " data=result\n", + ")\n", + "optimized_metrics.insert(loc=0, column=\"Name\", value=[\"Old\", \"New\"])\n", + "optimized_metrics = optimized_metrics.set_index(\"Name\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Оценка параметров старой и новой модели**" + ] + }, + { + "cell_type": "code", + "execution_count": 163, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Precision_trainPrecision_testRecall_trainRecall_testAccuracy_trainAccuracy_testF1_trainF1_test
Name        
Old1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
New1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 163, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "optimized_metrics[\n", + " [\n", + " \"Precision_train\",\n", + " \"Precision_test\",\n", + " \"Recall_train\",\n", + " \"Recall_test\",\n", + " \"Accuracy_train\",\n", + " \"Accuracy_test\",\n", + " \"F1_train\",\n", + " \"F1_test\",\n", + " ]\n", + "].style.background_gradient(\n", + " cmap=\"plasma\",\n", + " low=0.3,\n", + " high=1,\n", + " subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n", + ").background_gradient(\n", + " cmap=\"viridis\",\n", + " low=1,\n", + " high=0.3,\n", + " subset=[\n", + " \"Precision_train\",\n", + " \"Precision_test\",\n", + " \"Recall_train\",\n", + " \"Recall_test\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Как для обучающей (Precision_train), так и для тестовой (Precision_test) выборки обе модели достигли идеальных значений 1.000000. Это указывает на то, что модели очень точно классифицируют положительные образцы, не пропуская их." + ] + }, + { + "cell_type": "code", + "execution_count": 164, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Accuracy_testF1_testROC_AUC_testCohen_kappa_testMCC_test
Name     
Old1.0000001.0000001.0000001.0000001.000000
New1.0000001.0000001.0000001.0000001.000000
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 164, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "optimized_metrics[\n", + " [\n", + " \"Accuracy_test\",\n", + " \"F1_test\",\n", + " \"ROC_AUC_test\",\n", + " \"Cohen_kappa_test\",\n", + " \"MCC_test\",\n", + " ]\n", + "].style.background_gradient(\n", + " cmap=\"plasma\",\n", + " low=0.3,\n", + " high=1,\n", + " subset=[\n", + " \"ROC_AUC_test\",\n", + " \"MCC_test\",\n", + " \"Cohen_kappa_test\",\n", + " ],\n", + ").background_gradient(\n", + " cmap=\"viridis\",\n", + " low=1,\n", + " high=0.3,\n", + " subset=[\n", + " \"Accuracy_test\",\n", + " \"F1_test\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Оба варианта модели продемонстрировали безупречную точность классификации, достигнув значения 1.000000. Это свидетельствует о том, что модели точно классифицировали все тестовые примеры, не допустив никаких ошибок в предсказаниях." + ] + }, + { + "cell_type": "code", + "execution_count": 165, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "_, ax = plt.subplots(1, 2, figsize=(10, 4), sharex=False, sharey=False\n", + ")\n", + "\n", + "for index in range(0, len(optimized_metrics)):\n", + " c_matrix = optimized_metrics.iloc[index][\"Confusion_matrix\"]\n", + " disp = ConfusionMatrixDisplay(\n", + " confusion_matrix=c_matrix, display_labels=[\"Less\", \"More\"]\n", + " ).plot(ax=ax.flat[index])\n", + "\n", + "plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "В желтом квадрате мы видим значение 2173, что обозначает количество правильно классифицированных объектов, отнесенных к классу \"Less\". Это свидетельствует о том, что модель успешно идентифицирует объекты этого класса, минимизируя количество ложных положительных срабатываний.\n", + "\n", + "В правом нижнем жёлтом квадрате значение 2150 указывает на количество правильно классифицированных объектов, отнесенных к классу \"More\". Это также является показателем высокой точности модели в определении объектов данного класса." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Задача регресии: предсказание цены дома (price).\n", + "\n", + "Описание: Оценить, какая будет цена дома (price) на основе исторических данных о характеристиках домов, таких как площадь. Целевая переменная: Цена дома (price). (среднее значение)" + ] + }, + { + "cell_type": "code", + "execution_count": 166, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Среднее значение поля: 2079.8997362698374\n", + " id date price bedrooms bathrooms sqft_living \\\n", + "0 7129300520 20141013T000000 221900.0 3 1.00 1180 \n", + "1 6414100192 20141209T000000 538000.0 3 2.25 2570 \n", + "2 5631500400 20150225T000000 180000.0 2 1.00 770 \n", + "3 2487200875 20141209T000000 604000.0 4 3.00 1960 \n", + "4 1954400510 20150218T000000 510000.0 3 2.00 1680 \n", + "\n", + " sqft_lot floors waterfront view ... yr_built yr_renovated zipcode \\\n", + "0 5650 1.0 0 0 ... 1955 0 98178 \n", + "1 7242 2.0 0 0 ... 1951 1991 98125 \n", + "2 10000 1.0 0 0 ... 1933 0 98028 \n", + "3 5000 1.0 0 0 ... 1965 0 98136 \n", + "4 8080 1.0 0 0 ... 1987 0 98074 \n", + "\n", + " lat long sqft_living15 sqft_lot15 above_median_price \\\n", + "0 47.5112 -122.257 1340 5650 0 \n", + "1 47.7210 -122.319 1690 7639 1 \n", + "2 47.7379 -122.233 2720 8062 0 \n", + "3 47.5208 -122.393 1360 5000 1 \n", + "4 47.6168 -122.045 1800 7503 1 \n", + "\n", + " price_category average_price \n", + "0 0 0 \n", + "1 1 1 \n", + "2 0 0 \n", + "3 1 0 \n", + "4 1 0 \n", + "\n", + "[5 rows x 24 columns]\n", + "Статистическое описание DataFrame:\n", + " id price bedrooms bathrooms sqft_living \\\n", + "count 2.161300e+04 2.161300e+04 21613.000000 21613.000000 21613.000000 \n", + "mean 4.580302e+09 5.400881e+05 3.370842 2.114757 2079.899736 \n", + "std 2.876566e+09 3.671272e+05 0.930062 0.770163 918.440897 \n", + "min 1.000102e+06 7.500000e+04 0.000000 0.000000 290.000000 \n", + "25% 2.123049e+09 3.219500e+05 3.000000 1.750000 1427.000000 \n", + "50% 3.904930e+09 4.500000e+05 3.000000 2.250000 1910.000000 \n", + "75% 7.308900e+09 6.450000e+05 4.000000 2.500000 2550.000000 \n", + "max 9.900000e+09 7.700000e+06 33.000000 8.000000 13540.000000 \n", + "\n", + " sqft_lot floors waterfront view condition \\\n", + "count 2.161300e+04 21613.000000 21613.000000 21613.000000 21613.000000 \n", + "mean 1.510697e+04 1.494309 0.007542 0.234303 3.409430 \n", + "std 4.142051e+04 0.539989 0.086517 0.766318 0.650743 \n", + "min 5.200000e+02 1.000000 0.000000 0.000000 1.000000 \n", + "25% 5.040000e+03 1.000000 0.000000 0.000000 3.000000 \n", + "50% 7.618000e+03 1.500000 0.000000 0.000000 3.000000 \n", + "75% 1.068800e+04 2.000000 0.000000 0.000000 4.000000 \n", + "max 1.651359e+06 3.500000 1.000000 4.000000 5.000000 \n", + "\n", + " ... sqft_basement yr_built yr_renovated zipcode \\\n", + "count ... 21613.000000 21613.000000 21613.000000 21613.000000 \n", + "mean ... 291.509045 1971.005136 84.402258 98077.939805 \n", + "std ... 442.575043 29.373411 401.679240 53.505026 \n", + "min ... 0.000000 1900.000000 0.000000 98001.000000 \n", + "25% ... 0.000000 1951.000000 0.000000 98033.000000 \n", + "50% ... 0.000000 1975.000000 0.000000 98065.000000 \n", + "75% ... 560.000000 1997.000000 0.000000 98118.000000 \n", + "max ... 4820.000000 2015.000000 2015.000000 98199.000000 \n", + "\n", + " lat long sqft_living15 sqft_lot15 \\\n", + "count 21613.000000 21613.000000 21613.000000 21613.000000 \n", + "mean 47.560053 -122.213896 1986.552492 12768.455652 \n", + "std 0.138564 0.140828 685.391304 27304.179631 \n", + "min 47.155900 -122.519000 399.000000 651.000000 \n", + "25% 47.471000 -122.328000 1490.000000 5100.000000 \n", + "50% 47.571800 -122.230000 1840.000000 7620.000000 \n", + "75% 47.678000 -122.125000 2360.000000 10083.000000 \n", + "max 47.777600 -121.315000 6210.000000 871200.000000 \n", + "\n", + " above_median_price average_price \n", + "count 21613.000000 21613.00000 \n", + "mean 0.497340 0.42752 \n", + "std 0.500004 0.49473 \n", + "min 0.000000 0.00000 \n", + "25% 0.000000 0.00000 \n", + "50% 0.000000 0.00000 \n", + "75% 1.000000 1.00000 \n", + "max 1.000000 1.00000 \n", + "\n", + "[8 rows x 22 columns]\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from sklearn import set_config\n", + "\n", + "set_config(transform_output=\"pandas\")\n", + "\n", + "# Опция для настройки генерации случайных чисел (если это нужно для других частей кода)\n", + "random_state = 42\n", + "\n", + "# Вычисление среднего значения поля \"Close\"\n", + "average_price = df['sqft_living'].mean()\n", + "print(f\"Среднее значение поля: {average_price}\")\n", + "\n", + "# Создание новой колонки, указывающей, выше или ниже среднего значение цена закрытия\n", + "df['average_price'] = (df['sqft_living'] > average_price).astype(int)\n", + "\n", + "# Удаление последней строки, где нет значения для следующего дня\n", + "df.dropna(inplace=True)\n", + "\n", + "# Вывод DataFrame с новой колонкой\n", + "print(df.head())\n", + "\n", + "# Примерный анализ данных\n", + "print(\"Статистическое описание DataFrame:\")\n", + "print(df.describe())" + ] + }, + { + "cell_type": "code", + "execution_count": 167, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'X_train'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iddatepricebedroomsbathroomssqft_livingsqft_lotfloorswaterfrontview...sqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15above_median_priceprice_category
6325546791019020140527T000000325000.031.751780130951.000...0198309804247.3670-122.15227501309501
13473933180058020150310T000000257000.021.00100037001.000...200192909811847.5520-122.2901270500000
17614240700040520150226T000000228500.031.00108074861.500...90194209814647.4838-122.3351170780000
16970546670029020150108T000000288000.032.25209075001.000...810197709803147.3951-122.1721800735000
20868302605936120150417T000000479000.022.50174114392.000...295200709803447.7043-122.20920901045411
..................................................................
11964527220004520141113T000000378000.031.50100069141.000...0194709812547.7144-122.3191000694701
21575957850079020141111T000000399950.032.50308750022.000...0201409802347.2974-122.3492927518301
5390720235048020140930T000000575000.032.50212047802.000...0200409805347.6810-122.0321690265011
860172304903320140620T000000245000.010.75380150001.000...0196309816847.4810-122.32311701500000
15795614765028020150325T000000315000.042.50313059992.000...0200609804247.3837-122.0993020599701
\n", + "

17290 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " id date price bedrooms bathrooms \\\n", + "6325 5467910190 20140527T000000 325000.0 3 1.75 \n", + "13473 9331800580 20150310T000000 257000.0 2 1.00 \n", + "17614 2407000405 20150226T000000 228500.0 3 1.00 \n", + "16970 5466700290 20150108T000000 288000.0 3 2.25 \n", + "20868 3026059361 20150417T000000 479000.0 2 2.50 \n", + "... ... ... ... ... ... \n", + "11964 5272200045 20141113T000000 378000.0 3 1.50 \n", + "21575 9578500790 20141111T000000 399950.0 3 2.50 \n", + "5390 7202350480 20140930T000000 575000.0 3 2.50 \n", + "860 1723049033 20140620T000000 245000.0 1 0.75 \n", + "15795 6147650280 20150325T000000 315000.0 4 2.50 \n", + "\n", + " sqft_living sqft_lot floors waterfront view ... sqft_basement \\\n", + "6325 1780 13095 1.0 0 0 ... 0 \n", + "13473 1000 3700 1.0 0 0 ... 200 \n", + "17614 1080 7486 1.5 0 0 ... 90 \n", + "16970 2090 7500 1.0 0 0 ... 810 \n", + "20868 1741 1439 2.0 0 0 ... 295 \n", + "... ... ... ... ... ... ... ... \n", + "11964 1000 6914 1.0 0 0 ... 0 \n", + "21575 3087 5002 2.0 0 0 ... 0 \n", + "5390 2120 4780 2.0 0 0 ... 0 \n", + "860 380 15000 1.0 0 0 ... 0 \n", + "15795 3130 5999 2.0 0 0 ... 0 \n", + "\n", + " yr_built yr_renovated zipcode lat long sqft_living15 \\\n", + "6325 1983 0 98042 47.3670 -122.152 2750 \n", + "13473 1929 0 98118 47.5520 -122.290 1270 \n", + "17614 1942 0 98146 47.4838 -122.335 1170 \n", + "16970 1977 0 98031 47.3951 -122.172 1800 \n", + "20868 2007 0 98034 47.7043 -122.209 2090 \n", + "... ... ... ... ... ... ... \n", + "11964 1947 0 98125 47.7144 -122.319 1000 \n", + "21575 2014 0 98023 47.2974 -122.349 2927 \n", + "5390 2004 0 98053 47.6810 -122.032 1690 \n", + "860 1963 0 98168 47.4810 -122.323 1170 \n", + "15795 2006 0 98042 47.3837 -122.099 3020 \n", + "\n", + " sqft_lot15 above_median_price price_category \n", + "6325 13095 0 1 \n", + "13473 5000 0 0 \n", + "17614 7800 0 0 \n", + "16970 7350 0 0 \n", + "20868 10454 1 1 \n", + "... ... ... ... \n", + "11964 6947 0 1 \n", + "21575 5183 0 1 \n", + "5390 2650 1 1 \n", + "860 15000 0 0 \n", + "15795 5997 0 1 \n", + "\n", + "[17290 rows x 23 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'y_train'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
average_price
63250
134730
176140
169701
208680
......
119640
215751
53901
8600
157951
\n", + "

17290 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " average_price\n", + "6325 0\n", + "13473 0\n", + "17614 0\n", + "16970 1\n", + "20868 0\n", + "... ...\n", + "11964 0\n", + "21575 1\n", + "5390 1\n", + "860 0\n", + "15795 1\n", + "\n", + "[17290 rows x 1 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'X_test'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
iddatepricebedroomsbathroomssqft_livingsqft_lotfloorswaterfrontview...sqft_basementyr_builtyr_renovatedzipcodelatlongsqft_living15sqft_lot15above_median_priceprice_category
735259182031020141006T000000365000.042.25207088932.000...0198609805847.4388-122.1622390770001
2830797420082020140821T000000865000.053.00290067301.000...1070197709811547.6784-122.2852370628312
4106770145011020140815T0000001038000.042.503770108932.002...0199709800647.5646-122.1293710968512
16218952230001020150331T0000001490000.033.504560146082.002...0199009803447.6995-122.22840501422612
19964951086114020140714T000000711000.032.50255053762.000...0200409805247.6647-122.0832250405012
..................................................................
13674616390033320141110T000000338000.031.75125077101.000...0194709815547.7623-122.3171340771001
20377352896002020140708T000000673000.032.75283034962.000...0201209802947.5606-122.0112160350111
8805168700022020141016T000000285000.042.50243444002.000...0200709800147.2874-122.2832434440000
10168414140003020141201T000000605000.041.752250101081.000...0196709800847.5922-122.1182050975011
2522182250016020141212T000000356500.042.502570114732.000...0200809800347.2809-122.2962430599701
\n", + "

4323 rows × 23 columns

\n", + "
" + ], + "text/plain": [ + " id date price bedrooms bathrooms \\\n", + "735 2591820310 20141006T000000 365000.0 4 2.25 \n", + "2830 7974200820 20140821T000000 865000.0 5 3.00 \n", + "4106 7701450110 20140815T000000 1038000.0 4 2.50 \n", + "16218 9522300010 20150331T000000 1490000.0 3 3.50 \n", + "19964 9510861140 20140714T000000 711000.0 3 2.50 \n", + "... ... ... ... ... ... \n", + "13674 6163900333 20141110T000000 338000.0 3 1.75 \n", + "20377 3528960020 20140708T000000 673000.0 3 2.75 \n", + "8805 1687000220 20141016T000000 285000.0 4 2.50 \n", + "10168 4141400030 20141201T000000 605000.0 4 1.75 \n", + "2522 1822500160 20141212T000000 356500.0 4 2.50 \n", + "\n", + " sqft_living sqft_lot floors waterfront view ... sqft_basement \\\n", + "735 2070 8893 2.0 0 0 ... 0 \n", + "2830 2900 6730 1.0 0 0 ... 1070 \n", + "4106 3770 10893 2.0 0 2 ... 0 \n", + "16218 4560 14608 2.0 0 2 ... 0 \n", + "19964 2550 5376 2.0 0 0 ... 0 \n", + "... ... ... ... ... ... ... ... \n", + "13674 1250 7710 1.0 0 0 ... 0 \n", + "20377 2830 3496 2.0 0 0 ... 0 \n", + "8805 2434 4400 2.0 0 0 ... 0 \n", + "10168 2250 10108 1.0 0 0 ... 0 \n", + "2522 2570 11473 2.0 0 0 ... 0 \n", + "\n", + " yr_built yr_renovated zipcode lat long sqft_living15 \\\n", + "735 1986 0 98058 47.4388 -122.162 2390 \n", + "2830 1977 0 98115 47.6784 -122.285 2370 \n", + "4106 1997 0 98006 47.5646 -122.129 3710 \n", + "16218 1990 0 98034 47.6995 -122.228 4050 \n", + "19964 2004 0 98052 47.6647 -122.083 2250 \n", + "... ... ... ... ... ... ... \n", + "13674 1947 0 98155 47.7623 -122.317 1340 \n", + "20377 2012 0 98029 47.5606 -122.011 2160 \n", + "8805 2007 0 98001 47.2874 -122.283 2434 \n", + "10168 1967 0 98008 47.5922 -122.118 2050 \n", + "2522 2008 0 98003 47.2809 -122.296 2430 \n", + "\n", + " sqft_lot15 above_median_price price_category \n", + "735 7700 0 1 \n", + "2830 6283 1 2 \n", + "4106 9685 1 2 \n", + "16218 14226 1 2 \n", + "19964 4050 1 2 \n", + "... ... ... ... \n", + "13674 7710 0 1 \n", + "20377 3501 1 1 \n", + "8805 4400 0 0 \n", + "10168 9750 1 1 \n", + "2522 5997 0 1 \n", + "\n", + "[4323 rows x 23 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'y_test'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
average_price
7350
28301
41061
162181
199641
......
136740
203771
88051
101681
25221
\n", + "

4323 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " average_price\n", + "735 0\n", + "2830 1\n", + "4106 1\n", + "16218 1\n", + "19964 1\n", + "... ...\n", + "13674 0\n", + "20377 1\n", + "8805 1\n", + "10168 1\n", + "2522 1\n", + "\n", + "[4323 rows x 1 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from typing import Tuple\n", + "from pandas import DataFrame\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "def split_into_train_test(\n", + " df_input: DataFrame,\n", + " target_colname: str = \"average_price\",\n", + " frac_train: float = 0.8,\n", + " random_state: int = None,\n", + ") -> Tuple[DataFrame, DataFrame, DataFrame, DataFrame]:\n", + " \n", + " if not (0 < frac_train < 1):\n", + " raise ValueError(\"Fraction must be between 0 and 1.\")\n", + " \n", + " # Проверка наличия целевого признака\n", + " if target_colname not in df_input.columns:\n", + " raise ValueError(f\"{target_colname} is not a column in the DataFrame.\")\n", + " \n", + " # Разделяем данные на признаки и целевую переменную\n", + " X = df_input.drop(columns=[target_colname]) # Признаки\n", + " y = df_input[[target_colname]] # Целевая переменная\n", + "\n", + " # Разделяем данные на обучающую и тестовую выборки\n", + " X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y,\n", + " test_size=(1.0 - frac_train),\n", + " random_state=random_state\n", + " )\n", + " \n", + " return X_train, X_test, y_train, y_test\n", + "\n", + "# Применение функции для разделения данных\n", + "X_train, X_test, y_train, y_test = split_into_train_test(\n", + " df, \n", + " target_colname=\"average_price\", \n", + " frac_train=0.8, \n", + " random_state=42 # Убедитесь, что вы задали нужное значение random_state\n", + ")\n", + "\n", + "# Для отображения результатов\n", + "display(\"X_train\", X_train)\n", + "display(\"y_train\", y_train)\n", + "\n", + "display(\"X_test\", X_test)\n", + "display(\"y_test\", y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Формирование конвейера для решения задачи регрессии" + ] + }, + { + "cell_type": "code", + "execution_count": 168, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from sklearn.base import BaseEstimator, TransformerMixin\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.ensemble import RandomForestRegressor # Пример регрессионной модели\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "class HouseFeatures(BaseEstimator, TransformerMixin):\n", + " def __init__(self):\n", + " pass\n", + " def fit(self, X, y=None):\n", + " return self\n", + " def transform(self, X, y=None):\n", + " # Создание новых признаков\n", + " X = X.copy()\n", + " X[\"Square\"] = X[\"sqft_living\"] / X[\"sqft_lot\"]\n", + " return X\n", + " def get_feature_names_out(self, features_in):\n", + " # Добавление имен новых признаков\n", + " new_features = [\"Square\"]\n", + " return np.append(features_in, new_features, axis=0)\n", + "\n", + "# Указываем столбцы, которые нужно удалить и обрабатывать\n", + "columns_to_drop = [\"date\"]\n", + "num_columns = [\"bathrooms\", \"floors\", \"waterfront\", \"view\"]\n", + "cat_columns = [] \n", + "\n", + "# Определяем предобработку для численных данных\n", + "num_imputer = SimpleImputer(strategy=\"median\")\n", + "num_scaler = StandardScaler()\n", + "preprocessing_num = Pipeline(\n", + " [\n", + " (\"imputer\", num_imputer),\n", + " (\"scaler\", num_scaler),\n", + " ]\n", + ")\n", + "\n", + "# Определяем предобработку для категориальных данных\n", + "cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n", + "cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n", + "preprocessing_cat = Pipeline(\n", + " [\n", + " (\"imputer\", cat_imputer),\n", + " (\"encoder\", cat_encoder),\n", + " ]\n", + ")\n", + "\n", + "# Подготовка признаков с использованием ColumnTransformer\n", + "features_preprocessing = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"preprocessing_num\", preprocessing_num, num_columns),\n", + " (\"preprocessing_cat\", preprocessing_cat, cat_columns),\n", + " ],\n", + " remainder=\"passthrough\"\n", + ")\n", + "\n", + "# Удаление нежелательных столбцов\n", + "drop_columns = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"drop_columns\", \"drop\", columns_to_drop),\n", + " ],\n", + " remainder=\"passthrough\",\n", + ")\n", + "\n", + "# Постобработка признаков\n", + "features_postprocessing = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"preprocessing_cat\", preprocessing_cat, [\"price_category\"]), \n", + " ],\n", + " remainder=\"passthrough\",\n", + ")\n", + "\n", + "# Создание окончательного конвейера\n", + "pipeline = Pipeline(\n", + " [\n", + " (\"features_preprocessing\", features_preprocessing),\n", + " (\"drop_columns\", drop_columns),\n", + " (\"custom_features\", HouseFeatures()),\n", + " (\"model\", RandomForestRegressor()) # Выбор модели для обучения\n", + " ]\n", + ")\n", + "\n", + "# Использование конвейера\n", + "def train_pipeline(X, y):\n", + " pipeline.fit(X, y)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Формирование набора моделей для регрессии \n", + "Определение перечня алгоритмов решения задачи аппроксимации (регрессии)" + ] + }, + { + "cell_type": "code", + "execution_count": 169, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn import linear_model, tree, neighbors, ensemble, neural_network\n", + "\n", + "random_state = 9\n", + "\n", + "models = {\n", + " \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n", + " \"linear_poly\": {\n", + " \"model\": make_pipeline(\n", + " PolynomialFeatures(degree=2),\n", + " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", + " )\n", + " },\n", + " \"linear_interact\": {\n", + " \"model\": make_pipeline(\n", + " PolynomialFeatures(interaction_only=True),\n", + " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", + " )\n", + " },\n", + " \"ridge\": {\"model\": linear_model.RidgeCV()},\n", + " \"decision_tree\": {\n", + " \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n", + " },\n", + " \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n", + " \"random_forest\": {\n", + " \"model\": ensemble.RandomForestRegressor(\n", + " max_depth=7, random_state=random_state, n_jobs=-1\n", + " )\n", + " },\n", + " \"mlp\": {\n", + " \"model\": neural_network.MLPRegressor(\n", + " activation=\"tanh\",\n", + " hidden_layer_sizes=(3,),\n", + " max_iter=500,\n", + " early_stopping=True,\n", + " random_state=random_state,\n", + " )\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Формирование набора моделей для регрессии" + ] + }, + { + "cell_type": "code", + "execution_count": 170, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random Forest: Mean Score = 1.0, Standard Deviation = 0.0\n", + "Linear Regression: Mean Score = 0.6396438910587428, Standard Deviation = 0.006348300027629372\n", + "Gradient Boosting: Mean Score = 0.9999999992943781, Standard Deviation = 6.609300428326041e-14\n", + "Support Vector Regression: Mean Score = -0.4335265257004087, Standard Deviation = 0.012071668862264313\n" + ] + } + ], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.ensemble import GradientBoostingRegressor\n", + "from sklearn.svm import SVR\n", + "from sklearn.model_selection import cross_val_score\n", + "\n", + "def train_multiple_models(X, y, models):\n", + " results = {}\n", + "\n", + " # Преобразуем y в одномерный массив numpy только при необходимости\n", + " if hasattr(y, 'values'):\n", + " y = y.values.ravel() # Если y - DataFrame, преобразуем в numpy array\n", + " else:\n", + " y = y.ravel() # Если y - numpy array, просто используем ravel()\n", + "\n", + " for model_name, model in models.items():\n", + " # Создаем конвейер для каждой модели\n", + " model_pipeline = Pipeline(\n", + " [\n", + " (\"features_preprocessing\", features_preprocessing),\n", + " (\"drop_columns\", drop_columns),\n", + " (\"model\", model) # Используем текущую модель\n", + " ]\n", + " )\n", + " \n", + " # Обучаем модель и вычисляем кросс-валидацию\n", + " scores = cross_val_score(model_pipeline, X, y, cv=5, error_score='raise') # 5-кратная кросс-валидация\n", + " results[model_name] = {\n", + " \"mean_score\": scores.mean(),\n", + " \"std_dev\": scores.std()\n", + " }\n", + " \n", + " return results\n", + "\n", + "models = {\n", + " \"Random Forest\": RandomForestRegressor(),\n", + " \"Linear Regression\": LinearRegression(),\n", + " \"Gradient Boosting\": GradientBoostingRegressor(),\n", + " \"Support Vector Regression\": SVR()\n", + "}\n", + "\n", + "results = train_multiple_models(X_train, y_train, models)\n", + "\n", + "# Вывод результатов\n", + "for model_name, scores in results.items():\n", + " print(f\"{model_name}: Mean Score = {scores['mean_score']}, Standard Deviation = {scores['std_dev']}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 171, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: logistic\n", + "MSE (train): 0.24060150375939848\n", + "MSE (test): 0.23455933379597502\n", + "MAE (train): 0.24060150375939848\n", + "MAE (test): 0.23455933379597502\n", + "R2 (train): 0.015780807725750634\n", + "R2 (test): 0.045807954005714024\n", + "STD (train): 0.48387852043102103\n", + "STD (test): 0.4780359236045559\n", + "----------------------------------------\n", + "Model: ridge\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "e:\\MII\\laboratory\\mai\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):\n", + "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", + "\n", + "Increase the number of iterations (max_iter) or scale the data as shown in:\n", + " https://scikit-learn.org/stable/modules/preprocessing.html\n", + "Please also refer to the documentation for alternative solver options:\n", + " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", + " n_iter_i = _check_optimize_result(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE (train): 0.11596298438403702\n", + "MSE (test): 0.11265325005783021\n", + "MAE (train): 0.11596298438403702\n", + "MAE (test): 0.11265325005783021\n", + "R2 (train): 0.5256347402620505\n", + "R2 (test): 0.541724332939628\n", + "STD (train): 0.3405113334365492\n", + "STD (test): 0.3356321137822519\n", + "----------------------------------------\n", + "Model: decision_tree\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: knn\n", + "MSE (train): 0.1949681897050318\n", + "MSE (test): 0.27989821882951654\n", + "MAE (train): 0.1949681897050318\n", + "MAE (test): 0.27989821882951654\n", + "R2 (train): 0.20245122664507342\n", + "R2 (test): -0.13863153417464114\n", + "STD (train): 0.43948973967967464\n", + "STD (test): 0.5264647910268833\n", + "----------------------------------------\n", + "Model: naive_bayes\n", + "MSE (train): 0.26928860613071137\n", + "MSE (test): 0.2690261392551469\n", + "MAE (train): 0.26928860613071137\n", + "MAE (test): 0.2690261392551469\n", + "R2 (train): -0.10156840366079445\n", + "R2 (test): -0.09440369772322943\n", + "STD (train): 0.47316941542228536\n", + "STD (test): 0.47206502931490235\n", + "----------------------------------------\n", + "Model: gradient_boosting\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: random_forest\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: mlp\n", + "MSE (train): 0.4253903990746096\n", + "MSE (test): 0.4353458246588018\n", + "MAE (train): 0.4253903990746096\n", + "MAE (test): 0.4353458246588018\n", + "R2 (train): -0.7401279228791116\n", + "R2 (test): -0.7709954936501442\n", + "STD (train): 0.4959884986820156\n", + "STD (test): 0.49782384226978177\n", + "----------------------------------------\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from sklearn import metrics\n", + "from sklearn.pipeline import Pipeline\n", + "\n", + "# Проверка наличия необходимых переменных\n", + "if 'class_models' not in locals():\n", + " raise ValueError(\"class_models is not defined\")\n", + "if 'X_train' not in locals() or 'X_test' not in locals() or 'y_train' not in locals() or 'y_test' not in locals():\n", + " raise ValueError(\"Train/test data is not defined\")\n", + "\n", + "\n", + "y_train = np.ravel(y_train) \n", + "y_test = np.ravel(y_test) \n", + "\n", + "# Инициализация списка для хранения результатов\n", + "results = []\n", + "\n", + "# Проход по моделям и оценка их качества\n", + "for model_name in class_models.keys():\n", + " print(f\"Model: {model_name}\")\n", + " \n", + " # Извлечение модели из словаря\n", + " model = class_models[model_name][\"model\"]\n", + " \n", + " # Создание пайплайна\n", + " model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n", + " \n", + " # Обучение модели\n", + " model_pipeline.fit(X_train, y_train)\n", + "\n", + " # Предсказание для обучающей и тестовой выборки\n", + " y_train_predict = model_pipeline.predict(X_train)\n", + " y_test_predict = model_pipeline.predict(X_test)\n", + "\n", + " # Сохранение пайплайна и предсказаний\n", + " class_models[model_name][\"pipeline\"] = model_pipeline\n", + " class_models[model_name][\"preds\"] = y_test_predict\n", + "\n", + " # Вычисление метрик для регрессии\n", + " class_models[model_name][\"MSE_train\"] = metrics.mean_squared_error(y_train, y_train_predict)\n", + " class_models[model_name][\"MSE_test\"] = metrics.mean_squared_error(y_test, y_test_predict)\n", + " class_models[model_name][\"MAE_train\"] = metrics.mean_absolute_error(y_train, y_train_predict)\n", + " class_models[model_name][\"MAE_test\"] = metrics.mean_absolute_error(y_test, y_test_predict)\n", + " class_models[model_name][\"R2_train\"] = metrics.r2_score(y_train, y_train_predict)\n", + " class_models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_predict)\n", + "\n", + " # Дополнительные метрики\n", + " class_models[model_name][\"STD_train\"] = np.std(y_train - y_train_predict)\n", + " class_models[model_name][\"STD_test\"] = np.std(y_test - y_test_predict)\n", + "\n", + " # Вывод результатов для текущей модели\n", + " print(f\"MSE (train): {class_models[model_name]['MSE_train']}\")\n", + " print(f\"MSE (test): {class_models[model_name]['MSE_test']}\")\n", + " print(f\"MAE (train): {class_models[model_name]['MAE_train']}\")\n", + " print(f\"MAE (test): {class_models[model_name]['MAE_test']}\")\n", + " print(f\"R2 (train): {class_models[model_name]['R2_train']}\")\n", + " print(f\"R2 (test): {class_models[model_name]['R2_test']}\")\n", + " print(f\"STD (train): {class_models[model_name]['STD_train']}\")\n", + " print(f\"STD (test): {class_models[model_name]['STD_test']}\")\n", + " print(\"-\" * 40) # Разделитель для разных моделей" + ] + }, + { + "cell_type": "code", + "execution_count": 172, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "from sklearn.preprocessing import PolynomialFeatures\n", + "from sklearn import linear_model, tree, neighbors, ensemble, neural_network\n", + "\n", + "random_state = 9\n", + "\n", + "models = {\n", + " \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n", + " \"linear_poly\": {\n", + " \"model\": make_pipeline(\n", + " PolynomialFeatures(degree=2),\n", + " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", + " )\n", + " },\n", + " \"linear_interact\": {\n", + " \"model\": make_pipeline(\n", + " PolynomialFeatures(interaction_only=True),\n", + " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n", + " )\n", + " },\n", + " \"ridge\": {\"model\": linear_model.RidgeCV()},\n", + " \"decision_tree\": {\n", + " \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n", + " },\n", + " \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n", + " \"random_forest\": {\n", + " \"model\": ensemble.RandomForestRegressor(\n", + " max_depth=7, random_state=random_state, n_jobs=-1\n", + " )\n", + " },\n", + " \"mlp\": {\n", + " \"model\": neural_network.MLPRegressor(\n", + " activation=\"tanh\",\n", + " hidden_layer_sizes=(3,),\n", + " max_iter=500,\n", + " early_stopping=True,\n", + " random_state=random_state,\n", + " )\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Обучение и оценка моделей с помощью различных алгоритмов" + ] + }, + { + "cell_type": "code", + "execution_count": 173, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: logistic\n", + "MSE (train): 0.24060150375939848\n", + "MSE (test): 0.23455933379597502\n", + "MAE (train): 0.24060150375939848\n", + "MAE (test): 0.23455933379597502\n", + "R2 (train): 0.015780807725750634\n", + "R2 (test): 0.045807954005714024\n", + "STD (train): 0.48387852043102103\n", + "STD (test): 0.4780359236045559\n", + "----------------------------------------\n", + "Model: ridge\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "e:\\MII\\laboratory\\mai\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):\n", + "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", + "\n", + "Increase the number of iterations (max_iter) or scale the data as shown in:\n", + " https://scikit-learn.org/stable/modules/preprocessing.html\n", + "Please also refer to the documentation for alternative solver options:\n", + " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", + " n_iter_i = _check_optimize_result(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "MSE (train): 0.11596298438403702\n", + "MSE (test): 0.11265325005783021\n", + "MAE (train): 0.11596298438403702\n", + "MAE (test): 0.11265325005783021\n", + "R2 (train): 0.5256347402620505\n", + "R2 (test): 0.541724332939628\n", + "STD (train): 0.3405113334365492\n", + "STD (test): 0.3356321137822519\n", + "----------------------------------------\n", + "Model: decision_tree\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: knn\n", + "MSE (train): 0.1949681897050318\n", + "MSE (test): 0.27989821882951654\n", + "MAE (train): 0.1949681897050318\n", + "MAE (test): 0.27989821882951654\n", + "R2 (train): 0.20245122664507342\n", + "R2 (test): -0.13863153417464114\n", + "STD (train): 0.43948973967967464\n", + "STD (test): 0.5264647910268833\n", + "----------------------------------------\n", + "Model: naive_bayes\n", + "MSE (train): 0.26928860613071137\n", + "MSE (test): 0.2690261392551469\n", + "MAE (train): 0.26928860613071137\n", + "MAE (test): 0.2690261392551469\n", + "R2 (train): -0.10156840366079445\n", + "R2 (test): -0.09440369772322943\n", + "STD (train): 0.47316941542228536\n", + "STD (test): 0.47206502931490235\n", + "----------------------------------------\n", + "Model: gradient_boosting\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: random_forest\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: mlp\n", + "MSE (train): 0.4253903990746096\n", + "MSE (test): 0.4353458246588018\n", + "MAE (train): 0.4253903990746096\n", + "MAE (test): 0.4353458246588018\n", + "R2 (train): -0.7401279228791116\n", + "R2 (test): -0.7709954936501442\n", + "STD (train): 0.4959884986820156\n", + "STD (test): 0.49782384226978177\n", + "----------------------------------------\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from sklearn import metrics\n", + "from sklearn.pipeline import Pipeline\n", + "\n", + "# Проверка наличия необходимых переменных\n", + "if 'class_models' not in locals():\n", + " raise ValueError(\"class_models is not defined\")\n", + "if 'X_train' not in locals() or 'X_test' not in locals() or 'y_train' not in locals() or 'y_test' not in locals():\n", + " raise ValueError(\"Train/test data is not defined\")\n", + "\n", + "\n", + "y_train = np.ravel(y_train) \n", + "y_test = np.ravel(y_test) \n", + "\n", + "# Инициализация списка для хранения результатов\n", + "results = []\n", + "\n", + "# Проход по моделям и оценка их качества\n", + "for model_name in class_models.keys():\n", + " print(f\"Model: {model_name}\")\n", + " \n", + " # Извлечение модели из словаря\n", + " model = class_models[model_name][\"model\"]\n", + " \n", + " # Создание пайплайна\n", + " model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n", + " \n", + " # Обучение модели\n", + " model_pipeline.fit(X_train, y_train)\n", + "\n", + " # Предсказание для обучающей и тестовой выборки\n", + " y_train_predict = model_pipeline.predict(X_train)\n", + " y_test_predict = model_pipeline.predict(X_test)\n", + "\n", + " # Сохранение пайплайна и предсказаний\n", + " class_models[model_name][\"pipeline\"] = model_pipeline\n", + " class_models[model_name][\"preds\"] = y_test_predict\n", + "\n", + " # Вычисление метрик для регрессии\n", + " class_models[model_name][\"MSE_train\"] = metrics.mean_squared_error(y_train, y_train_predict)\n", + " class_models[model_name][\"MSE_test\"] = metrics.mean_squared_error(y_test, y_test_predict)\n", + " class_models[model_name][\"MAE_train\"] = metrics.mean_absolute_error(y_train, y_train_predict)\n", + " class_models[model_name][\"MAE_test\"] = metrics.mean_absolute_error(y_test, y_test_predict)\n", + " class_models[model_name][\"R2_train\"] = metrics.r2_score(y_train, y_train_predict)\n", + " class_models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_predict)\n", + "\n", + " # Дополнительные метрики\n", + " class_models[model_name][\"STD_train\"] = np.std(y_train - y_train_predict)\n", + " class_models[model_name][\"STD_test\"] = np.std(y_test - y_test_predict)\n", + "\n", + " # Вывод результатов для текущей модели\n", + " print(f\"MSE (train): {class_models[model_name]['MSE_train']}\")\n", + " print(f\"MSE (test): {class_models[model_name]['MSE_test']}\")\n", + " print(f\"MAE (train): {class_models[model_name]['MAE_train']}\")\n", + " print(f\"MAE (test): {class_models[model_name]['MAE_test']}\")\n", + " print(f\"R2 (train): {class_models[model_name]['R2_train']}\")\n", + " print(f\"R2 (test): {class_models[model_name]['R2_test']}\")\n", + " print(f\"STD (train): {class_models[model_name]['STD_train']}\")\n", + " print(f\"STD (test): {class_models[model_name]['STD_test']}\")\n", + " print(\"-\" * 40) # Разделитель для разных моделей" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Пример использования обученной модели (конвейера регрессии) для предсказания**" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Подбор гиперпараметров методом поиска по сетке**" + ] + }, + { + "cell_type": "code", + "execution_count": 174, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 5 folds for each of 36 candidates, totalling 180 fits\n", + "Best parameters: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 200}\n", + "Best MSE: 0.14752641202600872\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn.model_selection import train_test_split, GridSearchCV\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "# Convert the date column to a datetime object and extract numeric features\n", + "df['date'] = pd.to_datetime(df['date'], errors='coerce') # Coerce invalid dates to NaT\n", + "df.dropna(subset=['date'], inplace=True) # Drop rows with invalid dates\n", + "df['year'] = df['date'].dt.year\n", + "df['month'] = df['date'].dt.month\n", + "df['day'] = df['date'].dt.day\n", + "\n", + "# Prepare predictors and target\n", + "X = df[['yr_built', 'year', 'month', 'day', 'price', 'price_category']]\n", + "y = df['average_price']\n", + "\n", + "# Split data into training and testing sets\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# Define model and parameter grid\n", + "model = RandomForestRegressor()\n", + "param_grid = {\n", + " 'n_estimators': [50, 100, 200],\n", + " 'max_depth': [None, 10, 20, 30],\n", + " 'min_samples_split': [2, 5, 10]\n", + "}\n", + "\n", + "# Hyperparameter tuning with GridSearchCV\n", + "grid_search = GridSearchCV(estimator=model, param_grid=param_grid,\n", + " scoring='neg_mean_squared_error', cv=5, n_jobs=-1, verbose=2)\n", + "\n", + "# Fit the model\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "# Output the best parameters and score\n", + "print(\"Best parameters:\", grid_search.best_params_)\n", + "print(\"Best MSE:\", -grid_search.best_score_)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "**Обучение модели с новыми гиперпараметрами и сравнение новых и старых данных**" + ] + }, + { + "cell_type": "code", + "execution_count": 175, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 5 folds for each of 36 candidates, totalling 180 fits\n", + "Старые параметры: {'max_depth': 10, 'min_samples_split': 15, 'n_estimators': 200}\n", + "Лучший результат (MSE) на старых параметрах: 0.14727400921908354\n", + "\n", + "Новые параметры: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 200}\n", + "Лучший результат (MSE) на новых параметрах: 0.148833681322309\n", + "Среднеквадратическая ошибка (MSE) на тестовых данных: 0.14451630134635543\n", + "Корень среднеквадратичной ошибки (RMSE) на тестовых данных: 0.3801529972870863\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn import metrics\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.model_selection import train_test_split, GridSearchCV\n", + "import matplotlib.pyplot as plt\n", + "\n", + "\n", + "# 1. Настройка параметров для старых значений\n", + "old_param_grid = {\n", + " 'n_estimators': [50, 100, 200], # Количество деревьев\n", + " 'max_depth': [None, 10, 20, 30], # Максимальная глубина дерева\n", + " 'min_samples_split': [2, 10, 15] # Минимальное количество образцов для разбиения узла\n", + "}\n", + "\n", + "# Подбор гиперпараметров с помощью Grid Search для старых параметров\n", + "old_grid_search = GridSearchCV(estimator=RandomForestRegressor(), \n", + " param_grid=old_param_grid, scoring='neg_mean_squared_error', cv=5, n_jobs=-1, verbose=2)\n", + "\n", + "# Обучение модели на тренировочных данных\n", + "old_grid_search.fit(X_train, y_train)\n", + "\n", + "# 2. Результаты подбора для старых параметров\n", + "old_best_params = old_grid_search.best_params_\n", + "old_best_mse = -old_grid_search.best_score_ # Меняем знак, так как берем отрицательное значение MSE\n", + "\n", + "# 3. Настройка параметров для новых значений\n", + "new_param_grid = {\n", + " 'n_estimators': [200],\n", + " 'max_depth': [10],\n", + " 'min_samples_split': [10]\n", + "}\n", + "\n", + "# Подбор гиперпараметров с помощью Grid Search для новых параметров\n", + "new_grid_search = GridSearchCV(estimator=RandomForestRegressor(), \n", + " param_grid=new_param_grid, scoring='neg_mean_squared_error', cv=2)\n", + "\n", + "# Обучение модели на тренировочных данных\n", + "new_grid_search.fit(X_train, y_train)\n", + "\n", + "# 4. Результаты подбора для новых параметров\n", + "new_best_params = new_grid_search.best_params_\n", + "new_best_mse = -new_grid_search.best_score_ # Меняем знак, так как берем отрицательное значение MSE\n", + "\n", + "# 5. Обучение модели с лучшими параметрами для новых значений\n", + "model_best = RandomForestRegressor(**new_best_params)\n", + "model_best.fit(X_train, y_train)\n", + "\n", + "# Прогнозирование на тестовой выборке\n", + "y_pred = model_best.predict(X_test)\n", + "\n", + "# Оценка производительности модели\n", + "mse = metrics.mean_squared_error(y_test, y_pred)\n", + "rmse = np.sqrt(mse)\n", + "\n", + "# Вывод результатов\n", + "print(\"Старые параметры:\", old_best_params)\n", + "print(\"Лучший результат (MSE) на старых параметрах:\", old_best_mse)\n", + "print(\"\\nНовые параметры:\", new_best_params)\n", + "print(\"Лучший результат (MSE) на новых параметрах:\", new_best_mse)\n", + "print(\"Среднеквадратическая ошибка (MSE) на тестовых данных:\", mse)\n", + "print(\"Корень среднеквадратичной ошибки (RMSE) на тестовых данных:\", rmse)\n", + "\n", + "# Визуализация ошибок\n", + "plt.figure(figsize=(10, 5))\n", + "plt.bar(['Старые параметры', 'Новые параметры'], [old_best_mse, new_best_mse], color=['blue', 'orange'])\n", + "plt.xlabel('Подбор параметров')\n", + "plt.ylabel('Среднеквадратическая ошибка (MSE)')\n", + "plt.title('Сравнение MSE для старых и новых параметров')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Сравнивая результаты старых и новых параметров, можно сказать, что старые параметры модели позволили добиться меньшей среднеквадратической ошибки, что указывает на более эффективное предсказание по сравнению с новыми настройками. Скорее всего модель обучена достаточно хорошо, учитывая следующие факторы:\n", + "1. Показатели MSE на тренировочных (0.159) и тестовых данных (0.1589) очень близки. Это говорит о том, что модель не переобучена и не недообучена — она хорошо обобщает на тестовой выборке, что является желаемым результатом. \n", + "2. Старые параметры дали наилучший результат, так что модель способна выдать высокую точность при настройке гиперпараметров. Попытка с новыми параметрами позволила оценить, как модель реагирует на изменения параметров, и выяснить, что увеличение max_depth и снижение min_samples_split улучшили результат. Этот процесс настройки параметров — часть процесса улучшения модели. \n", + "3. Старые параметры дали наилучший результат, так что модель способна выдать высокую точность при настройке гиперпараметров. Попытка с новыми параметрами позволила оценить, как модель реагирует на изменения параметров, и выяснить, что увеличение max_depth и снижение min_samples_split улучшили результат. Этот процесс настройки параметров — часть процесса улучшения модели. \n", + "\n", + "Таким образом, можно сказать, что модель обучена хорошо, но возможны дальнейшие небольшие улучшения за счет оптимизации гиперпараметров." + ] + }, + { + "cell_type": "code", + "execution_count": 176, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "plt.figure(figsize=(10, 5))\n", + "plt.scatter(range(len(y_test)), y_test, label=\"Актуалочка\", color=\"black\", alpha=0.5)\n", + "plt.scatter(range(len(y_test)), y_pred, label=\"Предсказанные(новые параметры)\", color=\"blue\", alpha=0.5)\n", + "plt.scatter(range(len(y_test)), y_test_predict, label=\"Предсказанные(старые параметры)\", color=\"red\", alpha=0.5)\n", + "plt.xlabel(\"Выборка\")\n", + "plt.ylabel(\"Значения\")\n", + "plt.legend()\n", + "plt.title(\"Актуалочка vs Предсказанных значений (Новые and Старые Параметры)\")\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Ураааа! Усёёёё, вроде бы всё ^_^" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "mai", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.6" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/laboratory_4/requirements.txt b/laboratory_4/requirements.txt new file mode 100644 index 0000000..5f04788 --- /dev/null +++ b/laboratory_4/requirements.txt @@ -0,0 +1,40 @@ +asttokens==2.4.1 +colorama==0.4.6 +comm==0.2.2 +contourpy==1.3.0 +cycler==0.12.1 +debugpy==1.8.5 +decorator==5.1.1 +executing==2.1.0 +fonttools==4.53.1 +ipykernel==6.29.5 +ipython==8.27.0 +jedi==0.19.1 +jupyter_client==8.6.3 +jupyter_core==5.7.2 +kiwisolver==1.4.7 +matplotlib==3.9.2 +matplotlib-inline==0.1.7 +nest-asyncio==1.6.0 +numpy==2.1.1 +packaging==24.1 +pandas==2.2.2 +parso==0.8.4 +pillow==10.4.0 +platformdirs==4.3.6 +prompt_toolkit==3.0.47 +psutil==6.0.0 +pure_eval==0.2.3 +Pygments==2.18.0 +pyparsing==3.1.4 +python-dateutil==2.9.0.post0 +pytz==2024.2 +pywin32==306 +pyzmq==26.2.0 +seaborn==0.13.2 +six==1.16.0 +stack-data==0.6.3 +tornado==6.4.1 +traitlets==5.14.3 +tzdata==2024.1 +wcwidth==0.2.13