diff --git a/laboratory_4/lab4.ipynb b/laboratory_4/lab4.ipynb
new file mode 100644
index 0000000..5584a37
--- /dev/null
+++ b/laboratory_4/lab4.ipynb
@@ -0,0 +1,5712 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Начинаем работу... \n",
+ "\n",
+ "Датасет: Продажи домов в округе Кинг "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 144,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Index(['id', 'date', 'price', 'bedrooms', 'bathrooms', 'sqft_living',\n",
+ " 'sqft_lot', 'floors', 'waterfront', 'view', 'condition', 'grade',\n",
+ " 'sqft_above', 'sqft_basement', 'yr_built', 'yr_renovated', 'zipcode',\n",
+ " 'lat', 'long', 'sqft_living15', 'sqft_lot15'],\n",
+ " dtype='object')\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "from sklearn import set_config\n",
+ "\n",
+ "# Установим параметры для вывода\n",
+ "set_config(transform_output=\"pandas\")\n",
+ "\n",
+ "random_state = 42\n",
+ "\n",
+ "# Подключим датафрейм и выгрузим данные\n",
+ "df = pd.read_csv(\".//static//csv//kc_house_data.csv\")\n",
+ "print(df.columns)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 145,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " date | \n",
+ " price | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " floors | \n",
+ " waterfront | \n",
+ " view | \n",
+ " ... | \n",
+ " grade | \n",
+ " sqft_above | \n",
+ " sqft_basement | \n",
+ " yr_built | \n",
+ " yr_renovated | \n",
+ " zipcode | \n",
+ " lat | \n",
+ " long | \n",
+ " sqft_living15 | \n",
+ " sqft_lot15 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 0 | \n",
+ " 7129300520 | \n",
+ " 20141013T000000 | \n",
+ " 221900.0 | \n",
+ " 3 | \n",
+ " 1.00 | \n",
+ " 1180 | \n",
+ " 5650 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " 1180 | \n",
+ " 0 | \n",
+ " 1955 | \n",
+ " 0 | \n",
+ " 98178 | \n",
+ " 47.5112 | \n",
+ " -122.257 | \n",
+ " 1340 | \n",
+ " 5650 | \n",
+ "
\n",
+ " \n",
+ " 1 | \n",
+ " 6414100192 | \n",
+ " 20141209T000000 | \n",
+ " 538000.0 | \n",
+ " 3 | \n",
+ " 2.25 | \n",
+ " 2570 | \n",
+ " 7242 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " 2170 | \n",
+ " 400 | \n",
+ " 1951 | \n",
+ " 1991 | \n",
+ " 98125 | \n",
+ " 47.7210 | \n",
+ " -122.319 | \n",
+ " 1690 | \n",
+ " 7639 | \n",
+ "
\n",
+ " \n",
+ " 2 | \n",
+ " 5631500400 | \n",
+ " 20150225T000000 | \n",
+ " 180000.0 | \n",
+ " 2 | \n",
+ " 1.00 | \n",
+ " 770 | \n",
+ " 10000 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 6 | \n",
+ " 770 | \n",
+ " 0 | \n",
+ " 1933 | \n",
+ " 0 | \n",
+ " 98028 | \n",
+ " 47.7379 | \n",
+ " -122.233 | \n",
+ " 2720 | \n",
+ " 8062 | \n",
+ "
\n",
+ " \n",
+ " 3 | \n",
+ " 2487200875 | \n",
+ " 20141209T000000 | \n",
+ " 604000.0 | \n",
+ " 4 | \n",
+ " 3.00 | \n",
+ " 1960 | \n",
+ " 5000 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 7 | \n",
+ " 1050 | \n",
+ " 910 | \n",
+ " 1965 | \n",
+ " 0 | \n",
+ " 98136 | \n",
+ " 47.5208 | \n",
+ " -122.393 | \n",
+ " 1360 | \n",
+ " 5000 | \n",
+ "
\n",
+ " \n",
+ " 4 | \n",
+ " 1954400510 | \n",
+ " 20150218T000000 | \n",
+ " 510000.0 | \n",
+ " 3 | \n",
+ " 2.00 | \n",
+ " 1680 | \n",
+ " 8080 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 8 | \n",
+ " 1680 | \n",
+ " 0 | \n",
+ " 1987 | \n",
+ " 0 | \n",
+ " 98074 | \n",
+ " 47.6168 | \n",
+ " -122.045 | \n",
+ " 1800 | \n",
+ " 7503 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
5 rows × 21 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id date price bedrooms bathrooms sqft_living \\\n",
+ "0 7129300520 20141013T000000 221900.0 3 1.00 1180 \n",
+ "1 6414100192 20141209T000000 538000.0 3 2.25 2570 \n",
+ "2 5631500400 20150225T000000 180000.0 2 1.00 770 \n",
+ "3 2487200875 20141209T000000 604000.0 4 3.00 1960 \n",
+ "4 1954400510 20150218T000000 510000.0 3 2.00 1680 \n",
+ "\n",
+ " sqft_lot floors waterfront view ... grade sqft_above sqft_basement \\\n",
+ "0 5650 1.0 0 0 ... 7 1180 0 \n",
+ "1 7242 2.0 0 0 ... 7 2170 400 \n",
+ "2 10000 1.0 0 0 ... 6 770 0 \n",
+ "3 5000 1.0 0 0 ... 7 1050 910 \n",
+ "4 8080 1.0 0 0 ... 8 1680 0 \n",
+ "\n",
+ " yr_built yr_renovated zipcode lat long sqft_living15 \\\n",
+ "0 1955 0 98178 47.5112 -122.257 1340 \n",
+ "1 1951 1991 98125 47.7210 -122.319 1690 \n",
+ "2 1933 0 98028 47.7379 -122.233 2720 \n",
+ "3 1965 0 98136 47.5208 -122.393 1360 \n",
+ "4 1987 0 98074 47.6168 -122.045 1800 \n",
+ "\n",
+ " sqft_lot15 \n",
+ "0 5650 \n",
+ "1 7639 \n",
+ "2 8062 \n",
+ "3 5000 \n",
+ "4 7503 \n",
+ "\n",
+ "[5 rows x 21 columns]"
+ ]
+ },
+ "execution_count": 145,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.head()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 146,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " price | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " floors | \n",
+ " waterfront | \n",
+ " view | \n",
+ " condition | \n",
+ " grade | \n",
+ " sqft_above | \n",
+ " sqft_basement | \n",
+ " yr_built | \n",
+ " yr_renovated | \n",
+ " zipcode | \n",
+ " lat | \n",
+ " long | \n",
+ " sqft_living15 | \n",
+ " sqft_lot15 | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " count | \n",
+ " 2.161300e+04 | \n",
+ " 2.161300e+04 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 2.161300e+04 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ " 21613.000000 | \n",
+ "
\n",
+ " \n",
+ " mean | \n",
+ " 4.580302e+09 | \n",
+ " 5.400881e+05 | \n",
+ " 3.370842 | \n",
+ " 2.114757 | \n",
+ " 2079.899736 | \n",
+ " 1.510697e+04 | \n",
+ " 1.494309 | \n",
+ " 0.007542 | \n",
+ " 0.234303 | \n",
+ " 3.409430 | \n",
+ " 7.656873 | \n",
+ " 1788.390691 | \n",
+ " 291.509045 | \n",
+ " 1971.005136 | \n",
+ " 84.402258 | \n",
+ " 98077.939805 | \n",
+ " 47.560053 | \n",
+ " -122.213896 | \n",
+ " 1986.552492 | \n",
+ " 12768.455652 | \n",
+ "
\n",
+ " \n",
+ " std | \n",
+ " 2.876566e+09 | \n",
+ " 3.671272e+05 | \n",
+ " 0.930062 | \n",
+ " 0.770163 | \n",
+ " 918.440897 | \n",
+ " 4.142051e+04 | \n",
+ " 0.539989 | \n",
+ " 0.086517 | \n",
+ " 0.766318 | \n",
+ " 0.650743 | \n",
+ " 1.175459 | \n",
+ " 828.090978 | \n",
+ " 442.575043 | \n",
+ " 29.373411 | \n",
+ " 401.679240 | \n",
+ " 53.505026 | \n",
+ " 0.138564 | \n",
+ " 0.140828 | \n",
+ " 685.391304 | \n",
+ " 27304.179631 | \n",
+ "
\n",
+ " \n",
+ " min | \n",
+ " 1.000102e+06 | \n",
+ " 7.500000e+04 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 290.000000 | \n",
+ " 5.200000e+02 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 290.000000 | \n",
+ " 0.000000 | \n",
+ " 1900.000000 | \n",
+ " 0.000000 | \n",
+ " 98001.000000 | \n",
+ " 47.155900 | \n",
+ " -122.519000 | \n",
+ " 399.000000 | \n",
+ " 651.000000 | \n",
+ "
\n",
+ " \n",
+ " 25% | \n",
+ " 2.123049e+09 | \n",
+ " 3.219500e+05 | \n",
+ " 3.000000 | \n",
+ " 1.750000 | \n",
+ " 1427.000000 | \n",
+ " 5.040000e+03 | \n",
+ " 1.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 3.000000 | \n",
+ " 7.000000 | \n",
+ " 1190.000000 | \n",
+ " 0.000000 | \n",
+ " 1951.000000 | \n",
+ " 0.000000 | \n",
+ " 98033.000000 | \n",
+ " 47.471000 | \n",
+ " -122.328000 | \n",
+ " 1490.000000 | \n",
+ " 5100.000000 | \n",
+ "
\n",
+ " \n",
+ " 50% | \n",
+ " 3.904930e+09 | \n",
+ " 4.500000e+05 | \n",
+ " 3.000000 | \n",
+ " 2.250000 | \n",
+ " 1910.000000 | \n",
+ " 7.618000e+03 | \n",
+ " 1.500000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 3.000000 | \n",
+ " 7.000000 | \n",
+ " 1560.000000 | \n",
+ " 0.000000 | \n",
+ " 1975.000000 | \n",
+ " 0.000000 | \n",
+ " 98065.000000 | \n",
+ " 47.571800 | \n",
+ " -122.230000 | \n",
+ " 1840.000000 | \n",
+ " 7620.000000 | \n",
+ "
\n",
+ " \n",
+ " 75% | \n",
+ " 7.308900e+09 | \n",
+ " 6.450000e+05 | \n",
+ " 4.000000 | \n",
+ " 2.500000 | \n",
+ " 2550.000000 | \n",
+ " 1.068800e+04 | \n",
+ " 2.000000 | \n",
+ " 0.000000 | \n",
+ " 0.000000 | \n",
+ " 4.000000 | \n",
+ " 8.000000 | \n",
+ " 2210.000000 | \n",
+ " 560.000000 | \n",
+ " 1997.000000 | \n",
+ " 0.000000 | \n",
+ " 98118.000000 | \n",
+ " 47.678000 | \n",
+ " -122.125000 | \n",
+ " 2360.000000 | \n",
+ " 10083.000000 | \n",
+ "
\n",
+ " \n",
+ " max | \n",
+ " 9.900000e+09 | \n",
+ " 7.700000e+06 | \n",
+ " 33.000000 | \n",
+ " 8.000000 | \n",
+ " 13540.000000 | \n",
+ " 1.651359e+06 | \n",
+ " 3.500000 | \n",
+ " 1.000000 | \n",
+ " 4.000000 | \n",
+ " 5.000000 | \n",
+ " 13.000000 | \n",
+ " 9410.000000 | \n",
+ " 4820.000000 | \n",
+ " 2015.000000 | \n",
+ " 2015.000000 | \n",
+ " 98199.000000 | \n",
+ " 47.777600 | \n",
+ " -121.315000 | \n",
+ " 6210.000000 | \n",
+ " 871200.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id price bedrooms bathrooms sqft_living \\\n",
+ "count 2.161300e+04 2.161300e+04 21613.000000 21613.000000 21613.000000 \n",
+ "mean 4.580302e+09 5.400881e+05 3.370842 2.114757 2079.899736 \n",
+ "std 2.876566e+09 3.671272e+05 0.930062 0.770163 918.440897 \n",
+ "min 1.000102e+06 7.500000e+04 0.000000 0.000000 290.000000 \n",
+ "25% 2.123049e+09 3.219500e+05 3.000000 1.750000 1427.000000 \n",
+ "50% 3.904930e+09 4.500000e+05 3.000000 2.250000 1910.000000 \n",
+ "75% 7.308900e+09 6.450000e+05 4.000000 2.500000 2550.000000 \n",
+ "max 9.900000e+09 7.700000e+06 33.000000 8.000000 13540.000000 \n",
+ "\n",
+ " sqft_lot floors waterfront view condition \\\n",
+ "count 2.161300e+04 21613.000000 21613.000000 21613.000000 21613.000000 \n",
+ "mean 1.510697e+04 1.494309 0.007542 0.234303 3.409430 \n",
+ "std 4.142051e+04 0.539989 0.086517 0.766318 0.650743 \n",
+ "min 5.200000e+02 1.000000 0.000000 0.000000 1.000000 \n",
+ "25% 5.040000e+03 1.000000 0.000000 0.000000 3.000000 \n",
+ "50% 7.618000e+03 1.500000 0.000000 0.000000 3.000000 \n",
+ "75% 1.068800e+04 2.000000 0.000000 0.000000 4.000000 \n",
+ "max 1.651359e+06 3.500000 1.000000 4.000000 5.000000 \n",
+ "\n",
+ " grade sqft_above sqft_basement yr_built yr_renovated \\\n",
+ "count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n",
+ "mean 7.656873 1788.390691 291.509045 1971.005136 84.402258 \n",
+ "std 1.175459 828.090978 442.575043 29.373411 401.679240 \n",
+ "min 1.000000 290.000000 0.000000 1900.000000 0.000000 \n",
+ "25% 7.000000 1190.000000 0.000000 1951.000000 0.000000 \n",
+ "50% 7.000000 1560.000000 0.000000 1975.000000 0.000000 \n",
+ "75% 8.000000 2210.000000 560.000000 1997.000000 0.000000 \n",
+ "max 13.000000 9410.000000 4820.000000 2015.000000 2015.000000 \n",
+ "\n",
+ " zipcode lat long sqft_living15 sqft_lot15 \n",
+ "count 21613.000000 21613.000000 21613.000000 21613.000000 21613.000000 \n",
+ "mean 98077.939805 47.560053 -122.213896 1986.552492 12768.455652 \n",
+ "std 53.505026 0.138564 0.140828 685.391304 27304.179631 \n",
+ "min 98001.000000 47.155900 -122.519000 399.000000 651.000000 \n",
+ "25% 98033.000000 47.471000 -122.328000 1490.000000 5100.000000 \n",
+ "50% 98065.000000 47.571800 -122.230000 1840.000000 7620.000000 \n",
+ "75% 98118.000000 47.678000 -122.125000 2360.000000 10083.000000 \n",
+ "max 98199.000000 47.777600 -121.315000 6210.000000 871200.000000 "
+ ]
+ },
+ "execution_count": 146,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "df.describe()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 147,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "id 0\n",
+ "date 0\n",
+ "price 0\n",
+ "bedrooms 0\n",
+ "bathrooms 0\n",
+ "sqft_living 0\n",
+ "sqft_lot 0\n",
+ "floors 0\n",
+ "waterfront 0\n",
+ "view 0\n",
+ "condition 0\n",
+ "grade 0\n",
+ "sqft_above 0\n",
+ "sqft_basement 0\n",
+ "yr_built 0\n",
+ "yr_renovated 0\n",
+ "zipcode 0\n",
+ "lat 0\n",
+ "long 0\n",
+ "sqft_living15 0\n",
+ "sqft_lot15 0\n",
+ "dtype: int64\n",
+ "id False\n",
+ "date False\n",
+ "price False\n",
+ "bedrooms False\n",
+ "bathrooms False\n",
+ "sqft_living False\n",
+ "sqft_lot False\n",
+ "floors False\n",
+ "waterfront False\n",
+ "view False\n",
+ "condition False\n",
+ "grade False\n",
+ "sqft_above False\n",
+ "sqft_basement False\n",
+ "yr_built False\n",
+ "yr_renovated False\n",
+ "zipcode False\n",
+ "lat False\n",
+ "long False\n",
+ "sqft_living15 False\n",
+ "sqft_lot15 False\n",
+ "dtype: bool\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Процент пропущенных значений признаков\n",
+ "for i in df.columns:\n",
+ " null_rate = df[i].isnull().sum() / len(df) * 100\n",
+ " if null_rate > 0:\n",
+ " print(f'{i} Процент пустых значений: %{null_rate:.2f}')\n",
+ "\n",
+ "print(df.isnull().sum())\n",
+ "\n",
+ "print(df.isnull().any())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 148,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "id int64\n",
+ "date object\n",
+ "price float64\n",
+ "bedrooms int64\n",
+ "bathrooms float64\n",
+ "sqft_living int64\n",
+ "sqft_lot int64\n",
+ "floors float64\n",
+ "waterfront int64\n",
+ "view int64\n",
+ "condition int64\n",
+ "grade int64\n",
+ "sqft_above int64\n",
+ "sqft_basement int64\n",
+ "yr_built int64\n",
+ "yr_renovated int64\n",
+ "zipcode int64\n",
+ "lat float64\n",
+ "long float64\n",
+ "sqft_living15 int64\n",
+ "sqft_lot15 int64\n",
+ "dtype: object"
+ ]
+ },
+ "execution_count": 148,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "# Проверка типов столбцов\n",
+ "df.dtypes"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Выбор бизнес-целей \n",
+ "Для датасета недвижимости предлагаются две бизнес-цели:\n",
+ "\n",
+ "*Задача регрессии* – предсказание цены дома (price). Это может помочь риэлторам и аналитикам определить справедливую рыночную стоимость недвижимости. \n",
+ "\n",
+ "*Задача классификации* – определение вероятности того, что цена дома будет выше/ниже медианы рынка. Классифицировать дома по ценовым категориям (например, низкая, средняя, высокая цена). Это может помочь определить, какие дома популярны у покупателей.\n",
+ "\n",
+ "## Определение достижимого уровня качества модели \n",
+ "Для регрессии и классификации мы выберем метрики: \n",
+ "\n",
+ "Для регрессии будем использовать метрики MAE (средняя абсолютная ошибка) и R^2 (коэффициент детерминации), стремясь к MAE ниже 10% от средней цены. А классификация будте ориентироваться на метрики accuracy и F1-score при целевом значении accuracy около 80%.\n",
+ "\n",
+ "## Ориентир для каждой задачи\n",
+ "Для регрессии ориентиром будет медианная цена (price.median()), так как это стабильное значение. Для классификации ориентируемся на среднюю вероятность предсказания класса выше медианы.\n",
+ "\n",
+ "## Анализ алгоритмов машинного обучения \n",
+ "Рассмотрим для задачи регрессии:\n",
+ "\n",
+ "*Линейная регрессия:* подходит для простых линейных зависимостей. \n",
+ "*Дерево решений:* учитывает нелинейные зависимости, может учесть сложные закономерности. \n",
+ "*Случайный лес:* ансамблевый метод, обобщающий данные и эффективно обрабатывающий выбросы. \n",
+ "\n",
+ "Для задачи классификации: \n",
+ "\n",
+ "*Логистическая регрессия:* простая модель, подходящая для бинарной классификации. \n",
+ "*Метод опорных векторов (SVM):* работает хорошо на данных с четкими разделениями. \n",
+ "*Градиентный бустинг:* подходит для сложных и высокоразмерных данных, обеспечивает высокую точность. \n",
+ "\n",
+ "## Выбор моделей \n",
+ "Выбираем по три модели для каждой задачи:\n",
+ "\n",
+ "*Регрессия:* Линейная регрессия, Дерево решений, Случайный лес. \n",
+ "*Классификация:* Логистическая регрессия, Метод опорных векторов (SVM), Градиентный бустинг. \n",
+ "\n",
+ "\n",
+ "## Построение конвейера и визуализации \n",
+ "Теперь напишем код для загрузки данных, анализа и подготовки моделей с визуализацией результатов.\n",
+ "\n",
+ "\n",
+ "# Начнём с задачи классификации\n",
+ "\n",
+ "Целевой признак --> above_median_price\n",
+ "\n",
+ "Формируем выборки. Разделяем набор данных на обучающую и тестовые выборки (80/20) для задачи классификации"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 149,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'X_train'"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " date | \n",
+ " price | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " floors | \n",
+ " waterfront | \n",
+ " view | \n",
+ " ... | \n",
+ " sqft_basement | \n",
+ " yr_built | \n",
+ " yr_renovated | \n",
+ " zipcode | \n",
+ " lat | \n",
+ " long | \n",
+ " sqft_living15 | \n",
+ " sqft_lot15 | \n",
+ " above_median_price | \n",
+ " price_category | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 20962 | \n",
+ " 1278000210 | \n",
+ " 20150311T000000 | \n",
+ " 110000.0 | \n",
+ " 2 | \n",
+ " 1.00 | \n",
+ " 828 | \n",
+ " 4524 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1968 | \n",
+ " 2007 | \n",
+ " 98001 | \n",
+ " 47.2655 | \n",
+ " -122.244 | \n",
+ " 828 | \n",
+ " 5402 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 12284 | \n",
+ " 2193300390 | \n",
+ " 20140923T000000 | \n",
+ " 624000.0 | \n",
+ " 4 | \n",
+ " 3.25 | \n",
+ " 2810 | \n",
+ " 11250 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 1130 | \n",
+ " 1980 | \n",
+ " 0 | \n",
+ " 98052 | \n",
+ " 47.6920 | \n",
+ " -122.099 | \n",
+ " 2110 | \n",
+ " 11250 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 7343 | \n",
+ " 4289900005 | \n",
+ " 20141230T000000 | \n",
+ " 1535000.0 | \n",
+ " 4 | \n",
+ " 3.25 | \n",
+ " 2850 | \n",
+ " 4100 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " ... | \n",
+ " 1030 | \n",
+ " 1908 | \n",
+ " 2003 | \n",
+ " 98122 | \n",
+ " 47.6147 | \n",
+ " -122.285 | \n",
+ " 2130 | \n",
+ " 4200 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 14247 | \n",
+ " 316000145 | \n",
+ " 20150325T000000 | \n",
+ " 235000.0 | \n",
+ " 4 | \n",
+ " 1.00 | \n",
+ " 1360 | \n",
+ " 7132 | \n",
+ " 1.5 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1941 | \n",
+ " 0 | \n",
+ " 98168 | \n",
+ " 47.5054 | \n",
+ " -122.301 | \n",
+ " 1280 | \n",
+ " 7175 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 16670 | \n",
+ " 629400480 | \n",
+ " 20140619T000000 | \n",
+ " 775000.0 | \n",
+ " 4 | \n",
+ " 2.75 | \n",
+ " 3010 | \n",
+ " 15992 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1996 | \n",
+ " 0 | \n",
+ " 98075 | \n",
+ " 47.5895 | \n",
+ " -121.994 | \n",
+ " 3330 | \n",
+ " 12333 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 88 | \n",
+ " 1332700270 | \n",
+ " 20140519T000000 | \n",
+ " 215000.0 | \n",
+ " 2 | \n",
+ " 2.25 | \n",
+ " 1610 | \n",
+ " 2040 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1979 | \n",
+ " 0 | \n",
+ " 98056 | \n",
+ " 47.5180 | \n",
+ " -122.194 | \n",
+ " 1950 | \n",
+ " 2025 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 15031 | \n",
+ " 7129303070 | \n",
+ " 20140820T000000 | \n",
+ " 735000.0 | \n",
+ " 4 | \n",
+ " 2.75 | \n",
+ " 3040 | \n",
+ " 2415 | \n",
+ " 2.0 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1966 | \n",
+ " 0 | \n",
+ " 98118 | \n",
+ " 47.5188 | \n",
+ " -122.256 | \n",
+ " 2620 | \n",
+ " 2433 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 5234 | \n",
+ " 2432000130 | \n",
+ " 20150414T000000 | \n",
+ " 675000.0 | \n",
+ " 3 | \n",
+ " 1.75 | \n",
+ " 1660 | \n",
+ " 9549 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1956 | \n",
+ " 0 | \n",
+ " 98033 | \n",
+ " 47.6503 | \n",
+ " -122.198 | \n",
+ " 2090 | \n",
+ " 9549 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 19980 | \n",
+ " 774100475 | \n",
+ " 20140627T000000 | \n",
+ " 415000.0 | \n",
+ " 3 | \n",
+ " 2.75 | \n",
+ " 2600 | \n",
+ " 64626 | \n",
+ " 1.5 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 2009 | \n",
+ " 0 | \n",
+ " 98014 | \n",
+ " 47.7185 | \n",
+ " -121.405 | \n",
+ " 1740 | \n",
+ " 64626 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3671 | \n",
+ " 8847400115 | \n",
+ " 20140723T000000 | \n",
+ " 590000.0 | \n",
+ " 3 | \n",
+ " 2.00 | \n",
+ " 2420 | \n",
+ " 208652 | \n",
+ " 1.5 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 2005 | \n",
+ " 0 | \n",
+ " 98010 | \n",
+ " 47.3666 | \n",
+ " -121.978 | \n",
+ " 3180 | \n",
+ " 212137 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
17290 rows × 23 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id date price bedrooms bathrooms \\\n",
+ "20962 1278000210 20150311T000000 110000.0 2 1.00 \n",
+ "12284 2193300390 20140923T000000 624000.0 4 3.25 \n",
+ "7343 4289900005 20141230T000000 1535000.0 4 3.25 \n",
+ "14247 316000145 20150325T000000 235000.0 4 1.00 \n",
+ "16670 629400480 20140619T000000 775000.0 4 2.75 \n",
+ "... ... ... ... ... ... \n",
+ "88 1332700270 20140519T000000 215000.0 2 2.25 \n",
+ "15031 7129303070 20140820T000000 735000.0 4 2.75 \n",
+ "5234 2432000130 20150414T000000 675000.0 3 1.75 \n",
+ "19980 774100475 20140627T000000 415000.0 3 2.75 \n",
+ "3671 8847400115 20140723T000000 590000.0 3 2.00 \n",
+ "\n",
+ " sqft_living sqft_lot floors waterfront view ... sqft_basement \\\n",
+ "20962 828 4524 1.0 0 0 ... 0 \n",
+ "12284 2810 11250 1.0 0 0 ... 1130 \n",
+ "7343 2850 4100 2.0 0 3 ... 1030 \n",
+ "14247 1360 7132 1.5 0 0 ... 0 \n",
+ "16670 3010 15992 2.0 0 0 ... 0 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "88 1610 2040 2.0 0 0 ... 0 \n",
+ "15031 3040 2415 2.0 1 4 ... 0 \n",
+ "5234 1660 9549 1.0 0 0 ... 0 \n",
+ "19980 2600 64626 1.5 0 0 ... 0 \n",
+ "3671 2420 208652 1.5 0 0 ... 0 \n",
+ "\n",
+ " yr_built yr_renovated zipcode lat long sqft_living15 \\\n",
+ "20962 1968 2007 98001 47.2655 -122.244 828 \n",
+ "12284 1980 0 98052 47.6920 -122.099 2110 \n",
+ "7343 1908 2003 98122 47.6147 -122.285 2130 \n",
+ "14247 1941 0 98168 47.5054 -122.301 1280 \n",
+ "16670 1996 0 98075 47.5895 -121.994 3330 \n",
+ "... ... ... ... ... ... ... \n",
+ "88 1979 0 98056 47.5180 -122.194 1950 \n",
+ "15031 1966 0 98118 47.5188 -122.256 2620 \n",
+ "5234 1956 0 98033 47.6503 -122.198 2090 \n",
+ "19980 2009 0 98014 47.7185 -121.405 1740 \n",
+ "3671 2005 0 98010 47.3666 -121.978 3180 \n",
+ "\n",
+ " sqft_lot15 above_median_price price_category \n",
+ "20962 5402 0 0 \n",
+ "12284 11250 1 1 \n",
+ "7343 4200 1 2 \n",
+ "14247 7175 0 0 \n",
+ "16670 12333 1 2 \n",
+ "... ... ... ... \n",
+ "88 2025 0 0 \n",
+ "15031 2433 1 2 \n",
+ "5234 9549 1 1 \n",
+ "19980 64626 0 1 \n",
+ "3671 212137 1 1 \n",
+ "\n",
+ "[17290 rows x 23 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'y_train'"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " above_median_price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 20962 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 12284 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 7343 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 14247 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 16670 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 88 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 15031 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 5234 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 19980 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 3671 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
17290 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " above_median_price\n",
+ "20962 0\n",
+ "12284 1\n",
+ "7343 1\n",
+ "14247 0\n",
+ "16670 1\n",
+ "... ...\n",
+ "88 0\n",
+ "15031 1\n",
+ "5234 1\n",
+ "19980 0\n",
+ "3671 1\n",
+ "\n",
+ "[17290 rows x 1 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'X_test'"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " date | \n",
+ " price | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " floors | \n",
+ " waterfront | \n",
+ " view | \n",
+ " ... | \n",
+ " sqft_basement | \n",
+ " yr_built | \n",
+ " yr_renovated | \n",
+ " zipcode | \n",
+ " lat | \n",
+ " long | \n",
+ " sqft_living15 | \n",
+ " sqft_lot15 | \n",
+ " above_median_price | \n",
+ " price_category | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 11592 | \n",
+ " 2028701000 | \n",
+ " 20140529T000000 | \n",
+ " 635200.0 | \n",
+ " 4 | \n",
+ " 1.75 | \n",
+ " 1640 | \n",
+ " 4240 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 720 | \n",
+ " 1921 | \n",
+ " 0 | \n",
+ " 98117 | \n",
+ " 47.6766 | \n",
+ " -122.368 | \n",
+ " 1300 | \n",
+ " 4240 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 8984 | \n",
+ " 9406500530 | \n",
+ " 20140912T000000 | \n",
+ " 249000.0 | \n",
+ " 2 | \n",
+ " 2.00 | \n",
+ " 1090 | \n",
+ " 1357 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1990 | \n",
+ " 0 | \n",
+ " 98028 | \n",
+ " 47.7526 | \n",
+ " -122.244 | \n",
+ " 1078 | \n",
+ " 1318 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 8280 | \n",
+ " 8097000330 | \n",
+ " 20140721T000000 | \n",
+ " 359950.0 | \n",
+ " 3 | \n",
+ " 2.75 | \n",
+ " 2540 | \n",
+ " 8604 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1991 | \n",
+ " 0 | \n",
+ " 98092 | \n",
+ " 47.3209 | \n",
+ " -122.185 | \n",
+ " 2260 | \n",
+ " 7438 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 792 | \n",
+ " 8081020370 | \n",
+ " 20140709T000000 | \n",
+ " 1355000.0 | \n",
+ " 4 | \n",
+ " 3.50 | \n",
+ " 3550 | \n",
+ " 11000 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 1290 | \n",
+ " 1999 | \n",
+ " 0 | \n",
+ " 98006 | \n",
+ " 47.5506 | \n",
+ " -122.134 | \n",
+ " 4100 | \n",
+ " 10012 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 10371 | \n",
+ " 7518507580 | \n",
+ " 20150502T000000 | \n",
+ " 581000.0 | \n",
+ " 2 | \n",
+ " 1.00 | \n",
+ " 1170 | \n",
+ " 4080 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1909 | \n",
+ " 0 | \n",
+ " 98117 | \n",
+ " 47.6784 | \n",
+ " -122.386 | \n",
+ " 1560 | \n",
+ " 4586 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 16733 | \n",
+ " 7212650950 | \n",
+ " 20140708T000000 | \n",
+ " 336000.0 | \n",
+ " 4 | \n",
+ " 2.50 | \n",
+ " 2530 | \n",
+ " 8169 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1993 | \n",
+ " 0 | \n",
+ " 98003 | \n",
+ " 47.2634 | \n",
+ " -122.312 | \n",
+ " 2220 | \n",
+ " 8013 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 13151 | \n",
+ " 4365200620 | \n",
+ " 20150312T000000 | \n",
+ " 394000.0 | \n",
+ " 3 | \n",
+ " 1.00 | \n",
+ " 1450 | \n",
+ " 7930 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 300 | \n",
+ " 1923 | \n",
+ " 0 | \n",
+ " 98126 | \n",
+ " 47.5212 | \n",
+ " -122.371 | \n",
+ " 1040 | \n",
+ " 7740 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 11667 | \n",
+ " 4083304355 | \n",
+ " 20150318T000000 | \n",
+ " 675000.0 | \n",
+ " 4 | \n",
+ " 1.75 | \n",
+ " 1530 | \n",
+ " 3615 | \n",
+ " 1.5 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1913 | \n",
+ " 0 | \n",
+ " 98103 | \n",
+ " 47.6529 | \n",
+ " -122.334 | \n",
+ " 1650 | \n",
+ " 4200 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3683 | \n",
+ " 2891100820 | \n",
+ " 20140825T000000 | \n",
+ " 213500.0 | \n",
+ " 3 | \n",
+ " 1.00 | \n",
+ " 1220 | \n",
+ " 6000 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1968 | \n",
+ " 0 | \n",
+ " 98002 | \n",
+ " 47.3245 | \n",
+ " -122.209 | \n",
+ " 1420 | \n",
+ " 6000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 12059 | \n",
+ " 952000640 | \n",
+ " 20141027T000000 | \n",
+ " 715000.0 | \n",
+ " 3 | \n",
+ " 1.50 | \n",
+ " 1670 | \n",
+ " 5060 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1925 | \n",
+ " 0 | \n",
+ " 98126 | \n",
+ " 47.5671 | \n",
+ " -122.379 | \n",
+ " 1670 | \n",
+ " 5118 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4323 rows × 23 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id date price bedrooms bathrooms \\\n",
+ "11592 2028701000 20140529T000000 635200.0 4 1.75 \n",
+ "8984 9406500530 20140912T000000 249000.0 2 2.00 \n",
+ "8280 8097000330 20140721T000000 359950.0 3 2.75 \n",
+ "792 8081020370 20140709T000000 1355000.0 4 3.50 \n",
+ "10371 7518507580 20150502T000000 581000.0 2 1.00 \n",
+ "... ... ... ... ... ... \n",
+ "16733 7212650950 20140708T000000 336000.0 4 2.50 \n",
+ "13151 4365200620 20150312T000000 394000.0 3 1.00 \n",
+ "11667 4083304355 20150318T000000 675000.0 4 1.75 \n",
+ "3683 2891100820 20140825T000000 213500.0 3 1.00 \n",
+ "12059 952000640 20141027T000000 715000.0 3 1.50 \n",
+ "\n",
+ " sqft_living sqft_lot floors waterfront view ... sqft_basement \\\n",
+ "11592 1640 4240 1.0 0 0 ... 720 \n",
+ "8984 1090 1357 2.0 0 0 ... 0 \n",
+ "8280 2540 8604 2.0 0 0 ... 0 \n",
+ "792 3550 11000 1.0 0 2 ... 1290 \n",
+ "10371 1170 4080 1.0 0 0 ... 0 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "16733 2530 8169 2.0 0 0 ... 0 \n",
+ "13151 1450 7930 1.0 0 0 ... 300 \n",
+ "11667 1530 3615 1.5 0 0 ... 0 \n",
+ "3683 1220 6000 1.0 0 0 ... 0 \n",
+ "12059 1670 5060 2.0 0 2 ... 0 \n",
+ "\n",
+ " yr_built yr_renovated zipcode lat long sqft_living15 \\\n",
+ "11592 1921 0 98117 47.6766 -122.368 1300 \n",
+ "8984 1990 0 98028 47.7526 -122.244 1078 \n",
+ "8280 1991 0 98092 47.3209 -122.185 2260 \n",
+ "792 1999 0 98006 47.5506 -122.134 4100 \n",
+ "10371 1909 0 98117 47.6784 -122.386 1560 \n",
+ "... ... ... ... ... ... ... \n",
+ "16733 1993 0 98003 47.2634 -122.312 2220 \n",
+ "13151 1923 0 98126 47.5212 -122.371 1040 \n",
+ "11667 1913 0 98103 47.6529 -122.334 1650 \n",
+ "3683 1968 0 98002 47.3245 -122.209 1420 \n",
+ "12059 1925 0 98126 47.5671 -122.379 1670 \n",
+ "\n",
+ " sqft_lot15 above_median_price price_category \n",
+ "11592 4240 1 1 \n",
+ "8984 1318 0 0 \n",
+ "8280 7438 0 1 \n",
+ "792 10012 1 2 \n",
+ "10371 4586 1 1 \n",
+ "... ... ... ... \n",
+ "16733 8013 0 1 \n",
+ "13151 7740 0 1 \n",
+ "11667 4200 1 1 \n",
+ "3683 6000 0 0 \n",
+ "12059 5118 1 2 \n",
+ "\n",
+ "[4323 rows x 23 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'y_test'"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " above_median_price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 11592 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 8984 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 8280 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 792 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10371 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 16733 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 13151 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 11667 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 3683 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 12059 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4323 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " above_median_price\n",
+ "11592 1\n",
+ "8984 0\n",
+ "8280 0\n",
+ "792 1\n",
+ "10371 1\n",
+ "... ...\n",
+ "16733 0\n",
+ "13151 0\n",
+ "11667 1\n",
+ "3683 0\n",
+ "12059 1\n",
+ "\n",
+ "[4323 rows x 1 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "id int64\n",
+ "date object\n",
+ "price float64\n",
+ "bedrooms int64\n",
+ "bathrooms float64\n",
+ "sqft_living int64\n",
+ "sqft_lot int64\n",
+ "floors float64\n",
+ "waterfront int64\n",
+ "view int64\n",
+ "condition int64\n",
+ "grade int64\n",
+ "sqft_above int64\n",
+ "sqft_basement int64\n",
+ "yr_built int64\n",
+ "yr_renovated int64\n",
+ "zipcode int64\n",
+ "lat float64\n",
+ "long float64\n",
+ "sqft_living15 int64\n",
+ "sqft_lot15 int64\n",
+ "above_median_price int64\n",
+ "price_category category\n",
+ "dtype: object\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAA0EAAAIjCAYAAADFthA8AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACGn0lEQVR4nO3de1yTdf8/8NcYZ0UOngamxjwWukQrQwqtTPOUnTy1UtOyG6gsu++7zOGiILO0M1BqYQmlWZnZbZn6TdcBz9IU8zzTFDyEHAQ5uF2/P/xtbTJg4OBi1/V6Ph48ZNf13va+2MDrvc/nen8UgiAIICIiIiIikgkvsRMgIiIiIiJqTiyCiIiIiIhIVlgEERERERGRrLAIIiIiIiIiWWERREREREREssIiiIiIiIiIZIVFEBERERERyQqLICIiIiIikhUWQUREREREJCssgoiIiIiISFZYBBERERERkaywCCKSsQ8++ADDhw9Hx44d4ePjA5VKhcGDB+PTTz+FxWIROz0iIiKiJqEQBEEQOwkiEkdMTAzCw8Nxxx13oE2bNigqKsKWLVuwfPlyTJgwAZ9//rnYKRIRERG5HYsgIhmrrq6Gj49Pje1PPfUU3n//fZhMJlx77bXNnxgRERFRE+J0OCIZc1YAAbAVPl5e//yJWL16NUaNGoWIiAj4+fmhW7dueOWVV2A2mx3uO2TIECgUCttXu3btMGrUKOzdu9chTqFQ4KWXXnLY9sYbb0ChUGDIkCEO2ysqKvDSSy+hZ8+e8Pf3R3h4OO6//34cOXIEAHDs2DEoFAosXbrU4X6JiYlQKBSYOnWqbdvSpUuhUCjg6+uLs2fPOsTn5OTY8t6xY4fDvpUrV2LAgAEICAhAu3bt8PDDD+PkyZM1fnb79+/H+PHj0b59ewQEBKBXr16YM2cOAOCll15y+Nk4+9q0aZPt59inT58aj++K2u67YMECKBQKHDt2zGF7UVERnnnmGXTu3Bl+fn7o3r075s+f7zAl0vozXrBgQY3H7dOnj8NrtmnTJigUCnz55Ze15jh16lSXC+z09HRERUXBz88PERERSExMRFFRkcPx1vdzrcuQIUNqvOdSU1Ph5eWFzz77zGG7q+8DALXmYv/zd/X3wPreudK1117r8P4GXHs9AcBiseCdd95B37594e/vj/bt2+Puu++2vffr+5la87O+3tYvPz8/9OzZE/PmzYP956x//vknEhIS0KtXLwQEBKBt27YYN25cjfdjberLt76cr3y/LViwAIMGDULbtm0REBCAAQMG1Pqetf7dqO1n0JCffUN+l4io6XiLnQARia+oqAiXLl1CaWkpdu7ciQULFmDixIno0qWLLWbp0qVo3bo1Zs2ahdatW+P//u//MHfuXJSUlOCNN95weLzevXtjzpw5EAQBR44cwZtvvomRI0fi+PHjdeYwb968GtvNZjNGjx6NjRs3YuLEiZg5cyZKS0uxfv167N27F926dXP6eIcPH8bixYtrfT6lUomsrCw8++yztm2ZmZnw9/dHRUWFQ+zSpUvx6KOP4qabbsK8efNw+vRpvPPOO/j111+xe/duhISEAACMRiNuu+02+Pj4YMaMGbj22mtx5MgRrFmzBqmpqbj//vvRvXt32+M+++yzuO666zBjxgzbtuuuu67WnJtCeXk5Bg8ejJMnT+KJJ55Aly5d8Ntvv2H27NnIz8/H22+/3az5XOmll15CcnIyhg4divj4eBw4cAAZGRnYvn07fv31V/j4+GDOnDl47LHHAADnzp3Ds88+ixkzZuC2225r1HNmZmZCp9Nh4cKFeOihh2zbXX0f2Lvvvvtw//33AwB+/vlnLFq0qM7nru33wFUNeT2nT5+OpUuXYsSIEXjsscdw6dIl/Pzzz9iyZQtuvPFGLFu2zBZrzf2tt95Cu3btAAAdO3Z0eO4XX3wR1113HS5evIgVK1bgxRdfRIcOHTB9+nQAwPbt2/Hbb79h4sSJuOaaa3Ds2DFkZGRgyJAh2LdvHwIDA+s8tvrytbrrrrswefJkh/suXLgQ58+fd9j2zjvv4J577oFWq0VVVRWWL1+OcePG4bvvvsOoUaOc5mB//KmpqY3+2RNRCyAQkez16tVLAGD7mjx5slBdXe0QU15eXuN+TzzxhBAYGChUVFTYtg0ePFgYPHiwQ9yLL74oABDOnDlj2wZA0Ov1ttv//e9/hQ4dOggDBgxwuP/HH38sABDefPPNGs9vsVgEQRAEk8kkABAyMzNt+8aPHy/06dNH6Ny5szBlyhTb9szMTAGAMGnSJKFv37627WVlZUKbNm2Ehx56SAAgbN++XRAEQaiqqhI6dOgg9OnTR7h48aIt/rvvvhMACHPnzrVti4uLE4KCgoQ///zTaZ5X6tq1q0Nu9gYPHixERUU53Vef2u77xhtvCAAEk8lk2/bKK68IrVq1Eg4ePOgQ+8ILLwhKpVI4fvy4IAj//IzfeOONGo8bFRXl8Jr99NNPAgBh5cqVteY4ZcoUoWvXrnUex5kzZwRfX19h2LBhgtlstm1///33BQDCxx9/XOM+zt4L9bF/z/7vf/8TvL29heeee84hpiHvA0EQhOrqagGAkJycbNtmfe/Z//xd/T1ITk4WANR4L135HnL19fy///s/AYDw9NNP1/h5OHu/Osvdyvp6//TTT7ZtFRUVgpeXl5CQkGDb5uxvSE5OjgBA+PTTT2vss+dqvgCExMTEGjGjRo2q8X67Mp+qqiqhT58+wh133FHj/osXLxYAOPxuX/m3ril+l4io6UhmOpzBYMCYMWMQEREBhUKBb775psGPIQgCFixYgJ49e8LPzw+dOnWq8UkPkRRlZmZi/fr1yM7OxvTp05Gdne0wOgEAAQEBtu9LS0tx7tw53HbbbSgvL8f+/fsdYqurq3Hu3DmcPXsWOTk5WLVqFTQaje0T1CudPHkS7733HpKSktC6dWuHfV999RXatWuHp556qsb9apvmtHPnTqxcuRLz5s1zmNJn75FHHsH+/fttU2m++uorBAcH484773SI27FjB86cOYOEhAT4+/vbto8aNQq9e/fG//73PwDA2bNnYTAYMG3aNIcRtLryrI/ZbMa5c+dw7tw5VFVVNeox6rNy5UrcdtttCA0NtT3XuXPnMHToUJjNZhgMBof48vJyh7hz587VmBJpZX2f2E9da4gNGzagqqoKzzzzjMPr+Pjjj6NNmza2n727bNu2DePHj8cDDzxQY3TT1feBlfX18vPzc/n56/o96NChAwDgr7/+qvMxXH09v/rqKygUCuj1+hqP0dj3a3FxMc6dO4fjx4/j9ddfh8ViwR133GHbb/83pLq6Gn///Te6d++OkJAQ7Nq1q87Hbop87fM5f/48iouLcdtttznNxZXXsyl/l4jI/SQzHa6srAw33HADpk2bZpt60FAzZ87Ejz/+iAULFqBv374oLCxEYWGhmzMlanliYmJs3z/00ENQq9WYM2cOpk+fjtjYWABAXl4edDod/u///g8lJSUO9y8uLna4/dtvv6F9+/a22z169MA333xT68mKXq9HREQEnnjiiRpz8o8cOYJevXrB29v1P1cvvPACbrvtNowePRpPPvmk05j27dtj1KhR+Pjjj3HjjTfi448/xpQpU2oUTX/++ScAoFevXjUeo3fv3vjll18AAEePHgWARl/H48z+/fttP0cvLy90794der3eYYrW1Tp06BCMRqPD62XvzJkzDrf1er3TE9Erp0YBwLRp02zft27dGmPGjMFbb73lNNaZ2n72vr6+UKvVtv3ucPLkSYwaNQplZWX4+++/a7xXXX0fWFkLvyuLmbrU9XsQExMDhUKB2bNnIyUlxfa4V17n4+rreeTIEURERCAsLMzl/Opz77332r738vKCTqfDAw88YNt28eJFzJs3D5mZmTh58qTD9UJX/g25UlPk+9133yElJQW5ubmorKy0bXf2d8qV17Mpf5eIyP0kUwSNGDECI0aMqHV/ZWUl5syZg88//xxFRUXo06cP5s+fb7sA8Y8//kBGRgb27t1r+08uMjKyOVInanEefPBBzJkzB1u3bkVsbCyKioowePBgtGnTBi+//DK6desGf39/7Nq1C88//3yNEzGNRoOFCxcCuDxC8u6772LIkCHYtWsXVCqVQ+wff/yBpUuXIisrq9ZGDQ3x448/YsOGDcjJyak3dtq0aZg8eTKeeuopGAwGLFmyBD///PNV5+Au1157re26pr///hvvvvsuHnnkEajVatxyyy1ueQ6LxYK77roL//3vf53u79mzp8PtGTNmYNy4cQ7bHn/8caf3nTt3Lm677TZUV1dj586dePnll1FUVIS1a9e6JXd3Onz4MPr374+33noLjzzyCD755BNMmTKl0Y9XUFAAADXe77Wp7/fghhtugF6vR3JyMrKzs2t9nIa+nu60YMEC3HDDDaiursb27duRkpICb29v24n+U089hczMTDzzzDOIiYlBcHAwFAoFJk6c2Ozrkv3888+45557EBcXh/T0dISHh8PHxweZmZk1mmEAl1/P1q1bo1WrVrU+ZlP+LhGR+0mmCKrPk08+iX379mH58uWIiIjAqlWrcPfdd2PPnj3o0aMH1qxZA7Vaje+++w533303BEHA0KFD8frrr7v1kyciT3Dx4kUAl5sHAJe7P/3999/4+uuvERcXZ4szmUxO7x8aGoqhQ4fabg8ZMgQRERHIzMzE7NmzHWJnz56Nfv36YcKECU4fq1u3bti6dWut7bztCYKAF154Affdd59LRcKIESPg7++PiRMn4tZbb0W3bt1qFEFdu3YFABw4cMBhao91m3W/Wq0GgBpd8K5Gq1atHH6Ot912Gzp16oQff/zRbUVQt27dcOHCBYfnqUuPHj1qxNZ2Yti3b19b7IgRI3D8+HF88sknuHTpkkvPZf+zt/58gctTk0wmk8s5uyI8PBxr165Fx44dsXr1ajz33HMYOXKk7VN9V98HVvv27QPgeqOL+n4PgMsjBzNmzMD+/ftt06YefvhhhxhXX89u3bph3bp1KCwsdNv/cQMGDLB9sDhixAicPHkS8+fPR1JSEry8vPDll19iypQptg9IgMudH12ZLunufL/66iv4+/tj3bp1DlPcMjMzncbv27ev3teyKX+XiMj9JHNNUF2OHz+OzMxM23zdbt264d///jduvfVW2x+8o0eP4s8//8TKlSvx6aefYunSpdi5cycefPBBkbMnajq1fSK/ePFiKBQK28metRiyn75SVVWF9PR0l57HWlTZTzkBLrekXr16NV577bVap8o98MADOHfuHN5///0a+4Qrljlbvnw5jEajy921vL29MXnyZBiNRoepW/ZuvPFGdOjQAR988IFD/t9//z3++OMPWxep9u3bIy4uDh9//HGNLnhX5tlY1k/Lra+HO4wfPx45OTlYt25djX3WroHuYrFY4OXl5fI1HEOHDoWvry/effddh5/hRx99hOLi4lo7eDVGz549bdOQ3nvvPVgsFsycOdO239X3gdWKFSsQHh7uUhHkyu+BVXh4OG6//XYMHToUQ4cOdbg+CXD99XzggQcgCAKSk5NrxLnr/Xrx4kVcunTJ9pxKpbLGY7/33nsuXQfj7nyVSiUUCoXDcx87dszp9cQnTpzAr7/+WqP4vVJz/i4R0dWTxUjQnj17YDabawxFV1ZWom3btgAu/+dcWVmJTz/91Bb30UcfYcCAAThw4IDTeeBEnu6hhx5C7969cd9996Fjx444e/Ysvv/+e/z000+YM2cO+vbtCwAYNGgQQkNDMWXKFDz99NNQKBRYtmxZrScfp0+fRlZWFoDLLYs//PBDeHt7Y/To0Q5xP/74I+666646PzmdPHkyPv30U8yaNQvbtm3DbbfdhrKyMmzYsAEJCQkYO3asw+M9/vjjDfp9feWVV/Cf//wHoaGhTvf7+Phg/vz5ePTRRzF48GBMmjTJ1hr52muvdWix/e677+LWW29F//79MWPGDERGRuLYsWP43//+h9zcXJdzsrpw4QJ++OEHAEBhYSHeffdd+Pj4uHTyb39fqwMHDgAANm/eDB8fH3Tq1An/+c9/8O2332L06NGYOnUqBgwYgLKyMuzZswdffvkljh07VmtDi/rk5uaidevWuHTpEnbu3IlPP/0UY8eOdbmIa9++PWbPno3k5GTcfffduOeee3DgwAGkp6fjpptuqjEK4i4qlQpvvPEGHnvsMTz88MMYOXKky++DHTt2ICkpCT/88AM++OADlwo+V34PXOXq63n77bfjkUcewbvvvotDhw7h7rvvhsViwc8//4zbb7+91mvp6rJ+/Xr89ddftulw2dnZuOeee+Dr6wsAGD16NJYtW4bg4GBcf/31yMnJwYYNG2z/D9fF3fmOGjUKb775Ju6++2489NBDOHPmDNLS0tC9e3cYjUZbXEZGBubNm4fAwEA8/fTTdT5mU/4uEVETEKMlXVMDIKxatcp2e/ny5YJSqRT2798vHDp0yOErPz9fEARBmDt3ruDt7e3wOOXl5QIA4ccff2zO9ImaTUZGhjBy5EghIiJC8Pb2FkJCQoThw4cLa9eurRH766+/CrfccosQEBAgRERECP/973+FdevW1WiNO3jwYId22yEhIUJsbGyNxwQgKBQKYefOnQ7bnbXYLi8vF+bMmSNERkYKPj4+gkqlEh588EHhyJEjgiD803I2ICBAOHnypMN9r2whbG31a22BfaXa9q9YsUKIjo4W/Pz8hLCwMEGr1Qp//fVXjfvv3btXuO+++4SQkBDB399f6NWrl5CUlOT0ueprke3s5/j99987ja/rvs6+7FtIl5aWCrNnzxa6d+8u+Pr6Cu3atRMGDRokLFiwQKiqqhIEoXEtsq1f3t7eQteuXYWnn35aOH/+vCAIrrXItnr//feF3r17Cz4+PkLHjh2F+Ph42+Nc6WpbZNu74447hC5dugilpaW2bfW9D+bPny/cdNNNQnZ2do3Hq61Ftqu/B844ew+58noKgiBcunRJeOONN4TevXsLvr6+Qvv27YURI0bUyKW23K1ceb0FQRDOnz8vPProo0K7du2E1q1bC8OHDxf2799f5++BPVfyRQNaZH/00UdCjx49BD8/P6F3795CZmamoNfrBftTo5tvvlkYN26csH///hqP6ew1cvfvEhE1HYUguGncuwVRKBRYtWqVrVPNwYMH0atXLxgMhloXz/vxxx8xfPhwHD582Lb44u+//45+/frhwIEDTXoxKRFRc7r22mvx0ksvYerUqWKnQkREJArJTIe7cOECDh8+bLttMpmQm5uLsLAw9OzZE1qtFpMnT8bChQsRHR2Ns2fPYuPGjdBoNBg1ahSGDh2K/v37Y9q0aXj77bdhsViQmJiIu+66iwUQEREREZGESKYxwo4dOxAdHY3o6GgAwKxZsxAdHY25c+cCuNzxZfLkyXjuuefQq1cv3Hvvvdi+fbttUUMvLy+sWbMG7dq1Q1xcHEaNGoXrrrsOy5cvF+2YiIiawuDBg9GpUyex0yAiIhKNJKfDERERERER1UYyI0FERERERESuYBFERERERESy4tGNESwWC06dOoWgoCCXF98jIiIiIiLpEQQBpaWliIiIgJdX3WM9ohZBZrMZL730ErKyslBQUICIiAhMnToVOp3OpaLm1KlT6Ny5czNkSkREREREnuDEiRO45ppr6owRtQiaP38+MjIy8MknnyAqKgo7duzAo48+iuDg4HpXZgaAoKAgAJcPtE2bNk2dLhERERERtVAlJSXo3LmzrUaoi6hF0G+//YaxY8di1KhRAC4v4Pf5559j27ZtLt3fOlrUpk0bFkFEREREROTSjDJRGyMMGjQIGzduxMGDBwEAv//+O3755ReMGDHCaXxlZSVKSkocvoiIiIiIiBpC1JGgF154ASUlJejduzeUSiXMZjNSU1Oh1Wqdxs+bNw/JycnNnCUREREREUmJqCNBX3zxBbKzs/HZZ59h165d+OSTT7BgwQJ88sknTuNnz56N4uJi29eJEyeaOWMiIiIiIvJ0CkEQBLGevHPnznjhhReQmJho25aSkoKsrCzs37+/3vuXlJQgODgYxcXFvCaIiIiIiEjGGlIbiDoSVF5eXqOHt1KphMViESkjIiIiIiKSOlGvCRozZgxSU1PRpUsXREVFYffu3XjzzTcxbdo0MdMiIiIiIiIJE3U6XGlpKZKSkrBq1SqcOXMGERERmDRpEubOnQtfX99678/pcEREREREBDSsNhC1CLpaLIKIiIiIiAjwoGuCiIiIiIiImhuLICIiIiIikhUWQUREREREJCssgoiIiIiISFZYBBERERERkayIuk4QERHR1TKbzTAajSgsLERYWBg0Gg2USqXYaRERUQvGIoiIiDyWwWBAeno6CgoKbNtUKhUSEhIQFxcnYmZERNSScTocERF5JIPBAL1eD7VajbS0NKxduxZpaWlQq9XQ6/UwGAxip0hERC0UF0slIiKPYzabodVqoVarkZKSAi+vfz7Ts1gs0Ol0MJlMyMrK4tQ4IiKZ4GKpREQkaUajEQUFBdBqtQ4FEAB4eXlBq9UiPz8fRqNRpAyJiKglYxFEREQep7CwEAAQGRnpdL91uzWOiIjIHosgIiLyOGFhYQAAk8nkdL91uzWOiIjIHosgIiLyOBqNBiqVCtnZ2bBYLA77LBYLsrOzER4eDo1GI1KGRETUkrEIIiIij6NUKpGQkICcnBzodDrk5eWhvLwceXl50Ol0yMnJQXx8PJsiEBGRU+wOR0REHsvZOkHh4eGIj4/nOkFERDLTkNqARRAREXk0s9kMo9GIwsJChIWFQaPRcASIiEiGGlIbeDdTTkRERE1CqVQiOjpa7DSIiMiD8JogIiIiIiKSFRZBREREREQkKyyCiIiIiIhIVlgEERERERGRrLAIIiIiIiIiWWERREREREREssIiiIiIiIiIZIVFEBERERERyQqLICIiIiIikhUWQUREREREJCssgoiIiIiISFZYBBERERERkaywCCIiIiIiIllhEURERERERLLCIoiIiIiIiGSFRRAREREREckKiyAiIiIiIpIVFkFERERERCQrLIKIiIiIiEhWWAQREREREZGssAgiIiIiIiJZYRFERERERESywiKIiIiIiIhkxVvsBIiIyL3MZjOMRiMKCwsRFhYGjUYDpVIpdlpEREQtBosgIiIJMRgMSE9PR0FBgW2bSqVCQkIC4uLiRMyMiIio5eB0OCIiiTAYDNDr9VCr1UhLS8PatWuRlpYGtVoNvV4Pg8EgdopEREQtgkIQBEHsJBqrpKQEwcHBKC4uRps2bcROh4hINGazGVqtFmq1GikpKfDy+uczLovFAp1OB5PJhKysLE6NIyIiSWpIbSDqSNC1114LhUJR4ysxMVHMtIiIPI7RaERBQQG0Wq1DAQQAXl5e0Gq1yM/Ph9FoFClDIiKilkPUa4K2b98Os9lsu713717cddddGDdunIhZERF5nsLCQgBAZGSk0/3W7dY4IiIiORN1JKh9+/ZQqVS2r++++w7dunXD4MGDxUyLiMjjhIWFAQBMJpPT/dbt1jgiIiI5azGNEaqqqpCVlYVp06ZBoVA4jamsrERJSYnDFxERARqNBiqVCtnZ2bBYLA77LBYLsrOzER4eDo1GI1KGRERELUeLKYK++eYbFBUVYerUqbXGzJs3D8HBwbavzp07N1+CREQtmFKpREJCAnJycqDT6ZCXl4fy8nLk5eVBp9MhJycH8fHxbIpARESEFtQdbvjw4fD19cWaNWtqjamsrERlZaXtdklJCTp37szucERE/5+zdYLCw8MRHx/PdYKIiEjSGtIdrkUslvrnn39iw4YN+Prrr+uM8/Pzg5+fXzNlRUTkeeLi4hAbGwuj0YjCwkKEhYVBo9FIegTIbDbL6niJiOjqtYgiKDMzEx06dMCoUaPEToWIyOMplUpER0eLnUazcDbypVKpkJCQwJEvIiKqlejXBFksFmRmZmLKlCnw9m4RNRkREXkAg8EAvV4PtVqNtLQ0rF27FmlpaVCr1dDr9TAYDGKnSERELZTo1wT9+OOPGD58OA4cOICePXs26L4NmfdHRETSYTabodVqoVarkZKS4rBArMVigU6ng8lkQlZWFqfGERHJRENqA9FHgoYNGwZBEBpcABERkXwZjUYUFBRAq9U6FEAA4OXlBa1Wi/z8fBiNRpEyJCKilkz0IoiIiKihCgsLAQCRkZFO91u3W+OIiIjssQgiIiKPExYWBgAwmUxO91u3W+OIiIjssQgiIiKPo9FooFKpkJ2dDYvF4rDPYrEgOzsb4eHh0Gg0ImVIREQtGYsgIiLyOEqlEgkJCcjJyYFOp0NeXh7Ky8uRl5cHnU6HnJwcxMfHsykCERE5JXp3uKvB7nBERPLmbJ2g8PBwxMfHc50gIiKZaUhtwCKIiIg8mtlshtFoRGFhIcLCwqDRaDgCREQkQw2pDbg6KREReTSlUono6Gix0yAiIg/Ca4KIiIiIiEhWWAQREREREZGssAgiIiIiIiJZYRFERERERESywiKIiIiIiIhkhUUQERERERHJCosgIiIiIiKSFRZBREREREQkKyyCiIiIiIhIVlgEERERERGRrLAIIiIiIiIiWWERREREREREssIiiIiIiIiIZIVFEBERERERyQqLICIiIiIikhUWQUREREREJCssgoiIiIiISFZYBBERERERkaywCCIiIiIiIllhEURERERERLLCIoiIiIiIiGSFRRAREREREckKiyAiIiIiIpIVFkFERERERCQrLIKIiIiIiEhWWAQREREREZGssAgiIiIiIiJZYRFERERERESywiKIiIiIiIhkhUUQERERERHJCosgIiIiIiKSFRZBREREREQkKyyCiIiIiIhIVlgEERERERGRrLAIIiIiIiIiWWERREREREREssIiiIiIiIiIZEX0IujkyZN4+OGH0bZtWwQEBKBv377YsWOH2GkREREREZFEeYv55OfPn0dsbCxuv/12fP/992jfvj0OHTqE0NBQMdMiIiIiIiIJE7UImj9/Pjp37ozMzEzbtsjISBEzIiIiIiIiqRN1Oty3336LG2+8EePGjUOHDh0QHR2NxYsX1xpfWVmJkpIShy8iIiIiIqKGELUIOnr0KDIyMtCjRw+sW7cO8fHxePrpp/HJJ584jZ83bx6Cg4NtX507d27mjImIiIiIyNMpBEEQxHpyX19f3Hjjjfjtt99s255++mls374dOTk5NeIrKytRWVlpu11SUoLOnTujuLgYbdq0aZaciYiIiIio5SkpKUFwcLBLtYGoI0Hh4eG4/vrrHbZdd911OH78uNN4Pz8/tGnTxuGLiIiIiIioIUQtgmJjY3HgwAGHbQcPHkTXrl1FyoiIiIiIiKRO1CLo2WefxZYtW/Dqq6/i8OHD+Oyzz7Bo0SIkJiaKmRYREREREUmYqEXQTTfdhFWrVuHzzz9Hnz598Morr+Dtt9+GVqsVMy0iIiIiIpIwURsjXK2GXPxERERERETS5TGNEYiIiIiIiJobiyAiIiIiIpIVFkFERERERCQrLIKIiIiIiEhWWAQREREREZGssAgiIiIiIiJZYRFERERERESywiKIiIiIiIhkhUUQERERERHJCosgIiIiIiKSFRZBREREREQkKyyCiIiIiIhIVrzFToCIiNzLbDbDaDSisLAQYWFh0Gg0UCqVYqdFRETUYrAIIiKSEIPBgPT0dBQUFNi2qVQqJCQkIC4uTsTMiIiIWg5OhyMikgiDwQC9Xg+1Wo20tDSsXbsWaWlpUKvV0Ov1MBgMYqdIRETUIigEQRDETqKxSkpKEBwcjOLiYrRp00bsdIiIRGM2m6HVaqFWq5GSkgIvr38+47JYLNDpdDCZTMjKyuLUOCIikqSG1AYcCSIikgCj0YiCggJotVqHAggAvLy8oNVqkZ+fD6PRKFKGRERELQeLICIiCSgsLAQAREZGOt1v3W6NIyIikjMWQUREEhAWFgYAMJlMTvdbt1vjiIiI5IxFEBGRBGg0GqhUKmRnZ8NisTjss1gsyM7ORnh4ODQajUgZEhERtRwsgoiIJECpVCIhIQE5OTnQ6XTIy8tDeXk58vLyoNPpkJOTg/j4eDZFICIiArvDERFJirN1gsLDwxEfH891goiISNIaUhuwCCIikhiz2Qyj0YjCwkKEhYVBo9FwBIiIiCSvIbWBdzPlREREzUSpVCI6OlrsNIiIiFosFkFERBLDkSAiIqK6sQgiIpIQZ9cEqVQqJCQk8JogIiKi/4/d4YiIJMJgMECv10OtViMtLQ1r165FWloa1Go19Ho9DAaD2CkSERG1CGyMQEQkAWazGVqtFmq1GikpKfDy+uczLovFAp1OB5PJhKysLMlNjeP0PyIiAtgYgYhIdoxGIwoKCpCUlORQAAGAl5cXtFotEhMTYTQaJdU0gdP/iIioMTgdjohIAgoLCwEAkZGRTvdbt1vjpIDT/4iIqLFYBBERSUBYWBgAwGQyOd1v3W6N83Rmsxnp6emIiYlBSkoKoqKiEBgYiKioKKSkpCAmJgYZGRkwm81ip0pERC0QiyAiIgnQaDRQqVTIzs6GxWJx2GexWJCdnY3w8HBoNBqRMnQv6/Q/rVZb6/S//Px8GI1GkTIkIqKWjEUQEZEEKJVKJCQkICcnBzqdDnl5eSgvL0deXh50Oh1ycnIQHx8vmYYBcpz+R0RE7sPGCEREEhEXF4fk5GSkp6cjMTHRtj08PBzJycmSahRgP/0vKiqqxn6pTf8jIiL3YhFERCQhcXFxiI2NlXzLaPvpf85agktt+h8REbkXp8MREUmMUqlEdHQ07rzzTkRHR0uuAALkN/2PiIjci4ulEhGRx3K2TlB4eDji4+MlNf2PiIjq15DagEUQEZHEmM1myU+Hsye34yUiIucaUhvwmiAiIglxNjKiUqmQkJAg2ZER6/Q/IiIiV/GaICIiiTAYDNDr9VCr1UhLS8PatWuRlpYGtVoNvV4Pg8EgdopEREQtAqfDERFJgNlshlarhVqtdtotTafTwWQyISsri1PFiIhIkhpSG3AkiIhIAoxGIwoKCqDVah0KIADw8vKCVqtFfn4+jEajSBkSERG1HCyCiIgkoLCwEAAQGRnpdL91uzWOiIhIzlgEERFJQFhYGADAZDI53W/dbo0jIiKSMxZBREQSoNFooFKpkJ2dDYvF4rDPYrEgOzsb4eHh0Gg0ImVIRETUcohaBL300ktQKBQOX7179xYzJSIij6RUKpGQkICcnBzodDrk5eWhvLwceXl50Ol0yMnJQXx8PJsiEBERoQWsExQVFYUNGzbYbnt7i54SEZFHiouLQ3JyMtLT05GYmGjbHh4ejuTkZMmuE0RERNRQolcc3t7eUKlULsVWVlaisrLSdrukpKSp0iIi8khxcXGIjY2F0WhEYWEhwsLCoNFoOAJERERkR/Qi6NChQ4iIiIC/vz9iYmIwb948dOnSxWnsvHnzkJyc3MwZEhF5FqVSiejoaLHTICIiarFEXSz1+++/x4ULF9CrVy/k5+cjOTkZJ0+exN69exEUFFQj3tlIUOfOnblYKhERERGRzDVksVRRi6ArFRUVoWvXrnjzzTcxffr0euMbcqBERERERCRdDakNWlSL7JCQEPTs2ROHDx8WOxUiIiIiIpKoFlUEXbhwAUeOHEF4eLjYqRARERERkUSJWgT9+9//xubNm3Hs2DH89ttvuO+++6BUKjFp0iQx0yIiIiIiIgkTtTvcX3/9hUmTJuHvv/9G+/btceutt2LLli1o3769mGkREREREZGEiVoELV++XMynJyIiIiIiGWpR1wQRERERERE1NRZBREREREQkKyyCiIiIiIhIVlgEERERERGRrLAIIiIiIiIiWWERREREREREsiJqi2wiInI/s9kMo9GIwsJChIWFQaPRQKlUip0WERFRi8EiiIhIQgwGA9LT01FQUGDbplKpkJCQgLi4OBEzIyIiajk4HY6ISCIMBgP0ej3UajXS0tKwdu1apKWlQa1WQ6/Xw2AwiJ0iERFRi6AQBEEQO4nGKikpQXBwMIqLi9GmTRux0yEiEo3ZbIZWq4VarUZKSgq8vP75jMtisUCn08FkMiErK4tT44iISJIaUhtwJIiISAKMRiMKCgqg1WodCiAA8PLyglarRX5+PoxGo0gZEhERtRwsgoiIJKCwsBAAEBkZ6XS/dbs1joiISM5YBBERSUBYWBgAwGQyOd1v3W6NIyIikjMWQUREEqDRaKBSqZCdnQ2LxeKwz2KxIDs7G+Hh4dBoNCJlSERE1HKwCCIikgClUomEhATk5ORAp9MhLy8P5eXlyMvLg06nQ05ODuLj49kUgYiICOwOR0QkKc7WCQoPD0d8fDzXCSIiIklrSG3AxVKJSPLMZjOMRiMKCwsRFhYGjUYj6RGRKz/bunJ6HBERkdyxCCIiSXM2MqJSqZCQkCC5kRHrYqkxMTGYO3cuIiMjYTKZkJ2dDb1ej+TkZMkdMxERUWNwOhwRSZZ9UaDVah2KgpycHEkVBVwslYiI5I6LpRKR7JnNZqSnpyMmJgYpKSmIiopCYGAgoqKikJKSgpiYGGRkZMBsNoudqltwsVQiIiLXsQgiIkmSW1HAxVKJiIhcxyKIiCRJbkUBF0slIiJyHYsgIpIkuRUFXCyViIjIdSyCiEiS5FYUcLFUIiIi17E7HBFJlpy6w1lxsVQiIpKrhtQGLIKISNLkWBTIbXFYIiIigEUQEZEDFgVERETS15DawLuZciIiEo1SqUR0dLTYaRAREVELwcYIREREREQkKyyCiIiIiIhIVlgEERERERGRrPCaICIiiWEjCCIiorqxCCIikhBnLcFVKhUSEhIk2xKciIiooTgdjohIIqyLw6rVaqSlpWHt2rVIS0uDWq2GXq+HwWAQO0UiIqIWgesEERFJgNlshlarhVqtRkpKCry8/vmMy2KxQKfTwWQyISsri1PjiIhIkhpSG3AkiIhIAoxGIwoKCqDVah0KIADw8vKCVqtFfn4+jEajSBkSERG1HCyCiIgkoLCwEAAQGRnpdL91uzWOiIhIzlgEERFJQFhYGADAZDI53W/dbo0jIiKSMxZBREQSoNFooFKpkJ2dDYvF4rDPYrEgOzsb4eHh0Gg0ImVIRETUcrAIIiKSAKVSiYSEBOTk5ECn0yEvLw/l5eXIy8uDTqdDTk4O4uPj2RSBiIgIV9kdbseOHfjiiy9w/PhxVFVVOez7+uuvrzq5+rA7HBGRI2frBIWHhyM+Pp7rBBERkaQ1pDZo9GKpy5cvx+TJkzF8+HD8+OOPGDZsGA4ePIjTp0/jvvvua+zDEhHRVYiLi0NsbCyMRiMKCwsRFhYGjUbDESAiIiI7jS6CXn31Vbz11ltITExEUFAQ3nnnHURGRuKJJ55AeHi4O3MkIqIGUCqViI6OFjsNIiKiFqvR1wQdOXIEo0aNAgD4+vqirKwMCoUCzz77LBYtWuS2BImIrpbZbMbu3buxceNG7N69G2azWeyUiIiISESNLoJCQ0NRWloKAOjUqRP27t0LACgqKkJ5eXmDH++1116DQqHAM88809iUiIhqMBgM0Gq1ePbZZ/HKK6/g2WefhVarhcFgEDs1IiIiEkmji6C4uDisX78eADBu3DjMnDkTjz/+OCZNmoQ777yzQY+1fft2fPjhh2zdSkRuZTAYoNfroVarkZaWhrVr1yItLQ1qtRp6vZ6FEBERkUw1ujtcYWEhKioqEBERAYvFgtdffx2//fYbevToAZ1Oh9DQUJce58KFC+jfvz/S09ORkpKCfv364e2333bpvuwOR0S1MZvN0Gq1UKvVSElJgZfXP5/5WCwW6HQ6mEwmZGVlsWkAERGRBDSkNmj0SFBYWBgiIiIuP4iXF1544QV8++23WLhwocsFEAAkJiZi1KhRGDp0aL2xlZWVKCkpcfgiInLGaDSioKAAWq3WoQACLv/N0mq1yM/Ph9FoFClDIiIiEkuju8PVV4C4MjKzfPly7Nq1C9u3b3fpOefNm4fk5GSXYolI3goLCwEAkZGRTvdbt1vjiIiISD4aXQSFhIRAoVDU2C4IAhQKRb3dl06cOIGZM2di/fr18Pf3d+k5Z8+ejVmzZtlul5SUoHPnzg1LnIhkISwsDABgMpkQFRVVY7/JZHKIIyIiIvlodBH0008/Abhc9IwcORJLlixBp06dXL7/zp07cebMGfTv39+2zWw2w2Aw4P3330dlZWWNefp+fn7w8/NrbMpEJCMajQYqlQrZ2dlOrwnKzs5GeHg4G7IQERHJUKMbI9gLCgrC77//DrVa7fJ9SktL8eeffzpse/TRR9G7d288//zz6NOnT72PwcYIRFQXa3e4W265BTfffDP8/PxQWVmJbdu2YcuWLUhOTkZcXJzYaRIREZEbNKQ2aPRI0NUKCgqqUei0atUKbdu2dakAIiKqT1xcHCZMmICVK1ciJyfHtl2pVGLChAksgIiIiGTKbUWQs+uDiIjEZDAYsGLFCqcjQStWrMD111/PQoiIiEiGGj0dLjo62lb4GI1G9O7dG76+vrb9u3btck+GdeB0OKLGM5vNMBqNKCwsRFhYGDQajaTWy5HzOkFSf22JiIicaZbpcPfee6/t+7Fjxzb2YYhIBAaDAenp6SgoKLBtU6lUSEhIkMzIiHWdoKSkpFrXCUpMTITRaER0dLRIWbqfHF5bIiKiq9XoIkiv17szDyJqJtZmATExMUhKSkJkZCRMJhOys7Oh1+sl0yxAjusEyeW1JSIiulpe9YfUrqioCEuWLMHs2bNtJxK7du3CyZMn3ZIcEbmX2WxGeno6YmJikJKSgqioKAQGBiIqKgopKSmIiYlBRkZGvet8eQL7dYKckdo6QXJ6bYmIiK5Wo4sgo9GInj17Yv78+ViwYAGKiooAAF9//TVmz57trvyIyI2sU8S0Wm2tU8Ty8/NhNBpFytB97NcJqq6uxu7du7Fx40bs3r0b1dXVklsnSE6vLRER0dVq9HS4WbNmYerUqXj99dcRFBRk2z5y5Eg89NBDbkmOiNxLTlPElEolEhISMHfuXIwePRqVlZW2fdYucS+//LJkGgbI6bUlIiK6Wo0eCdq+fTueeOKJGts7derkcEEuEbUccpsiBtTevl9qbf3l+NoSERE1VqOLID8/P5SUlNTYfvDgQbRv3/6qkiKipmE/RcxisTjss1gskpoiZn+NzHfffYe33noLSUlJeOutt/Ddd99J7hoZOb22REREV6vRRdA999yDl19+GdXV1QAuf6p6/PhxPP/883jggQfcliARuY91ilhOTg50Oh3y8vJQXl6OvLw86HQ65OTkID4+XhJTxOR2jYz9aztnzhysWrUKa9euxapVqzBnzhxJvbZXMpvNDtd8SaWwJSKiptPoa4IWLlyIBx98EB06dMDFixcxePBgFBQUICYmBqmpqe7MkYjcKC4uDsnJyUhPT0diYqJte3h4uKRaKFuvfTl16hReeeWVGuvmTJ8+3SFOCuLi4jBhwgR88cUXyMnJsW338vLChAkTJPPa2uO6SERE1BgKQRCEq3mAX375BUajERcuXED//v1xyy23YNeuXQCA1q1bo3///m5J1JmGrApLRI7MZjOMRiMKCwsRFhYGjUYjqVGC3bt349lnnwUADBo0CFqt1mHdnN9++w0A8NZbb0lmsVSDwYC5c+faGj9Y2TeCkFJhYL8u0pWvb05OjqSKeiIiql9DaoMGF0HOrgOyt2fPHsTFxaFLly6IiorCd99915CHbxAWQUSNJ/UiqKqqCiNGjECbNm2wcuVKeHv/M/B96dIljBs3DiUlJfj+++/h6+srYqbuYTab8cADD6CoqAgxMTF4+OGHbUVBVlYWcnJyEBISgq+++koSr7PZbIZWq4VarUZKSorDlEeLxQKdTmc7dikcLxER1a8htUGDp8OFhITU2VVJEAQoFIpaOxQRkfjkMIUoLy8PZrMZRUVFmDt3bo2RgqKiIgiCgLy8PEmMBOXm5qKoqAh9+/bFyy+/jL179yInJwdhYWF4+eWXMWvWLOzZswe5ubkYMGCA2OleNes1X0lJSbVe85WYmAij0SiJ15eIiNyrwUXQTz/9VOf+Q4cOOW2dTUQtg/0UoqSkJIfCQK/XS2YKkfVanxdffBEfffRRjeufXnzxRaSmpkrmmqDc3FwAwIABA/DII4/UKHCHDx8uqSKI6yIREdHVaHARNHjw4Dr3h4SENDYXImpi9m2j7acQRUVFISUlBTqdDhkZGYiNjfX4KUTW9XAiIiKQnZ1dY+rf/v37HeKk4pNPPnFa4H766adip+ZW9usiRUVF1djPdZGIiKgujW6RTUSeR05to+3XzVEoFIiOjsadd96J6OhoKBQKya2bYz2O1q1b4+WXX0ZUVBQCAwMRFRWFl19+Ga1bt3aI83RcF4mIiK4GiyAiGZHTFCI5rYkEwFbUlpaWIikpyeF4k5KSUFpa6hDn6eT2+hIRkXs1ep0gIvI8cptCJJc1kQCgqKjI9v2uXbsc1gny8/NzGufp5PT6EhGRezW4CLr//vvr3C+l/2CJpMZ+CpGztsJSnEIUFxeH2NhYSbcDB/4pXB9//HGsWbPGoTFCWFgYRo0ahSVLlkimwLWSy+tLRETu1eAiKDg4uN79kydPbnRCRNR0rFOI9Ho9dDpdrQtMSu0EUqlUSr5NsrXAzcvLw7Jly7B3715bUdCnTx/o9XrJFbhWcnh9iYjIvRq8WGpLwsVSiRrH2TpB4eHhiI+P5xQiD2bf/ry2ApevLxERSVVDagMWQUQyZTabOYVIgljgEhGRXDWkNmBjBCKSvKqqKqxevRqnTp1CREQExo4dC19fX7HTajJXfrZ1ZQtpIiIiueNIEJEMORstUKlUSEhIkNxowQcffICVK1fCbDbbtimVSowbNw7/+te/RMzM/TgdjoiI5IzT4YioVnI6Uf7ggw+wfPlyhIaGYvr06YiJiUFOTg4++ugjnD9/HhMnTpRMIWQ2m6HVaqFWq512/tPpdDCZTMjKyuK0RyIikqSG1AbSWDWPiFxiNpuRnp6OmJgYpKSkICoqCoGBgYiKikJKSgpiYmKQkZHhMGriqaqqqrBy5UqEhoZi5cqVGD16NNq2bYvRo0c7bK+qqhI7VbcwGo0oKCiAVquFIAjYvXs3Nm7ciN27d0MQBGi1WuTn58NoNIqdKhERkehYBBHJiP2Jsv1IAQB4eXlJ6kR59erVMJvNmD59Ory9HS9/9Pb2xrRp02A2m7F69WqRMnSvwsJCAMCpU6eg1Wrx7LPP4pVXXsGzzz4LrVaLU6dOOcQRERHJGRsjEMmI9QQ4MjLSaXe4yMhIhzhPZj3pj4mJcbrfut0a5+msi6C++uqrGDhwIGJjY1FVVQVfX1+cPHkSr776qkMcERGRnLEIIvr/5NAy2noCvGrVKqxZs6ZGY4QxY8Y4xHmyiIgIAEBOTg5Gjx5dY39OTo5DnKeLioqCUqmEt7c3tm3b5tARzsvLC76+vrh06RKioqJEzJKIiKhlYBFEBPl0S9NoNAgJCcHixYsRExODpKQkW2OErKwsLF68GCEhIdBoNGKnetXGjh2LDz74AB999BHuvvtuhylxly5dwscffwylUomxY8eKmKX75OXlwWw2w2w2w8fHBxMnTsTIkSOxdu1arFy5EpWVlba46OhokbMlIiISF68JItmzdktTq9VIS0vD2rVrkZaWBrVaDb1eD4PBIHaKzUqhUIidglv4+vpi3LhxOH/+PMaNG4c1a9bg3LlzWLNmjcN2qawXdObMGQBAYGAgQkND8dlnn+Hhhx/GZ599hrCwMAQGBjrEERERyRlHgkjWruyWZm0WYO2WptPpkJGRgdjYWElMjTMajSgqKsLjjz+ONWvWIDEx0bYvPDwcjz32GJYsWQKj0SiJ0QJr++uVK1di4cKFtu1KpVJS7bEB4I8//gAA3HvvvZg+fXqNqZ1LlizB559/jj/++APDhw8XOVuSs4qKChw/frzJn6dLly7w9/dv8uchIs/EIohkzdotLSkpqdZuaYmJiZIpCqwND+677z5MnDixxolyZWUllixZIonGCFb/+te/MGXKFHz44Yf466+/cM011+CJJ55AQECA2Kk1iYMHD0KhUDi8Xy0WCw4dOiRiVkT/OH78OGbMmNHkz7No0SL07NmzyZ+HiDwTiyCSNftuac5IqVsa8E/DA5PJhN69e9fYbzKZHOKk4MrrvXbs2IEtW7ZI7nqvTp06Abh8fDqdrsZCuDt27HCIkxI5NDWxksKxdunSBYsWLXI5/s8//0RqairmzJmDrl27Nuh5iIhqwyKIZM2+KHDWNUtqRYFGo4FKpcK7776LoqIinD592ravY8eOCAkJQXh4uCQaIwD/XO915XU/58+fh16vR3JysmQKIWsjCH9/fxw5csRhqqNKpUKrVq1QUVEhmUYQVnJpagJI51j9/f0bNULTtWtXjuwQkduwMQLJmrUoyM7OdmgpDFyeQpSdnS2pokCpVGLIkCE4cOAAqqqq8Nxzz+HLL7/Ec889h6qqKhw4cACDBw/2uE+WnTGbzXjzzTchCAL69+/v0PSif//+EAQBb731Fsxms9ipuoW1EURZWVmN5genT59GWVmZpBpBAPJqaiKnYyUiag4sgkjWlEolEhISkJOTA51Oh7y8PJSXlyMvLw86nQ45OTmIj4+XRFEAXC4MNm3ahF69esHPzw8LFy7Egw8+iIULF8Lf3x+9evXC5s2bJVEY5ObmoqioCH379kVqaiqioqIQGBiIqKgopKamom/fvjh//jxyc3PFTtVtrr/+egCAIAgO2623rful4MqmJvavb0pKCmJiYpCRkSGJ97KcjpWIqLmwCCLZi4uLQ3JyMo4ePYrExESMHDkSiYmJMJlMkpouBfzTCOLpp5/Gp59+isTERNx3331ITEzEJ598gqeffhr5+fkwGo1ip3rVrMXN1KlTnTa9mDp1qkOcp7OeKA8aNAg//PCDw2v7ww8/YNCgQZI6Uba+l7Vaba1NTaTyXpbTsRIRNRdeE0SEy4VQbGysx19wXB9rg4dTp07hlVdecbi24KuvvsL06dMd4qRAKuse1ce+06GPjw+6d++OsLAwhIWFwcfHR7KdDiMjI502C5BSUxO5NXAhImoOLIKI/j+lUimJk8O6WBs8pKamws/Pz2Hf+fPnkZqa6hDnyfr164dly5YhMzMTffv2xd69e20nyX369MHSpUttcVJQV4GrUqkkV+Ba36OrVq3CmjVrahzvmDFjHOI8mdwauBARNQcWQUQyEhUVBS8vL1gsFvTr1w+33HIL/P39UVFRgS1btmDr1q3w8vJyeqLlafr164eQkBDs2bMHo0aNQlVVlW2fr68vqqqqEBISIpkiyHoC/Oqrr+KWW27BhAkTbK/ttm3b8OqrrzrEeTqNRoOQkBAsXrwYMTExSEpKsrUEz8rKwuLFixESEiKJpib2DVzsF3UGpNnAhYioObAIIpKRPXv22Lrg7d69G1u3brXts3YNs1gs2LNnDwYMGCBKju6iVCpx9913Y/ny5bh06ZLDPuvtu+++WzJTHqOioqBUKuHv74+jR48iJyfHtq9jx44IDAxERUWFJApcV0llKqS1gYter3e6BlROTg6Sk5Ml814mImoObIxAJCP2TQCuPEG0vy2FZgH2nfDat2/vsK9Dhw6S6oQHAHl5eTCbzSgrK0NVVRX+/e9/46uvvsK///1vVFVVoaysDGazGXl5eWKn6hZGoxFFRUV4/PHHYTKZHJqaHDt2DI899hjOnz8vmWYBcmrgQkTUHDgSRCQj9q2S33nnnRrXycycORP79u2r0WLZE9k3Cujdu3eNC+f3798vqUYB586dAwD06NEDJSUlWLBggW2fSqVCjx49cOjQIVucp7Ne23Tfffdh4sSJNV7fyspKLFmyRDLXQAHyaeBCRNQcWAQRyUhQUBAAoLKy0un+iooKhzhPdmX3sMOHD+PUqVOIiIhAVFSU5DpqFRUVAQDGjh2LIUOGYN68ebbjnT17Nn766ScsXLjQFufprmwWcGUhK9VmAXJo4EJE1BxELYIyMjKQkZGBY8eOAbg8p33u3LkYMWKEmGkRSZb1hPDIkSMYPXq0QzHk5+dnuy2FE0frMbz55pv46aefHKa9ffDBB7j99tsd4jxdSEgIAGDJkiUOo0AmkwmjR4+27bf+6+nYLICIiK6GqNcEXXPNNXjttdewc+dO7NixA3fccQfGjh0rmTnrRC1Nu3btbN9XV1c77LO/bR/nqTQaDQIDA7FhwwYEBQWhX79+uOGGG9CvXz8EBQVhw4YNCAwMlMxJsvU1q22kx7pdCq8t8E+zgJycHOh0OuTl5aG8vBx5eXnQ6XTIyclBfHw8p4oREZFToo4EWddxsEpNTUVGRga2bNkiqw5GRM3FvoNYq1atcObMGdu+9u3b48KFC5LpIGY2m23T+4qKipw2e6ioqIDZbJbEiXKPHj3cGucJrM0C0tPTkZiYaNseHh7OZgFERFSnFnNNkNlsxsqVK1FWVoaYmBinMZWVlQ7Td0pKSporPSJJsO8gptFoMGnSJNs0uG3bttnaKufl5Xn8dQerV6+2tQOvjcViwerVqzFu3LhmyqrpLFq0yPa9dS0oZ7cXLVqEWbNmNXt+TYXNAoiIqDFEL4L27NmDmJgYVFRUoHXr1li1ahWuv/56p7Hz5s1DcnJyM2dIJB3WJgBz5szBRx995LCWTHh4OObMmYPU1FRJNAs4fvw4gMsFQNu2bXH27Fnbvvbt2+Pvv/+GxWKxxXm6/fv327739vZ2WBzW/rZ9nFSwWQARETWU6EVQr169kJubi+LiYnz55ZeYMmUKNm/e7LQQmj17tsMnmCUlJejcuXNzpkvk0axNACIiIpCdne20bbR9nCezdgezWCwOBRAAh9vWOE9nbWvu7+/vUAABlxeH9ff3R0VFhSTan8uZ2WzmqBcRkRuIXgT5+vqie/fuAIABAwZg+/bteOedd/Dhhx/WiPXz84Ofn19zp0gkGVd21LL/9FxqHbV8fHzcGtfSde7cGYcOHUJFRUWNhXAFQbBdH8UPjjyXwWBAeno6CgoKbNtUKhUSEhJ4/RMRUQOJ2h3OGYvFUusaJkR0deTUUcvf39/2vUKhwIABA/DYY49hwIABDkWCfZwn69Chg+17QRBw1113YdGiRbjrrrscRn/s48hzGAwG6PV6qNVqpKWlYe3atUhLS4NarYZer4fBYBA7RSIijyLqSNDs2bMxYsQIdOnSBaWlpfjss8+wadMmrFu3Tsy0iCRNLh217EeNBUHAzp07sXPnzjrjPFlZWZnD7fXr12P9+vX1xlHLZzabkZ6ejpiYGIc1kaKiopCSkgKdToeMjAzExsZK4gMMIqLmIGoRdObMGUyePBn5+fkIDg6GRqPBunXrcNddd4mZFpHkyaGjlnURZquePXuiU6dOOHnyJA4ePFhrnKeyb2bh7e2NS5cuOb0thaYXcmM0GlFQUICkpCSHRWGBy40/tFotEhMTYTQa2SCCiMhFohZBH330kZhPTyRrUu+o1apVKwD/tIc+ePCgQ/Fj3W6N83SBgYG2752dKDuLI89gLVwjIyOd7rduZ4FLROS6FndNEBGRO6jVagCXrzO8+eabERcXh+joaMTFxeHmm2+2rZtjjfN0w4YNA3C52Yz9KBBwuTucr6+vQxx5Dmu3xto6GVq3S6GrIxFRcxG9OxwRUVPo06cP1qxZAwDYvn27Q3MA+8YIffr0afbcmkL//v1tC996e3vjhhtuQNu2bfH3339jz549qKqqgp+fH/r37y92qtRAV3Z1tB/Zk1pXRyKi5sKRICKSpCu7pdmTare0gIAAAJdHfnbv3o0NGzZg9+7dtpEh637yLHLq6khE1Fw4EkREkqTRaBAYGIjy8vJaYwIDAyXz6bnRaERRURGAy1Pi7BdMtd4uKirixfMeSi5dHYmImguLICKSJLPZbFsgtDYVFRUwm82S+AT93LlzAICBAwdizpw5mD9/Pk6dOoWIiAg8//zzSE1NxdatW21x5Hnk0NWRiKi5sAgiIklavXq1rflBbSwWC1avXo1x48Y1U1ZNxzoKdO7cOdxzzz227SaTCffccw+6devmEEeeSepdHYmImguLICKSpL/++sv2fUhICIYNG2ZbJ+jHH3+0FQP2cZ4sJCQEAHDkyBGn+63brXFERERyxiKIiCTp7NmzAC5fD7NixQrs27cPhYWFiImJwfTp0zFmzBhUVVXZ4jxdmzZtbN8HBwdj+PDhiIiIwKlTp7Bu3ToUFxfXiCMiIpIrFkFEJEnWk35BEPDII4/gzJkztn0dOnSwdYizxnm6X375BcDl6VKlpaX44osvbPuUSiWUSiXMZjN++eUXDBw4UKw0m4TZbOZ1MkRE1CAsgohIkqxrAVVXV+P8+fOYNGkSRo4cibVr1+LLL79EdXW1Q5ynO3jwIIDLBUFoaCjuuusu20jQ+vXrcf78eYc4qTAYDEhPT0dBQYFtm0qlQkJCAjumERFRrVgEEcmU1D89HzRoEPbu3Qvg8ro5n3/+OT7//HMAcFhsctCgQaLk526tWrUCAISGhsLX19dhJEilUiE0NBTnz5+3xUmBwWCAXq9HTEwMkpKSEBkZCZPJhOzsbOj1eraOJiKiWrEIIpIhOXx63qNHD9v3Vy6Wat81zj7Ok914443YtWsXSkpKsHr1anz//fe2FtkjRozA2LFjbXFSYDabkZ6ejpiYGKSkpNgK26ioKKSkpECn0yEjIwOxsbGSKu6JiMg9vOoPISIpsX56rlarkZaWhrVr1yItLQ1qtRp6vR4Gg0HsFN2ipKTErXEtXfv27QFcLg5Gjx6NtLQ0rFq1CmlpaRg9ejTMZrNDnKczGo0oKCiAVqt1GNkDLo/0abVa5Ofnw2g0ipQhERG1ZCyCiGTkyk/Po6KiEBgYaPv0PCYmBhkZGbYTZk/maitoqbSMbteunVvjWrrCwkIAQGRkpNP91u3WOCIiInssgohkRE6fnldVVbk1rqWLioqCUqmEj4+P0/0+Pj5QKpWIiopq5syaRlhYGIDLi8E6Y91ujSMiIrLHIohIRuw/PTebzdi9ezc2btyI3bt3w2w2S+rT85UrV7o1rqXLy8uD2WxGdXU1vL29ER0djaFDhyI6Ohre3t6orq6G2WxGXl6e2Km6hUajgUqlQnZ2tsM1XsDla76ys7MRHh4OjUYjUoZERNSSsTECkYxYPxVftWoV1qxZU6MxwpgxYxziPJn9sbkjrqWzroPk5+eH6upq7N6927bPy8sLfn5+qKysdFgvyZMplUokJCRAr9dDp9NBq9U6dIfLyclBcnIymyIQEZFTLIKIZESj0SAkJASLFy/GLbfcggkTJthOjrdu3YrFixcjJCREEp+eX7hwwa1xLd0ff/wBAKisrISPj4/D6IhSqURlZaUtbvjw4aLk6G5xcXFITk5Geno6EhMTbdvDw8PZHpuIiOrEIohIpnbv3o0tW7bYbvv5+QGQzuKhV7bFvtq4ls6+6LnyNbS/feXUMU8XFxeH2NhYSa95ZU/q63sRETUXFkFEMmI0GlFUVFRnzPnz52E0GhEdHd08STWRS5cuuTXOk1RXV9d5W2qUSqXHv19dIYf1vYiImguLICIZOXfuHABg4MCBSElJwd69e22fKPfp0wc6nQ5bt261xXmyK7vfXW1cSxcYGGj7XqlUOhR39rft48hzWNf3iomJQVJSksP1T3q9ntP/iIgaSBr/+xORS6yjQLfddpvTFtm33nqrQ5wn8/X1dWtcS2dfuF45umV/WwoF7pWcdTqUEjmt70VE1Fw4EkQkI9aFQVevXo1PP/3UoVNYhw4dEBwc7BDnyQICAnD+/HmX4shzyWGKmHV9r6SkpFrX90pMTJTENFYioubCIohIRtq1awcAOHToUI2TqXPnztmKImucJ2vdurVb41o6V18zKby2VtYpYld2Oty2bZukpojZr+/ljJTW9yIiai4sgohkJCoqCl5eXrBYLPD29kZVVZVtn/W2l5cXoqKiRMzSPbp27YqDBw+6FCcFZWVlbo1r6axTxHr27AmTyYScnBzbPpVKhZ49eyIjIwOxsbEe3z3Num6XyWRy+rtpMpkc4oiIqH4sgohkZM+ePbYWyf3798fAgQMd1gnasmULLBYL9uzZgwEDBoic7dXJzc11a1xL9/fff9u+9/HxcegI5+vrayt47eM8mXWK2OnTp502C8jJyYEgCJKYIqbRaKBSqZCdnY2UlBSHUVyLxYLs7GyEh4dLYn0vIqLmwiKISEasJ/xTp07FDz/84LBOUHh4OKZMmYJPPvkEubm5Hl8EuXI9UEPiWjrrYqhAzZbY9iN+9nGezNrg4eabb3YoDKzNAmbPni2ZTodKpRIJCQnQ6/WYM2cObr75Zoepf1u2bEFycrLHj3gRETUnFkFEMtS3b1888sgjNRZdlMqoCCC/Ftk9e/bEzp07ATiO/Fx5u2fPnqLk526udDrcunWrJDodApcXhZ0wYQJWrlzpMPVPqVRiwoQJkrj2iYioOUnjf38ickm/fv0AAEuXLrVNi7OyWCxYunSpQ5wnu/baa90a19L179/f9n1gYCDGjx+PmTNnYvz48Q5rA9nHeTJrB8Off/7Z6Xv5l19+cYjzdAaDAStWrKgx2qNUKrFixQoYDAaRMiMi8kwcCSKSkX79+iEkJAR79uzB8OHDHU4erQ0TQkNDJVEE2Y+EuCOupbMfDSkqKsIXX3xRb5wns3a527p1K3Q6HbRarcM1QVu3bnWI82RmsxlvvvkmBEHAgAED8PDDD9uONSsrCzk5OXjrrbck0QSCiKi5sAgikhGlUom7774by5cvd/rpOQAMHz5cEidSJ0+edGtcS+fqtC+pTA+zNgsIDg7GkSNHkJiYaNunUqnQq1cvlJSUSKJZQG5uLoqKitC3b1+kpqY6XP+UmpqKmTNnYs+ePZK4lo+IqLlI4yNBInKJ2WzGl19+WWfMl19+KYmV5109BikcK+B6e2SptFG2Ngs4ePAg1Go1Zs6cif/+97+YOXMmIiMjcfDgQcTHx0uioLdvaOLs+qepU6c6xBERUf04EkQkI9u2bcOlS5fqjLl06RK2bduGmJiYZsqqafj6+qKiosKlOCno3bu37fsBAwbg5MmTuHDhAlq3bo1OnTrZmibYx3m6uLg4JCcnIz093aFZQHh4uGQWSrWnUCjEToGISDJYBBHJyJIlS1yO8/QiSKVS4dixYy7FScG3335r+95a8ADAhQsXUFBQ4BA3YcKEZs2tKcXFxeGWW27B6tWrcerUKURERGDs2LGSKW6By9fyLVu2DJmZmejXr1+NdYKk1NCEiKi5sAgikpHjx4/bvg8ODkZ0dDQCAgJw8eJF7N69G8XFxTXiPFV9I14NjWvp9uzZ43KclIogg8GA9PR0h0Lvq6++QkJCgmRGguwbmsyZM6dGY4Q9e/YgJCSERRARUQOwCCKSEfvpNKWlpdi0aZPttv2ny1KYdtOuXTv89ddfLsVJgbf3P3/O16xZg8OHD9vWgOrevTvGjBlTI87TGQwG6PV63HLLLZgwYQL8/f1RUVGBbdu2Qa/XS2ZKnFKpxKxZszB37lzs2rXLYeqfn58fAGDWrFmSuP6JiKi5SOd/QyKqV+vWrVFYWAgAtXaHs8Z5usrKSrfGtXQmkwnA5RPmgIAAREdH2/ZdunQJSqUSZrPZFufpzGYz0tPT0bNnTxw9etShMOjYsSN69uyJjIwMybSNjouLw8svv4y0tDScPn3atj00NFRSo15ERM2FRRCRjPTo0cO2fkp9cZ7u77//dmtcS1dWVgbgcnHw4IMPYtiwYQgPD0d+fj5+/PFHWxc8a5ynMxqNKCgoQEFBAQYNGoS5c+c6rBP022+/2eLsC0JPFhcXh9jYWBiNRtson0ajkUSRR0TU3FgEEcmIqydLUjipunKk62rjWjqVSoVz584BqHuxVKk0grAe68CBA5GSkuKwdk5KSgpmz56NrVu32uKkQqlUSqaoIyISE9cJIpIRQRDcGteSBQYG2r738fFx2Gd/2z7Ok6Wmptq+v7Izmv1t+zhPZl309bbbbnO6ds6tt97qEEdERGSPRRCRjMjpOhn75g7V1dUO++xvS6EJBHD5Oi5rMVBVVYX27dujV69eaN++PaqqqgBcLg6kcL0XAISEhAAAfv75Z6fXt/3yyy8OcURERPZYBBHJSGhoKIDLHcKuPPlXKBS2zmHWOE8WFhbm1riWzmg0wmKxICAgAABw9uxZHDhwAGfPngUABAQEwGKxwGg0ipmm21i7+m3btg06nQ55eXkoLy9HXl4edDodtm3b5hBHRERkj9cEEcmI9XqQS5cu2dYVsbYVzs3NtU0dksJ1Iz169MDu3btdipMCa9e/ixcvIigoCEFBQaioqIC/vz9KS0tRWlrqEOfpNBoNVCoVgoODcfToUSQmJtr2qVQq9OzZEyUlJdBoNCJmSURELRVHgoj+P7PZjN27d2Pjxo3YvXu3rZuWlPTv39/2fVFRETZt2oQffvgBmzZtcrh2wj7OU5WXl7s1rqVr06YNACAoKAjLli1DZGQkgoODERkZiWXLliEoKMghztMplUokJCTgwIEDNQq7wsJCHDhwAPHx8ZJo8kFERO4n6kjQvHnz8PXXX2P//v0ICAjAoEGDMH/+fPTq1UvMtEiGnK06r1KpJLf+hnXl+bouFg8NDZXEyvOurocjlXVzjh49CuDy9Vz33nuvbbvJZMKvv/5qa45w9OhR3HTTTWKk2CQUCoXTqZ1SudaLiIiahqgjQZs3b0ZiYiK2bNmC9evXo7q6GsOGDZPMOhbkGayrzqvVaqSlpWHt2rVIS0uDWq2GXq+HwWAQO0W3USqVaNWqVZ0xgYGBkvj03NW/I1L5e2Mt4K1NEDp37oxbb70VnTt3dthuX+h7MvvFUoODgx32BQcH2xZLldqIrhxGrImImoOoI0E//PCDw+2lS5eiQ4cO2Llzp6Q+faeWy3oiFRMT43StEZ1OJ6lV5y9cuICTJ0/WGXPy5ElcuHDB47uIde3aFceOHXMpTgqubPBw4sQJnDhxot44T3XlYql6vV7yi6XKZcSaiKg5tKhrgoqLiwHU/p90ZWUlSkpKHL6Irob1REqr1Tpda0Sr1SI/P18yHbXmzZtn+z40NBRDhgzBiBEjMGTIEIeOcPZxnurMmTNujWvp9u3b59a4lu7KxVKjoqIQGBho+wBj4MCBDnGeTk4j1kREzaHFdIezWCx45plnEBsbiz59+jiNmTdvHpKTk5s5M5Iy6wXVkZGRMJvNMBqNKCwsRFhYGDQaDSIjIx3iPJ11FCggIABKpRKbNm2y7WvXrh0CAgJw8eLFekeLPIGrxyCFYwWA/Px8t8a1dPaLpQqCgN27dzv87t56663YunWrJBZLlduINRFRc2gxRVBiYiL27t1rW+DOmdmzZ2PWrFm22yUlJbb57kSNYR11XLVqFdasWVNjmsno0aMd4qTi4sWLuHjxosM2qXxibuXqSLFURpQFQXBrXEtnXQR19erVWLZsGU6fPm3b17FjR1sXPCkslmodsU5KSqp1xDoxMVFSU/+IiJpaiyiCnnzySXz33XcwGAy45pprao3z8/ODn59fM2ZGUqfRaBASEoLFixcjJiYGSUlJtusKsrKysGTJEoSGhkpmrZHrr7/epetkrr/++qZPhtzKukiqu+JaOusiqIcOHUJoaCjGjx+PiIgInDp1CuvXr8ehQ4cc4jyZ/Yi1M1IbsSYiag6iFkGCIOCpp57CqlWrsGnTplr/wBOJSSqfnAOo8Sny1cZRy3H+/Hm3xrV0UVFRUCqV8Pb2RnFxMb744gvbPqVSCT8/P1y6dAlRUVEiZuke1pFok8nk9Hisbd6lNmJNRNSURD3TSUxMRFZWFj777DMEBQXZOv1cOU2HqKkYjUYUFRXh8ccfh8lkQmJiIkaOHInExEQcO3YMjz/+OIqKiiTTGEFu18nIiat/N6Xy9zUvLw9msxmVlZVo06YNxo8fj2eeeQbjx49HUFAQKisrYTabkZeXJ3aqV02j0UClUiE7OxsWi8Vhn8ViQXZ2NsLDwyUzYk1E1BxEHQnKyMgAAAwZMsRhe2ZmJqZOndr8CZHsWKeP3HfffZg4cWKNxgiVlZVYvHixZKaZnD171q1x1HJUV1c73Pb394eXlxcsFgsqKipqjfNU1mvYevTogZKSEoeRIJVKhR49euDQoUOSuNZNqVQiISEBer0eOp0OWq3WoR14Tk4OkpOT2RSBiKgBRJ8ORySmK6eZXHlRsdSmmQQGBro1jloOhULhcNu+8KkrzlNZu76NHTsWI0aMqPEBxtq1a7Fw4UJJdIcDgLi4OCQnJyM9PR2JiYm27eHh4UhOTuY6QUREDdQiGiMQicV+mol961lAmtNMNBoNDh486FIceRa5Xe9l7fr2888/Y+TIkQ4fYFgsFlunUSl0h7OKi4tDbGxsjYKPI0BERA3HIohkTW7TTGobHWhsHLUcQUFBuHDhgktxUmDt+rZt2za8+OKL6NSpEyorK+Hn54eTJ09i27ZtDnFSoVQq2QabiMgNWASR7MlpmsnRo0fdGkctR6tWrdwa19JZR3ErKyuxZcuWGvtDQ0Ph7+/PUU0iInKKRRAR5DPNxNWLxKVwMblCoXDpukOpXCPz119/uTWupVMqlejWrRt+/fVXeHt7Y/DgwejVqxcOHDiAzZs34/z584iNjZXc7zAREbkHiyAiGamqqrJ9r1QqYTabnd62j/NUrjZekUqDFle7vkmlO1xVVRW2bNmCVq1aoVWrVti4cSM2btwIAOjYsSMuXLiALVu2oKqqCr6+viJnS0RELQ2LICIABoMB6enpKCgosG1TqVRISEiQ1HQ4+6LH/vv69lHLFxQU5FInNKlcE7R69WqYzWbEx8dj2LBhWL16NU6dOoWIiAiMHTsW69atw8KFC7F69WqMGzdO7HSJiKiFYRFEsmcwGKDX6xETE4OkpCSHxgh6vV5S1wUFBgaitLTUpTjyLNdffz1+++03l+Kk4NSpUwAuT2ecPHmywwcYX331FR5++GGHOCIiInvS6JVK1Ehmsxnp6emIiYlBSkoKoqKiEBgYiKioKKSkpCAmJgYZGRmSGRnp3r27W+Oo5Th+/Lhb41q6iIgIAMAbb7xRYzHjwsJCLFiwwCGOiIjIHosgkjWj0YiCggJotdoa66d4eXlBq9UiPz8fRqNRpAzd68CBA26No5ZDbo0RRo8ebfveYrE47LO/bR9HRERkxSKIZM36CXJkZKTT/dbtV37S7KnOnz/v1jgisezbt8/2vUKhwKRJk7Bs2TJMmjTJoeOffRwREZEViyCStbCwMACAyWRyut+63Rrn6a4c7braOCKx7Nq1CwDQtm1bWCwWfP7553jkkUfw+eefw2KxoG3btg5xRERE9tgYgWTNuuBidnY2UlJSHE7+LRYLsrOzER4eLpkFF9u2betwAXldceRZWrVqhbKyMpfipODMmTMAgIEDB2LHjh2228Dl9++AAQPw/fffO2ynpnP69GkUFxc3yWP/+eefDv82heDgYHTs2LHJHp+IWh4WQSRrSqUSCQkJ0Ov1mDNnDm6++Wb4+fmhsrIS27Ztw5YtW5CcnCyZBRddbfAglUYQctKmTRuXiqA2bdo0QzZNr0OHDgCAtWvXIiYmBnq93tbZMSsrC99//71DHDWd06dP4+FHJqO6qrJJnyc1NbXJHtvH1w9Zyz5lIUQkIyyCSPbi4uIwYcIErFy5Ejk5ObbtSqUSEyZMkEx7bAAoLy93axw1n4qKijo7u3l7u/bn3NvbGwcPHqx1f5cuXeDv79/g/Jpbv379kJ2d7VIcNa3i4mJUV1XionowLP7BYqfTYF4VxcDRzSguLmYRRCQjLIJI9gwGA1asWIFbbrmlxkjQihUrcP3110umEGIR5LmOHz+OGTNmXPXjnDhxos7HWbRoEXr27HnVz9PU7Keu7tq1y+EDDD8/P6dx1LQs/sGwtGondhpERC5hEUSyZr9OUHJyMvbu3YvCwkKEh4dj9OjR0Ov1yMjIQGxsrCSmxAmC4NY4aj5dunTBokWLat1fVVWFJ598st7Hef/99+Hr61vn83iCoqIi2/dXvl/tb9vHERERWbEIIlmzrhM0ZswYPPLIIw5NA1QqFcaMGYPffvsNRqMR0dHRImZKcufv71/vCE1sbCx+/fXXOvf36dPH3amJwtqx8fHHH8eaNWscfnfbtm2L0aNHY/HixZLp7EhERO7FIohkzbr+z5IlSxATE4OkpCTbxdXZ2dlYsmSJQ5ynUyqVLjU9kMKolxylpqZizpw5Tguh2NjYJr2wvLlZOzvm5eVh2bJltlHcsLAw9OnTB3q9XlKdHYmIyL1YBJGshYSEAAD69OmDuXPnYs2aNdiwYQMiIiIwd+5c/Oc//8GePXtscZ4uKCjIpelBQUFBTZ8MNYnU1FRcvHgR8+fPx6ZNmzBkyBA8//zzCAgIEDu1BqmvEQQA3Hvvvfjwww/x3HPPYcSIEejUqRNOnjyJzMxM7NmzB0888QSOHDlS52N4SiMIIiJyLxZBRADy8/MxYsQIh2sJ0tPTJbdeTlVVlVvjqGUKCAjAQw89hE2bNuGhhx7yuAIIaFgjCKPRCKPRWGP7Bx98UO99PaURBBERuReLIJI166jIuXPnauwTBMG2XSoXV1+8eNGtcURNpb5GEPYsFgt++eUXZGVl4eGHH8att97qclc4T2kEQURE7sUiiGTN1YUjpbLAJLvDkadwpRGEPS8vL2RlZSEuLo4jO0REVC8WQSRrhw8ftn0fEhKCfv36ISAgABcvXkRubq5tBOjw4cO46aabRMqSiIiIiNyJRRDJ2p49e2zfl5WVYdOmTbbbPj4+DnGTJk1qztSIiIiIqIlwKW2SNftrgaqrqx322d92ds0QEREREXkmjgSRrNl3f2vTpg3uvvtuRERE4NSpU/jhhx9QUlJSI86TeXl5wWKxuBRHRERXz5V271eLrd6JGo5FUAOZzWYYjUbbonwajYYLS3ow++JGoVAA+KcpgPX2lXGezJUCqCFxRERUt4a0e28stnonajgWQQ1gMBiQnp6OgoIC2zaVSoWEhATExcWJmBk1ln0r6OLiYnzxxRf1xhEREbmqIe3e//zzT6SmpmLOnDno2rVrg56DiBqGRZCLDAYD9Ho9YmJikJSUhMjISJhMJmRnZ0Ov1yM5OZmFkAeqrKx0axw1H3dOMTl48GCt+zjNhIiuRkPbvQNA165dObJD1MRYBLnAbDYjPT0dMTExSElJsV0vERUVhZSUFOh0OmRkZCA2NlZyU+OkPv0vKioKv/zyi0tx1LK4c4pJXY/DaSZERETSwyLIBUajEQUFBUhKSqpxwbiXlxe0Wi0SExNhNBoRHR0tUpbuJ4fpf2q12q1x1Hzqm2Ly+eef46effqr3cW6//fY6259zmgkREZH0sAhyQWFhIQAgMjLS6X7rdmucFMhl+t/vv//uctzAgQObOBtqiPqmmMyePdulImj27Nnw9fV1Z2pERETUwrEPrgvCwsIAACaTyel+63ZrnKe7cvpfVFQUAgMDbdP/YmJikJGRAbPZLHaqV23//v1ujaOWw9fXFxMnTqwzZuLEiSyAiIiIZIhFkAs0Gg1UKhWys7NrtA62WCzIzs5GeHg4NBqNSBm6l3X6n1arrXX6X35+PoxGo0gZuo+14YFSqXR6rNbrn9gYwTP961//qrUQmjhxIv71r381c0ZERETUEnA6nAuUSiUSEhKg1+uh0+mg1Wodpofl5OQgOTlZMg0D5DT9z8/PDwCcjmrZF7zWOPI8//rXvzBt2jQsWbIEX3zxBcaPH4/HHnuMI0DUYjTHYpoAOx0SEdljEeSiuLg4JCcnIz09HYmJibbt4eHhkrk+xsp++p+zrmhSmv7XvXt37Nq1y6U48ly+vr4YOnQovvjiCwwdOpQFELUozbGYJsBOh0RE9lgENUBcXBxiY2Ml3TIacJz+Z98SHJDe9D9XF0HlYqlE1FQaspgmwAU1W4LTp0+juLjY7Y/7559/OvzbFIKDg9GxY8cme3wiT8EiqIGUSqWk2mA7I6fpf65O6ZPC1D8iapkas5gmwAU1xXL69Gk8/MhkVFc13bWiqampTfbYPr5+yFr2KQshkj0WQQ0k9cVDreQy/S8gIMCtcUREJG3FxcWorqrERfVgWPyDxU6nQbwqioGjm1FcXMwiiGSPRVADyGHxUHtymP7XvXt3bNiwwaU4IiIiK4t/MCyt2omdRovCJh/kSVgEuUgui4deSerT/0JDQ90aR0REJFds8kGehEWQC65cPNTaKMC6eKhOp0NGRgZiY2MlNUoiB0VFRW6NIyIikis2+SBPwiLIBdbFQ5OSkmpdPDQxMRFGo1HSoyZSxMYIRERE7sEmH+RJvOoPITktHio3O3bssH1/5Sie/W37OCIiIiLybKIWQQaDAWPGjEFERAQUCgW++eYbMdOplf3ioc5IafFQuTl79qzte7PZ7LDP/rZ9HBERERF5NlGLoLKyMtxwww1IS0sTM4162S8earFYHPZJbfFQufH19XVrHBERERG1fKIWQSNGjEBKSgruu+8+MdOol3Xx0JycHOh0OuTl5aG8vBx5eXnQ6XTIyclBfHw8myJ4oNqmODY2joiIiIhaPo9qjFBZWYnKyn9WaC4pKWm255bL4qFyc+7cObfGEREREVHL51FF0Lx585CcnCza88th8VC5YRFEREREJD8eVQTNnj0bs2bNst0uKSlB586dmzUHqS8eKjdXtjy/2jixuXO17oMHD9a6j6t1ExERkSfzqCLIz88Pfn5+YqdBEiK1Isidq3XX9ThcrZuobqdPn0ZxcXGTPPaff/7p8G9TCA4ORseOHZvs8YmIxOZRRRCRu1VVVbk1Tmz1rdadmJiI6urqeh/Hx8enzq6NXK2bqHanT5/Gw49MRnVVZf3BVyE1NbXJHtvH1w9Zyz5lIUREkiVqEXThwgUcPnzYdttkMiE3NxdhYWE8yaJmUVFR4dY4sdW3WveyZcswceLEeh9n2bJlUKlU7kyNnJDbaIEnH29DjrW4uBjVVZW4qB4Mi39wk+TTlLwqioGjm1FcXMwiiDz69xbgqCbVTtQiaMeOHbj99tttt63X+0yZMgVLly4VKSuSkysXSL3auJZOpVLB39+/zqLO39+fBVAzkNtogacfb2NGRiz+wbC0atck+RA1B0//vQU4qkm1E7UIGjJkCARBEDMFItn54YcfcPfddzsthPz9/fHDDz+IkJX8yG20wJOPlyMjJFee/HsL8HeX6sZrgohk6IcffkBBQQFmzJiBkpIStGnTBosWLeIIkAjkNlogt+MlkgL+3pIUeUbLKyJyO5VKhQULFgAAFixYwAKIiIiIZIMjQSRpXDeHiIiagtfFIrFTaDBPzJmoqbAIIknjujlERNQUAkwGsVMgoqvAIojqZDabYTQaUVhYiLCwMGg0GiiVSrHTcll96+Zs2bIFH3/8cb2PM23aNNxyyy11Pg8REcnHxcg4WAJCxE6jQbwuFrF4I/r/WARRrQwGA9LT01FQUGDbplKpkJCQgLi4OBEzc1196+Z069bNpSJIq9V6VPHnyZpqTQquR0FE7mQJCGGzACIPxiKInDIYDNDr9fD19XXYfv78eej1eiQnJ3tMIVQXpVKJl19+GXPnzq015uWXX2YB1EyaY00KrkdBRERELIKoBrPZjDfffBOCIOCGG26Av78/SktLERQUhIqKCmzbtg1vvfUWYmNjJVEcxMXF4eWXX8a7776Lc+fO2ba3a9cOTz/9tCSKPU/hyWtScD0KupKnXoTuqXkTETUEiyCqITc3F0VFRQgKCsK2bdtq7A8KCsL58+eRm5uLAQMGiJCh+8XFxSE2NhZr167FwoUL8dxzz2HkyJGSKPI8EdekICmQ27UXnlo8eWreRHR1WARRDbm5uQCA0tJS+Pj4YNy4cRg5ciTWrl2LlStXorS01BYnlSIIuDw1rlevXgCAXr16sQAioqviiRfOA42/eF5uRR8ReTYWQVRDdXU1gMtFwf/+9z/bdUEzZszA1KlTMWLECJjNZlscERHVJLcL5+VW9MmJp46WeWre1DxYBFENJ06cAACEhYXB29vxLeLt7Y3Q0FCcO3fOFkdE5CpPPCnxxJzFILeiT05YJJIUsQiSoYqKChw/frzW/YWFhQCAs2fP4plnnsGIESPQqVMnnDx5Et9//72teUBhYSEOHjzo9DG6dOkCf39/9ydPRB6NJ1MkFV4V7m/l39QamnNwcDC8fXxxqbqqiTJqet4+vggO9qxGO83l8OHDMJlMLsWWl5fjyJEjTZzR5aVLAgMDXYqNjIxE9+7dG/1cLIJk6Pjx45gxY4ZLsUajEUaj0em+P/74o9bHWbRoUZ3r8xDRZZ46ytDYvD1xyhSnS5G94OBg+Pj6AUc3i51Ko/j4+rlcFHTs2BHZWctcXrutsrLSYW3BpqJSqeDn5+dSLNdvq917772H33//Xew0Gu2GG27AO++80+j7swiSoS5dumDRokW17q+qqsKTTz4JLy8vBAUFOfzxCwkJQUlJCSwWC95///0a6wjZPwcR1U9uJ9ecMkWermPHjsha9mmTLeqcmpqKOXPmoGvXrm5/fKDhRUHHjh0bFN+3b9/GpEUieOqppzx+JOhqsAiSIX9//3pHaWJjY/Hrr7+ivLwcN910E7Zv346bbroJubm5sFgsiI2NRZ8+fZopYyLp8sSREYCjIyRvDS0MGqpr166cTUFNrnv37lc1nczTsQgip6yfRP3666/Yvn07ANj+jY2NRWpqqpjpycrp06eb5BNH4PKnjvb/NgVORagbR0aIiIiaH4sgqlVqaiouXryI+fPnY9OmTRgyZAief/55BAQEiJ2abJw+fRoPPzIZ1VWVTfo8TVnU+vj6IWvZpw0qhDzxOhlPzJmalideOA94bt5ERA3BIojqFBAQgIceegibNm3CQw89xAKomRUXF6O6qhIX1YNh8fe87jZeFcXA0c0oLi5uUBHEaVbkyTz9wnmgYRfPE0mJ3DumyQmLICIPYPEPltWUKU+8TobXyJBVU144D7TMi+eJpELuHdPkhEUQeRxeIyN9vE6GPF1TXzgPtLyL5z11Gp2n5k1NQ+4d0+SERRB5FLleI0NE1FJx+h9Jidw7pskJiyDyKHK9RoaIqKXi9L+6VVRU4Pjx4y7FNnY2QpcuXeDv79/g3IjkjEUQeSS5XSND0uWpU3Eam7cnHq8n5tzc5Dj9z1XHjx/HjBkzGnSfhs5GWLRokUf+bIjExCKIyAN4avtlT827OchtCpGnHy+nS1FjdenSBYsWLWry5yCihmERJBGe3CyAjQLqx65j0iO3KUSefrz8O0WN5e/vz1EaohaIRRAaNl/3ajTVnF1PbxYgl8U0gcbn7Ykto4HGt432xOlHjclZblOI5Ha8RETUckm2CGrIyIj1E8Sm1pBPKBvyqaMnNwvgYpqukUvLaE6ZIiIiouYgySKouUZGGqohhVZjRkfk1CxAbiMjctGUU6Za2vQwIrlq6OwLdkwjoqYgySLIk0dGALZRdoVcRkbkqKmnTHG6FJG4GtMtDWDHNCJyL0kWQVZyGhkBPPM6GU/MmYiIGq85uqVZn4eIqDaSLoI89QS7sXlzmpV0eWKjAMBz8yZqTnKbHsZuaUTUEki6CJJbUeCJ18nIqXsY0PC8Pb1RAMBmAUT14fQwIqLmJ+kiyBOLAqDxhYEcrpORW1Hg6WurAGwWQFQfTg8jImp+ki6CoFCInUHjeGrezUCORQHXViGSNk4PIyJqfpIsguQ2WmDliVPEuMAkERERETU3SRZBchst8PSij9eMEBEREVFzkmQRBMhrtMDTiz5eM0JEV5JbxzQiImpeki2C5EZORR8RSR87phERUVNiEQR+4khE1NKwYxoRETUlFkHgJ45E1PLJ7cMadkwjIqKmxCII/MSRyBPJrSjghzVERETuwyII/MSRpENOhYHcigJ+WENEROQ+LIJkqDlOlFvCSTIgr6IAkFdhILeigB/WEBERuY9CEARB7CTS0tLwxhtvoKCgADfccAPee+893HzzzfXer6SkBMHBwSguLkabNm2aIVNpOHjwYKNOlBuiJZwkA81zrEDLOd6GFn2N1VKKPiIiIiKrhtQGohdBK1aswOTJk/HBBx9g4MCBePvtt7Fy5UocOHAAHTp0qPO+LIIapzlOlFvKSTKLAiIiIiJ58KgiaODAgbjpppvw/vvvAwAsFgs6d+6Mp556Ci+88EKd92URREREREREQMNqA69mysmpqqoq7Ny5E0OHDrVt8/LywtChQ5GTk1MjvrKyEiUlJQ5fREREREREDSFqEXTu3DmYzWZ07NjRYXvHjh1RUFBQI37evHkIDg62fXXu3Lm5UiUiIiIiIokQtQhqqNmzZ6O4uNj2deLECbFTIiIiIiIiDyNqi+x27dpBqVTi9OnTDttPnz4NlUpVI97Pzw9+fn7NlR4REREREUmQqCNBvr6+GDBgADZu3GjbZrFYsHHjRsTExIiYGRERERERSZXoi6XOmjULU6ZMwY033oibb74Zb7/9NsrKyvDoo4+KnRoREREREUmQ6EXQhAkTcPbsWcydOxcFBQXo168ffvjhhxrNEoiIiIiIiNxB9HWCrgbXCSIiIiIiIsCD1gkiIiIiIiJqbiyCiIiIiIhIVlgEERERERGRrLAIIiIiIiIiWWERREREREREssIiiIiIiIiIZIVFEBERERERyQqLICIiIiIikhUWQUREREREJCveYidwNQRBAHB5dVgiIiIiIpIva01grRHq4tFFUGlpKQCgc+fOImdCREREREQtQWlpKYKDg+uMUQiulEotlMViwalTpxAUFASFQtFsz1tSUoLOnTvjxIkTaNOmTbM9r1jkdLxyOlZAXscrp2MFeLxSJqdjBeR1vHI6VoDHK2ViHasgCCgtLUVERAS8vOq+6sejR4K8vLxwzTXXiPb8bdq0kfyb2J6cjldOxwrI63jldKwAj1fK5HSsgLyOV07HCvB4pUyMY61vBMiKjRGIiIiIiEhWWAQREREREZGssAhqBD8/P+j1evj5+YmdSrOQ0/HK6VgBeR2vnI4V4PFKmZyOFZDX8crpWAEer5R5wrF6dGMEIiIiIiKihuJIEBERERERyQqLICIiIiIikhUWQUREREREJCssgoiIiIiISFZYBDVQWloarr32Wvj7+2PgwIHYtm2b2Ck1GYPBgDFjxiAiIgIKhQLffPON2Ck1mXnz5uGmm25CUFAQOnTogHvvvRcHDhwQO60mkZGRAY1GY1vALCYmBt9//73YaTWb1157DQqFAs8884zYqTSJl156CQqFwuGrd+/eYqfVZE6ePImHH34Ybdu2RUBAAPr27YsdO3aInVaTuPbaa2u8tgqFAomJiWKn5nZmsxlJSUmIjIxEQEAAunXrhldeeQVS7uVUWlqKZ555Bl27dkVAQAAGDRqE7du3i52WW9R3PiEIAubOnYvw8HAEBARg6NChOHTokDjJXqX6jvXrr7/GsGHD0LZtWygUCuTm5oqSp7vUd07xxBNPoFu3bggICED79u0xduxY7N+/X8SM/8EiqAFWrFiBWbNmQa/XY9euXbjhhhswfPhwnDlzRuzUmkRZWRluuOEGpKWliZ1Kk9u8eTMSExOxZcsWrF+/HtXV1Rg2bBjKysrETs3trrnmGrz22mvYuXMnduzYgTvuuANjx45FXl6e2Kk1ue3bt+PDDz+ERqMRO5UmFRUVhfz8fNvXL7/8InZKTeL8+fOIjY2Fj48Pvv/+e+zbtw8LFy5EaGio2Kk1ie3btzu8ruvXrwcAjBs3TuTM3G/+/PnIyMjA+++/jz/++APz58/H66+/jvfee0/s1JrMY489hvXr12PZsmXYs2cPhg0bhqFDh+LkyZNip3bV6jufeP311/Huu+/igw8+wNatW9GqVSsMHz4cFRUVzZzp1avvWMvKynDrrbdi/vz5zZxZ06jvnGLAgAHIzMzEH3/8gXXr1kEQBAwbNgxms1nkzAEI5LKbb75ZSExMtN02m81CRESEMG/ePBGzah4AhFWrVomdRrM5c+aMAEDYvHmz2Kk0i9DQUGHJkiVip9GkSktLhR49egjr168XBg8eLMycOVPslJqEXq8XbrjhBrHTaBbPP/+8cOutt4qdhmhmzpwpdOvWTbBYLGKn4najRo0Spk2b5rDt/vvvF7RarUgZNa3y8nJBqVQK3333ncP2/v37C3PmzBEpq6Zx5fmExWIRVCqV8MYbb9i2FRUVCX5+fsLnn38uQobuU9e5k8lkEgAIu3fvbtacmkNd5xS///67AEA4fPhwM2dVE0eCXFRVVYWdO3di6NChtm1eXl4YOnQocnJyRMyMmkJxcTEAICwsTORMmpbZbMby5ctRVlaGmJgYsdNpUomJiRg1apTD77BUHTp0CBEREVCr1dBqtTh+/LjYKTWJb7/9FjfeeCPGjRuHDh06IDo6GosXLxY7rWZRVVWFrKwsTJs2DQqFQux03G7QoEHYuHEjDh48CAD4/fff8csvv2DEiBEiZ9Y0Ll26BLPZDH9/f4ftAQEBkh3JtTKZTCgoKHD42xwcHIyBAwfy/MrD1HdOUVZWhszMTERGRqJz584iZOjIW+wEPMW5c+dgNpvRsWNHh+0dO3ZsMXMbyT0sFgueeeYZxMbGok+fPmKn0yT27NmDmJgYVFRUoHXr1li1ahWuv/56sdNqMsuXL8euXbskM7++LgMHDsTSpUvRq1cv5OfnIzk5Gbfddhv27t2LoKAgsdNzq6NHjyIjIwOzZs3Ciy++iO3bt+Ppp5+Gr68vpkyZInZ6Teqbb75BUVERpk6dKnYqTeKFF15ASUkJevfuDaVSCbPZjNTUVGi1WrFTaxJBQUGIiYnBK6+8guuuuw4dO3bE559/jpycHHTv3l3s9JpUQUEBADg9v7Luo5atvnOK9PR0/Pe//0VZWRl69eqF9evXw9fXV8SML+NIENEVEhMTsXfvXixfvlzsVJpMr169kJubi61btyI+Ph5TpkzBvn37xE6rSZw4cQIzZ85EdnZ2jU9ZpWjEiBEYN24cNBoNhg8fjrVr16KoqAhffPGF2Km5ncViQf/+/fHqq68iOjoaM2bMwOOPP44PPvhA7NSa3EcffYQRI0YgIiJC7FSaxBdffIHs7Gx89tln2LVrFz755BMsWLAAn3zyidipNZlly5ZBEAR06tQJfn5+ePfddzFp0iR4efFUjVq2+s4ptFotdu/ejc2bN6Nnz54YP358i7jei79ZLmrXrh2USiVOnz7tsP306dNQqVQiZUXu9uSTT+K7777DTz/9hGuuuUbsdJqMr68vunfvjgEDBmDevHm44YYb8M4774idVpPYuXMnzpw5g/79+8Pb2xve3t7YvHkz3n33XXh7e7eMizObUEhICHr27InDhw+LnYrbhYeH1xjBvO666yQ7/c/qzz//xIYNG/DYY4+JnUqT+c9//oMXXngBEydORN++ffHII4/g2Wefxbx588ROrcl069YNmzdvxoULF3DixAls27YN1dXVUKvVYqfWpKznUDy/8lz1nVMEBwejR48eiIuLw5dffon9+/dj1apVImZ8GYsgF/n6+mLAgAHYuHGjbZvFYsHGjRslfy2FHAiCgCeffBKrVq3C//3f/yEyMlLslJqVxWJBZWWl2Gk0iTvvvBN79uxBbm6u7evGG2+EVqtFbm4ulEql2Ck2qQsXLuDIkSMIDw8XOxW3i42NrdHK/uDBg+jatatIGTWPzMxMdOjQAaNGjRI7lSZTXl5eYwREqVTCYrGIlFHzadWqFcLDw3H+/HmsW7cOY8eOFTulJhUZGQmVSuVwflVSUoKtW7fy/MpD1XVOIQgCBEFoEeccvCaoAWbNmoUpU6bgxhtvxM0334y3334bZWVlePTRR8VOrUlcuHDB4dNjk8mE3NxchIWFoUuXLiJm5n6JiYn47LPPsHr1agQFBdnmIQcHByMgIEDk7Nxr9uzZGDFiBLp06YLS0lJ89tln2LRpE9atWyd2ak0iKCioxrVdrVq1Qtu2bSV5zde///1vjBkzBl27dsWpU6eg1+uhVCoxadIksVNzu2effRaDBg3Cq6++ivHjx2Pbtm1YtGgRFi1aJHZqTcZisSAzMxNTpkyBt7d0/wsfM2YMUlNT0aVLF0RFRWH37t148803MW3aNLFTazLW9sG9evXC4cOH8Z///Ae9e/eWxDlGfecTzzzzDFJSUtCjRw9ERkYiKSkJERERuPfee8VLupHqO9bCwkIcP34cp06dAgDbBzkqlcojR77qOqc4evQoVqxYgWHDhqF9+/b466+/8NprryEgIAAjR44UO3W2yG6o9957T+jSpYvg6+sr3HzzzcKWLVvETqnJ/PTTTwKAGl9TpkwROzW3c3acAITMzEyxU3O7adOmCV27dhV8fX2F9u3bC3feeafw448/ip1Ws5Jyi+wJEyYI4eHhgq+vr9CpUydhwoQJLaIVaVNZs2aN0KdPH8HPz0/o3bu3sGjRIrFTalLr1q0TAAgHDhwQO5UmVVJSIsycOVPo0qWL4O/vL6jVamHOnDlCZWWl2Kk1mRUrVghqtVrw9fUVVCqVkJiYKBQVFYmdllvUdz5hsViEpKQkoWPHjoKfn59w5513eux7vL5jzczMdLpfr9eLmndj1XVOcfLkSWHEiBFChw4dBB8fH+Gaa64RHnroIWH//v0iZ32ZQhAkvPwyERERERHRFXhNEBERERERyQqLICIiIiIikhUWQUREREREJCssgoiIiIiISFZYBBERERERkaywCCIiIiIiIllhEURERERERLLCIoiIiIiIiGSFRRAREREREckKiyAiIg8zdepU3HvvvQ7bzp49iz59+mDgwIEoLi4WJzEiIiIPwSKIiMjDnT17FnfccQcCAgLw448/Ijg4WOyUiIiIWjQWQUREHuzcuXO488474efnh/Xr1zsUQMePH8fYsWPRunVrtGnTBuPHj8fp06cd7n/s2DEoFIoaX0VFRQCAl156Cf369bPFV1VVoXv37g4xzkamFAoFvvnmG9vtEydOYPz48QgJCUFYWBjGjh2LY8eOOdzn448/RlRUFPz8/BAeHo4nn3wSAHDttdc6zVGhUGDp0qW257N+tWnTBnfddReOHDlie+zz589j8uTJCA0NRWBgIEaMGIFDhw7V+bMtKirCE088gY4dO8Lf3x99+vTBd999BwBYunRprTnl5uYCAMxmM6ZPn47IyEgEBASgV69eeOedd2o8z6ZNm2o8RkhIiEPMkiVLcN1118Hf3x+9e/dGenq6bZ/1NbQ+r9W1116Lt99+u85jJCKSKxZBREQe6u+//8bQoUPh7e2N9evXO5w4WywWjB07FoWFhdi8eTPWr1+Po0ePYsKECQ6PIQgCAGDDhg3Iz8/HV199Vedzvv/++zUKqfpUV1dj+PDhCAoKws8//4xff/0VrVu3xt13342qqioAQEZGBhITEzFjxgzs2bMH3377Lbp37w4A2L59O/Lz85Gfn49rrrkGb7/9tu22/fFkZmYiPz8fBoMBZ86cwYsvvmjbN3XqVOzYsQPffvstcnJyIAgCRo4cierqaqc5WywWjBgxAr/++iuysrKwb98+vPbaa1AqlbaYNm3a2PLIz8/Htm3bajzGNddcg5UrV2Lfvn2YO3cuXnzxRXzxxRcOcdbX4MCBA8jPz69RuGRnZ2Pu3LlITU3FH3/8gVdffRVJSUn45JNPGvQ6EBHRP7zFToCIiBru/PnzGDp0KPbt24cBAwagTZs2Dvs3btyIPXv2wGQyoXPnzgCATz/9FFFRUdi+fTtuuukmALAVASqVCiqVCmFhYbU+Z2FhIVJSUvD8888jKSnJtj0gIAD5+fm13m/FihWwWCxYsmQJFAoFgMsFS0hICDZt2oRhw4YhJSUFzz33HGbOnGm7nzXH9u3b27YplUoEBwdDpVLVeJ6QkBCoVCoEBAQgKCjINip26NAhfPvtt/j1118xaNAgAJcLi86dO+Obb77BuHHjajzWhg0bsG3bNvzxxx/o2bMnAECtVjvEKBQKhzwqKioc9vv4+CA5Odl2OzIyEjk5Ofjiiy8wfvx423bra9CpUye0atWqxnRGvV6PhQsX4v7777c9zr59+/Dhhx9iypQpNXInIqL6cSSIiMgDGQwGWCwW5Obm4vDhw3j99dcd9v/xxx/o3LmzrQACgOuvvx4hISH4448/bNtKSkoAAK1atar3OV9++WXcfvvtuPXWWx229+nTB1u2bIHJZHJ6v99//x2HDx9GUFAQWrdujdatWyMsLAwVFRU4cuQIzpw5g1OnTuHOO+90+fidmTRpElq3bo3Q0FCUlpZi3rx5AC7/LLy9vTFw4EBbbNu2bdGrVy+Hn4W93NxcXHPNNbYCqLHS0tIwYMAAtG/fHq1bt8aiRYtw/Phxh5iSkhJ4eXkhICCgxv3Lyspw5MgRTJ8+3faza926NVJSUhym+wHAoEGDHGKufB4iIvoHR4KIiDyQWq3Gxo0b0a5dO6Snp+Phhx/GqFGjoNFoGvQ4p06dgpeXl9ORFXuHDh3CkiVLkJubi7/++sth37Rp07Bq1Sqo1WqnxdSFCxcwYMAAZGdn19jXvn17eHm55/O4t956C0OHDkVRURHmzJmDqVOnYs2aNY16LGcFSUMtX74c//73v7Fw4ULExMQgKCgIb7zxBrZu3eoQd+rUKXTs2NHpz+HChQsAgMWLFzsUcQAcpuYBl0fcrrvuOtvtIUOGXPUxEBFJFYsgIiIP1LdvX7Rr1w4AMG7cOHz99deYPHkytm3bBl9fX1x33XU4ceIETpw4YRsN2rdvH4qKinD99dfbHmf79u3o3bs3/P3963y+559/Ho899hi6d+9eowgKCAjAhg0bcPr0aZSWlgIAevToYdvfv39/rFixAh06dKgxbc/q2muvxcaNG3H77bc3/Ifx/6lUKtt1RE899RTuueceVFdX47rrrsOlS5ewdetW23S4v//+GwcOHHD4WdjTaDT466+/cPDgwUaPBlmn3yUkJNi2XTl6A1x+DaKjo50+RseOHREREYGjR49Cq9XW+XydO3e2HT8AeHvzv3giotpwOhwRkQSkpaXhzJkztmtQhg4dir59+0Kr1WLXrl3Ytm0bJk+ejMGDB+PGG29EVVUVli1bhjfffBOPPvponY99+PBhbNq0CXPnzq0zrmPHjujevbvDiTgAaLVatGvXDmPHjsXPP/8Mk8mETZs24emnn7YVVC+99BIWLlyId999F4cOHcKuXbvw3nvvNehnUFRUhIKCAhw4cAAfffQR1Go1fHx80KNHD4wdOxaPP/44fvnlF/z+++94+OGH0alTJ4wdO9bpYw0ePBhxcXF44IEHsH79ephMJnz//ff44YcfXM6nR48e2LFjB9atW4eDBw8iKSkJ27dvt+2/cOEC3n77bXz22Wd1vgbJycmYN28e3n33XRw8eBB79uxBZmYm3nzzTdd/OERE5IBFEBGRBISFhWHx4sWYP38+tm7dCoVCgdWrVyM0NBRxcXEYOnQo1Go1VqxYAQDYs2cPXnrpJSQlJWHWrFl1PnZZWRnmzJlTZ9OEugQGBsJgMKBLly64//77cd1112H69OmoqKiwjQxNmTIFb7/9NtLT0xEVFYXRo0fX28L6So8++ijCw8Nx00034fz58/jyyy9t+zIzMzFgwACMHj0aMTExEAQBa9euhY+PT62P99VXX+Gmm27CpEmTcP311+O///0vzGazy/k88cQTuP/++zFhwgQMHDgQf//9t8Oo0Pr167F48WJ8+OGHePDBB2t9nMceewxLlixBZmYm+vbti8GDB2Pp0qWIjIx0ORciInKkEKy9OYmIiIiIiGSAI0FERERERCQrLIKIiIiIiEhWWAQREREREZGssAgiIiIiIiJZYRFERERERESywiKIiIiIiIhkhUUQERERERHJCosgIiIiIiKSFRZBREREREQkKyyCiIiIiIhIVlgEERERERGRrPw/cdMjxzujrDQAAAAASUVORK5CYII=",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "import seaborn as sns\n",
+ "\n",
+ "from typing import Tuple\n",
+ "import pandas as pd\n",
+ "from pandas import DataFrame\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "\n",
+ "# Создание целевого признака\n",
+ "median_price = df['price'].median()\n",
+ "df['above_median_price'] = np.where(df['price'] > median_price, 1, 0)\n",
+ "\n",
+ "# Разделение на признаки и целевую переменную\n",
+ "X = df.drop(columns=['id', 'date', 'price', 'above_median_price'])\n",
+ "y = df['above_median_price']\n",
+ "\n",
+ "# Примерная категоризация\n",
+ "df['price_category'] = pd.cut(df['price'], bins=[0, 300000, 700000, np.inf], labels=[0, 1, 2])\n",
+ "\n",
+ "# Выбор признаков и целевых переменных\n",
+ "X = df.drop(columns=['id', 'date', 'price', 'price_category'])\n",
+ "\n",
+ "\n",
+ "def split_stratified_into_train_val_test(\n",
+ " df_input,\n",
+ " stratify_colname=\"y\",\n",
+ " frac_train=0.6,\n",
+ " frac_val=0.15,\n",
+ " frac_test=0.25,\n",
+ " random_state=None,\n",
+ ") -> Tuple[DataFrame, DataFrame, DataFrame, DataFrame, DataFrame, DataFrame]:\n",
+ " \n",
+ " if frac_train + frac_val + frac_test != 1.0:\n",
+ " raise ValueError(\n",
+ " \"fractions %f, %f, %f do not add up to 1.0\"\n",
+ " % (frac_train, frac_val, frac_test)\n",
+ " )\n",
+ " \n",
+ " if stratify_colname not in df_input.columns:\n",
+ " raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
+ " X = df_input # Contains all columns.\n",
+ " y = df_input[\n",
+ " [stratify_colname]\n",
+ " ] # Dataframe of just the column on which to stratify.\n",
+ " \n",
+ " # Split original dataframe into train and temp dataframes.\n",
+ " df_train, df_temp, y_train, y_temp = train_test_split(\n",
+ " X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
+ " )\n",
+ "\n",
+ " if frac_val <= 0:\n",
+ " assert len(df_input) == len(df_train) + len(df_temp)\n",
+ " return df_train, pd.DataFrame(), df_temp, y_train, pd.DataFrame(), y_temp\n",
+ " # Split the temp dataframe into val and test dataframes.\n",
+ " relative_frac_test = frac_test / (frac_val + frac_test)\n",
+ "\n",
+ " df_val, df_test, y_val, y_test = train_test_split(\n",
+ " df_temp,\n",
+ " y_temp,\n",
+ " stratify=y_temp,\n",
+ " test_size=relative_frac_test,\n",
+ " random_state=random_state,\n",
+ " )\n",
+ "\n",
+ " assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
+ " return df_train, df_val, df_test, y_train, y_val, y_test\n",
+ "\n",
+ "X_train, X_val, X_test, y_train, y_val, y_test = split_stratified_into_train_val_test(\n",
+ " df, stratify_colname=\"above_median_price\", frac_train=0.80, frac_val=0, frac_test=0.20, random_state=42\n",
+ ")\n",
+ "\n",
+ "display(\"X_train\", X_train)\n",
+ "display(\"y_train\", y_train)\n",
+ "\n",
+ "display(\"X_test\", X_test)\n",
+ "display(\"y_test\", y_test)\n",
+ "\n",
+ "\n",
+ "# Проверка преобразования\n",
+ "print(df.dtypes)\n",
+ "\n",
+ "# Визуализация распределения цен\n",
+ "plt.figure(figsize=(10, 6))\n",
+ "sns.histplot(df['price'], bins=50, kde=True)\n",
+ "plt.title('Распределение цен на недвижимость')\n",
+ "plt.xlabel('Цена')\n",
+ "plt.ylabel('Частота')\n",
+ "plt.show()\n",
+ "\n",
+ "# Визуализация зависимости между ценой и количеством спален\n",
+ "plt.figure(figsize=(10, 6))\n",
+ "sns.boxplot(x='bedrooms', y='price', data=df)\n",
+ "plt.title('Зависимость цены от количества спален')\n",
+ "plt.xlabel('Количество спален')\n",
+ "plt.ylabel('Цена')\n",
+ "plt.show()\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Построение конвейеров предобработки \n",
+ "Создадим пайплайн для числовых и категориальных данных. \n",
+ "\n",
+ "preprocessing_num -- конвейер для обработки числовых данных: заполнение пропущенных значений и стандартизация\n",
+ "\n",
+ "preprocessing_cat -- конвейер для обработки категориальных данных: заполнение пропущенных данных и унитарное кодирование\n",
+ "\n",
+ "features_preprocessing -- трансформер для предобработки признаков\n",
+ "\n",
+ "features_engineering -- трансформер для конструирования признаков\n",
+ "\n",
+ "drop_columns -- трансформер для удаления колонок\n",
+ "\n",
+ "pipeline_end -- основной конвейер предобработки данных и конструирования признаков"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 150,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "from sklearn.base import BaseEstimator, TransformerMixin\n",
+ "from sklearn.compose import ColumnTransformer\n",
+ "from sklearn.discriminant_analysis import StandardScaler\n",
+ "from sklearn.impute import SimpleImputer\n",
+ "from sklearn.preprocessing import OneHotEncoder\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "from sklearn.pipeline import Pipeline\n",
+ "\n",
+ "pipeline_end = StandardScaler()\n",
+ "\n",
+ "\n",
+ "# Построение конвейеров предобработки\n",
+ "\n",
+ "class HouseFeatures(BaseEstimator, TransformerMixin):\n",
+ " def __init__(self):\n",
+ " pass\n",
+ " def fit(self, X, y=None):\n",
+ " return self\n",
+ " def transform(self, X, y=None):\n",
+ " # Создание новых признаков\n",
+ " X = X.copy()\n",
+ " X[\"Living_area_to_Lot_ratio\"] = X[\"sqft_living\"] / X[\"sqft_lot\"]\n",
+ " return X\n",
+ " def get_feature_names_out(self, features_in):\n",
+ " # Добавление имен новых признаков\n",
+ " new_features = [\"Living_area_to_Lot_ratio\"]\n",
+ " return np.append(features_in, new_features, axis=0)\n",
+ "\n",
+ "\n",
+ "# Обработка числовых данных. Числовой конвейр: заполнение пропущенных значений медианой и стандартизация\n",
+ "preprocessing_num_class = Pipeline(steps=[\n",
+ " ('imputer', SimpleImputer(strategy='median')),\n",
+ " ('scaler', StandardScaler())\n",
+ "])\n",
+ "\n",
+ "preprocessing_cat_class = Pipeline(steps=[\n",
+ " ('imputer', SimpleImputer(strategy='most_frequent')),\n",
+ " ('onehot', OneHotEncoder(handle_unknown='ignore'))\n",
+ "])\n",
+ "\n",
+ "columns_to_drop = [\"date\"]\n",
+ "numeric_columns = [\"sqft_living\", \"sqft_lot\", \"above_median_price\"]\n",
+ "cat_columns = []\n",
+ "\n",
+ "features_preprocessing = ColumnTransformer(\n",
+ " verbose_feature_names_out=False,\n",
+ " transformers=[\n",
+ " (\"prepocessing_num\", preprocessing_num_class, numeric_columns),\n",
+ " (\"prepocessing_cat\", preprocessing_cat_class, cat_columns),\n",
+ " ],\n",
+ " remainder=\"passthrough\"\n",
+ ")\n",
+ "\n",
+ "drop_columns = ColumnTransformer(\n",
+ " verbose_feature_names_out=False,\n",
+ " transformers=[\n",
+ " (\"drop_columns\", \"drop\", columns_to_drop),\n",
+ " ],\n",
+ " remainder=\"passthrough\",\n",
+ ")\n",
+ "\n",
+ "features_postprocessing = ColumnTransformer(\n",
+ " verbose_feature_names_out=False,\n",
+ " transformers=[\n",
+ " ('preprocessing_cat', preprocessing_cat_class, [\"price_category\"]),\n",
+ " ],\n",
+ " remainder=\"passthrough\",\n",
+ ")\n",
+ "\n",
+ "pipeline_end = Pipeline(\n",
+ " [\n",
+ " (\"features_preprocessing\", features_preprocessing),\n",
+ " (\"custom_features\", HouseFeatures()),\n",
+ " (\"drop_columns\", drop_columns),\n",
+ " ]\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Демонстрация работы конвейра для предобработки данных при классификации**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 151,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " above_median_price | \n",
+ " id | \n",
+ " price | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " floors | \n",
+ " waterfront | \n",
+ " view | \n",
+ " ... | \n",
+ " sqft_basement | \n",
+ " yr_built | \n",
+ " yr_renovated | \n",
+ " zipcode | \n",
+ " lat | \n",
+ " long | \n",
+ " sqft_living15 | \n",
+ " sqft_lot15 | \n",
+ " price_category | \n",
+ " Living_area_to_Lot_ratio | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 20962 | \n",
+ " -1.360742 | \n",
+ " -0.262132 | \n",
+ " -0.994693 | \n",
+ " 1278000210 | \n",
+ " 110000.0 | \n",
+ " 2 | \n",
+ " 1.00 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1968 | \n",
+ " 2007 | \n",
+ " 98001 | \n",
+ " 47.2655 | \n",
+ " -122.244 | \n",
+ " 828 | \n",
+ " 5402 | \n",
+ " 0 | \n",
+ " 5.191063 | \n",
+ "
\n",
+ " \n",
+ " 12284 | \n",
+ " 0.794390 | \n",
+ " -0.094121 | \n",
+ " 1.005335 | \n",
+ " 2193300390 | \n",
+ " 624000.0 | \n",
+ " 4 | \n",
+ " 3.25 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 1130 | \n",
+ " 1980 | \n",
+ " 0 | \n",
+ " 98052 | \n",
+ " 47.6920 | \n",
+ " -122.099 | \n",
+ " 2110 | \n",
+ " 11250 | \n",
+ " 1 | \n",
+ " -8.440052 | \n",
+ "
\n",
+ " \n",
+ " 7343 | \n",
+ " 0.837884 | \n",
+ " -0.272723 | \n",
+ " 1.005335 | \n",
+ " 4289900005 | \n",
+ " 1535000.0 | \n",
+ " 4 | \n",
+ " 3.25 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 3 | \n",
+ " ... | \n",
+ " 1030 | \n",
+ " 1908 | \n",
+ " 2003 | \n",
+ " 98122 | \n",
+ " 47.6147 | \n",
+ " -122.285 | \n",
+ " 2130 | \n",
+ " 4200 | \n",
+ " 2 | \n",
+ " -3.072292 | \n",
+ "
\n",
+ " \n",
+ " 14247 | \n",
+ " -0.782270 | \n",
+ " -0.196986 | \n",
+ " -0.994693 | \n",
+ " 316000145 | \n",
+ " 235000.0 | \n",
+ " 4 | \n",
+ " 1.00 | \n",
+ " 1.5 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1941 | \n",
+ " 0 | \n",
+ " 98168 | \n",
+ " 47.5054 | \n",
+ " -122.301 | \n",
+ " 1280 | \n",
+ " 7175 | \n",
+ " 0 | \n",
+ " 3.971201 | \n",
+ "
\n",
+ " \n",
+ " 16670 | \n",
+ " 1.011860 | \n",
+ " 0.024330 | \n",
+ " 1.005335 | \n",
+ " 629400480 | \n",
+ " 775000.0 | \n",
+ " 4 | \n",
+ " 2.75 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1996 | \n",
+ " 0 | \n",
+ " 98075 | \n",
+ " 47.5895 | \n",
+ " -121.994 | \n",
+ " 3330 | \n",
+ " 12333 | \n",
+ " 2 | \n",
+ " 41.589045 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 88 | \n",
+ " -0.510432 | \n",
+ " -0.324180 | \n",
+ " -0.994693 | \n",
+ " 1332700270 | \n",
+ " 215000.0 | \n",
+ " 2 | \n",
+ " 2.25 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1979 | \n",
+ " 0 | \n",
+ " 98056 | \n",
+ " 47.5180 | \n",
+ " -122.194 | \n",
+ " 1950 | \n",
+ " 2025 | \n",
+ " 0 | \n",
+ " 1.574534 | \n",
+ "
\n",
+ " \n",
+ " 15031 | \n",
+ " 1.044481 | \n",
+ " -0.314813 | \n",
+ " 1.005335 | \n",
+ " 7129303070 | \n",
+ " 735000.0 | \n",
+ " 4 | \n",
+ " 2.75 | \n",
+ " 2.0 | \n",
+ " 1 | \n",
+ " 4 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1966 | \n",
+ " 0 | \n",
+ " 98118 | \n",
+ " 47.5188 | \n",
+ " -122.256 | \n",
+ " 2620 | \n",
+ " 2433 | \n",
+ " 2 | \n",
+ " -3.317784 | \n",
+ "
\n",
+ " \n",
+ " 5234 | \n",
+ " -0.456065 | \n",
+ " -0.136611 | \n",
+ " 1.005335 | \n",
+ " 2432000130 | \n",
+ " 675000.0 | \n",
+ " 3 | \n",
+ " 1.75 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1956 | \n",
+ " 0 | \n",
+ " 98033 | \n",
+ " 47.6503 | \n",
+ " -122.198 | \n",
+ " 2090 | \n",
+ " 9549 | \n",
+ " 1 | \n",
+ " 3.338418 | \n",
+ "
\n",
+ " \n",
+ " 19980 | \n",
+ " 0.566046 | \n",
+ " 1.239169 | \n",
+ " -0.994693 | \n",
+ " 774100475 | \n",
+ " 415000.0 | \n",
+ " 3 | \n",
+ " 2.75 | \n",
+ " 1.5 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 2009 | \n",
+ " 0 | \n",
+ " 98014 | \n",
+ " 47.7185 | \n",
+ " -121.405 | \n",
+ " 1740 | \n",
+ " 64626 | \n",
+ " 1 | \n",
+ " 0.456795 | \n",
+ "
\n",
+ " \n",
+ " 3671 | \n",
+ " 0.370323 | \n",
+ " 4.836825 | \n",
+ " 1.005335 | \n",
+ " 8847400115 | \n",
+ " 590000.0 | \n",
+ " 3 | \n",
+ " 2.00 | \n",
+ " 1.5 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 2005 | \n",
+ " 0 | \n",
+ " 98010 | \n",
+ " 47.3666 | \n",
+ " -121.978 | \n",
+ " 3180 | \n",
+ " 212137 | \n",
+ " 1 | \n",
+ " 0.076563 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
17290 rows × 23 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sqft_living sqft_lot above_median_price id price \\\n",
+ "20962 -1.360742 -0.262132 -0.994693 1278000210 110000.0 \n",
+ "12284 0.794390 -0.094121 1.005335 2193300390 624000.0 \n",
+ "7343 0.837884 -0.272723 1.005335 4289900005 1535000.0 \n",
+ "14247 -0.782270 -0.196986 -0.994693 316000145 235000.0 \n",
+ "16670 1.011860 0.024330 1.005335 629400480 775000.0 \n",
+ "... ... ... ... ... ... \n",
+ "88 -0.510432 -0.324180 -0.994693 1332700270 215000.0 \n",
+ "15031 1.044481 -0.314813 1.005335 7129303070 735000.0 \n",
+ "5234 -0.456065 -0.136611 1.005335 2432000130 675000.0 \n",
+ "19980 0.566046 1.239169 -0.994693 774100475 415000.0 \n",
+ "3671 0.370323 4.836825 1.005335 8847400115 590000.0 \n",
+ "\n",
+ " bedrooms bathrooms floors waterfront view ... sqft_basement \\\n",
+ "20962 2 1.00 1.0 0 0 ... 0 \n",
+ "12284 4 3.25 1.0 0 0 ... 1130 \n",
+ "7343 4 3.25 2.0 0 3 ... 1030 \n",
+ "14247 4 1.00 1.5 0 0 ... 0 \n",
+ "16670 4 2.75 2.0 0 0 ... 0 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "88 2 2.25 2.0 0 0 ... 0 \n",
+ "15031 4 2.75 2.0 1 4 ... 0 \n",
+ "5234 3 1.75 1.0 0 0 ... 0 \n",
+ "19980 3 2.75 1.5 0 0 ... 0 \n",
+ "3671 3 2.00 1.5 0 0 ... 0 \n",
+ "\n",
+ " yr_built yr_renovated zipcode lat long sqft_living15 \\\n",
+ "20962 1968 2007 98001 47.2655 -122.244 828 \n",
+ "12284 1980 0 98052 47.6920 -122.099 2110 \n",
+ "7343 1908 2003 98122 47.6147 -122.285 2130 \n",
+ "14247 1941 0 98168 47.5054 -122.301 1280 \n",
+ "16670 1996 0 98075 47.5895 -121.994 3330 \n",
+ "... ... ... ... ... ... ... \n",
+ "88 1979 0 98056 47.5180 -122.194 1950 \n",
+ "15031 1966 0 98118 47.5188 -122.256 2620 \n",
+ "5234 1956 0 98033 47.6503 -122.198 2090 \n",
+ "19980 2009 0 98014 47.7185 -121.405 1740 \n",
+ "3671 2005 0 98010 47.3666 -121.978 3180 \n",
+ "\n",
+ " sqft_lot15 price_category Living_area_to_Lot_ratio \n",
+ "20962 5402 0 5.191063 \n",
+ "12284 11250 1 -8.440052 \n",
+ "7343 4200 2 -3.072292 \n",
+ "14247 7175 0 3.971201 \n",
+ "16670 12333 2 41.589045 \n",
+ "... ... ... ... \n",
+ "88 2025 0 1.574534 \n",
+ "15031 2433 2 -3.317784 \n",
+ "5234 9549 1 3.338418 \n",
+ "19980 64626 1 0.456795 \n",
+ "3671 212137 1 0.076563 \n",
+ "\n",
+ "[17290 rows x 23 columns]"
+ ]
+ },
+ "execution_count": 151,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "preprocessing_result = pipeline_end.fit_transform(X_train)\n",
+ "preprocessed_df = pd.DataFrame(\n",
+ " preprocessing_result,\n",
+ " columns=pipeline_end.get_feature_names_out(),\n",
+ ")\n",
+ "\n",
+ "preprocessed_df"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Формирование набора моделей для классификации\n",
+ "\n",
+ "logistic -- логистическая регрессия\n",
+ "\n",
+ "ridge -- гребневая регрессия\n",
+ "\n",
+ "decision_tree -- дерево решений\n",
+ "\n",
+ "knn -- k-ближайших соседей\n",
+ "\n",
+ "naive_bayes -- наивный Байесовский классификатор\n",
+ "\n",
+ "gradient_boosting -- метод градиентного бустинга (набор деревьев решений)\n",
+ "\n",
+ "random_forest -- метод случайного леса (набор деревьев решений)\n",
+ "\n",
+ "mlp -- многослойный персептрон (нейронная сеть)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 152,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn import ensemble, linear_model, naive_bayes, neighbors, neural_network, tree, svm\n",
+ "\n",
+ "class_models = {\n",
+ " \"logistic\": {\"model\": linear_model.LogisticRegression(max_iter=150)},\n",
+ " \"ridge\": {\"model\": linear_model.RidgeClassifierCV(cv=5, class_weight=\"balanced\")},\n",
+ " \"ridge\": {\"model\": linear_model.LogisticRegression(max_iter=150, solver='lbfgs', penalty=\"l2\", class_weight=\"balanced\")},\n",
+ " \"decision_tree\": {\n",
+ " \"model\": tree.DecisionTreeClassifier(max_depth=5, min_samples_split=10, random_state=random_state)\n",
+ " },\n",
+ "\n",
+ " \"knn\": {\"model\": neighbors.KNeighborsClassifier(n_neighbors=7)},\n",
+ " \"naive_bayes\": {\"model\": naive_bayes.GaussianNB()},\n",
+ " \"gradient_boosting\": {\n",
+ " \"model\": ensemble.GradientBoostingClassifier(n_estimators=210)\n",
+ " },\n",
+ "\n",
+ " \"random_forest\": {\n",
+ " \"model\": ensemble.RandomForestClassifier(\n",
+ " max_depth=5, class_weight=\"balanced\", random_state=random_state\n",
+ " )\n",
+ " },\n",
+ "\n",
+ " \"mlp\": {\n",
+ " \"model\": neural_network.MLPClassifier(\n",
+ " hidden_layer_sizes=(7,),\n",
+ " max_iter=200,\n",
+ " early_stopping=True,\n",
+ " random_state=random_state,\n",
+ " )\n",
+ " },\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Обучение моделей на обучающем наборе данных и оценка на тестовом**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 153,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Model: logistic\n",
+ "Model: ridge\n",
+ "Model: decision_tree\n",
+ "Model: knn\n",
+ "Model: naive_bayes\n",
+ "Model: gradient_boosting\n",
+ "Model: random_forest\n",
+ "Model: mlp\n"
+ ]
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "from sklearn import metrics\n",
+ "\n",
+ "for model_name in class_models.keys():\n",
+ " print(f\"Model: {model_name}\")\n",
+ " model = class_models[model_name][\"model\"]\n",
+ "\n",
+ " model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n",
+ " model_pipeline = model_pipeline.fit(X_train, y_train.values.ravel())\n",
+ "\n",
+ " y_train_predict = model_pipeline.predict(X_train)\n",
+ " y_test_probs = model_pipeline.predict_proba(X_test)[:, 1]\n",
+ " y_test_predict = np.where(y_test_probs > 0.5, 1, 0)\n",
+ "\n",
+ " class_models[model_name][\"pipeline\"] = model_pipeline\n",
+ " class_models[model_name][\"probs\"] = y_test_probs\n",
+ " class_models[model_name][\"preds\"] = y_test_predict\n",
+ "\n",
+ " class_models[model_name][\"Precision_train\"] = metrics.precision_score(\n",
+ " y_train, y_train_predict, zero_division=1\n",
+ " )\n",
+ " class_models[model_name][\"Precision_test\"] = metrics.precision_score(\n",
+ " y_test, y_test_predict, zero_division=1\n",
+ " )\n",
+ " class_models[model_name][\"Recall_train\"] = metrics.recall_score(\n",
+ " y_train, y_train_predict\n",
+ " )\n",
+ " class_models[model_name][\"Recall_test\"] = metrics.recall_score(\n",
+ " y_test, y_test_predict\n",
+ " )\n",
+ " class_models[model_name][\"Accuracy_train\"] = metrics.accuracy_score(\n",
+ " y_train, y_train_predict\n",
+ " )\n",
+ " class_models[model_name][\"Accuracy_test\"] = metrics.accuracy_score(\n",
+ " y_test, y_test_predict\n",
+ " )\n",
+ " class_models[model_name][\"ROC_AUC_test\"] = metrics.roc_auc_score(\n",
+ " y_test, y_test_probs\n",
+ " )\n",
+ " class_models[model_name][\"F1_train\"] = metrics.f1_score(y_train, y_train_predict)\n",
+ " class_models[model_name][\"F1_test\"] = metrics.f1_score(y_test, y_test_predict)\n",
+ " class_models[model_name][\"MCC_test\"] = metrics.matthews_corrcoef(\n",
+ " y_test, y_test_predict\n",
+ " )\n",
+ " class_models[model_name][\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(\n",
+ " y_test, y_test_predict\n",
+ " )\n",
+ " class_models[model_name][\"Confusion_matrix\"] = metrics.confusion_matrix(\n",
+ " y_test, y_test_predict\n",
+ " )"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Сводная таблица оценок качества для использованных моделей классификации¶\n",
+ "Матрица неточностей**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 154,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from sklearn.metrics import ConfusionMatrixDisplay\n",
+ "import matplotlib.pyplot as plt\n",
+ "\n",
+ "_, ax = plt.subplots(int(len(class_models) / 2), 2, figsize=(12, 10), sharex=False, sharey=False)\n",
+ "for index, key in enumerate(class_models.keys()):\n",
+ " c_matrix = class_models[key][\"Confusion_matrix\"]\n",
+ " disp = ConfusionMatrixDisplay(\n",
+ " confusion_matrix=c_matrix, display_labels=[\"Less\", \"More\"]\n",
+ " ).plot(ax=ax.flat[index])\n",
+ " disp.ax_.set_title(key)\n",
+ "\n",
+ "plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.1)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Значение 2173 в желтом квадрате представляет собой количество объектов, относимых к классу \"Less\", которые модель правильно классифицировала. Это свидетельствует о высоком уровне точности в идентификации этого класса. Значение 2150 в жёлтом нижнем правом квадрате указывает на количество правильно классифицированных объектов класса \"More\". Хотя это также является положительным результатом, мы можем заметить, что он местами ниже, чем для класса \"Less\", а местами и выше.\n",
+ "\n",
+ "Точность, полнота, верность (аккуратность), F-мера"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 155,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Precision_train | \n",
+ " Precision_test | \n",
+ " Recall_train | \n",
+ " Recall_test | \n",
+ " Accuracy_train | \n",
+ " Accuracy_test | \n",
+ " F1_train | \n",
+ " F1_test | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " logistic | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 0.999767 | \n",
+ " 1.000000 | \n",
+ " 0.999884 | \n",
+ " 1.000000 | \n",
+ " 0.999884 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " ridge | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 0.999651 | \n",
+ " 1.000000 | \n",
+ " 0.999826 | \n",
+ " 1.000000 | \n",
+ " 0.999826 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " decision_tree | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " gradient_boosting | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " random_forest | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " naive_bayes | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 0.786719 | \n",
+ " 0.793953 | \n",
+ " 0.893927 | \n",
+ " 0.897525 | \n",
+ " 0.880630 | \n",
+ " 0.885144 | \n",
+ "
\n",
+ " \n",
+ " knn | \n",
+ " 0.872486 | \n",
+ " 0.827473 | \n",
+ " 0.857774 | \n",
+ " 0.820930 | \n",
+ " 0.866917 | \n",
+ " 0.825815 | \n",
+ " 0.865068 | \n",
+ " 0.824189 | \n",
+ "
\n",
+ " \n",
+ " mlp | \n",
+ " 0.687500 | \n",
+ " 0.615385 | \n",
+ " 0.002558 | \n",
+ " 0.003721 | \n",
+ " 0.503355 | \n",
+ " 0.503354 | \n",
+ " 0.005098 | \n",
+ " 0.007397 | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 155,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n",
+ " [\n",
+ " \"Precision_train\",\n",
+ " \"Precision_test\",\n",
+ " \"Recall_train\",\n",
+ " \"Recall_test\",\n",
+ " \"Accuracy_train\",\n",
+ " \"Accuracy_test\",\n",
+ " \"F1_train\",\n",
+ " \"F1_test\",\n",
+ " ]\n",
+ "]\n",
+ "class_metrics.sort_values(\n",
+ " by=\"Accuracy_test\", ascending=False\n",
+ ").style.background_gradient(\n",
+ " cmap=\"plasma\",\n",
+ " low=0.3,\n",
+ " high=1,\n",
+ " subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n",
+ ").background_gradient(\n",
+ " cmap=\"viridis\",\n",
+ " low=1,\n",
+ " high=0.3,\n",
+ " subset=[\n",
+ " \"Precision_train\",\n",
+ " \"Precision_test\",\n",
+ " \"Recall_train\",\n",
+ " \"Recall_test\",\n",
+ " ],\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Действительно, если модели, включая логистическую регрессию (есть исключения), ридж-регрессию (есть исключения), дерево решений, случайный лес и градиентный бустинг, показывают 100% точность на обучающей выборке, это может свидетельствовать о переобучении. Переобучение (overfitting) происходит, когда модель слишком хорошо подстраивается под обучающие данные, включая шум и случайные вариации, и начинает плохо работать на новых данных (например, на тестовой выборке). \n",
+ "\n",
+ "ROC-кривая, каппа Коэна, коэффициент корреляции Мэтьюса"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 156,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Accuracy_test | \n",
+ " F1_test | \n",
+ " ROC_AUC_test | \n",
+ " Cohen_kappa_test | \n",
+ " MCC_test | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " logistic | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " ridge | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " decision_tree | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " gradient_boosting | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " random_forest | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " naive_bayes | \n",
+ " 0.897525 | \n",
+ " 0.885144 | \n",
+ " 0.999566 | \n",
+ " 0.794820 | \n",
+ " 0.812098 | \n",
+ "
\n",
+ " \n",
+ " knn | \n",
+ " 0.825815 | \n",
+ " 0.824189 | \n",
+ " 0.910823 | \n",
+ " 0.651606 | \n",
+ " 0.651627 | \n",
+ "
\n",
+ " \n",
+ " mlp | \n",
+ " 0.503354 | \n",
+ " 0.007397 | \n",
+ " 0.497071 | \n",
+ " 0.001427 | \n",
+ " 0.012966 | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 156,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n",
+ " [\n",
+ " \"Accuracy_test\",\n",
+ " \"F1_test\",\n",
+ " \"ROC_AUC_test\",\n",
+ " \"Cohen_kappa_test\",\n",
+ " \"MCC_test\",\n",
+ " ]\n",
+ "]\n",
+ "class_metrics.sort_values(by=\"ROC_AUC_test\", ascending=False).style.background_gradient(\n",
+ " cmap=\"plasma\",\n",
+ " low=0.3,\n",
+ " high=1,\n",
+ " subset=[\n",
+ " \"ROC_AUC_test\",\n",
+ " \"MCC_test\",\n",
+ " \"Cohen_kappa_test\",\n",
+ " ],\n",
+ ").background_gradient(\n",
+ " cmap=\"viridis\",\n",
+ " low=1,\n",
+ " high=0.3,\n",
+ " subset=[\n",
+ " \"Accuracy_test\",\n",
+ " \"F1_test\",\n",
+ " ],\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 157,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'logistic'"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "best_model = str(class_metrics.sort_values(by=\"MCC_test\", ascending=False).iloc[0].name)\n",
+ "\n",
+ "display(best_model)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Вывод данных с ошибкой предсказания для оценки**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 158,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'Error items count: 0'"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " Predicted | \n",
+ " date | \n",
+ " price | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " floors | \n",
+ " waterfront | \n",
+ " ... | \n",
+ " sqft_basement | \n",
+ " yr_built | \n",
+ " yr_renovated | \n",
+ " zipcode | \n",
+ " lat | \n",
+ " long | \n",
+ " sqft_living15 | \n",
+ " sqft_lot15 | \n",
+ " above_median_price | \n",
+ " price_category | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ "
\n",
+ "
0 rows × 24 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ "Empty DataFrame\n",
+ "Columns: [id, Predicted, date, price, bedrooms, bathrooms, sqft_living, sqft_lot, floors, waterfront, view, condition, grade, sqft_above, sqft_basement, yr_built, yr_renovated, zipcode, lat, long, sqft_living15, sqft_lot15, above_median_price, price_category]\n",
+ "Index: []\n",
+ "\n",
+ "[0 rows x 24 columns]"
+ ]
+ },
+ "execution_count": 158,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "preprocessing_result = pipeline_end.transform(X_test)\n",
+ "preprocessed_df = pd.DataFrame(\n",
+ " preprocessing_result,\n",
+ " columns=pipeline_end.get_feature_names_out(),\n",
+ ")\n",
+ "\n",
+ "y_pred = class_models[best_model][\"preds\"]\n",
+ "\n",
+ "error_index = y_test[y_test[\"above_median_price\"] != y_pred].index.tolist()\n",
+ "display(f\"Error items count: {len(error_index)}\")\n",
+ "\n",
+ "error_predicted = pd.Series(y_pred, index=y_test.index).loc[error_index]\n",
+ "error_df = X_test.loc[error_index].copy()\n",
+ "error_df.insert(loc=1, column=\"Predicted\", value=error_predicted)\n",
+ "error_df.sort_index()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 159,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " date | \n",
+ " price | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " floors | \n",
+ " waterfront | \n",
+ " view | \n",
+ " ... | \n",
+ " sqft_basement | \n",
+ " yr_built | \n",
+ " yr_renovated | \n",
+ " zipcode | \n",
+ " lat | \n",
+ " long | \n",
+ " sqft_living15 | \n",
+ " sqft_lot15 | \n",
+ " above_median_price | \n",
+ " price_category | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 6863 | \n",
+ " 1124000050 | \n",
+ " 20140729T000000 | \n",
+ " 461000.0 | \n",
+ " 4 | \n",
+ " 1.0 | \n",
+ " 1260 | \n",
+ " 8505 | \n",
+ " 1.5 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1951 | \n",
+ " 0 | \n",
+ " 98177 | \n",
+ " 47.7181 | \n",
+ " -122.371 | \n",
+ " 1480 | \n",
+ " 8100 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1 rows × 23 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id date price bedrooms bathrooms sqft_living \\\n",
+ "6863 1124000050 20140729T000000 461000.0 4 1.0 1260 \n",
+ "\n",
+ " sqft_lot floors waterfront view ... sqft_basement yr_built yr_renovated \\\n",
+ "6863 8505 1.5 0 0 ... 0 1951 0 \n",
+ "\n",
+ " zipcode lat long sqft_living15 sqft_lot15 above_median_price \\\n",
+ "6863 98177 47.7181 -122.371 1480 8100 1 \n",
+ "\n",
+ " price_category \n",
+ "6863 1 \n",
+ "\n",
+ "[1 rows x 23 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " above_median_price | \n",
+ " id | \n",
+ " price | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " floors | \n",
+ " waterfront | \n",
+ " view | \n",
+ " ... | \n",
+ " sqft_basement | \n",
+ " yr_built | \n",
+ " yr_renovated | \n",
+ " zipcode | \n",
+ " lat | \n",
+ " long | \n",
+ " sqft_living15 | \n",
+ " sqft_lot15 | \n",
+ " price_category | \n",
+ " Living_area_to_Lot_ratio | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 6863 | \n",
+ " -0.891006 | \n",
+ " -0.162689 | \n",
+ " 1.005335 | \n",
+ " 1.124000e+09 | \n",
+ " 461000.0 | \n",
+ " 4.0 | \n",
+ " 1.0 | \n",
+ " 1.5 | \n",
+ " 0.0 | \n",
+ " 0.0 | \n",
+ " ... | \n",
+ " 0.0 | \n",
+ " 1951.0 | \n",
+ " 0.0 | \n",
+ " 98177.0 | \n",
+ " 47.7181 | \n",
+ " -122.371 | \n",
+ " 1480.0 | \n",
+ " 8100.0 | \n",
+ " 1.0 | \n",
+ " 5.476729 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
1 rows × 23 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " sqft_living sqft_lot above_median_price id price \\\n",
+ "6863 -0.891006 -0.162689 1.005335 1.124000e+09 461000.0 \n",
+ "\n",
+ " bedrooms bathrooms floors waterfront view ... sqft_basement \\\n",
+ "6863 4.0 1.0 1.5 0.0 0.0 ... 0.0 \n",
+ "\n",
+ " yr_built yr_renovated zipcode lat long sqft_living15 \\\n",
+ "6863 1951.0 0.0 98177.0 47.7181 -122.371 1480.0 \n",
+ "\n",
+ " sqft_lot15 price_category Living_area_to_Lot_ratio \n",
+ "6863 8100.0 1.0 5.476729 \n",
+ "\n",
+ "[1 rows x 23 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'predicted: 1 (proba: [0. 1.])'"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'real: 1'"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "model = class_models[best_model][\"pipeline\"]\n",
+ "\n",
+ "example_id = 6863\n",
+ "test = pd.DataFrame(X_test.loc[example_id, :]).T\n",
+ "test_preprocessed = pd.DataFrame(preprocessed_df.loc[example_id, :]).T\n",
+ "display(test)\n",
+ "display(test_preprocessed)\n",
+ "result_proba = model.predict_proba(test)[0]\n",
+ "result = model.predict(test)[0]\n",
+ "real = int(y_test.loc[example_id].values[0])\n",
+ "display(f\"predicted: {result} (proba: {result_proba})\")\n",
+ "display(f\"real: {real}\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Подбор гиперпараметров методом поиска по сетке**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 160,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "e:\\MII\\laboratory\\mai\\Lib\\site-packages\\numpy\\ma\\core.py:2881: RuntimeWarning: invalid value encountered in cast\n",
+ " _data = np.array(data, dtype=dtype, copy=copy,\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'model__criterion': 'gini',\n",
+ " 'model__max_depth': 5,\n",
+ " 'model__max_features': 'sqrt',\n",
+ " 'model__n_estimators': 10}"
+ ]
+ },
+ "execution_count": 160,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from sklearn.model_selection import GridSearchCV\n",
+ "\n",
+ "optimized_model_type = \"random_forest\"\n",
+ "\n",
+ "random_forest_model = class_models[optimized_model_type][\"pipeline\"]\n",
+ "\n",
+ "param_grid = {\n",
+ " \"model__n_estimators\": [10, 50, 100],\n",
+ " \"model__max_features\": [\"sqrt\", \"log2\"],\n",
+ " \"model__max_depth\": [5, 7, 10],\n",
+ " \"model__criterion\": [\"gini\", \"entropy\"],\n",
+ "}\n",
+ "\n",
+ "gs_optomizer = GridSearchCV(\n",
+ " estimator=random_forest_model, param_grid=param_grid, n_jobs=-1\n",
+ ")\n",
+ "gs_optomizer.fit(X_train, y_train.values.ravel())\n",
+ "gs_optomizer.best_params_"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Обучение модели с новыми гиперпараметрами"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 161,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "optimized_model = ensemble.RandomForestClassifier(\n",
+ " random_state=random_state,\n",
+ " criterion=\"gini\",\n",
+ " max_depth=5,\n",
+ " max_features=\"log2\",\n",
+ " n_estimators=10,\n",
+ ")\n",
+ "\n",
+ "result = {}\n",
+ "\n",
+ "result[\"pipeline\"] = Pipeline([(\"pipeline\", pipeline_end), (\"model\", optimized_model)]).fit(X_train, y_train.values.ravel())\n",
+ "result[\"train_preds\"] = result[\"pipeline\"].predict(X_train)\n",
+ "result[\"probs\"] = result[\"pipeline\"].predict_proba(X_test)[:, 1]\n",
+ "result[\"preds\"] = np.where(result[\"probs\"] > 0.5, 1, 0)\n",
+ "\n",
+ "result[\"Precision_train\"] = metrics.precision_score(y_train, result[\"train_preds\"])\n",
+ "result[\"Precision_test\"] = metrics.precision_score(y_test, result[\"preds\"])\n",
+ "result[\"Recall_train\"] = metrics.recall_score(y_train, result[\"train_preds\"])\n",
+ "result[\"Recall_test\"] = metrics.recall_score(y_test, result[\"preds\"])\n",
+ "result[\"Accuracy_train\"] = metrics.accuracy_score(y_train, result[\"train_preds\"])\n",
+ "result[\"Accuracy_test\"] = metrics.accuracy_score(y_test, result[\"preds\"])\n",
+ "result[\"ROC_AUC_test\"] = metrics.roc_auc_score(y_test, result[\"probs\"])\n",
+ "result[\"F1_train\"] = metrics.f1_score(y_train, result[\"train_preds\"])\n",
+ "result[\"F1_test\"] = metrics.f1_score(y_test, result[\"preds\"])\n",
+ "result[\"MCC_test\"] = metrics.matthews_corrcoef(y_test, result[\"preds\"])\n",
+ "result[\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(y_test, result[\"preds\"])\n",
+ "result[\"Confusion_matrix\"] = metrics.confusion_matrix(y_test, result[\"preds\"])"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Формирование данных для оценки старой и новой версии модели**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 162,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "optimized_metrics = pd.DataFrame(columns=list(result.keys()))\n",
+ "optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n",
+ " data=class_models[optimized_model_type]\n",
+ ")\n",
+ "optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n",
+ " data=result\n",
+ ")\n",
+ "optimized_metrics.insert(loc=0, column=\"Name\", value=[\"Old\", \"New\"])\n",
+ "optimized_metrics = optimized_metrics.set_index(\"Name\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Оценка параметров старой и новой модели**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 163,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Precision_train | \n",
+ " Precision_test | \n",
+ " Recall_train | \n",
+ " Recall_test | \n",
+ " Accuracy_train | \n",
+ " Accuracy_test | \n",
+ " F1_train | \n",
+ " F1_test | \n",
+ "
\n",
+ " \n",
+ " Name | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Old | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " New | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 163,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "optimized_metrics[\n",
+ " [\n",
+ " \"Precision_train\",\n",
+ " \"Precision_test\",\n",
+ " \"Recall_train\",\n",
+ " \"Recall_test\",\n",
+ " \"Accuracy_train\",\n",
+ " \"Accuracy_test\",\n",
+ " \"F1_train\",\n",
+ " \"F1_test\",\n",
+ " ]\n",
+ "].style.background_gradient(\n",
+ " cmap=\"plasma\",\n",
+ " low=0.3,\n",
+ " high=1,\n",
+ " subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n",
+ ").background_gradient(\n",
+ " cmap=\"viridis\",\n",
+ " low=1,\n",
+ " high=0.3,\n",
+ " subset=[\n",
+ " \"Precision_train\",\n",
+ " \"Precision_test\",\n",
+ " \"Recall_train\",\n",
+ " \"Recall_test\",\n",
+ " ],\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Как для обучающей (Precision_train), так и для тестовой (Precision_test) выборки обе модели достигли идеальных значений 1.000000. Это указывает на то, что модели очень точно классифицируют положительные образцы, не пропуская их."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 164,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Accuracy_test | \n",
+ " F1_test | \n",
+ " ROC_AUC_test | \n",
+ " Cohen_kappa_test | \n",
+ " MCC_test | \n",
+ "
\n",
+ " \n",
+ " Name | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ " | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " Old | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ " New | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ " 1.000000 | \n",
+ "
\n",
+ " \n",
+ "
\n"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 164,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "optimized_metrics[\n",
+ " [\n",
+ " \"Accuracy_test\",\n",
+ " \"F1_test\",\n",
+ " \"ROC_AUC_test\",\n",
+ " \"Cohen_kappa_test\",\n",
+ " \"MCC_test\",\n",
+ " ]\n",
+ "].style.background_gradient(\n",
+ " cmap=\"plasma\",\n",
+ " low=0.3,\n",
+ " high=1,\n",
+ " subset=[\n",
+ " \"ROC_AUC_test\",\n",
+ " \"MCC_test\",\n",
+ " \"Cohen_kappa_test\",\n",
+ " ],\n",
+ ").background_gradient(\n",
+ " cmap=\"viridis\",\n",
+ " low=1,\n",
+ " high=0.3,\n",
+ " subset=[\n",
+ " \"Accuracy_test\",\n",
+ " \"F1_test\",\n",
+ " ],\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Оба варианта модели продемонстрировали безупречную точность классификации, достигнув значения 1.000000. Это свидетельствует о том, что модели точно классифицировали все тестовые примеры, не допустив никаких ошибок в предсказаниях."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 165,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "_, ax = plt.subplots(1, 2, figsize=(10, 4), sharex=False, sharey=False\n",
+ ")\n",
+ "\n",
+ "for index in range(0, len(optimized_metrics)):\n",
+ " c_matrix = optimized_metrics.iloc[index][\"Confusion_matrix\"]\n",
+ " disp = ConfusionMatrixDisplay(\n",
+ " confusion_matrix=c_matrix, display_labels=[\"Less\", \"More\"]\n",
+ " ).plot(ax=ax.flat[index])\n",
+ "\n",
+ "plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.3)\n",
+ "plt.show()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "В желтом квадрате мы видим значение 2173, что обозначает количество правильно классифицированных объектов, отнесенных к классу \"Less\". Это свидетельствует о том, что модель успешно идентифицирует объекты этого класса, минимизируя количество ложных положительных срабатываний.\n",
+ "\n",
+ "В правом нижнем жёлтом квадрате значение 2150 указывает на количество правильно классифицированных объектов, отнесенных к классу \"More\". Это также является показателем высокой точности модели в определении объектов данного класса."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Задача регресии: предсказание цены дома (price).\n",
+ "\n",
+ "Описание: Оценить, какая будет цена дома (price) на основе исторических данных о характеристиках домов, таких как площадь. Целевая переменная: Цена дома (price). (среднее значение)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 166,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Среднее значение поля: 2079.8997362698374\n",
+ " id date price bedrooms bathrooms sqft_living \\\n",
+ "0 7129300520 20141013T000000 221900.0 3 1.00 1180 \n",
+ "1 6414100192 20141209T000000 538000.0 3 2.25 2570 \n",
+ "2 5631500400 20150225T000000 180000.0 2 1.00 770 \n",
+ "3 2487200875 20141209T000000 604000.0 4 3.00 1960 \n",
+ "4 1954400510 20150218T000000 510000.0 3 2.00 1680 \n",
+ "\n",
+ " sqft_lot floors waterfront view ... yr_built yr_renovated zipcode \\\n",
+ "0 5650 1.0 0 0 ... 1955 0 98178 \n",
+ "1 7242 2.0 0 0 ... 1951 1991 98125 \n",
+ "2 10000 1.0 0 0 ... 1933 0 98028 \n",
+ "3 5000 1.0 0 0 ... 1965 0 98136 \n",
+ "4 8080 1.0 0 0 ... 1987 0 98074 \n",
+ "\n",
+ " lat long sqft_living15 sqft_lot15 above_median_price \\\n",
+ "0 47.5112 -122.257 1340 5650 0 \n",
+ "1 47.7210 -122.319 1690 7639 1 \n",
+ "2 47.7379 -122.233 2720 8062 0 \n",
+ "3 47.5208 -122.393 1360 5000 1 \n",
+ "4 47.6168 -122.045 1800 7503 1 \n",
+ "\n",
+ " price_category average_price \n",
+ "0 0 0 \n",
+ "1 1 1 \n",
+ "2 0 0 \n",
+ "3 1 0 \n",
+ "4 1 0 \n",
+ "\n",
+ "[5 rows x 24 columns]\n",
+ "Статистическое описание DataFrame:\n",
+ " id price bedrooms bathrooms sqft_living \\\n",
+ "count 2.161300e+04 2.161300e+04 21613.000000 21613.000000 21613.000000 \n",
+ "mean 4.580302e+09 5.400881e+05 3.370842 2.114757 2079.899736 \n",
+ "std 2.876566e+09 3.671272e+05 0.930062 0.770163 918.440897 \n",
+ "min 1.000102e+06 7.500000e+04 0.000000 0.000000 290.000000 \n",
+ "25% 2.123049e+09 3.219500e+05 3.000000 1.750000 1427.000000 \n",
+ "50% 3.904930e+09 4.500000e+05 3.000000 2.250000 1910.000000 \n",
+ "75% 7.308900e+09 6.450000e+05 4.000000 2.500000 2550.000000 \n",
+ "max 9.900000e+09 7.700000e+06 33.000000 8.000000 13540.000000 \n",
+ "\n",
+ " sqft_lot floors waterfront view condition \\\n",
+ "count 2.161300e+04 21613.000000 21613.000000 21613.000000 21613.000000 \n",
+ "mean 1.510697e+04 1.494309 0.007542 0.234303 3.409430 \n",
+ "std 4.142051e+04 0.539989 0.086517 0.766318 0.650743 \n",
+ "min 5.200000e+02 1.000000 0.000000 0.000000 1.000000 \n",
+ "25% 5.040000e+03 1.000000 0.000000 0.000000 3.000000 \n",
+ "50% 7.618000e+03 1.500000 0.000000 0.000000 3.000000 \n",
+ "75% 1.068800e+04 2.000000 0.000000 0.000000 4.000000 \n",
+ "max 1.651359e+06 3.500000 1.000000 4.000000 5.000000 \n",
+ "\n",
+ " ... sqft_basement yr_built yr_renovated zipcode \\\n",
+ "count ... 21613.000000 21613.000000 21613.000000 21613.000000 \n",
+ "mean ... 291.509045 1971.005136 84.402258 98077.939805 \n",
+ "std ... 442.575043 29.373411 401.679240 53.505026 \n",
+ "min ... 0.000000 1900.000000 0.000000 98001.000000 \n",
+ "25% ... 0.000000 1951.000000 0.000000 98033.000000 \n",
+ "50% ... 0.000000 1975.000000 0.000000 98065.000000 \n",
+ "75% ... 560.000000 1997.000000 0.000000 98118.000000 \n",
+ "max ... 4820.000000 2015.000000 2015.000000 98199.000000 \n",
+ "\n",
+ " lat long sqft_living15 sqft_lot15 \\\n",
+ "count 21613.000000 21613.000000 21613.000000 21613.000000 \n",
+ "mean 47.560053 -122.213896 1986.552492 12768.455652 \n",
+ "std 0.138564 0.140828 685.391304 27304.179631 \n",
+ "min 47.155900 -122.519000 399.000000 651.000000 \n",
+ "25% 47.471000 -122.328000 1490.000000 5100.000000 \n",
+ "50% 47.571800 -122.230000 1840.000000 7620.000000 \n",
+ "75% 47.678000 -122.125000 2360.000000 10083.000000 \n",
+ "max 47.777600 -121.315000 6210.000000 871200.000000 \n",
+ "\n",
+ " above_median_price average_price \n",
+ "count 21613.000000 21613.00000 \n",
+ "mean 0.497340 0.42752 \n",
+ "std 0.500004 0.49473 \n",
+ "min 0.000000 0.00000 \n",
+ "25% 0.000000 0.00000 \n",
+ "50% 0.000000 0.00000 \n",
+ "75% 1.000000 1.00000 \n",
+ "max 1.000000 1.00000 \n",
+ "\n",
+ "[8 rows x 22 columns]\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "from sklearn import set_config\n",
+ "\n",
+ "set_config(transform_output=\"pandas\")\n",
+ "\n",
+ "# Опция для настройки генерации случайных чисел (если это нужно для других частей кода)\n",
+ "random_state = 42\n",
+ "\n",
+ "# Вычисление среднего значения поля \"Close\"\n",
+ "average_price = df['sqft_living'].mean()\n",
+ "print(f\"Среднее значение поля: {average_price}\")\n",
+ "\n",
+ "# Создание новой колонки, указывающей, выше или ниже среднего значение цена закрытия\n",
+ "df['average_price'] = (df['sqft_living'] > average_price).astype(int)\n",
+ "\n",
+ "# Удаление последней строки, где нет значения для следующего дня\n",
+ "df.dropna(inplace=True)\n",
+ "\n",
+ "# Вывод DataFrame с новой колонкой\n",
+ "print(df.head())\n",
+ "\n",
+ "# Примерный анализ данных\n",
+ "print(\"Статистическое описание DataFrame:\")\n",
+ "print(df.describe())"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 167,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "'X_train'"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " date | \n",
+ " price | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " floors | \n",
+ " waterfront | \n",
+ " view | \n",
+ " ... | \n",
+ " sqft_basement | \n",
+ " yr_built | \n",
+ " yr_renovated | \n",
+ " zipcode | \n",
+ " lat | \n",
+ " long | \n",
+ " sqft_living15 | \n",
+ " sqft_lot15 | \n",
+ " above_median_price | \n",
+ " price_category | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 6325 | \n",
+ " 5467910190 | \n",
+ " 20140527T000000 | \n",
+ " 325000.0 | \n",
+ " 3 | \n",
+ " 1.75 | \n",
+ " 1780 | \n",
+ " 13095 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1983 | \n",
+ " 0 | \n",
+ " 98042 | \n",
+ " 47.3670 | \n",
+ " -122.152 | \n",
+ " 2750 | \n",
+ " 13095 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 13473 | \n",
+ " 9331800580 | \n",
+ " 20150310T000000 | \n",
+ " 257000.0 | \n",
+ " 2 | \n",
+ " 1.00 | \n",
+ " 1000 | \n",
+ " 3700 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 200 | \n",
+ " 1929 | \n",
+ " 0 | \n",
+ " 98118 | \n",
+ " 47.5520 | \n",
+ " -122.290 | \n",
+ " 1270 | \n",
+ " 5000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 17614 | \n",
+ " 2407000405 | \n",
+ " 20150226T000000 | \n",
+ " 228500.0 | \n",
+ " 3 | \n",
+ " 1.00 | \n",
+ " 1080 | \n",
+ " 7486 | \n",
+ " 1.5 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 90 | \n",
+ " 1942 | \n",
+ " 0 | \n",
+ " 98146 | \n",
+ " 47.4838 | \n",
+ " -122.335 | \n",
+ " 1170 | \n",
+ " 7800 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 16970 | \n",
+ " 5466700290 | \n",
+ " 20150108T000000 | \n",
+ " 288000.0 | \n",
+ " 3 | \n",
+ " 2.25 | \n",
+ " 2090 | \n",
+ " 7500 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 810 | \n",
+ " 1977 | \n",
+ " 0 | \n",
+ " 98031 | \n",
+ " 47.3951 | \n",
+ " -122.172 | \n",
+ " 1800 | \n",
+ " 7350 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 20868 | \n",
+ " 3026059361 | \n",
+ " 20150417T000000 | \n",
+ " 479000.0 | \n",
+ " 2 | \n",
+ " 2.50 | \n",
+ " 1741 | \n",
+ " 1439 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 295 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 98034 | \n",
+ " 47.7043 | \n",
+ " -122.209 | \n",
+ " 2090 | \n",
+ " 10454 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 11964 | \n",
+ " 5272200045 | \n",
+ " 20141113T000000 | \n",
+ " 378000.0 | \n",
+ " 3 | \n",
+ " 1.50 | \n",
+ " 1000 | \n",
+ " 6914 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1947 | \n",
+ " 0 | \n",
+ " 98125 | \n",
+ " 47.7144 | \n",
+ " -122.319 | \n",
+ " 1000 | \n",
+ " 6947 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 21575 | \n",
+ " 9578500790 | \n",
+ " 20141111T000000 | \n",
+ " 399950.0 | \n",
+ " 3 | \n",
+ " 2.50 | \n",
+ " 3087 | \n",
+ " 5002 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 2014 | \n",
+ " 0 | \n",
+ " 98023 | \n",
+ " 47.2974 | \n",
+ " -122.349 | \n",
+ " 2927 | \n",
+ " 5183 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 5390 | \n",
+ " 7202350480 | \n",
+ " 20140930T000000 | \n",
+ " 575000.0 | \n",
+ " 3 | \n",
+ " 2.50 | \n",
+ " 2120 | \n",
+ " 4780 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 2004 | \n",
+ " 0 | \n",
+ " 98053 | \n",
+ " 47.6810 | \n",
+ " -122.032 | \n",
+ " 1690 | \n",
+ " 2650 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 860 | \n",
+ " 1723049033 | \n",
+ " 20140620T000000 | \n",
+ " 245000.0 | \n",
+ " 1 | \n",
+ " 0.75 | \n",
+ " 380 | \n",
+ " 15000 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1963 | \n",
+ " 0 | \n",
+ " 98168 | \n",
+ " 47.4810 | \n",
+ " -122.323 | \n",
+ " 1170 | \n",
+ " 15000 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 15795 | \n",
+ " 6147650280 | \n",
+ " 20150325T000000 | \n",
+ " 315000.0 | \n",
+ " 4 | \n",
+ " 2.50 | \n",
+ " 3130 | \n",
+ " 5999 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 2006 | \n",
+ " 0 | \n",
+ " 98042 | \n",
+ " 47.3837 | \n",
+ " -122.099 | \n",
+ " 3020 | \n",
+ " 5997 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
17290 rows × 23 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id date price bedrooms bathrooms \\\n",
+ "6325 5467910190 20140527T000000 325000.0 3 1.75 \n",
+ "13473 9331800580 20150310T000000 257000.0 2 1.00 \n",
+ "17614 2407000405 20150226T000000 228500.0 3 1.00 \n",
+ "16970 5466700290 20150108T000000 288000.0 3 2.25 \n",
+ "20868 3026059361 20150417T000000 479000.0 2 2.50 \n",
+ "... ... ... ... ... ... \n",
+ "11964 5272200045 20141113T000000 378000.0 3 1.50 \n",
+ "21575 9578500790 20141111T000000 399950.0 3 2.50 \n",
+ "5390 7202350480 20140930T000000 575000.0 3 2.50 \n",
+ "860 1723049033 20140620T000000 245000.0 1 0.75 \n",
+ "15795 6147650280 20150325T000000 315000.0 4 2.50 \n",
+ "\n",
+ " sqft_living sqft_lot floors waterfront view ... sqft_basement \\\n",
+ "6325 1780 13095 1.0 0 0 ... 0 \n",
+ "13473 1000 3700 1.0 0 0 ... 200 \n",
+ "17614 1080 7486 1.5 0 0 ... 90 \n",
+ "16970 2090 7500 1.0 0 0 ... 810 \n",
+ "20868 1741 1439 2.0 0 0 ... 295 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "11964 1000 6914 1.0 0 0 ... 0 \n",
+ "21575 3087 5002 2.0 0 0 ... 0 \n",
+ "5390 2120 4780 2.0 0 0 ... 0 \n",
+ "860 380 15000 1.0 0 0 ... 0 \n",
+ "15795 3130 5999 2.0 0 0 ... 0 \n",
+ "\n",
+ " yr_built yr_renovated zipcode lat long sqft_living15 \\\n",
+ "6325 1983 0 98042 47.3670 -122.152 2750 \n",
+ "13473 1929 0 98118 47.5520 -122.290 1270 \n",
+ "17614 1942 0 98146 47.4838 -122.335 1170 \n",
+ "16970 1977 0 98031 47.3951 -122.172 1800 \n",
+ "20868 2007 0 98034 47.7043 -122.209 2090 \n",
+ "... ... ... ... ... ... ... \n",
+ "11964 1947 0 98125 47.7144 -122.319 1000 \n",
+ "21575 2014 0 98023 47.2974 -122.349 2927 \n",
+ "5390 2004 0 98053 47.6810 -122.032 1690 \n",
+ "860 1963 0 98168 47.4810 -122.323 1170 \n",
+ "15795 2006 0 98042 47.3837 -122.099 3020 \n",
+ "\n",
+ " sqft_lot15 above_median_price price_category \n",
+ "6325 13095 0 1 \n",
+ "13473 5000 0 0 \n",
+ "17614 7800 0 0 \n",
+ "16970 7350 0 0 \n",
+ "20868 10454 1 1 \n",
+ "... ... ... ... \n",
+ "11964 6947 0 1 \n",
+ "21575 5183 0 1 \n",
+ "5390 2650 1 1 \n",
+ "860 15000 0 0 \n",
+ "15795 5997 0 1 \n",
+ "\n",
+ "[17290 rows x 23 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'y_train'"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " average_price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 6325 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 13473 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 17614 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 16970 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 20868 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 11964 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 21575 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 5390 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 860 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 15795 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
17290 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " average_price\n",
+ "6325 0\n",
+ "13473 0\n",
+ "17614 0\n",
+ "16970 1\n",
+ "20868 0\n",
+ "... ...\n",
+ "11964 0\n",
+ "21575 1\n",
+ "5390 1\n",
+ "860 0\n",
+ "15795 1\n",
+ "\n",
+ "[17290 rows x 1 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'X_test'"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " id | \n",
+ " date | \n",
+ " price | \n",
+ " bedrooms | \n",
+ " bathrooms | \n",
+ " sqft_living | \n",
+ " sqft_lot | \n",
+ " floors | \n",
+ " waterfront | \n",
+ " view | \n",
+ " ... | \n",
+ " sqft_basement | \n",
+ " yr_built | \n",
+ " yr_renovated | \n",
+ " zipcode | \n",
+ " lat | \n",
+ " long | \n",
+ " sqft_living15 | \n",
+ " sqft_lot15 | \n",
+ " above_median_price | \n",
+ " price_category | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 735 | \n",
+ " 2591820310 | \n",
+ " 20141006T000000 | \n",
+ " 365000.0 | \n",
+ " 4 | \n",
+ " 2.25 | \n",
+ " 2070 | \n",
+ " 8893 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1986 | \n",
+ " 0 | \n",
+ " 98058 | \n",
+ " 47.4388 | \n",
+ " -122.162 | \n",
+ " 2390 | \n",
+ " 7700 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2830 | \n",
+ " 7974200820 | \n",
+ " 20140821T000000 | \n",
+ " 865000.0 | \n",
+ " 5 | \n",
+ " 3.00 | \n",
+ " 2900 | \n",
+ " 6730 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 1070 | \n",
+ " 1977 | \n",
+ " 0 | \n",
+ " 98115 | \n",
+ " 47.6784 | \n",
+ " -122.285 | \n",
+ " 2370 | \n",
+ " 6283 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 4106 | \n",
+ " 7701450110 | \n",
+ " 20140815T000000 | \n",
+ " 1038000.0 | \n",
+ " 4 | \n",
+ " 2.50 | \n",
+ " 3770 | \n",
+ " 10893 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1997 | \n",
+ " 0 | \n",
+ " 98006 | \n",
+ " 47.5646 | \n",
+ " -122.129 | \n",
+ " 3710 | \n",
+ " 9685 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 16218 | \n",
+ " 9522300010 | \n",
+ " 20150331T000000 | \n",
+ " 1490000.0 | \n",
+ " 3 | \n",
+ " 3.50 | \n",
+ " 4560 | \n",
+ " 14608 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 2 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1990 | \n",
+ " 0 | \n",
+ " 98034 | \n",
+ " 47.6995 | \n",
+ " -122.228 | \n",
+ " 4050 | \n",
+ " 14226 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " 19964 | \n",
+ " 9510861140 | \n",
+ " 20140714T000000 | \n",
+ " 711000.0 | \n",
+ " 3 | \n",
+ " 2.50 | \n",
+ " 2550 | \n",
+ " 5376 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 2004 | \n",
+ " 0 | \n",
+ " 98052 | \n",
+ " 47.6647 | \n",
+ " -122.083 | \n",
+ " 2250 | \n",
+ " 4050 | \n",
+ " 1 | \n",
+ " 2 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 13674 | \n",
+ " 6163900333 | \n",
+ " 20141110T000000 | \n",
+ " 338000.0 | \n",
+ " 3 | \n",
+ " 1.75 | \n",
+ " 1250 | \n",
+ " 7710 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1947 | \n",
+ " 0 | \n",
+ " 98155 | \n",
+ " 47.7623 | \n",
+ " -122.317 | \n",
+ " 1340 | \n",
+ " 7710 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 20377 | \n",
+ " 3528960020 | \n",
+ " 20140708T000000 | \n",
+ " 673000.0 | \n",
+ " 3 | \n",
+ " 2.75 | \n",
+ " 2830 | \n",
+ " 3496 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 2012 | \n",
+ " 0 | \n",
+ " 98029 | \n",
+ " 47.5606 | \n",
+ " -122.011 | \n",
+ " 2160 | \n",
+ " 3501 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 8805 | \n",
+ " 1687000220 | \n",
+ " 20141016T000000 | \n",
+ " 285000.0 | \n",
+ " 4 | \n",
+ " 2.50 | \n",
+ " 2434 | \n",
+ " 4400 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 2007 | \n",
+ " 0 | \n",
+ " 98001 | \n",
+ " 47.2874 | \n",
+ " -122.283 | \n",
+ " 2434 | \n",
+ " 4400 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 10168 | \n",
+ " 4141400030 | \n",
+ " 20141201T000000 | \n",
+ " 605000.0 | \n",
+ " 4 | \n",
+ " 1.75 | \n",
+ " 2250 | \n",
+ " 10108 | \n",
+ " 1.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 1967 | \n",
+ " 0 | \n",
+ " 98008 | \n",
+ " 47.5922 | \n",
+ " -122.118 | \n",
+ " 2050 | \n",
+ " 9750 | \n",
+ " 1 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2522 | \n",
+ " 1822500160 | \n",
+ " 20141212T000000 | \n",
+ " 356500.0 | \n",
+ " 4 | \n",
+ " 2.50 | \n",
+ " 2570 | \n",
+ " 11473 | \n",
+ " 2.0 | \n",
+ " 0 | \n",
+ " 0 | \n",
+ " ... | \n",
+ " 0 | \n",
+ " 2008 | \n",
+ " 0 | \n",
+ " 98003 | \n",
+ " 47.2809 | \n",
+ " -122.296 | \n",
+ " 2430 | \n",
+ " 5997 | \n",
+ " 0 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4323 rows × 23 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " id date price bedrooms bathrooms \\\n",
+ "735 2591820310 20141006T000000 365000.0 4 2.25 \n",
+ "2830 7974200820 20140821T000000 865000.0 5 3.00 \n",
+ "4106 7701450110 20140815T000000 1038000.0 4 2.50 \n",
+ "16218 9522300010 20150331T000000 1490000.0 3 3.50 \n",
+ "19964 9510861140 20140714T000000 711000.0 3 2.50 \n",
+ "... ... ... ... ... ... \n",
+ "13674 6163900333 20141110T000000 338000.0 3 1.75 \n",
+ "20377 3528960020 20140708T000000 673000.0 3 2.75 \n",
+ "8805 1687000220 20141016T000000 285000.0 4 2.50 \n",
+ "10168 4141400030 20141201T000000 605000.0 4 1.75 \n",
+ "2522 1822500160 20141212T000000 356500.0 4 2.50 \n",
+ "\n",
+ " sqft_living sqft_lot floors waterfront view ... sqft_basement \\\n",
+ "735 2070 8893 2.0 0 0 ... 0 \n",
+ "2830 2900 6730 1.0 0 0 ... 1070 \n",
+ "4106 3770 10893 2.0 0 2 ... 0 \n",
+ "16218 4560 14608 2.0 0 2 ... 0 \n",
+ "19964 2550 5376 2.0 0 0 ... 0 \n",
+ "... ... ... ... ... ... ... ... \n",
+ "13674 1250 7710 1.0 0 0 ... 0 \n",
+ "20377 2830 3496 2.0 0 0 ... 0 \n",
+ "8805 2434 4400 2.0 0 0 ... 0 \n",
+ "10168 2250 10108 1.0 0 0 ... 0 \n",
+ "2522 2570 11473 2.0 0 0 ... 0 \n",
+ "\n",
+ " yr_built yr_renovated zipcode lat long sqft_living15 \\\n",
+ "735 1986 0 98058 47.4388 -122.162 2390 \n",
+ "2830 1977 0 98115 47.6784 -122.285 2370 \n",
+ "4106 1997 0 98006 47.5646 -122.129 3710 \n",
+ "16218 1990 0 98034 47.6995 -122.228 4050 \n",
+ "19964 2004 0 98052 47.6647 -122.083 2250 \n",
+ "... ... ... ... ... ... ... \n",
+ "13674 1947 0 98155 47.7623 -122.317 1340 \n",
+ "20377 2012 0 98029 47.5606 -122.011 2160 \n",
+ "8805 2007 0 98001 47.2874 -122.283 2434 \n",
+ "10168 1967 0 98008 47.5922 -122.118 2050 \n",
+ "2522 2008 0 98003 47.2809 -122.296 2430 \n",
+ "\n",
+ " sqft_lot15 above_median_price price_category \n",
+ "735 7700 0 1 \n",
+ "2830 6283 1 2 \n",
+ "4106 9685 1 2 \n",
+ "16218 14226 1 2 \n",
+ "19964 4050 1 2 \n",
+ "... ... ... ... \n",
+ "13674 7710 0 1 \n",
+ "20377 3501 1 1 \n",
+ "8805 4400 0 0 \n",
+ "10168 9750 1 1 \n",
+ "2522 5997 0 1 \n",
+ "\n",
+ "[4323 rows x 23 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/plain": [
+ "'y_test'"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " average_price | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " 735 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 2830 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 4106 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 16218 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 19964 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " ... | \n",
+ " ... | \n",
+ "
\n",
+ " \n",
+ " 13674 | \n",
+ " 0 | \n",
+ "
\n",
+ " \n",
+ " 20377 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 8805 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 10168 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ " 2522 | \n",
+ " 1 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
4323 rows × 1 columns
\n",
+ "
"
+ ],
+ "text/plain": [
+ " average_price\n",
+ "735 0\n",
+ "2830 1\n",
+ "4106 1\n",
+ "16218 1\n",
+ "19964 1\n",
+ "... ...\n",
+ "13674 0\n",
+ "20377 1\n",
+ "8805 1\n",
+ "10168 1\n",
+ "2522 1\n",
+ "\n",
+ "[4323 rows x 1 columns]"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "from typing import Tuple\n",
+ "from pandas import DataFrame\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "\n",
+ "def split_into_train_test(\n",
+ " df_input: DataFrame,\n",
+ " target_colname: str = \"average_price\",\n",
+ " frac_train: float = 0.8,\n",
+ " random_state: int = None,\n",
+ ") -> Tuple[DataFrame, DataFrame, DataFrame, DataFrame]:\n",
+ " \n",
+ " if not (0 < frac_train < 1):\n",
+ " raise ValueError(\"Fraction must be between 0 and 1.\")\n",
+ " \n",
+ " # Проверка наличия целевого признака\n",
+ " if target_colname not in df_input.columns:\n",
+ " raise ValueError(f\"{target_colname} is not a column in the DataFrame.\")\n",
+ " \n",
+ " # Разделяем данные на признаки и целевую переменную\n",
+ " X = df_input.drop(columns=[target_colname]) # Признаки\n",
+ " y = df_input[[target_colname]] # Целевая переменная\n",
+ "\n",
+ " # Разделяем данные на обучающую и тестовую выборки\n",
+ " X_train, X_test, y_train, y_test = train_test_split(\n",
+ " X, y,\n",
+ " test_size=(1.0 - frac_train),\n",
+ " random_state=random_state\n",
+ " )\n",
+ " \n",
+ " return X_train, X_test, y_train, y_test\n",
+ "\n",
+ "# Применение функции для разделения данных\n",
+ "X_train, X_test, y_train, y_test = split_into_train_test(\n",
+ " df, \n",
+ " target_colname=\"average_price\", \n",
+ " frac_train=0.8, \n",
+ " random_state=42 # Убедитесь, что вы задали нужное значение random_state\n",
+ ")\n",
+ "\n",
+ "# Для отображения результатов\n",
+ "display(\"X_train\", X_train)\n",
+ "display(\"y_train\", y_train)\n",
+ "\n",
+ "display(\"X_test\", X_test)\n",
+ "display(\"y_test\", y_test)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Формирование конвейера для решения задачи регрессии"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 168,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import numpy as np\n",
+ "from sklearn.base import BaseEstimator, TransformerMixin\n",
+ "from sklearn.compose import ColumnTransformer\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "from sklearn.impute import SimpleImputer\n",
+ "from sklearn.pipeline import Pipeline\n",
+ "from sklearn.preprocessing import OneHotEncoder\n",
+ "from sklearn.ensemble import RandomForestRegressor # Пример регрессионной модели\n",
+ "from sklearn.model_selection import train_test_split\n",
+ "from sklearn.pipeline import make_pipeline\n",
+ "\n",
+ "class HouseFeatures(BaseEstimator, TransformerMixin):\n",
+ " def __init__(self):\n",
+ " pass\n",
+ " def fit(self, X, y=None):\n",
+ " return self\n",
+ " def transform(self, X, y=None):\n",
+ " # Создание новых признаков\n",
+ " X = X.copy()\n",
+ " X[\"Square\"] = X[\"sqft_living\"] / X[\"sqft_lot\"]\n",
+ " return X\n",
+ " def get_feature_names_out(self, features_in):\n",
+ " # Добавление имен новых признаков\n",
+ " new_features = [\"Square\"]\n",
+ " return np.append(features_in, new_features, axis=0)\n",
+ "\n",
+ "# Указываем столбцы, которые нужно удалить и обрабатывать\n",
+ "columns_to_drop = [\"date\"]\n",
+ "num_columns = [\"bathrooms\", \"floors\", \"waterfront\", \"view\"]\n",
+ "cat_columns = [] \n",
+ "\n",
+ "# Определяем предобработку для численных данных\n",
+ "num_imputer = SimpleImputer(strategy=\"median\")\n",
+ "num_scaler = StandardScaler()\n",
+ "preprocessing_num = Pipeline(\n",
+ " [\n",
+ " (\"imputer\", num_imputer),\n",
+ " (\"scaler\", num_scaler),\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "# Определяем предобработку для категориальных данных\n",
+ "cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n",
+ "cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n",
+ "preprocessing_cat = Pipeline(\n",
+ " [\n",
+ " (\"imputer\", cat_imputer),\n",
+ " (\"encoder\", cat_encoder),\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "# Подготовка признаков с использованием ColumnTransformer\n",
+ "features_preprocessing = ColumnTransformer(\n",
+ " verbose_feature_names_out=False,\n",
+ " transformers=[\n",
+ " (\"preprocessing_num\", preprocessing_num, num_columns),\n",
+ " (\"preprocessing_cat\", preprocessing_cat, cat_columns),\n",
+ " ],\n",
+ " remainder=\"passthrough\"\n",
+ ")\n",
+ "\n",
+ "# Удаление нежелательных столбцов\n",
+ "drop_columns = ColumnTransformer(\n",
+ " verbose_feature_names_out=False,\n",
+ " transformers=[\n",
+ " (\"drop_columns\", \"drop\", columns_to_drop),\n",
+ " ],\n",
+ " remainder=\"passthrough\",\n",
+ ")\n",
+ "\n",
+ "# Постобработка признаков\n",
+ "features_postprocessing = ColumnTransformer(\n",
+ " verbose_feature_names_out=False,\n",
+ " transformers=[\n",
+ " (\"preprocessing_cat\", preprocessing_cat, [\"price_category\"]), \n",
+ " ],\n",
+ " remainder=\"passthrough\",\n",
+ ")\n",
+ "\n",
+ "# Создание окончательного конвейера\n",
+ "pipeline = Pipeline(\n",
+ " [\n",
+ " (\"features_preprocessing\", features_preprocessing),\n",
+ " (\"drop_columns\", drop_columns),\n",
+ " (\"custom_features\", HouseFeatures()),\n",
+ " (\"model\", RandomForestRegressor()) # Выбор модели для обучения\n",
+ " ]\n",
+ ")\n",
+ "\n",
+ "# Использование конвейера\n",
+ "def train_pipeline(X, y):\n",
+ " pipeline.fit(X, y)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Формирование набора моделей для регрессии \n",
+ "Определение перечня алгоритмов решения задачи аппроксимации (регрессии)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 169,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.pipeline import make_pipeline\n",
+ "from sklearn.preprocessing import PolynomialFeatures\n",
+ "from sklearn import linear_model, tree, neighbors, ensemble, neural_network\n",
+ "\n",
+ "random_state = 9\n",
+ "\n",
+ "models = {\n",
+ " \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
+ " \"linear_poly\": {\n",
+ " \"model\": make_pipeline(\n",
+ " PolynomialFeatures(degree=2),\n",
+ " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
+ " )\n",
+ " },\n",
+ " \"linear_interact\": {\n",
+ " \"model\": make_pipeline(\n",
+ " PolynomialFeatures(interaction_only=True),\n",
+ " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
+ " )\n",
+ " },\n",
+ " \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
+ " \"decision_tree\": {\n",
+ " \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n",
+ " },\n",
+ " \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
+ " \"random_forest\": {\n",
+ " \"model\": ensemble.RandomForestRegressor(\n",
+ " max_depth=7, random_state=random_state, n_jobs=-1\n",
+ " )\n",
+ " },\n",
+ " \"mlp\": {\n",
+ " \"model\": neural_network.MLPRegressor(\n",
+ " activation=\"tanh\",\n",
+ " hidden_layer_sizes=(3,),\n",
+ " max_iter=500,\n",
+ " early_stopping=True,\n",
+ " random_state=random_state,\n",
+ " )\n",
+ " },\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "### Формирование набора моделей для регрессии"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 170,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Random Forest: Mean Score = 1.0, Standard Deviation = 0.0\n",
+ "Linear Regression: Mean Score = 0.6396438910587428, Standard Deviation = 0.006348300027629372\n",
+ "Gradient Boosting: Mean Score = 0.9999999992943781, Standard Deviation = 6.609300428326041e-14\n",
+ "Support Vector Regression: Mean Score = -0.4335265257004087, Standard Deviation = 0.012071668862264313\n"
+ ]
+ }
+ ],
+ "source": [
+ "from sklearn.linear_model import LinearRegression\n",
+ "from sklearn.ensemble import GradientBoostingRegressor\n",
+ "from sklearn.svm import SVR\n",
+ "from sklearn.model_selection import cross_val_score\n",
+ "\n",
+ "def train_multiple_models(X, y, models):\n",
+ " results = {}\n",
+ "\n",
+ " # Преобразуем y в одномерный массив numpy только при необходимости\n",
+ " if hasattr(y, 'values'):\n",
+ " y = y.values.ravel() # Если y - DataFrame, преобразуем в numpy array\n",
+ " else:\n",
+ " y = y.ravel() # Если y - numpy array, просто используем ravel()\n",
+ "\n",
+ " for model_name, model in models.items():\n",
+ " # Создаем конвейер для каждой модели\n",
+ " model_pipeline = Pipeline(\n",
+ " [\n",
+ " (\"features_preprocessing\", features_preprocessing),\n",
+ " (\"drop_columns\", drop_columns),\n",
+ " (\"model\", model) # Используем текущую модель\n",
+ " ]\n",
+ " )\n",
+ " \n",
+ " # Обучаем модель и вычисляем кросс-валидацию\n",
+ " scores = cross_val_score(model_pipeline, X, y, cv=5, error_score='raise') # 5-кратная кросс-валидация\n",
+ " results[model_name] = {\n",
+ " \"mean_score\": scores.mean(),\n",
+ " \"std_dev\": scores.std()\n",
+ " }\n",
+ " \n",
+ " return results\n",
+ "\n",
+ "models = {\n",
+ " \"Random Forest\": RandomForestRegressor(),\n",
+ " \"Linear Regression\": LinearRegression(),\n",
+ " \"Gradient Boosting\": GradientBoostingRegressor(),\n",
+ " \"Support Vector Regression\": SVR()\n",
+ "}\n",
+ "\n",
+ "results = train_multiple_models(X_train, y_train, models)\n",
+ "\n",
+ "# Вывод результатов\n",
+ "for model_name, scores in results.items():\n",
+ " print(f\"{model_name}: Mean Score = {scores['mean_score']}, Standard Deviation = {scores['std_dev']}\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 171,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Model: logistic\n",
+ "MSE (train): 0.24060150375939848\n",
+ "MSE (test): 0.23455933379597502\n",
+ "MAE (train): 0.24060150375939848\n",
+ "MAE (test): 0.23455933379597502\n",
+ "R2 (train): 0.015780807725750634\n",
+ "R2 (test): 0.045807954005714024\n",
+ "STD (train): 0.48387852043102103\n",
+ "STD (test): 0.4780359236045559\n",
+ "----------------------------------------\n",
+ "Model: ridge\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "e:\\MII\\laboratory\\mai\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
+ "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+ "\n",
+ "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
+ " https://scikit-learn.org/stable/modules/preprocessing.html\n",
+ "Please also refer to the documentation for alternative solver options:\n",
+ " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
+ " n_iter_i = _check_optimize_result(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "MSE (train): 0.11596298438403702\n",
+ "MSE (test): 0.11265325005783021\n",
+ "MAE (train): 0.11596298438403702\n",
+ "MAE (test): 0.11265325005783021\n",
+ "R2 (train): 0.5256347402620505\n",
+ "R2 (test): 0.541724332939628\n",
+ "STD (train): 0.3405113334365492\n",
+ "STD (test): 0.3356321137822519\n",
+ "----------------------------------------\n",
+ "Model: decision_tree\n",
+ "MSE (train): 0.0\n",
+ "MSE (test): 0.0\n",
+ "MAE (train): 0.0\n",
+ "MAE (test): 0.0\n",
+ "R2 (train): 1.0\n",
+ "R2 (test): 1.0\n",
+ "STD (train): 0.0\n",
+ "STD (test): 0.0\n",
+ "----------------------------------------\n",
+ "Model: knn\n",
+ "MSE (train): 0.1949681897050318\n",
+ "MSE (test): 0.27989821882951654\n",
+ "MAE (train): 0.1949681897050318\n",
+ "MAE (test): 0.27989821882951654\n",
+ "R2 (train): 0.20245122664507342\n",
+ "R2 (test): -0.13863153417464114\n",
+ "STD (train): 0.43948973967967464\n",
+ "STD (test): 0.5264647910268833\n",
+ "----------------------------------------\n",
+ "Model: naive_bayes\n",
+ "MSE (train): 0.26928860613071137\n",
+ "MSE (test): 0.2690261392551469\n",
+ "MAE (train): 0.26928860613071137\n",
+ "MAE (test): 0.2690261392551469\n",
+ "R2 (train): -0.10156840366079445\n",
+ "R2 (test): -0.09440369772322943\n",
+ "STD (train): 0.47316941542228536\n",
+ "STD (test): 0.47206502931490235\n",
+ "----------------------------------------\n",
+ "Model: gradient_boosting\n",
+ "MSE (train): 0.0\n",
+ "MSE (test): 0.0\n",
+ "MAE (train): 0.0\n",
+ "MAE (test): 0.0\n",
+ "R2 (train): 1.0\n",
+ "R2 (test): 1.0\n",
+ "STD (train): 0.0\n",
+ "STD (test): 0.0\n",
+ "----------------------------------------\n",
+ "Model: random_forest\n",
+ "MSE (train): 0.0\n",
+ "MSE (test): 0.0\n",
+ "MAE (train): 0.0\n",
+ "MAE (test): 0.0\n",
+ "R2 (train): 1.0\n",
+ "R2 (test): 1.0\n",
+ "STD (train): 0.0\n",
+ "STD (test): 0.0\n",
+ "----------------------------------------\n",
+ "Model: mlp\n",
+ "MSE (train): 0.4253903990746096\n",
+ "MSE (test): 0.4353458246588018\n",
+ "MAE (train): 0.4253903990746096\n",
+ "MAE (test): 0.4353458246588018\n",
+ "R2 (train): -0.7401279228791116\n",
+ "R2 (test): -0.7709954936501442\n",
+ "STD (train): 0.4959884986820156\n",
+ "STD (test): 0.49782384226978177\n",
+ "----------------------------------------\n"
+ ]
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "from sklearn import metrics\n",
+ "from sklearn.pipeline import Pipeline\n",
+ "\n",
+ "# Проверка наличия необходимых переменных\n",
+ "if 'class_models' not in locals():\n",
+ " raise ValueError(\"class_models is not defined\")\n",
+ "if 'X_train' not in locals() or 'X_test' not in locals() or 'y_train' not in locals() or 'y_test' not in locals():\n",
+ " raise ValueError(\"Train/test data is not defined\")\n",
+ "\n",
+ "\n",
+ "y_train = np.ravel(y_train) \n",
+ "y_test = np.ravel(y_test) \n",
+ "\n",
+ "# Инициализация списка для хранения результатов\n",
+ "results = []\n",
+ "\n",
+ "# Проход по моделям и оценка их качества\n",
+ "for model_name in class_models.keys():\n",
+ " print(f\"Model: {model_name}\")\n",
+ " \n",
+ " # Извлечение модели из словаря\n",
+ " model = class_models[model_name][\"model\"]\n",
+ " \n",
+ " # Создание пайплайна\n",
+ " model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n",
+ " \n",
+ " # Обучение модели\n",
+ " model_pipeline.fit(X_train, y_train)\n",
+ "\n",
+ " # Предсказание для обучающей и тестовой выборки\n",
+ " y_train_predict = model_pipeline.predict(X_train)\n",
+ " y_test_predict = model_pipeline.predict(X_test)\n",
+ "\n",
+ " # Сохранение пайплайна и предсказаний\n",
+ " class_models[model_name][\"pipeline\"] = model_pipeline\n",
+ " class_models[model_name][\"preds\"] = y_test_predict\n",
+ "\n",
+ " # Вычисление метрик для регрессии\n",
+ " class_models[model_name][\"MSE_train\"] = metrics.mean_squared_error(y_train, y_train_predict)\n",
+ " class_models[model_name][\"MSE_test\"] = metrics.mean_squared_error(y_test, y_test_predict)\n",
+ " class_models[model_name][\"MAE_train\"] = metrics.mean_absolute_error(y_train, y_train_predict)\n",
+ " class_models[model_name][\"MAE_test\"] = metrics.mean_absolute_error(y_test, y_test_predict)\n",
+ " class_models[model_name][\"R2_train\"] = metrics.r2_score(y_train, y_train_predict)\n",
+ " class_models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_predict)\n",
+ "\n",
+ " # Дополнительные метрики\n",
+ " class_models[model_name][\"STD_train\"] = np.std(y_train - y_train_predict)\n",
+ " class_models[model_name][\"STD_test\"] = np.std(y_test - y_test_predict)\n",
+ "\n",
+ " # Вывод результатов для текущей модели\n",
+ " print(f\"MSE (train): {class_models[model_name]['MSE_train']}\")\n",
+ " print(f\"MSE (test): {class_models[model_name]['MSE_test']}\")\n",
+ " print(f\"MAE (train): {class_models[model_name]['MAE_train']}\")\n",
+ " print(f\"MAE (test): {class_models[model_name]['MAE_test']}\")\n",
+ " print(f\"R2 (train): {class_models[model_name]['R2_train']}\")\n",
+ " print(f\"R2 (test): {class_models[model_name]['R2_test']}\")\n",
+ " print(f\"STD (train): {class_models[model_name]['STD_train']}\")\n",
+ " print(f\"STD (test): {class_models[model_name]['STD_test']}\")\n",
+ " print(\"-\" * 40) # Разделитель для разных моделей"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 172,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from sklearn.pipeline import make_pipeline\n",
+ "from sklearn.preprocessing import PolynomialFeatures\n",
+ "from sklearn import linear_model, tree, neighbors, ensemble, neural_network\n",
+ "\n",
+ "random_state = 9\n",
+ "\n",
+ "models = {\n",
+ " \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n",
+ " \"linear_poly\": {\n",
+ " \"model\": make_pipeline(\n",
+ " PolynomialFeatures(degree=2),\n",
+ " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
+ " )\n",
+ " },\n",
+ " \"linear_interact\": {\n",
+ " \"model\": make_pipeline(\n",
+ " PolynomialFeatures(interaction_only=True),\n",
+ " linear_model.LinearRegression(fit_intercept=False, n_jobs=-1),\n",
+ " )\n",
+ " },\n",
+ " \"ridge\": {\"model\": linear_model.RidgeCV()},\n",
+ " \"decision_tree\": {\n",
+ " \"model\": tree.DecisionTreeRegressor(max_depth=7, random_state=random_state)\n",
+ " },\n",
+ " \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n",
+ " \"random_forest\": {\n",
+ " \"model\": ensemble.RandomForestRegressor(\n",
+ " max_depth=7, random_state=random_state, n_jobs=-1\n",
+ " )\n",
+ " },\n",
+ " \"mlp\": {\n",
+ " \"model\": neural_network.MLPRegressor(\n",
+ " activation=\"tanh\",\n",
+ " hidden_layer_sizes=(3,),\n",
+ " max_iter=500,\n",
+ " early_stopping=True,\n",
+ " random_state=random_state,\n",
+ " )\n",
+ " },\n",
+ "}"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Обучение и оценка моделей с помощью различных алгоритмов"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 173,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Model: logistic\n",
+ "MSE (train): 0.24060150375939848\n",
+ "MSE (test): 0.23455933379597502\n",
+ "MAE (train): 0.24060150375939848\n",
+ "MAE (test): 0.23455933379597502\n",
+ "R2 (train): 0.015780807725750634\n",
+ "R2 (test): 0.045807954005714024\n",
+ "STD (train): 0.48387852043102103\n",
+ "STD (test): 0.4780359236045559\n",
+ "----------------------------------------\n",
+ "Model: ridge\n"
+ ]
+ },
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "e:\\MII\\laboratory\\mai\\Lib\\site-packages\\sklearn\\linear_model\\_logistic.py:469: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
+ "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
+ "\n",
+ "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
+ " https://scikit-learn.org/stable/modules/preprocessing.html\n",
+ "Please also refer to the documentation for alternative solver options:\n",
+ " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
+ " n_iter_i = _check_optimize_result(\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "MSE (train): 0.11596298438403702\n",
+ "MSE (test): 0.11265325005783021\n",
+ "MAE (train): 0.11596298438403702\n",
+ "MAE (test): 0.11265325005783021\n",
+ "R2 (train): 0.5256347402620505\n",
+ "R2 (test): 0.541724332939628\n",
+ "STD (train): 0.3405113334365492\n",
+ "STD (test): 0.3356321137822519\n",
+ "----------------------------------------\n",
+ "Model: decision_tree\n",
+ "MSE (train): 0.0\n",
+ "MSE (test): 0.0\n",
+ "MAE (train): 0.0\n",
+ "MAE (test): 0.0\n",
+ "R2 (train): 1.0\n",
+ "R2 (test): 1.0\n",
+ "STD (train): 0.0\n",
+ "STD (test): 0.0\n",
+ "----------------------------------------\n",
+ "Model: knn\n",
+ "MSE (train): 0.1949681897050318\n",
+ "MSE (test): 0.27989821882951654\n",
+ "MAE (train): 0.1949681897050318\n",
+ "MAE (test): 0.27989821882951654\n",
+ "R2 (train): 0.20245122664507342\n",
+ "R2 (test): -0.13863153417464114\n",
+ "STD (train): 0.43948973967967464\n",
+ "STD (test): 0.5264647910268833\n",
+ "----------------------------------------\n",
+ "Model: naive_bayes\n",
+ "MSE (train): 0.26928860613071137\n",
+ "MSE (test): 0.2690261392551469\n",
+ "MAE (train): 0.26928860613071137\n",
+ "MAE (test): 0.2690261392551469\n",
+ "R2 (train): -0.10156840366079445\n",
+ "R2 (test): -0.09440369772322943\n",
+ "STD (train): 0.47316941542228536\n",
+ "STD (test): 0.47206502931490235\n",
+ "----------------------------------------\n",
+ "Model: gradient_boosting\n",
+ "MSE (train): 0.0\n",
+ "MSE (test): 0.0\n",
+ "MAE (train): 0.0\n",
+ "MAE (test): 0.0\n",
+ "R2 (train): 1.0\n",
+ "R2 (test): 1.0\n",
+ "STD (train): 0.0\n",
+ "STD (test): 0.0\n",
+ "----------------------------------------\n",
+ "Model: random_forest\n",
+ "MSE (train): 0.0\n",
+ "MSE (test): 0.0\n",
+ "MAE (train): 0.0\n",
+ "MAE (test): 0.0\n",
+ "R2 (train): 1.0\n",
+ "R2 (test): 1.0\n",
+ "STD (train): 0.0\n",
+ "STD (test): 0.0\n",
+ "----------------------------------------\n",
+ "Model: mlp\n",
+ "MSE (train): 0.4253903990746096\n",
+ "MSE (test): 0.4353458246588018\n",
+ "MAE (train): 0.4253903990746096\n",
+ "MAE (test): 0.4353458246588018\n",
+ "R2 (train): -0.7401279228791116\n",
+ "R2 (test): -0.7709954936501442\n",
+ "STD (train): 0.4959884986820156\n",
+ "STD (test): 0.49782384226978177\n",
+ "----------------------------------------\n"
+ ]
+ }
+ ],
+ "source": [
+ "import numpy as np\n",
+ "from sklearn import metrics\n",
+ "from sklearn.pipeline import Pipeline\n",
+ "\n",
+ "# Проверка наличия необходимых переменных\n",
+ "if 'class_models' not in locals():\n",
+ " raise ValueError(\"class_models is not defined\")\n",
+ "if 'X_train' not in locals() or 'X_test' not in locals() or 'y_train' not in locals() or 'y_test' not in locals():\n",
+ " raise ValueError(\"Train/test data is not defined\")\n",
+ "\n",
+ "\n",
+ "y_train = np.ravel(y_train) \n",
+ "y_test = np.ravel(y_test) \n",
+ "\n",
+ "# Инициализация списка для хранения результатов\n",
+ "results = []\n",
+ "\n",
+ "# Проход по моделям и оценка их качества\n",
+ "for model_name in class_models.keys():\n",
+ " print(f\"Model: {model_name}\")\n",
+ " \n",
+ " # Извлечение модели из словаря\n",
+ " model = class_models[model_name][\"model\"]\n",
+ " \n",
+ " # Создание пайплайна\n",
+ " model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n",
+ " \n",
+ " # Обучение модели\n",
+ " model_pipeline.fit(X_train, y_train)\n",
+ "\n",
+ " # Предсказание для обучающей и тестовой выборки\n",
+ " y_train_predict = model_pipeline.predict(X_train)\n",
+ " y_test_predict = model_pipeline.predict(X_test)\n",
+ "\n",
+ " # Сохранение пайплайна и предсказаний\n",
+ " class_models[model_name][\"pipeline\"] = model_pipeline\n",
+ " class_models[model_name][\"preds\"] = y_test_predict\n",
+ "\n",
+ " # Вычисление метрик для регрессии\n",
+ " class_models[model_name][\"MSE_train\"] = metrics.mean_squared_error(y_train, y_train_predict)\n",
+ " class_models[model_name][\"MSE_test\"] = metrics.mean_squared_error(y_test, y_test_predict)\n",
+ " class_models[model_name][\"MAE_train\"] = metrics.mean_absolute_error(y_train, y_train_predict)\n",
+ " class_models[model_name][\"MAE_test\"] = metrics.mean_absolute_error(y_test, y_test_predict)\n",
+ " class_models[model_name][\"R2_train\"] = metrics.r2_score(y_train, y_train_predict)\n",
+ " class_models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_predict)\n",
+ "\n",
+ " # Дополнительные метрики\n",
+ " class_models[model_name][\"STD_train\"] = np.std(y_train - y_train_predict)\n",
+ " class_models[model_name][\"STD_test\"] = np.std(y_test - y_test_predict)\n",
+ "\n",
+ " # Вывод результатов для текущей модели\n",
+ " print(f\"MSE (train): {class_models[model_name]['MSE_train']}\")\n",
+ " print(f\"MSE (test): {class_models[model_name]['MSE_test']}\")\n",
+ " print(f\"MAE (train): {class_models[model_name]['MAE_train']}\")\n",
+ " print(f\"MAE (test): {class_models[model_name]['MAE_test']}\")\n",
+ " print(f\"R2 (train): {class_models[model_name]['R2_train']}\")\n",
+ " print(f\"R2 (test): {class_models[model_name]['R2_test']}\")\n",
+ " print(f\"STD (train): {class_models[model_name]['STD_train']}\")\n",
+ " print(f\"STD (test): {class_models[model_name]['STD_test']}\")\n",
+ " print(\"-\" * 40) # Разделитель для разных моделей"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Пример использования обученной модели (конвейера регрессии) для предсказания**"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Подбор гиперпараметров методом поиска по сетке**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 174,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fitting 5 folds for each of 36 candidates, totalling 180 fits\n",
+ "Best parameters: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 200}\n",
+ "Best MSE: 0.14752641202600872\n"
+ ]
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "from sklearn.model_selection import train_test_split, GridSearchCV\n",
+ "from sklearn.ensemble import RandomForestRegressor\n",
+ "from sklearn.preprocessing import StandardScaler\n",
+ "\n",
+ "# Convert the date column to a datetime object and extract numeric features\n",
+ "df['date'] = pd.to_datetime(df['date'], errors='coerce') # Coerce invalid dates to NaT\n",
+ "df.dropna(subset=['date'], inplace=True) # Drop rows with invalid dates\n",
+ "df['year'] = df['date'].dt.year\n",
+ "df['month'] = df['date'].dt.month\n",
+ "df['day'] = df['date'].dt.day\n",
+ "\n",
+ "# Prepare predictors and target\n",
+ "X = df[['yr_built', 'year', 'month', 'day', 'price', 'price_category']]\n",
+ "y = df['average_price']\n",
+ "\n",
+ "# Split data into training and testing sets\n",
+ "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
+ "\n",
+ "# Define model and parameter grid\n",
+ "model = RandomForestRegressor()\n",
+ "param_grid = {\n",
+ " 'n_estimators': [50, 100, 200],\n",
+ " 'max_depth': [None, 10, 20, 30],\n",
+ " 'min_samples_split': [2, 5, 10]\n",
+ "}\n",
+ "\n",
+ "# Hyperparameter tuning with GridSearchCV\n",
+ "grid_search = GridSearchCV(estimator=model, param_grid=param_grid,\n",
+ " scoring='neg_mean_squared_error', cv=5, n_jobs=-1, verbose=2)\n",
+ "\n",
+ "# Fit the model\n",
+ "grid_search.fit(X_train, y_train)\n",
+ "\n",
+ "# Output the best parameters and score\n",
+ "print(\"Best parameters:\", grid_search.best_params_)\n",
+ "print(\"Best MSE:\", -grid_search.best_score_)\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "**Обучение модели с новыми гиперпараметрами и сравнение новых и старых данных**"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 175,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Fitting 5 folds for each of 36 candidates, totalling 180 fits\n",
+ "Старые параметры: {'max_depth': 10, 'min_samples_split': 15, 'n_estimators': 200}\n",
+ "Лучший результат (MSE) на старых параметрах: 0.14727400921908354\n",
+ "\n",
+ "Новые параметры: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 200}\n",
+ "Лучший результат (MSE) на новых параметрах: 0.148833681322309\n",
+ "Среднеквадратическая ошибка (MSE) на тестовых данных: 0.14451630134635543\n",
+ "Корень среднеквадратичной ошибки (RMSE) на тестовых данных: 0.3801529972870863\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ "