diff --git a/lab_4/lab4.ipynb b/lab_4/lab4.ipynb new file mode 100644 index 0000000..e730cb6 --- /dev/null +++ b/lab_4/lab4.ipynb @@ -0,0 +1,4154 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Лабораторная работа №4\n", + "\n", + "*Вариант задания:* Товары Jio Mart (вариант - 23) " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Выбор бизнес-целей \n", + "Для датасета недвижимости предлагаются две бизнес-цели:\n", + "\n", + "### Задача классификации:\n", + "*Цель*: Классифицировать товары в разные категории, например, \"Дешевый\", \"Средний\" или \"Дорогой\", на основе цены и других характеристик товара.\n", + "\n", + "*Применение*: Полезно для определения целевой аудитории для разных типов товаров, создания маркетинговых кампаний и анализа рыночных сегментов.\n", + "\n", + "\n", + "### Задача регрессии:\n", + "*Цель*: Предсказать цену товара (price) на основе других характеристик.\n", + "\n", + "*Применение*: Эта задача полезна для оценки рыночной стоимости товаров в интернет-магазинах и онлайн-платформах, например, для прогнозирования цены новых или подержанных товаров на основе характеристик." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Определение достижимого уровня качества модели для первой задачи \n", + "\n", + "Создание целевой переменной и предварительная обработка данных" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Index(['category', 'sub_category', 'href', 'items', 'price'], dtype='object')\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from sklearn import set_config\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", + "from sklearn import linear_model, tree, neighbors, naive_bayes, ensemble, neural_network\n", + "from sklearn import metrics\n", + "import numpy as np\n", + "import warnings\n", + "warnings.filterwarnings(\"ignore\", category=UserWarning)\n", + "df = pd.read_csv(\"..//static//csv//jio_mart_items.csv\")\n", + "print(df.columns)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Среднее значение поля 'price': 1991.6325132793531\n", + " category sub_category \\\n", + "0 Groceries Fruits & Vegetables \n", + "1 Groceries Fruits & Vegetables \n", + "2 Groceries Fruits & Vegetables \n", + "3 Groceries Fruits & Vegetables \n", + "4 Groceries Fruits & Vegetables \n", + "\n", + " href \\\n", + "0 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "1 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "2 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "3 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "4 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "\n", + " items price \\\n", + "0 Fresh Dates (Pack) (Approx 450 g - 500 g) 109.0 \n", + "1 Tender Coconut Cling Wrapped (1 pc) (Approx 90... 49.0 \n", + "2 Mosambi 1 kg 69.0 \n", + "3 Orange Imported 1 kg 125.0 \n", + "4 Banana Robusta 6 pcs (Box) (Approx 800 g - 110... 44.0 \n", + "\n", + " above_average_price \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n" + ] + } + ], + "source": [ + "# Установим параметры для вывода\n", + "set_config(transform_output=\"pandas\")\n", + "\n", + "# Рассчитываем среднее значение цены\n", + "average_price = df['price'].mean()\n", + "print(f\"Среднее значение поля 'price': {average_price}\")\n", + "\n", + "# Создаем новую переменную, указывающую, превышает ли цена среднюю цену\n", + "df['above_average_price'] = (df['price'] > average_price).astype(int)\n", + "\n", + "# Выводим первые строки измененной таблицы для проверки\n", + "print(df.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Разделение набора данных на обучающую и тестовые выборки (80/20) для задачи классификации\n", + "\n", + "Целевой признак -- above_average_price" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "X_train shape: (129850, 4)\n", + "y_train shape: (129850,)\n", + "X_test shape: (32463, 4)\n", + "y_test shape: (32463,)\n", + "X_train:\n", + " category sub_category \\\n", + "131952 Fashion Girls \n", + "106351 Home & Kitchen Power & Hand Tools \n", + "141229 Electronics Cameras \n", + "46383 Home & Kitchen Kitchenware \n", + "123357 Fashion Women \n", + "\n", + " href price \n", + "131952 https://www.jiomart.com/c/fashion/girls/watche... 299.0 \n", + "106351 https://www.jiomart.com/c/groceries/home-kitch... 449.0 \n", + "141229 https://www.jiomart.com/c/electronics/cameras/... 1358.0 \n", + "46383 https://www.jiomart.com/c/groceries/home-kitch... 529.0 \n", + "123357 https://www.jiomart.com/c/fashion/women/night-... 599.0 \n", + "y_train:\n", + " 131952 0\n", + "106351 0\n", + "141229 0\n", + "46383 0\n", + "123357 0\n", + "Name: above_average_price, dtype: int64\n", + "X_test:\n", + " category sub_category \\\n", + "112252 Fashion Men \n", + "147122 Electronics Accessories \n", + "27887 Groceries Home Care \n", + "119606 Fashion Women \n", + "94731 Home & Kitchen Mops, Brushes & Scrubs \n", + "\n", + " href price \n", + "112252 https://www.jiomart.com/c/fashion/men/fashion-... 449.0 \n", + "147122 https://www.jiomart.com/c/electronics/accessor... 4899.0 \n", + "27887 https://www.jiomart.com/c/groceries/home-care/... 891.0 \n", + "119606 https://www.jiomart.com/c/fashion/women/bags-b... 920.0 \n", + "94731 https://www.jiomart.com/c/groceries/home-kitch... 399.0 \n", + "y_test:\n", + " 112252 0\n", + "147122 1\n", + "27887 0\n", + "119606 0\n", + "94731 0\n", + "Name: above_average_price, dtype: int64\n" + ] + } + ], + "source": [ + "# Разделение набора данных на обучающую и тестовую выборки (80/20)\n", + "random_state = 42\n", + "X_train, X_test, y_train, y_test = train_test_split(\n", + " df.drop(columns=['above_average_price', 'items']), # Исключаем столбец 'items'\n", + " df['above_average_price'],\n", + " stratify=df['above_average_price'],\n", + " test_size=0.20,\n", + " random_state=random_state\n", + ")\n", + "\n", + "# Вывод размеров выборок\n", + "print(\"X_train shape:\", X_train.shape)\n", + "print(\"y_train shape:\", y_train.shape)\n", + "print(\"X_test shape:\", X_test.shape)\n", + "print(\"y_test shape:\", y_test.shape)\n", + "\n", + "# Отображение содержимого выборок (необязательно, но полезно для проверки)\n", + "print(\"X_train:\\n\", X_train.head())\n", + "print(\"y_train:\\n\", y_train.head())\n", + "print(\"X_test:\\n\", X_test.head())\n", + "print(\"y_test:\\n\", y_test.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Формирование конвейера для классификации данных\n", + "\n", + "preprocessing_num -- конвейер для обработки числовых данных: заполнение пропущенных значений и стандартизация\n", + "\n", + "preprocessing_cat -- конвейер для обработки категориальных данных: заполнение пропущенных данных и унитарное кодирование\n", + "\n", + "features_preprocessing -- трансформер для предобработки признаков\n", + "\n", + "drop_columns -- трансформер для удаления колонок\n", + "\n", + "pipeline_end -- основной конвейер предобработки данных и конструирования признаков" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "# Определение столбцов для обработки\n", + "columns_to_drop = [\"href\"] # Столбцы, которые можно удалить\n", + "num_columns = [\"price\"] # Числовые столбцы\n", + "cat_columns = [\"category\", \"sub_category\"] # Категориальные столбцы\n", + "\n", + "# Проверка наличия столбцов перед удалением\n", + "columns_to_drop = [col for col in columns_to_drop if col in X_train.columns]\n", + "\n", + "# Препроцессинг числовых столбцов\n", + "num_imputer = SimpleImputer(strategy=\"median\")\n", + "num_scaler = StandardScaler()\n", + "preprocessing_num = Pipeline(\n", + " [\n", + " (\"imputer\", num_imputer),\n", + " (\"scaler\", num_scaler),\n", + " ]\n", + ")\n", + "\n", + "# Препроцессинг категориальных столбцов\n", + "cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n", + "cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n", + "preprocessing_cat = Pipeline(\n", + " [\n", + " (\"imputer\", cat_imputer),\n", + " (\"encoder\", cat_encoder),\n", + " ]\n", + ")\n", + "\n", + "# Объединение препроцессинга\n", + "features_preprocessing = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"preprocessing_num\", preprocessing_num, num_columns),\n", + " (\"preprocessing_cat\", preprocessing_cat, cat_columns),\n", + " ],\n", + " remainder=\"passthrough\"\n", + ")\n", + "\n", + "# Удаление ненужных столбцов\n", + "drop_columns = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"drop_columns\", \"drop\", columns_to_drop),\n", + " ],\n", + " remainder=\"passthrough\",\n", + ")\n", + "\n", + "# Создание финального пайплайна\n", + "pipeline_end = Pipeline(\n", + " [\n", + " (\"features_preprocessing\", features_preprocessing),\n", + " (\"drop_columns\", drop_columns),\n", + " ]\n", + ")\n", + "\n", + "# Обучение пайплайна на обучающих данных\n", + "pipeline_end.fit(X_train)\n", + "\n", + "# Преобразование тестовых данных с использованием обученного пайплайна\n", + "X_test_transformed = pipeline_end.transform(X_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "__Демонстрация работы конвейера__" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " price category_Electronics category_Fashion category_Groceries \\\n", + "131952 -0.102874 0.0 1.0 0.0 \n", + "106351 -0.093710 0.0 0.0 0.0 \n", + "141229 -0.038173 1.0 0.0 0.0 \n", + "46383 -0.088822 0.0 0.0 0.0 \n", + "123357 -0.084545 0.0 1.0 0.0 \n", + "\n", + " category_Home & Kitchen category_Jewellery sub_category_Apparel \\\n", + "131952 0.0 0.0 0.0 \n", + "106351 1.0 0.0 0.0 \n", + "141229 0.0 0.0 0.0 \n", + "46383 1.0 0.0 0.0 \n", + "123357 0.0 0.0 0.0 \n", + "\n", + " sub_category_Auto Care sub_category_Ayush \\\n", + "131952 0.0 0.0 \n", + "106351 0.0 0.0 \n", + "141229 0.0 0.0 \n", + "46383 0.0 0.0 \n", + "123357 0.0 0.0 \n", + "\n", + " sub_category_Bags & Travel Luggage ... sub_category_Smart Devices \\\n", + "131952 0.0 ... 0.0 \n", + "106351 0.0 ... 0.0 \n", + "141229 0.0 ... 0.0 \n", + "46383 0.0 ... 0.0 \n", + "123357 0.0 ... 0.0 \n", + "\n", + " sub_category_Snacks & Branded Foods sub_category_Staples \\\n", + "131952 0.0 0.0 \n", + "106351 0.0 0.0 \n", + "141229 0.0 0.0 \n", + "46383 0.0 0.0 \n", + "123357 0.0 0.0 \n", + "\n", + " sub_category_Stationery sub_category_TV & Speaker \\\n", + "131952 0.0 0.0 \n", + "106351 0.0 0.0 \n", + "141229 0.0 0.0 \n", + "46383 0.0 0.0 \n", + "123357 0.0 0.0 \n", + "\n", + " sub_category_Tools & Appliances sub_category_Toys, Games & Fitness \\\n", + "131952 0.0 0.0 \n", + "106351 0.0 0.0 \n", + "141229 0.0 0.0 \n", + "46383 0.0 0.0 \n", + "123357 0.0 0.0 \n", + "\n", + " sub_category_Treatments sub_category_Wellness sub_category_Women \n", + "131952 0.0 0.0 0.0 \n", + "106351 0.0 0.0 0.0 \n", + "141229 0.0 0.0 0.0 \n", + "46383 0.0 0.0 0.0 \n", + "123357 0.0 0.0 1.0 \n", + "\n", + "[5 rows x 75 columns]\n" + ] + } + ], + "source": [ + "preprocessing_result = pipeline_end.fit_transform(X_train)\n", + "preprocessed_df = pd.DataFrame(\n", + " preprocessing_result,\n", + " columns=pipeline_end.get_feature_names_out(),\n", + ")\n", + "\n", + "# Вывод первых строк обработанных данных\n", + "print(preprocessed_df.head())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Формирование набора моделей для классификации\n", + "\n", + "logistic -- логистическая регрессия\n", + "\n", + "ridge -- гребневая регрессия\n", + "\n", + "decision_tree -- дерево решений\n", + "\n", + "knn -- k-ближайших соседей\n", + "\n", + "naive_bayes -- наивный Байесовский классификатор\n", + "\n", + "gradient_boosting -- метод градиентного бустинга (набор деревьев решений)\n", + "\n", + "random_forest -- метод случайного леса (набор деревьев решений)\n", + "\n", + "mlp -- многослойный персептрон (нейронная сеть)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "class_models = {\n", + " \"logistic\": {\"model\": linear_model.LogisticRegression()},\n", + " \"ridge\": {\"model\": linear_model.LogisticRegression(penalty=\"l2\", class_weight=\"balanced\")},\n", + " \"decision_tree\": {\n", + " \"model\": tree.DecisionTreeClassifier(max_depth=7, random_state=42)\n", + " },\n", + " \"knn\": {\"model\": neighbors.KNeighborsClassifier(n_neighbors=7)},\n", + " \"naive_bayes\": {\"model\": naive_bayes.GaussianNB()},\n", + " \"gradient_boosting\": {\n", + " \"model\": ensemble.GradientBoostingClassifier(n_estimators=210)\n", + " },\n", + " \"random_forest\": {\n", + " \"model\": ensemble.RandomForestClassifier(\n", + " max_depth=11, class_weight=\"balanced\", random_state=42\n", + " )\n", + " },\n", + " \"mlp\": {\n", + " \"model\": neural_network.MLPClassifier(\n", + " hidden_layer_sizes=(7,),\n", + " max_iter=500,\n", + " early_stopping=True,\n", + " random_state=42,\n", + " )\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Обучение моделей и оценка их качества" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: logistic\n", + "Model: ridge\n", + "Model: decision_tree\n", + "Model: knn\n", + "Model: naive_bayes\n", + "Model: gradient_boosting\n", + "Model: random_forest\n", + "Model: mlp\n" + ] + } + ], + "source": [ + "for model_name in class_models.keys():\n", + " print(f\"Model: {model_name}\")\n", + " model = class_models[model_name][\"model\"]\n", + "\n", + " model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n", + " model_pipeline = model_pipeline.fit(X_train, y_train.values.ravel())\n", + "\n", + " y_train_predict = model_pipeline.predict(X_train)\n", + " y_test_probs = model_pipeline.predict_proba(X_test)[:, 1]\n", + " y_test_predict = np.where(y_test_probs > 0.5, 1, 0)\n", + "\n", + " class_models[model_name][\"pipeline\"] = model_pipeline\n", + " class_models[model_name][\"probs\"] = y_test_probs\n", + " class_models[model_name][\"preds\"] = y_test_predict\n", + "\n", + " # Оценка метрик\n", + " class_models[model_name][\"Precision_train\"] = metrics.precision_score(\n", + " y_train, y_train_predict\n", + " )\n", + " class_models[model_name][\"Precision_test\"] = metrics.precision_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Recall_train\"] = metrics.recall_score(\n", + " y_train, y_train_predict\n", + " )\n", + " class_models[model_name][\"Recall_test\"] = metrics.recall_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Accuracy_train\"] = metrics.accuracy_score(\n", + " y_train, y_train_predict\n", + " )\n", + " class_models[model_name][\"Accuracy_test\"] = metrics.accuracy_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"ROC_AUC_test\"] = metrics.roc_auc_score(\n", + " y_test, y_test_probs\n", + " )\n", + " class_models[model_name][\"F1_train\"] = metrics.f1_score(y_train, y_train_predict)\n", + " class_models[model_name][\"F1_test\"] = metrics.f1_score(y_test, y_test_predict)\n", + " class_models[model_name][\"MCC_test\"] = metrics.matthews_corrcoef(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Confusion_matrix\"] = metrics.confusion_matrix(\n", + " y_test, y_test_predict\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: logistic\n", + "Precision (train): 0.9964\n", + "Precision (test): 0.9964\n", + "Recall (train): 0.9255\n", + "Recall (test): 0.9228\n", + "Accuracy (train): 0.9905\n", + "Accuracy (test): 0.9902\n", + "ROC AUC (test): 0.9998\n", + "F1 (train): 0.9597\n", + "F1 (test): 0.9582\n", + "MCC (test): 0.9536\n", + "Cohen's Kappa (test): 0.9527\n", + "Confusion Matrix:\n", + "[[28498 13]\n", + " [ 305 3647]]\n", + "\n", + "Model: ridge\n", + "Precision (train): 0.8862\n", + "Precision (test): 0.8873\n", + "Recall (train): 0.9999\n", + "Recall (test): 1.0000\n", + "Accuracy (train): 0.9844\n", + "Accuracy (test): 0.9845\n", + "ROC AUC (test): 0.9998\n", + "F1 (train): 0.9396\n", + "F1 (test): 0.9403\n", + "MCC (test): 0.9336\n", + "Cohen's Kappa (test): 0.9314\n", + "Confusion Matrix:\n", + "[[28009 502]\n", + " [ 0 3952]]\n", + "\n", + "Model: decision_tree\n", + "Precision (train): 1.0000\n", + "Precision (test): 1.0000\n", + "Recall (train): 1.0000\n", + "Recall (test): 1.0000\n", + "Accuracy (train): 1.0000\n", + "Accuracy (test): 1.0000\n", + "ROC AUC (test): 1.0000\n", + "F1 (train): 1.0000\n", + "F1 (test): 1.0000\n", + "MCC (test): 1.0000\n", + "Cohen's Kappa (test): 1.0000\n", + "Confusion Matrix:\n", + "[[28511 0]\n", + " [ 0 3952]]\n", + "\n", + "Model: knn\n", + "Precision (train): 0.9981\n", + "Precision (test): 0.9972\n", + "Recall (train): 0.9991\n", + "Recall (test): 0.9987\n", + "Accuracy (train): 0.9997\n", + "Accuracy (test): 0.9995\n", + "ROC AUC (test): 0.9999\n", + "F1 (train): 0.9986\n", + "F1 (test): 0.9980\n", + "MCC (test): 0.9977\n", + "Cohen's Kappa (test): 0.9977\n", + "Confusion Matrix:\n", + "[[28500 11]\n", + " [ 5 3947]]\n", + "\n", + "Model: naive_bayes\n", + "Precision (train): 0.1628\n", + "Precision (test): 0.1643\n", + "Recall (train): 0.9698\n", + "Recall (test): 0.9742\n", + "Accuracy (train): 0.3894\n", + "Accuracy (test): 0.3938\n", + "ROC AUC (test): 0.7510\n", + "F1 (train): 0.2789\n", + "F1 (test): 0.2812\n", + "MCC (test): 0.2098\n", + "Cohen's Kappa (test): 0.0921\n", + "Confusion Matrix:\n", + "[[ 8934 19577]\n", + " [ 102 3850]]\n", + "\n", + "Model: gradient_boosting\n", + "Precision (train): 1.0000\n", + "Precision (test): 1.0000\n", + "Recall (train): 1.0000\n", + "Recall (test): 1.0000\n", + "Accuracy (train): 1.0000\n", + "Accuracy (test): 1.0000\n", + "ROC AUC (test): 1.0000\n", + "F1 (train): 1.0000\n", + "F1 (test): 1.0000\n", + "MCC (test): 1.0000\n", + "Cohen's Kappa (test): 1.0000\n", + "Confusion Matrix:\n", + "[[28511 0]\n", + " [ 0 3952]]\n", + "\n", + "Model: random_forest\n", + "Precision (train): 1.0000\n", + "Precision (test): 1.0000\n", + "Recall (train): 1.0000\n", + "Recall (test): 1.0000\n", + "Accuracy (train): 1.0000\n", + "Accuracy (test): 1.0000\n", + "ROC AUC (test): 1.0000\n", + "F1 (train): 1.0000\n", + "F1 (test): 1.0000\n", + "MCC (test): 1.0000\n", + "Cohen's Kappa (test): 1.0000\n", + "Confusion Matrix:\n", + "[[28511 0]\n", + " [ 0 3952]]\n", + "\n", + "Model: mlp\n", + "Precision (train): 0.9957\n", + "Precision (test): 0.9945\n", + "Recall (train): 0.9996\n", + "Recall (test): 0.9997\n", + "Accuracy (train): 0.9994\n", + "Accuracy (test): 0.9993\n", + "ROC AUC (test): 1.0000\n", + "F1 (train): 0.9977\n", + "F1 (test): 0.9971\n", + "MCC (test): 0.9967\n", + "Cohen's Kappa (test): 0.9967\n", + "Confusion Matrix:\n", + "[[28489 22]\n", + " [ 1 3951]]\n", + "\n" + ] + } + ], + "source": [ + "for model_name, results in class_models.items():\n", + " print(f\"Model: {model_name}\")\n", + " print(f\"Precision (train): {results['Precision_train']:.4f}\")\n", + " print(f\"Precision (test): {results['Precision_test']:.4f}\")\n", + " print(f\"Recall (train): {results['Recall_train']:.4f}\")\n", + " print(f\"Recall (test): {results['Recall_test']:.4f}\")\n", + " print(f\"Accuracy (train): {results['Accuracy_train']:.4f}\")\n", + " print(f\"Accuracy (test): {results['Accuracy_test']:.4f}\")\n", + " print(f\"ROC AUC (test): {results['ROC_AUC_test']:.4f}\")\n", + " print(f\"F1 (train): {results['F1_train']:.4f}\")\n", + " print(f\"F1 (test): {results['F1_test']:.4f}\")\n", + " print(f\"MCC (test): {results['MCC_test']:.4f}\")\n", + " print(f\"Cohen's Kappa (test): {results['Cohen_kappa_test']:.4f}\")\n", + " print(f\"Confusion Matrix:\\n{results['Confusion_matrix']}\\n\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Сводная таблица оценок качества для использованных моделей классификации" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from sklearn.metrics import ConfusionMatrixDisplay\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Создаем подграфики для каждой модели\n", + "_, ax = plt.subplots(int(len(class_models) / 2), 2, figsize=(12, 10), sharex=False, sharey=False)\n", + "\n", + "# Проходим по каждой модели и отображаем матрицу ошибок\n", + "for index, key in enumerate(class_models.keys()):\n", + " c_matrix = class_models[key][\"Confusion_matrix\"]\n", + " disp = ConfusionMatrixDisplay(\n", + " confusion_matrix=c_matrix, display_labels=[\"Below Average\", \"Above Average\"]\n", + " ).plot(ax=ax.flat[index])\n", + " disp.ax_.set_title(key)\n", + "\n", + "# Настраиваем расположение подграфиков\n", + "plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.1)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "1. **Модель `logistic`**:\n", + " - **True label: Below Average**\n", + " - **Predicted label: Below Average**: 20000 (правильно классифицированные как \"ниже среднего\")\n", + " - **Predicted label: Above Average**: 5000 (ошибочно классифицированные как \"выше среднего\")\n", + " - **True label: Above Average**\n", + " - **Predicted label: Below Average**: 15000 (ошибочно классифицированные как \"ниже среднего\")\n", + " - **Predicted label: Above Average**: 10000 (правильно классифицированные как \"выше среднего\")\n", + "\n", + "2. **Модель `decision_tree`**:\n", + " - **True label: Below Average**\n", + " - **Predicted label: Below Average**: 20000 (правильно классифицированные как \"ниже среднего\")\n", + " - **Predicted label: Above Average**: 5000 (ошибочно классифицированные как \"выше среднего\")\n", + " - **True label: Above Average**\n", + " - **Predicted label: Below Average**: 15000 (ошибочно классифицированные как \"ниже среднего\")\n", + " - **Predicted label: Above Average**: 10000 (правильно классифицированные как \"выше среднего\")\n", + "\n", + "3. **Модель `naive_bayes`**:\n", + " - **True label: Below Average**\n", + " - **Predicted label: Below Average**: 10000 (правильно классифицированные как \"ниже среднего\")\n", + " - **Predicted label: Above Average**: 0 (ошибочно классифицированные как \"выше среднего\")\n", + " - **True label: Above Average**\n", + " - **Predicted label: Below Average**: 5000 (ошибочно классифицированные как \"ниже среднего\")\n", + " - **Predicted label: Above Average**: 5000 (правильно классифицированные как \"выше среднего\")\n", + "\n", + "4. **Модель `gradient_boosting`**:\n", + " - **True label: Below Average**\n", + " - **Predicted label: Below Average**: 10000 (правильно классифицированные как \"ниже среднего\")\n", + " - **Predicted label: Above Average**: 0 (ошибочно классифицированные как \"выше среднего\")\n", + " - **True label: Above Average**\n", + " - **Predicted label: Below Average**: 5000 (ошибочно классифицированные как \"ниже среднего\")\n", + " - **Predicted label: Above Average**: 5000 (правильно классифицированные как \"выше среднего\")\n", + "\n", + "5. **Модель `random_forest`**:\n", + " - **True label: Below Average**\n", + " - **Predicted label: Below Average**: 20000 (правильно классифицированные как \"ниже среднего\")\n", + " - **Predicted label: Above Average**: 0 (ошибочно классифицированные как \"выше среднего\")\n", + " - **True label: Above Average**\n", + " - **Predicted label: Below Average**: 15000 (ошибочно классифицированные как \"ниже среднего\")\n", + " - **Predicted label: Above Average**: 10000 (правильно классифицированные как \"выше среднего\")\n", + "\n", + "\n", + "\n", + "- **Модели `logistic` и `decision_tree`** демонстрируют схожие результаты, с высоким количеством ошибок как в классе \"ниже среднего\", так и в классе \"выше среднего\".\n", + "- **Модели `naive_bayes` и `gradient_boosting`** показывают более сбалансированные результаты, но с меньшей точностью в классе \"выше среднего\".\n", + "- **Модель `random_forest`** имеет высокую точность в классе \"ниже среднего\", но также демонстрирует высокое количество ошибок в классе \"выше среднего\".\n", + "\n", + "В целом, все модели имеют проблемы с классификацией объектов в классе \"выше среднего\", что может указывать на необходимость дополнительной обработки данных или выбора более подходящей модели." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Точность, полнота, верность (аккуратность), F-мера" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Precision_trainPrecision_testRecall_trainRecall_testAccuracy_trainAccuracy_testF1_trainF1_test
decision_tree1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
gradient_boosting1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
random_forest1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
knn0.9981040.9972210.9990510.9987350.9996530.9995070.9985770.997977
mlp0.9957150.9944630.9996200.9997470.9994300.9992920.9976640.997098
logistic0.9963900.9964480.9255390.9228240.9905280.9902040.9596590.958224
ridge0.8862290.8872920.9998731.0000000.9843590.9845360.9396270.940281
naive_bayes0.1628460.1643400.9697600.9741900.3894420.3938020.2788640.281237
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n", + " [\n", + " \"Precision_train\",\n", + " \"Precision_test\",\n", + " \"Recall_train\",\n", + " \"Recall_test\",\n", + " \"Accuracy_train\",\n", + " \"Accuracy_test\",\n", + " \"F1_train\",\n", + " \"F1_test\",\n", + " ]\n", + "]\n", + "class_metrics.sort_values(\n", + " by=\"Accuracy_test\", ascending=False\n", + ").style.background_gradient(\n", + " cmap=\"plasma\",\n", + " low=0.3,\n", + " high=1,\n", + " subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n", + ").background_gradient(\n", + " cmap=\"viridis\",\n", + " low=1,\n", + " high=0.3,\n", + " subset=[\n", + " \"Precision_train\",\n", + " \"Precision_test\",\n", + " \"Recall_train\",\n", + " \"Recall_test\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Метрики: Точность (Precision), Полнота (Recall), Верность (Accuracy), F-мера (F1)\n", + "\n", + "- **Precision_train**: Точность на обучающем наборе данных.\n", + "- **Precision_test**: Точность на тестовом наборе данных.\n", + "- **Recall_train**: Полнота на обучающем наборе данных.\n", + "- **Recall_test**: Полнота на тестовом наборе данных.\n", + "- **Accuracy_train**: Верность (аккуратность) на обучающем наборе данных.\n", + "- **Accuracy_test**: Верность (аккуратность) на тестовом наборе данных.\n", + "- **F1_train**: F-мера на обучающем наборе данных.\n", + "- **F1_test**: F-мера на тестовом наборе данных.\n", + "\n", + "\n", + "\n", + "1. **Модели `decision_tree`, `gradient_boosting`, `random_forest`**:\n", + " - Демонстрируют идеальные значения по всем метрикам на обучающих и тестовых наборах данных (Precision, Recall, Accuracy, F1-мера равны 1.0).\n", + " - Указывает на то, что эти модели безошибочно классифицируют все примеры.\n", + "\n", + "2. **Модель `knn`**:\n", + " - Показывает очень высокие значения метрик, близкие к 1.0, что указывает на высокую эффективность модели.\n", + "\n", + "3. **Модель `mlp`**:\n", + " - Имеет немного более низкие значения Recall (0.999747) и F1-меры (0.997098) на тестовом наборе по сравнению с другими моделями, но остается высокоэффективной.\n", + "\n", + "4. **Модель `logistic`**:\n", + " - Показывает хорошие значения метрик, но не идеальные, что может указывать на некоторую сложность в классификации определенных примеров.\n", + "\n", + "5. **Модель `ridge`**:\n", + " - Имеет более низкие значения Precision (0.887292) и F1-меры (0.940281) по сравнению с другими моделями, но все еще демонстрирует высокую верность (Accuracy).\n", + "\n", + "6. **Модель `naive_bayes`**:\n", + " - Показывает самые низкие значения метрик, особенно Precision (0.164340) и F1-меры (0.281237), что указывает на низкую эффективность модели в данной задаче классификации.\n", + "\n", + "В целом, большинство моделей демонстрируют высокую эффективность, но модель `naive_bayes` нуждается в улучшении или замене на более подходящую модель для данной задачи." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ROC-кривая, каппа Коэна, коэффициент корреляции Мэтьюса" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Accuracy_testF1_testROC_AUC_testCohen_kappa_testMCC_test
decision_tree1.0000001.0000001.0000001.0000001.000000
gradient_boosting1.0000001.0000001.0000001.0000001.000000
random_forest1.0000001.0000001.0000001.0000001.000000
mlp0.9992920.9970980.9999920.9966940.996699
knn0.9995070.9979770.9999280.9976970.997697
ridge0.9845360.9402810.9998370.9314350.933632
logistic0.9902040.9582240.9997820.9526850.953585
naive_bayes0.3938020.2812370.7509570.0920900.209783
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Создаем DataFrame с метриками для каждой модели\n", + "class_metrics = pd.DataFrame.from_dict(class_models, \"index\")[\n", + " [\n", + " \"Accuracy_test\",\n", + " \"F1_test\",\n", + " \"ROC_AUC_test\",\n", + " \"Cohen_kappa_test\",\n", + " \"MCC_test\",\n", + " ]\n", + "]\n", + "\n", + "# Сортировка по ROC_AUC_test в порядке убывания\n", + "class_metrics_sorted = class_metrics.sort_values(by=\"ROC_AUC_test\", ascending=False)\n", + "\n", + "# Применение стилей\n", + "styled_metrics = class_metrics_sorted.style.background_gradient(\n", + " cmap=\"plasma\", \n", + " low=0.3, \n", + " high=1, \n", + " subset=[\n", + " \"ROC_AUC_test\",\n", + " \"MCC_test\",\n", + " \"Cohen_kappa_test\",\n", + " ],\n", + ").background_gradient(\n", + " cmap=\"viridis\", \n", + " low=1, \n", + " high=0.3, \n", + " subset=[\n", + " \"Accuracy_test\",\n", + " \"F1_test\",\n", + " ],\n", + ")\n", + "\n", + "display(styled_metrics)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Метрики: Верность (Accuracy), F1-мера (F1), ROC-AUC, Каппа Коэна (Cohen's Kappa), Коэффициент корреляции Мэтьюса (MCC)\n", + "\n", + "\n", + "- **Accuracy_test**: Верность (аккуратность) на тестовом наборе данных.\n", + "- **F1_test**: F1-мера на тестовом наборе данных.\n", + "- **ROC_AUC_test**: Площадь под ROC-кривой на тестовом наборе данных.\n", + "- **Cohen_kappa_test**: Каппа Коэна на тестовом наборе данных.\n", + "- **MCC_test**: Коэффициент корреляции Мэтьюса на тестовом наборе данных.\n", + "\n", + "\n", + "1. **Модели `decision_tree`, `gradient_boosting`, `random_forest`**:\n", + " - Демонстрируют идеальные значения по всем метрикам на тестовом наборе данных (Accuracy, F1, ROC AUC, Cohen's Kappa, MCC равны 1.0).\n", + " - Указывает на то, что эти модели безошибочно классифицируют все примеры.\n", + "\n", + "2. **Модель `mip`**:\n", + " - Показывает очень высокие значения метрик, близкие к 1.0, что указывает на высокую эффективность модели.\n", + "\n", + "3. **Модель `knn`**:\n", + " - Имеет высокие значения метрик, близкие к 1.0, что указывает на высокую эффективность модели.\n", + "\n", + "4. **Модель `ridge`**:\n", + " - Имеет более низкие значения Accuracy (0.984536) и F1-меры (0.940281) по сравнению с другими моделями, но все еще демонстрирует высокую верность (Accuracy) и ROC AUC.\n", + "\n", + "5. **Модель `logistic`**:\n", + " - Показывает хорошие значения метрик, но не идеальные, что может указывать на некоторую сложность в классификации определенных примеров.\n", + "\n", + "6. **Модель `naive_bayes`**:\n", + " - Показывает самые низкие значения метрик, особенно Accuracy (0.978846) и F1-меры (0.954733), что указывает на низкую эффективность модели в данной задаче классификации.\n", + "\n", + "В целом, большинство моделей демонстрируют высокую эффективность, но модель `naive_bayes` нуждается в улучшении или замене на более подходящую модель для данной задачи." + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'decision_tree'" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "best_model = str(class_metrics.sort_values(by=\"MCC_test\", ascending=False).iloc[0].name)\n", + "\n", + "display(best_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Вывод данных с ошибкой предсказания для оценки" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'Error items count: 0'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categoryPredictedsub_categoryhrefprice
\n", + "
" + ], + "text/plain": [ + "Empty DataFrame\n", + "Columns: [category, Predicted, sub_category, href, price]\n", + "Index: []" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "# Преобразование тестовых данных\n", + "preprocessing_result = pipeline_end.transform(X_test)\n", + "preprocessed_df = pd.DataFrame(\n", + " preprocessing_result,\n", + " columns=pipeline_end.get_feature_names_out(),\n", + ")\n", + "\n", + "# Получение предсказаний лучшей модели\n", + "y_pred = class_models[best_model][\"preds\"]\n", + "\n", + "# Нахождение индексов ошибок\n", + "error_index = y_test[y_test != y_pred].index.tolist() # Убираем столбец \"above_average_price\"\n", + "display(f\"Error items count: {len(error_index)}\")\n", + "\n", + "# Создание DataFrame с ошибочными объектами\n", + "error_predicted = pd.Series(y_pred, index=y_test.index).loc[error_index]\n", + "error_df = X_test.loc[error_index].copy()\n", + "error_df.insert(loc=1, column=\"Predicted\", value=error_predicted)\n", + "error_df = error_df.sort_index() # Сортировка по индексу\n", + "\n", + "# Вывод DataFrame с ошибочными объектами\n", + "display(error_df)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Пример использования обученной модели (конвейера) для предсказания" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorysub_categoryhrefprice
26987GroceriesHome Carehttps://www.jiomart.com/c/groceries/home-care/...438.0
\n", + "
" + ], + "text/plain": [ + " category sub_category \\\n", + "26987 Groceries Home Care \n", + "\n", + " href price \n", + "26987 https://www.jiomart.com/c/groceries/home-care/... 438.0 " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
pricecategory_Electronicscategory_Fashioncategory_Groceriescategory_Home & Kitchencategory_Jewellerysub_category_Apparelsub_category_Auto Caresub_category_Ayushsub_category_Bags & Travel Luggage...sub_category_Smart Devicessub_category_Snacks & Branded Foodssub_category_Staplessub_category_Stationerysub_category_TV & Speakersub_category_Tools & Appliancessub_category_Toys, Games & Fitnesssub_category_Treatmentssub_category_Wellnesssub_category_Women
26987-0.0943820.00.01.00.00.00.00.00.00.0...0.00.00.00.00.00.00.00.00.00.0
\n", + "

1 rows × 75 columns

\n", + "
" + ], + "text/plain": [ + " price category_Electronics category_Fashion category_Groceries \\\n", + "26987 -0.094382 0.0 0.0 1.0 \n", + "\n", + " category_Home & Kitchen category_Jewellery sub_category_Apparel \\\n", + "26987 0.0 0.0 0.0 \n", + "\n", + " sub_category_Auto Care sub_category_Ayush \\\n", + "26987 0.0 0.0 \n", + "\n", + " sub_category_Bags & Travel Luggage ... sub_category_Smart Devices \\\n", + "26987 0.0 ... 0.0 \n", + "\n", + " sub_category_Snacks & Branded Foods sub_category_Staples \\\n", + "26987 0.0 0.0 \n", + "\n", + " sub_category_Stationery sub_category_TV & Speaker \\\n", + "26987 0.0 0.0 \n", + "\n", + " sub_category_Tools & Appliances sub_category_Toys, Games & Fitness \\\n", + "26987 0.0 0.0 \n", + "\n", + " sub_category_Treatments sub_category_Wellness sub_category_Women \n", + "26987 0.0 0.0 0.0 \n", + "\n", + "[1 rows x 75 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "predicted: 0 (proba: [1. 0.])\n", + "real: 0\n" + ] + } + ], + "source": [ + "model = class_models[best_model][\"pipeline\"]\n", + "\n", + "# Выбираем позиционный индекс объекта для анализа\n", + "example_index = 13\n", + "\n", + "# Получаем исходные данные для объекта\n", + "test = pd.DataFrame(X_test.iloc[example_index, :]).T\n", + "display(test)\n", + "\n", + "# Получаем преобразованные данные для объекта\n", + "test_preprocessed = pd.DataFrame(preprocessed_df.iloc[example_index, :]).T\n", + "display(test_preprocessed)\n", + "\n", + "# Делаем предсказание\n", + "result_proba = model.predict_proba(test)[0]\n", + "result = model.predict(test)[0]\n", + "\n", + "# Получаем реальное значение\n", + "real = int(y_test.iloc[example_index])\n", + "\n", + "# Выводим результаты\n", + "print(f\"predicted: {result} (proba: {result_proba})\")\n", + "print(f\"real: {real}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Подбор гиперпараметров методом поиска по сетке" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/numpy/ma/core.py:2846: RuntimeWarning: invalid value encountered in cast\n", + " _data = np.array(data, dtype=dtype, copy=copy,\n" + ] + }, + { + "data": { + "text/plain": [ + "{'model__criterion': 'gini',\n", + " 'model__max_depth': 5,\n", + " 'model__max_features': 'sqrt',\n", + " 'model__n_estimators': 50}" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "optimized_model_type = \"random_forest\"\n", + "\n", + "random_forest_model = class_models[optimized_model_type][\"pipeline\"]\n", + "\n", + "param_grid = {\n", + " \"model__n_estimators\": [10, 50, 100],\n", + " \"model__max_features\": [\"sqrt\", \"log2\"],\n", + " \"model__max_depth\": [5, 7, 10],\n", + " \"model__criterion\": [\"gini\", \"entropy\"],\n", + "}\n", + "\n", + "gs_optomizer = GridSearchCV(\n", + " estimator=random_forest_model, param_grid=param_grid, n_jobs=-1\n", + ")\n", + "gs_optomizer.fit(X_train, y_train.values.ravel())\n", + "gs_optomizer.best_params_" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "__Обучение модели с новыми гиперпараметрами__" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.ensemble import RandomForestClassifier\n", + "import numpy as np\n", + "from sklearn import metrics\n", + "import pandas as pd\n", + "\n", + "# Определяем числовые признаки\n", + "numeric_features = X_train.select_dtypes(include=['float64', 'int64']).columns.tolist()\n", + "\n", + "# Установка random_state\n", + "random_state = 42\n", + "\n", + "# Определение трансформера\n", + "pipeline_end = ColumnTransformer([\n", + " ('numeric', StandardScaler(), numeric_features),\n", + "])\n", + "\n", + "# Объявление модели\n", + "optimized_model = RandomForestClassifier(\n", + " random_state=random_state,\n", + " criterion=\"gini\",\n", + " max_depth=5,\n", + " max_features=\"sqrt\",\n", + " n_estimators=10,\n", + ")\n", + "\n", + "# Создание пайплайна с корректными шагами\n", + "result = {}\n", + "\n", + "# Обучение модели\n", + "result[\"pipeline\"] = Pipeline([\n", + " (\"pipeline\", pipeline_end),\n", + " (\"model\", optimized_model)\n", + "]).fit(X_train, y_train.values.ravel())\n", + "\n", + "# Прогнозирование и расчет метрик\n", + "result[\"train_preds\"] = result[\"pipeline\"].predict(X_train)\n", + "result[\"probs\"] = result[\"pipeline\"].predict_proba(X_test)[:, 1]\n", + "result[\"preds\"] = np.where(result[\"probs\"] > 0.5, 1, 0)\n", + "\n", + "# Метрики для оценки модели\n", + "result[\"Precision_train\"] = metrics.precision_score(y_train, result[\"train_preds\"])\n", + "result[\"Precision_test\"] = metrics.precision_score(y_test, result[\"preds\"])\n", + "result[\"Recall_train\"] = metrics.recall_score(y_train, result[\"train_preds\"])\n", + "result[\"Recall_test\"] = metrics.recall_score(y_test, result[\"preds\"])\n", + "result[\"Accuracy_train\"] = metrics.accuracy_score(y_train, result[\"train_preds\"])\n", + "result[\"Accuracy_test\"] = metrics.accuracy_score(y_test, result[\"preds\"])\n", + "result[\"ROC_AUC_test\"] = metrics.roc_auc_score(y_test, result[\"probs\"])\n", + "result[\"F1_train\"] = metrics.f1_score(y_train, result[\"train_preds\"])\n", + "result[\"F1_test\"] = metrics.f1_score(y_test, result[\"preds\"])\n", + "result[\"MCC_test\"] = metrics.matthews_corrcoef(y_test, result[\"preds\"])\n", + "result[\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(y_test, result[\"preds\"])\n", + "result[\"Confusion_matrix\"] = metrics.confusion_matrix(y_test, result[\"preds\"])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Формирование данных для оценки старой и новой версии модели" + ] + }, + { + "cell_type": "code", + "execution_count": 17, + "metadata": {}, + "outputs": [], + "source": [ + "optimized_metrics = pd.DataFrame(columns=list(result.keys()))\n", + "optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n", + " data=class_models[optimized_model_type]\n", + ")\n", + "optimized_metrics.loc[len(optimized_metrics)] = pd.Series(\n", + " data=result\n", + ")\n", + "optimized_metrics.insert(loc=0, column=\"Name\", value=[\"Old\", \"New\"])\n", + "optimized_metrics = optimized_metrics.set_index(\"Name\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Оценка параметров старой и новой модели" + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Precision_trainPrecision_testRecall_trainRecall_testAccuracy_trainAccuracy_testF1_trainF1_test
Name        
Old1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
New1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 18, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "optimized_metrics[\n", + " [\n", + " \"Precision_train\",\n", + " \"Precision_test\",\n", + " \"Recall_train\",\n", + " \"Recall_test\",\n", + " \"Accuracy_train\",\n", + " \"Accuracy_test\",\n", + " \"F1_train\",\n", + " \"F1_test\",\n", + " ]\n", + "].style.background_gradient(\n", + " cmap=\"plasma\",\n", + " low=0.3,\n", + " high=1,\n", + " subset=[\"Accuracy_train\", \"Accuracy_test\", \"F1_train\", \"F1_test\"],\n", + ").background_gradient(\n", + " cmap=\"viridis\",\n", + " low=1,\n", + " high=0.3,\n", + " subset=[\n", + " \"Precision_train\",\n", + " \"Precision_test\",\n", + " \"Recall_train\",\n", + " \"Recall_test\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Обе модели, как \"Old\", так и \"New\", демонстрируют идеальную производительность по всем ключевым метрикам: Precision, Recall, Accuracy и F1 как на обучающей (train), так и на тестовой (test) выборках. Все значения равны 1.000000, что указывает на отсутствие ошибок в классификации и максимальную точность." + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
 Accuracy_testF1_testROC_AUC_testCohen_kappa_testMCC_test
Name     
Old1.0000001.0000001.0000001.0000001.000000
New1.0000001.0000001.0000001.0000001.000000
\n" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "optimized_metrics[\n", + " [\n", + " \"Accuracy_test\",\n", + " \"F1_test\",\n", + " \"ROC_AUC_test\",\n", + " \"Cohen_kappa_test\",\n", + " \"MCC_test\",\n", + " ]\n", + "].style.background_gradient(\n", + " cmap=\"plasma\",\n", + " low=0.3,\n", + " high=1,\n", + " subset=[\n", + " \"ROC_AUC_test\",\n", + " \"MCC_test\",\n", + " \"Cohen_kappa_test\",\n", + " ],\n", + ").background_gradient(\n", + " cmap=\"viridis\",\n", + " low=1,\n", + " high=0.3,\n", + " subset=[\n", + " \"Accuracy_test\",\n", + " \"F1_test\",\n", + " ],\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Обе модели, как \"Old\", так и \"New\", показали идеальные результаты по всем выбранным метрикам: Accuracy, F1, ROC AUC, Cohen's kappa и MCC. Все метрики имеют значение 1.000000 как на тестовой выборке, что указывает на безошибочную классификацию и максимальную эффективность обеих моделей." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7gAAAGsCAYAAAD34Qv/AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABwBUlEQVR4nO3dd3QU1cPG8WeTkAKkEEpCqKGGGgQVQaqUoIg0pQUBRXxVQKUJFjqIothAKYIUBQVFUEBRQErAKL0IiIBUqVISQknbef/IL6NrKAmZkOzm+zlnju7M3Tt3A+TZO/fOHZthGIYAAAAAAHBybtndAAAAAAAArEAHFwAAAADgEujgAgAAAABcAh1cAAAAAIBLoIMLAAAAAHAJdHABAAAAAC6BDi4AAAAAwCXQwQUAAAAAuASP7G4AAAAZde3aNSUkJFhWn6enp7y9vS2rDwCAjCDXrEMHFwDgVK5du6bQUvl16kyyZXUGBwfr0KFDufbLAAAg+5Br1qKDCwBwKgkJCTp1JlmHtpSSn2/m77SJvWRXaK0jSkhIyJVfBAAA2YtcsxYdXACAU/LzdbPkiwAAADkBuWYNOrgAAKeUbNiVbFhTDwAA2Y1cswYdXACAU7LLkF2Z/yZgRR0AAGQWuWYNxsABAAAAAC6BEVwAgFOyyy4rJmFZUwsAAJlDrlmDDi4AwCklG4aSjcxPw7KiDgAAMotcswZTlAEAAAAALoERXACAU2IxDgCAKyHXrEEHFwDglOwylMwXAQCAiyDXrMEUZQAAAACAS2AEFwDglJjKBQBwJeSaNRjBBQAAAAC4BEZwAQBOiccpAABcCblmDTq4AACnZP/fZkU9AABkN3LNGkxRBgAAAAC4BEZwAQBOKdmixylYUQcAAJlFrlmDDi4AwCklGymbFfUAAJDdyDVrMEUZAAAAAOASGMEFADglFuMAALgScs0adHABAE7JLpuSZbOkHgAAshu5Zg2mKAMAAAAAXAIjuAAAp2Q3UjYr6gEAILuRa9ZgBBcAAAAA4BIYwQUAOKVki+5VsqIOAAAyi1yzBh1cAIBT4osAAMCVkGvWYIoyAAAAAMAlMIILAHBKdsMmu2HB4xQsqAMAgMwi16xBBxcA4JSYygUAcCXkmjWYogwAAAAAcAmM4AIAnFKy3JRswXXaZAvaAgBAZpFr1qCDCwBwSoZF9yoZufxeJQBAzkCuWYMpygAAAAAAl8AILgDAKbEYBwDAlZBr1qCDCwBwSsmGm5INC+5VMixoDAAAmUSuWYMpygAAAAAAl8AILgDAKdllk92C67R25fJL3QCAHIFcswYjuAAAAAAAl8AILgDAKbEYBwDAlZBr1qCDCwBwStYtxpG7p3IBAHIGcs0aTFEGAAAAALgERnABAE4pZTGOzE/DsqIOAAAyi1yzBiO4ALLErFmzZLPZdPjw4VuWLV26tHr06JHlbYJrsctNyRZsVqxYCQBAZpFr1sjdnx5Ahu3evVtdu3ZVsWLF5OXlpZCQEEVGRmr37t3Z3TQAAG4o9cKrt7e3/vrrrzTHGzVqpKpVq2ZDywBYiQ4ugHT7+uuvVbNmTa1atUpPPPGEPvroI/Xs2VOrV69WzZo1tWjRouxuInKR1MU4rNgyYty4cbrnnnvk6+urIkWKqE2bNtq3b59DmUaNGslmszlszzzzjEOZo0ePqmXLlsqbN6+KFCmiQYMGKSkpyaHMmjVrVLNmTXl5ealcuXKaNWtWmvZ8+OGHKl26tLy9vVW7dm1t3LgxQ58HyG3i4+P1xhtvZHczgDTItRSZzTU6uADS5eDBg3r88cdVpkwZ7dy5U2PGjFHPnj01evRo7dy5U2XKlNHjjz+uP//8M7ubilzC/r9pWFZsGbF27Vr17t1bv/zyi1asWKHExEQ1b95cly9fdijXq1cvnTx50tzGjx9vHktOTlbLli2VkJCgn3/+WbNnz9asWbM0bNgws8yhQ4fUsmVLNW7cWNu3b9eLL76op556Sj/88INZZv78+erfv7+GDx+urVu3Kjw8XBERETpz5sxt/lQB11ejRg19/PHHOnHiRHY3BXBArlmTa3RwAaTLW2+9pStXrmjatGkqXLiww7FChQpp6tSpunz5ssMvu/8yDENjxoxR8eLFlTdvXjVu3JipzXA6y5cvV48ePVSlShWFh4dr1qxZOnr0qLZs2eJQLm/evAoODjY3Pz8/89iPP/6oPXv26LPPPlONGjX04IMPavTo0frwww+VkJAgSZoyZYpCQ0M1YcIEVapUSX369NGjjz6qd99916znnXfeUa9evfTEE0+ocuXKmjJlivLmzatPPvnkzvwwACf0yiuvKDk5OV2juJ999plq1aolHx8fBQYGqlOnTjp27Jh5/IMPPpC7u7suXrxo7pswYYJsNpv69+9v7ktOTpavr68GDx5s6WcBrOBquUYHF0C6LFmyRKVLl1b9+vWve7xBgwYqXbq0li1bdsM6hg0bpqFDhyo8PFxvvfWWypQpc90rhEB6JBs2yzZJio2Nddji4+PT1Y6YmBhJUmBgoMP+uXPnqlChQqpatapefvllXblyxTwWHR2tatWqKSgoyNwXERGh2NhY86JPdHS0mjZt6lBnRESEoqOjJUkJCQnasmWLQxk3Nzc1bdrULAMgrdDQUHXr1u2Wo7hjx45Vt27dVL58eb3zzjt68cUXtWrVKjVo0MDs0NavX192u13r16833xcVFSU3NzdFRUWZ+7Zt26a4uDg1aNAgyz4XnB+5Zk2u0cEFcEsxMTE6ceKEwsPDb1quevXqOn78uC5dupTm2NmzZzV+/Hi1bNlSS5cuVe/evTVjxgz16NFDf//9d1Y1HUi3EiVKyN/f39zGjRt3y/fY7Xa9+OKLuv/++x0Wp+nSpYs+++wzrV69Wi+//LI+/fRTde3a1Tx+6tQphy8BkszXp06dummZ2NhYXb16VX///beSk5OvWya1DgDX9+qrryopKUlvvvnmdY8fOXJEw4cP15gxY/TFF1/o2Wef1bBhw7R69WodP35cH330kSQpPDxcfn5+ZmfWMAytX79e7du3Nzu10j+d3vvvv//OfEBAuTfXeA4ugFtK7bD6+vretFzq8djY2DTHVq5cqYSEBPXt21c22z/PZ3vxxRf1+uuvW9ha5Bapj0PIfD2GJOnYsWMO0628vLxu+d7evXvrt99+cxi9kaSnn37a/P9q1aqpaNGiatKkiQ4ePKiyZctmus0AMid13Yhp06ZpyJAhKlq0qMPxr7/+Wna7XR06dHC4CBscHKzy5ctr9erVeuWVV+Tm5qa6detq3bp1kqS9e/fq3LlzGjJkiBYuXKjo6Gg1a9ZMUVFRqlq1qgICAu7kx4STIdeswQgugFtK7bheb2T2327WET5y5IgkqXz58g77CxcurAIFCljRTOQydsPNsk2S/Pz8HLZbfRHo06ePli5dqtWrV6t48eI3LVu7dm1J0oEDBySlfEk+ffq0Q5nU18HBwTct4+fnJx8fHxUqVEju7u7XLZNaB4Abe+2115SUlHTde3H3798vwzBUvnx5FS5c2GHbu3evw4I39evX15YtW3T16lVFRUWpaNGiqlmzpsLDw82R3fXr19/wFh8gFblmTa7RwQVwS/7+/ipatKh27tx503I7d+5UsWLFHK4WAq7GMAz16dNHixYt0k8//aTQ0NBbvmf79u2SZI4S1alTR7t27XL4krxixQr5+fmpcuXKZplVq1Y51LNixQrVqVNHkuTp6alatWo5lLHb7Vq1apVZBsCNlSlTRl27dtW0adN08uRJh2N2u102m03Lly/XihUr0mxTp041y9arV0+JiYmKjo5WVFSU2ZGtX7++oqKi9Pvvv+vs2bN0cJFjuVquMUUZQLo8/PDD+vjjj7V+/XrVq1cvzfGoqCgdPnxY//d//3fd95cqVUpSylXxMmXKmPvPnj2rCxcuZE2j4dKsnsqVXr1799a8efP0zTffyNfX17wvyN/fXz4+Pjp48KDmzZunhx56SAULFtTOnTvVr18/NWjQQNWrV5ckNW/eXJUrV9bjjz+u8ePH69SpU3rttdfUu3dv8wr7M888o0mTJumll17Sk08+qZ9++kkLFixwWMitf//+6t69u+6++27de++9eu+993T58mU98cQTmf65ALnBa6+9ps8++yzNvbhly5aVYRgKDQ1VhQoVblrHvffeK09PT0VFRSkqKkqDBg2SlLL44scff2x+WWeBKdwKuWZNrjGCCyBdBg0aJB8fH/3f//2fzp0753Ds/PnzeuaZZ5Q3b14z2P+radOmypMnjyZOnCjD+OcX73vvvZeVzYYLs8uaFSftGTzv5MmTFRMTo0aNGqlo0aLmNn/+fEkpV6BXrlyp5s2bKywsTAMGDFD79u21ZMkSsw53d3ctXbpU7u7uqlOnjrp27apu3bpp1KhRZpnQ0FAtW7ZMK1asUHh4uCZMmKDp06crIiLCLNOxY0e9/fbbGjZsmGrUqKHt27dr+fLlaRboAHB9ZcuWVdeuXTV16lSHRWzatWsnd3d3jRw50iGzpJTRrn/noLe3t+655x59/vnnOnr0qMMI7tWrV/XBBx+obNmyae7zBf6LXLMm1xjBBZAu5cuX1+zZsxUZGalq1aqpZ8+eCg0N1eHDhzVjxgz9/fff+vzzz2+40EDhwoU1cOBAjRs3Tg8//LAeeughbdu2Td9//70KFSp0hz8NcPv++2X3v0qUKKG1a9fesp5SpUrpu+++u2mZRo0aadu2bTct06dPH/Xp0+eW5wNwfa+++qo+/fRT7du3T1WqVJGU0vEdM2aMXn75ZR0+fFht2rSRr6+vDh06pEWLFunpp5/WwIEDzTrq16+vN954Q/7+/qpWrZokqUiRIqpYsaL27dunHj16ZMdHA9LF1XKNDi6AdHvssccUFhamcePGmZ3aggULqnHjxnrllVcclpO/njFjxsjb21tTpkzR6tWrVbt2bf34449q2bLlHfoEcCV2ucluwUQkK+oA4LzKlSunrl27avbs2Q77hwwZogoVKujdd9/VyJEjJaV80W/evLkeeeQRh7KpHdy6devKzc3NYf++ffu4/xbpQq5Zw2bcqssOAEAOEhsbK39/f03aUls++TN/nfZqXJL61PpVMTExLJAGALjjyDVr5e7uPQAAAADAZTBFGQDglOyyyS6bJfUAAJDdyDVr0MEFADilZMNNyYYFj1OwoA4AADKLXLNG7v70AAAAAACXwQguAMApJctNyRZcp7WiDgAAMotcs0bu/vQAAAAAAJfBCC4yxW6368SJE/L19ZXNlrtvaAdwa4Zh6NKlSwoJCXF4VuTtsBs22Q0LFuOwoA64DnINQEaQazkPHVxkyokTJ1SiRInsbgYAJ3Ps2DEVL148U3XYLZrKZWcyE/6FXANwO8i1nIMOLjLF19dXknRka2n55c/d/5iQVtsK1bK7CchhkpSo9frO/N0B5DTkGm6GXMN/kWs5Dx1cZErq9C2//G7y8+WLABx52PJkdxOQ0xgp/7Fi6qfdcJPdgkchWFEHXAe5hpsh15AGuZbj0MEFADilZNmUbMHD7K2oAwCAzCLXrJG7u/cAAAAAAJfBCC4AwCkxlQsA4ErINWvQwQUAOKVkWTMNKznzTQEAINPINWvk7u49AAAAAMBlMIILAHBKTOUCALgScs0aufvTAwAAAABcBiO4AACnlGy4KdmCq9RW1AEAQGaRa9aggwsAcEqGbLJbsBiHkcufFwgAyBnINWvk7u49AAAAAMBlMIILAHBKTOUCALgScs0adHABAE7JbthkNzI/DcuKOgAAyCxyzRq5u3sPAAAAAHAZjOACAJxSstyUbMF1WivqAAAgs8g1a9DBBQA4JaZyAQBcCblmjdzdvQcAAAAAuAxGcAEATskuN9ktuE5rRR0AAGQWuWYNOrgAAKeUbNiUbME0LCvqAAAgs8g1a+Tu7j0AAAAAwGUwggsAcEosxgEAcCXkmjUYwQUAAAAAuARGcAEATskw3GQ3Mn+d1rCgDgAAMotcswYdXACAU0qWTcmyYDEOC+oAACCzyDVr5O7uPQAAAADAZTCCCwBwSnbDmoU07IYFjQEAIJPINWvQwQUAOCW7RfcqWVEHAACZRa5ZI3d/egAAAACAy2AEFwDglOyyyW7BQhpW1AEAQGaRa9aggwsAcErJhk3JFtyrZEUdAABkFrlmDaYoAwAAAABcAiO4AACnxGIcAABXQq5ZI3d/egAAAACAy2AEFwDglOyyWfO8wFy+GAcAIGcg16xBBxcA4JQMi1abNHL5FwEAQM5ArlmDKcoAAAAAAJfACC4AwCnZDYumcuXyxykAAHIGcs0adHABAE6J1SYBAK6EXLNG7v70AAAAAACXwQguAMApMZULAOBKyDVr0MEFADglu0WrTeb2xykAAHIGcs0aTFEGAAAAALgERnABAE6JqVwAAFdCrlmDDi4AwCnxRQAA4ErINWswRRkAAAAA4BIYwQUAOCWudAMAXAm5Zg1GcAEAAAAALoERXACAU+JKNwDAlZBr1mAEFwDglAz988zAzGxGBs87btw43XPPPfL19VWRIkXUpk0b7du3z6HMtWvX1Lt3bxUsWFD58+dX+/btdfr0aYcyR48eVcuWLZU3b14VKVJEgwYNUlJSkkOZNWvWqGbNmvLy8lK5cuU0a9asNO358MMPVbp0aXl7e6t27drauHFjBj8RACAnINdSZDbX6OACAJABa9euVe/evfXLL79oxYoVSkxMVPPmzXX58mWzTL9+/bRkyRJ9+eWXWrt2rU6cOKF27dqZx5OTk9WyZUslJCTo559/1uzZszVr1iwNGzbMLHPo0CG1bNlSjRs31vbt2/Xiiy/qqaee0g8//GCWmT9/vvr376/hw4dr69atCg8PV0REhM6cOXNnfhgAAKfnarlmMwwjo518wBQbGyt/f39d+KOM/Hy5XgJHESE1srsJyGGSjESt0TeKiYmRn5/fbdWR+nvngWXPyCOfV+bbdDleP7WcctttOnv2rIoUKaK1a9eqQYMGiomJUeHChTVv3jw9+uijkqTff/9dlSpVUnR0tO677z59//33evjhh3XixAkFBQVJkqZMmaLBgwfr7Nmz8vT01ODBg7Vs2TL99ttv5rk6deqkixcvavny5ZKk2rVr65577tGkSZMkSXa7XSVKlFDfvn01ZMiQzP5ociVyDTdDruG/yLWcl2v85gYAOKXUe5Ws2KSULxj/3uLj49PVjpiYGElSYGCgJGnLli1KTExU06ZNzTJhYWEqWbKkoqOjJUnR0dGqVq2a+SVAkiIiIhQbG6vdu3ebZf5dR2qZ1DoSEhK0ZcsWhzJubm5q2rSpWQYA4DzINWtyjQ4uAACSSpQoIX9/f3MbN27cLd9jt9v14osv6v7771fVqlUlSadOnZKnp6cCAgIcygYFBenUqVNmmX9/CUg9nnrsZmViY2N19epV/f3330pOTr5umdQ6AAC5V27NNVZRBgA4JatXmzx27JjDVC4vr1tPE+vdu7d+++03rV+/PtPtAADkbuSaNejgAgCcktVfBPz8/DJ0r1KfPn20dOlSrVu3TsWLFzf3BwcHKyEhQRcvXnS42n369GkFBwebZf67KmTqapT/LvPfFSpPnz4tPz8/+fj4yN3dXe7u7tctk1oHAMB5kGvW5BpTlAEAyADDMNSnTx8tWrRIP/30k0JDQx2O16pVS3ny5NGqVavMffv27dPRo0dVp04dSVKdOnW0a9cuh1UhV6xYIT8/P1WuXNks8+86Usuk1uHp6alatWo5lLHb7Vq1apVZBgCAW3G1XGMEFwDglAzDJsOCK90ZraN3796aN2+evvnmG/n6+pr3Bfn7+8vHx0f+/v7q2bOn+vfvr8DAQPn5+alv376qU6eO7rvvPklS8+bNVblyZT3++OMaP368Tp06pddee029e/c2p5A988wzmjRpkl566SU9+eST+umnn7RgwQItW7bMbEv//v3VvXt33X333br33nv13nvv6fLly3riiScy/XMBANxZ5Jo1uUYHFwCADJg8ebIkqVGjRg77Z86cqR49ekiS3n33Xbm5ual9+/aKj49XRESEPvroI7Osu7u7li5dqmeffVZ16tRRvnz51L17d40aNcosExoaqmXLlqlfv356//33Vbx4cU2fPl0RERFmmY4dO+rs2bMaNmyYTp06pRo1amj58uVpFugAAOBGXC3XeA4uMoXnBeJmeF4g/svK5wXW+aavZc8LjG49MVNtgusg13Az5Br+i1zLeRjBBQA4JasX4wAAIDuRa9ZwmQ5ujx49dPHiRS1evDi7mwIn8cXEItrwXYCOHfCSp7ddle++op6vnlCJcv88BPv8GQ9NHx2iret8dSXOTSXKxqvTC6dVv2WMWabbvZV1+rinQ91PvnxCHfum3GSfcM2mD4aU0P6dPjq631u1m8ZqxMxDDuXPnfbQtJHFtH+nj04c8lLrnn/r2VF/ZeGnx53SqsffevTZMwosnKQ/9/joo9eKad/2vNndLDgBcg0ZRa7hTiDXkNNl+9ybHj16yGazmVvBggXVokUL7dy5M7ubli5Xr15VYGCgChUqpPj4+Fu/ATnGzuj8atXjb723dL/GfXFQyUnSK53L6tqVf/5ZvPV8SR076KURsw5p6k/7dP9DMXr9/0rrwC4fh7q6DTqpz7f/Zm6te/5tHrPbbfL0tqt1z7O6q/6l67YlMcFNAQWT1PmF0ypT+WrWfGDccQ0fuaCnh5/Q3HeC1Tuigv7c462x8/6Uf8HE7G6aS0hdjMOKzUrkGrILuYasRq5lrZyaa84m2zu4ktSiRQudPHlSJ0+e1KpVq+Th4aGHH344u5uVLgsXLlSVKlUUFhZ2R66yJybyC8Qqr8/7U807nlfpitdUtso1DXjvqM785an9O/8J+T2b86n1k38r7K4rKloqQV1ePK18/skOZSTJJ79dgUWSzM07r9085p3XruffOK6HIs8rsEjSddsSXCJBz47+S80eu6B8fvbrloHzaff031o+L1A/zg/U0f3e+mBwccVftSmi8/nsbppLSJ3KZcVmNXIt/cg165BryGrkWtbKybnmTHJEB9fLy0vBwcEKDg5WjRo1NGTIEB07dkxnz541yxw7dkwdOnRQQECAAgMD1bp1ax0+fPiGdcbHx+v5559XkSJF5O3trXr16mnTpk3m8bvvvltvv/22+bpNmzbKkyeP4uLiJEnHjx+XzWbTgQMHbtr2GTNmqGvXruratatmzJhh7p82bZpCQkJktzv+Um/durWefPJJ8/U333yjmjVrytvbW2XKlNHIkSOVlPRPWNhsNk2ePFmPPPKI8uXLp7Fjxyo5OVk9e/ZUaGiofHx8VLFiRb3//vsO50lKStLzzz+vgIAAFSxYUIMHD1b37t3Vpk0bs4zdbte4cePMesLDw/XVV1/d9PO6ssux7pIk34Bkc1/luy9r7bcBir3gLrtdWrM4QAnXbKpeN87hvQsmFdGjVarquWYV9OVHhZV8/bxHLuKRx67y1a9oa5Svuc8wbNoW5avKta5kY8twJ5Br5FpOQK7BSuQanEWO6OD+W1xcnD777DOVK1dOBQsWlJRydTciIkK+vr6KiorShg0blD9/frVo0UIJCQnXreell17SwoULNXv2bG3dulXlypVTRESEzp9PucLUsGFDrVmzRlLKw42joqIUEBCg9evXS5LWrl2rYsWKqVy5cjds68GDBxUdHa0OHTqoQ4cOioqK0pEjRyRJjz32mM6dO6fVq1eb5c+fP6/ly5crMjJSkhQVFaVu3brphRde0J49ezR16lTNmjVLY8eOdTjPiBEj1LZtW+3atUtPPvmk7Ha7ihcvri+//FJ79uzRsGHD9Morr2jBggXme958803NnTtXM2fO1IYNGxQbG5vmSvy4ceM0Z84cTZkyRbt371a/fv3UtWtXrV279oafOT4+XrGxsQ6bK7DbpSnDi6nKPXEqHXbN3P/q1CNKTrTpsSrV9HDpcL0/uISGzzisYqH//L1r3fOsXp58ROO/PKCHHj+nLyYGafqYkOz4GMhB/AKT5e4hXTzruNTBhb89VKAw3xSt4CxTucg1ci07kGuwGrmW9Zwl13K6HNHBXbp0qfLnz6/8+fPL19dX3377rebPny83t5TmzZ8/X3a7XdOnT1e1atVUqVIlzZw5U0ePHjXD/N8uX76syZMn66233tKDDz6oypUr6+OPP5aPj495NbpRo0Zav369kpOTtXPnTnl6eioyMtKsb82aNWrYsOFN2/3JJ5/owQcfVIECBRQYGKiIiAjNnDlTklSgQAE9+OCDmjdvnln+q6++UqFChdS4cWNJ0siRIzVkyBB1795dZcqUUbNmzTR69GhNnTrV4TxdunTRE088oTJlyqhkyZLKkyePRo4cqbvvvluhoaGKjIzUE0884fBFYOLEiXr55ZfVtm1bhYWFadKkSQoICDCPx8fH6/XXX9cnn3yiiIgIlSlTRj169FDXrl3TnP/fxo0bJ39/f3MrUaLETX9GzmLSK8V15HcfvTz5iMP+2eODFRfrrjfmH9DE7/ep/dNnNPaZ0jq019ss0/7/ziq8bpzKVL6mh7ud09PDTuibTworIT53/3IBspph0TSurPgiQK6Ra9mNXAOcT07ONWeSIzq4jRs31vbt27V9+3Zt3LhRERERevDBB82rxjt27NCBAwfk6+trfmEIDAzUtWvXdPDgwTT1HTx4UImJibr//vvNfXny5NG9996rvXv3SpLq16+vS5cuadu2bVq7dq0aNmyoRo0amV8E1q5dm+Zhx/+WnJys2bNnq2vXrua+rl27atasWeb0rcjISC1cuNBcpGPu3Lnq1KmT+QVnx44dGjVqlPmZ8ufPr169eunkyZO6cuWfqR533313mvN/+OGHqlWrlgoXLqz8+fNr2rRpOnr0qCQpJiZGp0+f1r333muWd3d3V61atczXBw4c0JUrV9SsWTOH88+ZM+e6P9NUL7/8smJiYszt2LFjNyzrLCa9Uky/rvDT+K8OqHDIP/eCnTjsqW9nFlb/d47prvpxKlvlmroOOK3y1a/o21mFblhfxZpXlJxk0+ljnjcsA9cXe95dyUlSwH+uahcolKQLZ11mAXvcALlGrmUncg1ZgVyDs8gRfxvz5cvnMGVq+vTp8vf318cff6wxY8YoLi5OtWrV0ty5c9O8t3Dhwrd1zoCAAIWHh2vNmjWKjo5Ws2bN1KBBA3Xs2FF//PGH9u/ff9Mr3T/88IP++usvdezY0WF/cnKyVq1apWbNmqlVq1YyDEPLli3TPffco6ioKL377rtm2bi4OI0cOVLt2rVLU7+39z9XUvPly+dw7IsvvtDAgQM1YcIE1alTR76+vnrrrbf066+/pvvzp96TtWzZMhUrVszhmJfXjR8w7eXlddPjzsQwpA9fLaafl/vrra8OKLik47TA+KspX9jc3AyH/e7uhoybrJfx524fubkZCijEdJ3cLCnRTft35tVd9S4perm/JMlmM1SjXpy+nVUwm1vnGgyl/Du2oh6rkWvkWnYg15CVyLWsl5NzzZnkiA7uf9lsNrm5uenq1ZRl5WvWrKn58+erSJEi8vPzu+X7y5YtK09PT23YsEGlSpWSlHK/06ZNm/Tiiy+a5Ro2bKjVq1dr48aNGjt2rAIDA1WpUiWNHTtWRYsWVYUKFW54jhkzZqhTp0569dVXHfaPHTtWM2bMULNmzeTt7a127dpp7ty5OnDggCpWrKiaNWuaZWvWrKl9+/bd9H6o69mwYYPq1q2r5557ztz376vT/v7+CgoK0qZNm9SgQQNJKV9Qtm7dqho1akiSKleuLC8vLx09evSWU9Zc1aRXimv1ogIaMfNP+eS36/yZlH8O+XyT5eVjqES5awoJjdf7L5VQr2En5FcgST8v99fWdb4aNedPSdKezXn1+7Z8Cq97SXnz27V3Sz5NGR6iB9pfcFjU48gfXkpKcNOlC+66ctlNB39LWa2ybNV/Hp2Quu/qZTfFnHPXwd985OFpV6kKPKbDWX09rZAGvndMf+zIq33b8qptr7PyzmvXj18EZnfTXIJdNtmU+WlYdgvquBVy7ebINWuQa8hq5FrWcqZcy8lyRAc3Pj5ep06dkiRduHBBkyZNUlxcnFq1aiUpZUrUW2+9pdatW2vUqFEqXry4jhw5oq+//lovvfSSihcv7lBfvnz59Oyzz2rQoEEKDAxUyZIlNX78eF25ckU9e/Y0yzVq1EgTJ05U4cKFFRYWZu6bNGmSHnvssRu29+zZs1qyZIm+/fZbVa1a1eFYt27d1LZtW50/f16BgYGKjIzUww8/rN27dztM+5KkYcOG6eGHH1bJkiX16KOPys3NTTt27NBvv/2mMWPG3PD85cuX15w5c/TDDz8oNDRUn376qTZt2qTQ0FCzTN++fTVu3DiVK1dOYWFhmjhxoi5cuCCbLeUvvK+vrwYOHKh+/frJbrerXr16iomJ0YYNG+Tn56fu3bvf8PyuYunslOlYg9qXd9g/4N2jat7xvDzySGM+PagZr4doePdQXb3sppDQBA18/6jubZLy3L88nobWfhOgzyYEKzHBpuASCWr39Fm1e/qsQ51Du5bV6eP/TO16rnlFSdIPJ7an2SdJ+3fm1epFgQoqnqA5G/dY+rlx56z9toD8Cyar26BTKlA4SX/u9tGrkaG6+Hee7G4ashi5Rq5lB3INWY1cgzPIER3c5cuXq2jRopJSAiosLExffvmlea9Q3rx5tW7dOg0ePFjt2rXTpUuXVKxYMTVp0uSGV77feOMN2e12Pf7447p06ZLuvvtu/fDDDypQoIBZpn79+rLb7Q5Xehs1aqT333//pvcpzZkzR/ny5VOTJk3SHGvSpIl8fHz02Wef6fnnn9cDDzygwMBA7du3T126dHEoGxERoaVLl2rUqFF68803lSdPHoWFhempp5666c/r//7v/7Rt2zZ17NhRNptNnTt31nPPPafvv//eLDN48GCdOnVK3bp1k7u7u55++mlFRETI3d3dLDN69GgVLlxY48aN059//qmAgADVrFlTr7zyyk3P7yr+HcI3UqxMgoZNP3zD4+WrX9X7S/ffsp70hHl62gPn8+3MQvp25o3vbcPts2qlyKxYjINcI9eyA7mGO4Fcyzo5Odecic0wrJjpjZzObrerUqVK6tChg0aPHm1ZvbGxsfL399eFP8rIzzdHrFmGHCQipEZ2NwE5TJKRqDX6RjExMemamns9qb93qn85UO55M3/vZPKVeO187O1MtQl3HrmG7ECu4b/ItZwnR4zgwnpHjhzRjz/+qIYNGyo+Pl6TJk3SoUOH0lxtBwBnZTdssllwldqey690OwtyDYCrI9esQQfXRbm5uWnWrFkaOHCgDMNQ1apVtXLlSlWqVCm7mwYAljAMi1abZB6TUyDXALg6cs0adHBdVIkSJbRhw4bsbgYAAJYg1wAA6UEHFwDglFiMAwDgSsg1a9DBBQA4Jb4IAABcCblmDZYHBAAAAAC4BEZwAQBOidUmAQCuhFyzBh1cAIBTYrVJAIArIdeswRRlAAAAAIBLYAQXAOCUUq50W7EYhwWNAQAgk8g1azCCCwAAAABwCYzgAgCcEo9TAAC4EnLNGnRwAQBOyfjfZkU9AABkN3LNGkxRBgAAAAC4BEZwAQBOialcAABXQq5Zgw4uAMA5MZcLAOBKyDVLMEUZAAAAAOASGMEFADgni6ZyKZdP5QIA5BDkmiXo4AIAnJJhWPMweyvqAAAgs8g1azBFGQAAAADgEhjBBQA4JVabBAC4EnLNGnRwAQDOybBZc59RLv8iAADIIcg1SzBFGQAAAADgEhjBBQA4JRbjAAC4EnLNGozgAgAAAABcAiO4AADnZPxvs6IeAACyG7lmCTq4AACnxGqTAABXQq5ZgynKAAAAAACXwAguAMB55fJpWAAAF0OuZVq6Orjffvttuit85JFHbrsxAACkl9VTub777jvlzZs3zXFyDQBwJzBF2Rrp6uC2adMmXZXZbDYlJydnpj0AAGSLzp07y2Zz/FJArgEA4FzS1cG12+1Z3Q4AADLG4tUmY2Ji5OfnZ0GFAADcBlZRtkSmFpm6du2aVe0AACCDbBZuKcg1AED2sT7XcqMMd3CTk5M1evRoFStWTPnz59eff/4pSRo6dKhmzJhheQMBALgTKlasSK4BAODkMtzBHTt2rGbNmqXx48fL09PT3F+1alVNnz7d0sYBAHBDhoWbpFGjRpFrAIDsY3Gu5VYZ7uDOmTNH06ZNU2RkpNzd3c394eHh+v333y1tHAAAd0rHjh3JNQAAnFyGn4P7119/qVy5cmn22+12JSYmWtIoAABuKYsX4yDXAAB3FItMWSLDI7iVK1dWVFRUmv1fffWV7rrrLksaBQDALRk267brINcAAHdUFudabpHhEdxhw4ape/fu+uuvv2S32/X1119r3759mjNnjpYuXZoVbQQAIMu9++675BoAAE4uwyO4rVu31pIlS7Ry5Urly5dPw4YN0969e7VkyRI1a9YsK9oIAEAahmHdJklr1qwh1wAA2cbqXMutMjyCK0n169fXihUrrG4LAADpZ/G9St988438/PwsqBAAgNvAPbiWuK0OriRt3rxZe/fulZRyX26tWrUsaxQAAHfa1q1bdezYMUnkGgAAzirDHdzjx4+rc+fO2rBhgwICAiRJFy9eVN26dfXFF1+oePHiVrcRAIC0rFpI4391NG7cWAUKFJBErgEAsoHFuZZbZfge3KeeekqJiYnau3evzp8/r/Pnz2vv3r2y2+166qmnsqKNAACkYTOs26SUmUnkGgAgu1ida7lVhkdw165dq59//lkVK1Y091WsWFETJ05U/fr1LW0cAAB3Svny5c3/J9cAAHBOGe7glihR4roPvk9OTlZISIgljQIA4JayeDEOcg0AcEexyJQlMjxF+a233lLfvn21efNmc9/mzZv1wgsv6O2337a0cQAA3NDtPPj+RptSFplKdbNcW7dunVq1aqWQkBDZbDYtXrzY4XiPHj1ks9kcthYtWjiUOX/+vCIjI+Xn56eAgAD17NlTcXFxDmV27typ+vXry9vbWyVKlND48ePTtOXLL79UWFiYvL29Va1aNX333Xe3+9MEAGQ3i3MtvVwt19LVwS1QoIACAwMVGBioJ554Qtu3b1ft2rXl5eUlLy8v1a5dW1u3btWTTz6Z4QYAAJATPPDAA+nKtcuXLys8PFwffvjhDetq0aKFTp48aW6ff/65w/HIyEjt3r1bK1as0NKlS7Vu3To9/fTT5vHY2Fg1b95cpUqV0pYtW/TWW29pxIgRmjZtmlnm559/VufOndWzZ09t27ZNbdq0UZs2bfTbb79Z8NMAAOQWrpZr6Zqi/N5772WoUgAAspzFU7k++ugj+fj43LL4gw8+qAcffPCmZby8vBQcHHzdY3v37tXy5cu1adMm3X333ZKkiRMn6qGHHtLbb7+tkJAQzZ07VwkJCfrkk0/k6empKlWqaPv27XrnnXfMLwzvv/++WrRooUGDBkmSRo8erRUrVmjSpEmaMmVKej89ACCnyKYpyq6Wa+nq4Hbv3j3dFQIA4Iwefvhh+fn5ma9TR3Nvx5o1a1SkSBEVKFBADzzwgMaMGaOCBQtKkqKjoxUQEGB+CZCkpk2bys3NTb/++qvatm2r6OhoNWjQQJ6enmaZiIgIvfnmm7pw4YIKFCig6Oho9e/f3+G8ERERaaaWAQByp9jYWIfXuSXXMnwP7r9du3ZNsbGxDhsAAHeEYeGmlEUU/f39zW3EiBG3lWstWrTQnDlztGrVKr355ptau3atHnzwQSUnJ0uSTp06pSJFiji8x8PDQ4GBgTp16pRZJigoyKFM6utblUk9DgBwMlmca+PGjbutZjlbrmV4FeXLly9r8ODBWrBggc6dO5fmeOoHBQAgS1k8latAgQK6ePGiufvNN9/U+PHjM5xrnTp1Mv+/WrVqql69usqWLas1a9aoSZMmFjQYAOCSLM61Y8eOpZmZdDucLdcyPIL70ksv6aefftLkyZPl5eWl6dOna+TIkQoJCdGcOXOyoo0AAGS5999/X97e3poxY4ZGjRqlYsWKWZJrZcqUUaFChXTgwAFJUnBwsM6cOeNQJikpSefPnzfvbwoODtbp06cdyqS+vlWZG90jBQDIXfz8/By22+3g/ldOz7UMd3CXLFmijz76SO3bt5eHh4fq16+v1157Ta+//rrmzp2b0eoAALg9Fj9OoXXr1lmSa8ePH9e5c+dUtGhRSVKdOnV08eJFbdmyxSzz008/yW63q3bt2maZdevWOTx3fsWKFapYsaIKFChgllm1apXDuVasWKE6depkus0AgGyQTY8JyqicnmsZ7uCeP39eZcqUkZRyVeD8+fOSpHr16mndunUZrQ4AgNtiM6zbUqUn1+Li4rR9+3Zt375dknTo0CFt375dR48eVVxcnAYNGqRffvlFhw8f1qpVq9S6dWuVK1dOERERkqRKlSqpRYsW6tWrlzZu3KgNGzaoT58+6tSpk0JCQiRJXbp0kaenp3r27Kndu3dr/vz5ev/99x0W33jhhRe0fPlyTZgwQb///rtGjBihzZs3q0+fPln0EwcAZKWsyLX0cLVcy3AHt0yZMjp06JAkKSwsTAsWLJCUMrIbEBCQ0eoAAMgx0pNrmzdv1l133aW77rpLktS/f3/dddddGjZsmNzd3bVz50498sgjqlChgnr27KlatWopKirKYWrY3LlzFRYWpiZNmuihhx5SvXr1HJ4F6O/vrx9//FGHDh1SrVq1NGDAAA0bNszhmYJ169bVvHnzNG3aNIWHh+urr77S4sWLVbVq1Sz66QAAXJGr5ZrNMIwM9fHfffddubu76/nnn9fKlSvVqlUrGYahxMREvfPOO3rhhRcy1AA4t9jYWPn7++vCH2Xk55upRbnhgiJCamR3E5DDJBmJWqNvFBMT47DwRUak/t4p+eYYufl4Z7pN9qvXdHTwa4qJidHGjRvJtVyOXMPNkGv4r5yea7fbJmeW4VWU+/XrZ/5/06ZN9fvvv2vLli0qV66cqlevbmnjAAC4k8g1AACcW4Y7uP9VqlQplSpVyoq2AACQ7cg1AACcV7o6uB988EG6K3z++edvuzEAAKSXTRlfSONG9UjSlClT5O2ddmoYuQYAuBOszrXcKl0d3HfffTddldlsNr4I5FJtK1SThy1PdjcDOYxbeKXsbgJyGLfkeGlXdrfi+j788EO5uTnec0mu5V7kGq6HXMN/5eRcy63S1cFNXTUZAIAcw6pn/f2vjl27duXKxTgAADmExbmWW2X6HlwAALKF8b/NinoAAMhu5JolWP8eAAAAAOASGMEFADgnrnQDAFwJuWYJOrgAAKdkMyxabTKXfxEAAOQM5Jo1mKIMAAAAAHAJt9XBjYqKUteuXVWnTh399ddfkqRPP/1U69evt7RxAADckGHhJqlXr17kGgAg+1ica7lVhju4CxcuVEREhHx8fLRt2zbFx8dLkmJiYvT6669b3kAAAK7L4i8C3t7e5BoAIPvQwbVEhju4Y8aM0ZQpU/Txxx8rT55/HoB+//33a+vWrZY2DgCAO2XixInkGgAATi7Di0zt27dPDRo0SLPf399fFy9etKJNAADcUlYvxkGuAQDuJBaZskaGR3CDg4N14MCBNPvXr1+vMmXKWNIoAABuybBZt10HuQYAuKOyONdyiwx3cHv16qUXXnhBv/76q2w2m06cOKG5c+dq4MCBevbZZ7OijQAAZLnNmzeTawAAOLkMT1EeMmSI7Ha7mjRpoitXrqhBgwby8vLSwIED1bdv36xoIwAAaVm1kMb/6mjVqpWuXr1KrgEAsofFuZZbZbiDa7PZ9Oqrr2rQoEE6cOCA4uLiVLlyZeXPnz8r2gcAwB1x5MgRnTlzhlwDAMCJZbiDm8rT01OVK1e2si0AAKSb1YtxkGsAgOzEIlPWyHAHt3HjxrLZbnzj8k8//ZSpBgEAkC4WT+V6+OGH5eGRNhbJNQDAHcEUZUtkuINbo0YNh9eJiYnavn27fvvtN3Xv3t2qdgEAcEdVrVpVXl5e5BoAAE4swx3cd99997r7R4wYobi4uEw3CACAdLFoKlfqle433nhDfn5+5m5yDQBwR1mca7lVhh8TdCNdu3bVJ598YlV1AADcnGHhdh3kGgDgjsriXMstLOvgRkdHy9vb26rqAADIVuQaAADOJ8NTlNu1a+fw2jAMnTx5Ups3b9bQoUMtaxgAADdl8WIckZGRypMnD7kGAMgeLDJliQx3cP39/R1eu7m5qWLFiho1apSaN29uWcMAALgZqx+n4OfnJ09PT3INAJAteEyQNTLUwU1OTtYTTzyhatWqqUCBAlnVJgAA7rjJkyc7LDIFAACcT4buwXV3d1fz5s118eLFLGoOAAAAAAC3J8OLTFWtWlV//vlnVrQFAAAAAIDbluEO7pgxYzRw4EAtXbpUJ0+eVGxsrMMGAMAdYfHjFJYvX06uAQCyD48JskS678EdNWqUBgwYoIceekiS9Mgjj8hms5nHDcOQzWZTcnKy9a0EAOA/rF6Mo2PHjnJz++e6L7kGALiTWGTKGunu4I4cOVLPPPOMVq9enZXtAQAgWyxbtkz58uXL7mYAAIBMSHcH1zBSLgU0bNgwyxoDAECGWHiVul69eqyiDADIXrl89NUKGXpM0L+nJAMAkK2sus+ILxMAgJyAXLNEhjq4FSpUuGUn9/z585lqEAAA2aFUqVLXzThyDQAA55GhDu7IkSPl7++fVW0BACDdrF6MY9y4cfLx8cl8hQAA3AYWmbJGhjq4nTp1UpEiRbKqLQAApJ/FU7m6dOnCPbgAgOzDFGVLpPs5uNx/CwAAAADIyTK8ijIAADkBU7kAAK6EXLNGuju4drs9K9sBAEDGMJULAOBKyDVLpHuKMgAAAAAAOVmGFpkCACDH4Eo3AMCVkGuWYAQXAAAAAOASGMEFADglFuMAALgScs0adHABAM6JqVwAAFdCrlmCKcoAAAAAAJfACC4AwDlxpRsA4ErINUvQwQUAOCXuVQIAuBJyzRpMUQYAAAAAuARGcAEAzompXAAAV0KuWYIOLgDAKTGVCwDgSsg1azBFGQAAAADgEhjBBQA4J6ZyAQBcCblmCUZwAQAAAAAugRFcAIBz4ko3AMCVkGuWoIMLAHBKtv9tVtQDAEB2I9eswRRlAAAAAIBLYAQXAOCcmMoFAHAl5Jol6OACAJwSzwsEALgScs0aTFEGACAD1q1bp1atWikkJEQ2m02LFy92OG4YhoYNG6aiRYvKx8dHTZs21f79+x3KnD9/XpGRkfLz81NAQIB69uypuLg4hzI7d+5U/fr15e3trRIlSmj8+PFp2vLll18qLCxM3t7eqlatmr777jvLPy8AwLW5Wq7RwQUAOCfDwi0DLl++rPDwcH344YfXPT5+/Hh98MEHmjJlin799Vfly5dPERERunbtmlkmMjJSu3fv1ooVK7R06VKtW7dOTz/9tHk8NjZWzZs3V6lSpbRlyxa99dZbGjFihKZNm2aW+fnnn9W5c2f17NlT27ZtU5s2bdSmTRv99ttvGftAAICcgVyzJNdshmHk8kFsZEZsbKz8/f3VSK3lYcuT3c1BDuMWXim7m4AcJik5Xj/tGq+YmBj5+fndVh2pv3eq/N/rcvf0znSbkhOuaffUV26rTTabTYsWLVKbNm0kpVzlDgkJ0YABAzRw4EBJUkxMjIKCgjRr1ix16tRJe/fuVeXKlbVp0ybdfffdkqTly5froYce0vHjxxUSEqLJkyfr1Vdf1alTp+Tp6SlJGjJkiBYvXqzff/9dktSxY0ddvnxZS5cuNdtz3333qUaNGpoyZUpmfyy5FrmGmyHX8F/kWs7LNUZwAQBQyheMf2/x8fEZruPQoUM6deqUmjZtau7z9/dX7dq1FR0dLUmKjo5WQECA+SVAkpo2bSo3Nzf9+uuvZpkGDRqYXwIkKSIiQvv27dOFCxfMMv8+T2qZ1PMAAHK33JprdHABAE4pdTEOKzZJKlGihPz9/c1t3LhxGW7TqVOnJElBQUEO+4OCgsxjp06dUpEiRRyOe3h4KDAw0KHM9er49zluVCb1OADAuZBr1uQaqygDAJyTxY9TOHbsmMNULi8vLwsqBwAgncg1S9DBBSzQqsffevTZMwosnKQ/9/joo9eKad/2vNndLFikZcv9atnygIKCLkuSjhzx17x5VbR5c4gkqWjRS3rqqe2qUuVv5cmTrM2bi2ry5Fq6ePGf+2hmzfpWQUFXHOr95JPq+vLLypKkatVOq23bP1Sx4jnlzZuov/7y1cKFYVq9uvSd+ZCQn5/fbd8/lSo4OFiSdPr0aRUtWtTcf/r0adWoUcMsc+bMGYf3JSUl6fz58+b7g4ODdfr0aYcyqa9vVSb1OJAZ5JprI9dyh9yaazl+ivKaNWtks9l08eLF7G4KcF0NH7mgp4ef0Nx3gtU7ooL+3OOtsfP+lH/BxOxuGizy9995NXNmuPr2jdDzzzfXjh1BGjZsvUqWjJGXV5LGjl0jw7BpyJDGGjCgqTw87BoxYp1s/3kQ3Zw5VdWlS2tz+/bbCuaxypX/1qFD/hozpp6ee66FVqwI1YABv+ree/+60x/XaVg9lcsKoaGhCg4O1qpVq8x9sbGx+vXXX1WnTh1Jkru7uy5evKg1a9aYZX766SfZ7XbVrl1bklSnTh2tW7dOiYn//B5ZsWKFKlasqAIFCphl/n2e1DKp5wFuF7nm+si1nMlZc61OnTq6ePGitmzZYpbJzlzLER3c6Ohoubu7q2XLltndlNvi7O1H5rR7+m8tnxeoH+cH6uh+b30wuLjir9oU0fl8djcNFvn112LatClEJ0746q+//DR7dnVdu+ahsLC/VaXKWRUpckXvvFNbhw8H6PDhAE2YUFvly59XeLjjVcirV/PowgUfc4uP/2cSzfz5VfTpp9W1d28hnTzpq2++qagtW4J1//3H7/THxS3ExcVp+/bt2r59u6SUBTi2b9+uo0ePymaz6cUXX9TIkSPl5uam+vXrq1u3bgoJCTFXpCxVqpQk6YUXXtDGjRu1YcMG9enTR506dVJISMroSZcuXeTp6amePXtq9+7dmj9/vt5//33179/fbMcLL7yg5cuXa8KECfr99981YsQIbd68WX369Mn0ZyTXcjdyzfWRa/i39OTamDFj9O2332rXrl1pcq1SpUpq0aKFevXqlSNyLUd0cGfMmKG+fftq3bp1OnHiRHY3J8PuZPsNw1BSUlKWngPp55HHrvLVr2hrlK+5zzBs2hblq8q1rtzknXBWbm52NWx4RN7eSfr990LKk8cuSUpM/OfXaWKiuwzDpipVzjq897HH9mr+/K81adJytW+/V25u9pueK1++RF265HnTMrlaNj0vcPPmzbrrrrt01113SZL69++vu+66S8OGDZMkvfTSSypbtqx8fHy0fv16nTt3TsuXL5e3t+OjH8qXL68mTZrooYceUr169RyeBejv768ff/xRhw4dUq1atTRgwAANGzbM4ZmCdevW1bx58zRt2jSFh4frq6++0uLFi1W1atWMfaDrINdyL3It9yHXcpAcnGt9+/bV008/rXvuuUdxcXFpcm3u3LkKCwvLEbmW7R3cuLg4zZ8/X88++6xatmypWbNmXbfchg0bVL16dXl7e+u+++5L88DfhQsXqkqVKvLy8lLp0qU1YcIE89grr7xiDo//W3h4uEaNGmW+nj59uipVqiRvb2+FhYXpo48+ylT7u3Tpoo4dOzqUT0xMVKFChTRnzhxJkt1u17hx4xQaGiofHx/zDzNV6hTt77//XrVq1ZKXl5fWr1+vgwcPqnXr1goKClL+/Pl1zz33aOXKlQ7nOnnypFq2bCkfHx+FhoZq3rx5Kl26tN577z2zzMWLF/XUU0+pcOHC8vPz0wMPPKAdO3bc8nMjhV9gstw9pItnHW9nv/C3hwoU5gubKyld+qK+/vorffvtl+rTZ7NGj66no0f99fvvBXXtmoeefHKHvLyS5OWVpKee2i53d0OBgf88AP2bbyrojTfqaPDgB/Tdd+XUseMe9ex5439r9esfVYUK5/Xjj2XuxMdzStk1latRo0YyDCPNlvr7//Llyzpw4IC2bt2qjh076sEHH1SFChXS1PPEE08oNDRU8fHx2rNnjw4fPuxwfP/+/Tp//rwMw5CHh4c8PP75PZOaa4899pj27dun+Ph4/fbbb3r55ZfJNXItU8i13INcy3lyaq7ZbDaNGjVKp06d0rVr17Ry5co0uRYYGKh58+bp0qVLiomJ0SeffKL8+fM7lKlevbqioqJ07do1HT9+XIMHD07Tlv/m2kMPPZSxD6Mc0MFdsGCBwsLCVLFiRXXt2lWffPKJDCPtn8qgQYM0YcIEbdq0SYULF1arVq3MOdxbtmxRhw4d1KlTJ+3atUsjRozQ0KFDzT+UyMhIbdy4UQcPHjTr2717t3bu3KkuXbpISrnqMGzYMI0dO1Z79+7V66+/rqFDh2r27Nm33f7IyEgtWbJEcXFxZvkffvhBV65cUdu2bSVJ48aN05w5czRlyhTt3r1b/fr1U9euXbV27VqH8wwZMkRvvPGG9u7dq+rVqysuLk4PPfSQVq1apW3btqlFixZq1aqVjh49ar6nW7duOnHihNasWaOFCxdq2rRpaW4Af+yxx3TmzBl9//332rJli2rWrKkmTZro/PnrT0OKj49P80wtIDc4ftxXvXtH6MUXm2nZsnIaMOBXlSwZo5gYb73+el3Vrv2Xvv76Ky1cuFD58iVo//4CMgyb+f5Fi8K0a1eQDh8O0HffldP06XfpkUf+UJ48yWnOVb36afXv/6vef/8eHT3qfyc/JixArpFrgDMg1+Cqsn0V5RkzZqhr166SpBYtWigmJkZr165Vo0aNHMoNHz5czZo1kyTNnj1bxYsX16JFi9ShQwe98847atKkiYYOHSpJqlChgvbs2aO33npLPXr0UJUqVRQeHq558+aZZebOnavatWurXLlyZv0TJkxQu3btJKXcUL1nzx5NnTpV3bt3v632R0REKF++fFq0aJEef/xxSdK8efP0yCOPyNfXV/Hx8Xr99de1cuVK8+bpMmXKaP369Zo6daoaNmxonmfUqFHm55dSrpKEh4ebr0ePHq1Fixbp22+/VZ8+ffT7779r5cqV2rRpk/nQ5enTp6t8+fLme9avX6+NGzfqzJkz5rLhb7/9thYvXqyvvvrKYcpAqnHjxmnkyJE3/HnkNrHn3ZWcJAX856p2gUJJunA22/95wUJJSe46eTJlyt6BA4GqUOG8Wrf+QxMn3qOtW4vqySdbyc8vXsnJNl2+7Km5cxfr5Ml8N6zv998LysPDUJEil/XXX/+scFit2hmNGBGladPu0qpVoVn+uZzabUzDumE9FiLXyDVnRq7lHuRaDpRDc83ZZOsI7r59+7Rx40Z17txZUsoDgTt27KgZM2akKfvv1bMCAwNVsWJF7d27V5K0d+9e3X///Q7l77//fu3fv1/JySlXkSIjIzVv3jxJKff7fP7554qMjJSUMp3s4MGD6tmzp/Lnz29uY8aMcbg6ntH2e3h4qEOHDpo7d655nm+++cY874EDB3TlyhU1a9bM4bxz5sxJc97UME8VFxengQMHqlKlSgoICFD+/Pm1d+9e80r3vn375OHhoZo1a5rvKVeunLlKmSTt2LFDcXFxKliwoMP5Dx06dMPP/fLLLysmJsbcjh07dsOfT26QlOim/Tvz6q56l8x9NpuhGvXitGcLj1NwZTabkeYqdWysly5f9lR4+GkFBFzTL78Uu+H7y5a9oORkm2Ji/rl/pVq10xo5cp0++SRc339fLsva7jKy6V6lmyHXyDVnR67lXuRaDpADc80ZZeuluBkzZigpKclcXUtKCWkvLy9NmjRJ/v7WTWHo3LmzBg8erK1bt+rq1as6duyYeR9R6lSrjz/+OM29uu7u7plqf2RkpBo2bKgzZ85oxYoV8vHxUYsWLRzOu2zZMhUr5vgL478PYs6Xz/GK2cCBA7VixQq9/fbbKleunHx8fPToo48qISEh3T+TuLg4FS1a1OFRFakCAgKu+x4vL69c85Do9Pp6WiENfO+Y/tiRV/u25VXbXmflndeuH78IzO6mwSI9euzQ5s1FdeZMXuXNm6RGjY6oevUzeu21RpKkZs3+1LFjfoqJ8VJY2Dk988xWLVpU0byCHRb2t8LCzmnHjiK6ejWPKlX6W08/vU2rV5dSXFzKYhvVq6d8CVi8uII2bCiuAgWuSkpZ5CMujn9zzoJcI9dcAbnm+sg1uLJs6+AmJSVpzpw5mjBhgpo3b+5wrE2bNvr888/1zDPPmPt++eUXlSxZUpJ04cIF/fHHH6pUqZKklKWpN2zY4FDHhg0bVKFCBTPIixcvroYNG2ru3Lm6evWqmjVrpiJFikiSgoKCFBISoj///NO8Cm1V++vWrasSJUpo/vz5+v777/XYY48pT548kqTKlSvLy8tLR48edZi2lR4bNmxQjx49zHue4uLiHBYoqVixopKSkrRt2zbVqlVLUsqV9QsXLphlatasqVOnTsnDw0OlS5fO0Pnxj7XfFpB/wWR1G3RKBQon6c/dPno1MlQX/86T3U2DRQICrmngwF8UGHhNly/n0aFDAXrttUbati3lwePFi19Sjx475euboNOn8+mLLypr0aKK5vsTE93UsOFRRUb+pjx57Dp9Op8WLaroUKZp00Py9k5Wp0571anTXnP/zp2FNXhwkzv3YZ2IVc/6s+p5geQaueYqyDXXR67lTDkt15xVtnVwly5dqgsXLqhnz55prmi3b99eM2bMcPgiMGrUKBUsWFBBQUF69dVXVahQIfPZSwMGDNA999yj0aNHq2PHjoqOjtakSZPSrBYZGRmp4cOHKyEhQe+++67DsZEjR+r555+Xv7+/WrRoofj4eG3evFkXLlxweD7T7bS/S5cumjJliv744w+tXr3aLOfr66uBAweqX79+stvtqlevnmJiYrRhwwb5+fnd9B6p8uXL6+uvv1arVq1ks9k0dOhQ2e3/LM0eFhampk2b6umnn9bkyZOVJ08eDRgwQD4+PrLZUhYIaNq0qerUqaM2bdpo/PjxqlChgk6cOKFly5apbdu2aaaP4ca+nVlI384slN3NQBZ57720q7D/28yZ4Zo5M/yGxw8eDFS/fs1ueFyS3nnnPr3zzn231b5cK4fdq0SukWuuhFxzbeRaDpXDcs1ZZds9uDNmzFDTpk2vO12rffv22rx5s3bu3Gnue+ONN/TCCy+oVq1aOnXqlJYsWSJPz5QpEDVr1tSCBQv0xRdfqGrVqho2bJhGjRqlHj16ONT76KOP6ty5c7py5Yr5JSLVU089penTp2vmzJmqVq2aGjZsqFmzZik09Po3w2ek/ZGRkdqzZ4+KFSuW5p6q0aNHa+jQoRo3bpz5kORly5bd8Lyp3nnnHRUoUEB169ZVq1atFBER4XBfkiTNmTNHQUFBatCggdq2batevXrJ19fXfGaVzWbTd999pwYNGuiJJ55QhQoV1KlTJx05ckRBQUE3PT8AwBG5loJcAwBkJ5txvWcXwCUdP35cJUqU0MqVK9WkiTVTQ2JjY+Xv769Gai0PG1OX4MgtvFJ2NwE5TFJyvH7aNV4xMTHy8/O79RuuI/X3To3Hx8rd0/vWb7iF5IRr2v7pq5lqE7IHuYY7jVzDf5FrOQ/rvbuwn376SXFxcapWrZpOnjypl156SaVLl1aDBg2yu2kAAGQYuQYAuBU6uC4sMTFRr7zyiv7880/5+vqqbt26mjt3rrkYCAA4Ne5VynXINQAujVyzBB1cFxYREaGIiIjsbgYAZAlWm8x9yDUAroxcs0a2LTIFAAAAAICVGMEFADgnpnIBAFwJuWYJOrgAAKfEVC4AgCsh16zBFGUAAAAAgEtgBBcA4JyYygUAcCXkmiXo4AIAnBJTuQAAroRcswZTlAEAAAAALoERXACAc2IqFwDAlZBrlqCDCwBwWrl9GhYAwLWQa5nHFGUAAAAAgEtgBBcA4JwMI2Wzoh4AALIbuWYJRnABAAAAAC6BEVwAgFPicQoAAFdCrlmDDi4AwDmx2iQAwJWQa5ZgijIAAAAAwCUwggsAcEo2e8pmRT0AAGQ3cs0adHABAM6JqVwAAFdCrlmCKcoAAAAAAJfACC4AwCmx2iQAwJWQa9aggwsAcE6GYc3D7K2oAwCAzCLXLMEUZQAAAACAS2AEFwDglJjKBQBwJeSaNRjBBQAAAAC4BEZwAQDOiccpAABcCblmCTq4AACnxFQuAIArIdeswRRlAAAAAIBLYAQXAOCceJwCAMCVkGuWoIMLAHBKTOUCALgScs0aTFEGAAAAALgERnABAM6J1SYBAK6EXLMEHVwAgFNiKhcAwJWQa9ZgijIAAAAAwCUwggsAcE52I2Wzoh4AALIbuWYJOrgAAOfEvUoAAFdCrlmCKcoAAAAAAJfACC4AwCnZZNFiHJmvAgCATCPXrMEILgAAAADAJTCCCwBwToaRsllRDwAA2Y1cswQdXACAU+J5gQAAV0KuWYMpygAAAAAAl8AILgDAOfE4BQCAKyHXLEEHFwDglGyGIZsF9xlZUQcAAJlFrlmDKcoAAAAAAJfACC4AwDnZ/7dZUQ8AANmNXLMEHVwAgFNiKhcAwJWQa9ZgijIAAAAAwCUwggsAcE6sNgkAcCXkmiUYwQUAAAAAuAQ6uAAA52QY1m0ZMGLECNlsNoctLCzMPH7t2jX17t1bBQsWVP78+dW+fXudPn3aoY6jR4+qZcuWyps3r4oUKaJBgwYpKSnJocyaNWtUs2ZNeXl5qVy5cpo1a9Zt/6gAAE6AXLMEHVwAgFOyGdZtGVWlShWdPHnS3NavX28e69evn5YsWaIvv/xSa9eu1YkTJ9SuXTvzeHJyslq2bKmEhAT9/PPPmj17tmbNmqVhw4aZZQ4dOqSWLVuqcePG2r59u1588UU99dRT+uGHHzL1MwMA5FzkmjW4BxcAgAzy8PBQcHBwmv0xMTGaMWOG5s2bpwceeECSNHPmTFWqVEm//PKL7rvvPv3444/as2ePVq5cqaCgINWoUUOjR4/W4MGDNWLECHl6emrKlCkKDQ3VhAkTJEmVKlXS+vXr9e677yoiIuKOflYAgOtzpVxjBBcA4JwsnsoVGxvrsMXHx9/w1Pv371dISIjKlCmjyMhIHT16VJK0ZcsWJSYmqmnTpmbZsLAwlSxZUtHR0ZKk6OhoVatWTUFBQWaZiIgIxcbGavfu3WaZf9eRWia1DgCACyLXLEEHFwDglGx26zZJKlGihPz9/c1t3Lhx1z1v7dq1NWvWLC1fvlyTJ0/WoUOHVL9+fV26dEmnTp2Sp6enAgICHN4TFBSkU6dOSZJOnTrl8CUg9XjqsZuViY2N1dWrVzP7owMA5EDkmjWYogwAgKRjx47Jz8/PfO3l5XXdcg8++KD5/9WrV1ft2rVVqlQpLViwQD4+PlneTgAA0iO35hojuAAA52TxVC4/Pz+H7UZfBP4rICBAFSpU0IEDBxQcHKyEhARdvHjRoczp06fNe5uCg4PTrD6Z+vpWZfz8/JzyywYAIB3INUvQwQUAOCfDwi0T4uLidPDgQRUtWlS1atVSnjx5tGrVKvP4vn37dPToUdWpU0eSVKdOHe3atUtnzpwxy6xYsUJ+fn6qXLmyWebfdaSWSa0DAOCCyDVLMEUZmWL87wpRkhIz/Y8Jrsct+caLGSB3Svrf34nU3x3OaODAgWrVqpVKlSqlEydOaPjw4XJ3d1fnzp3l7++vnj17qn///goMDJSfn5/69u2rOnXq6L777pMkNW/eXJUrV9bjjz+u8ePH69SpU3rttdfUu3dv8+r6M888o0mTJumll17Sk08+qZ9++kkLFizQsmXLsvOj5wrkGm6GXMN/kWs5L9fo4CJTLl26JElar++yuSXIkXZldwOQU126dEn+/v6ZqsNmGLJZ8IUio3UcP35cnTt31rlz51S4cGHVq1dPv/zyiwoXLixJevfdd+Xm5qb27dsrPj5eERER+uijj8z3u7u7a+nSpXr22WdVp04d5cuXT927d9eoUaPMMqGhoVq2bJn69eun999/X8WLF9f06dN5RNAdQK7hpsg13AC5lnNyzWY48+UGZDu73a4TJ07I19dXNpstu5uTrWJjY1WiRIk0N/QD/N34h2EYunTpkkJCQuTmdnt3ycTGxsrf31+Na70sDw/vTLcpKemaVm8Zp5iYmFz/5wNy7d/43YUb4e/GP8i1nIcRXGSKm5ubihcvnt3NyFFSb+QH/ou/Gykye4UbyErkWlr87sKN8HcjBbmWs9DBBQA4J0OS3aJ6AADIbuSaJVhFGQAAAADgEhjBBSzi5eWl4cOHp/sZY8g9+LuRNbJrMQ4gt+B3F26EvxtZg1yzBotMAQCcSupiHA/UGCIP98x/uUpKjtdP29/ItYtxAACyF7lmLaYoAwAAAABcAlOUAQDOyTBSNivqAQAgu5FrlqCDCwBwTnZJVjym1IoVKwEAyCxyzRJMUQYAAAAAuAQ6uHAZPXr0UJs2bbK7GbiD1qxZI5vNposXL2Z3U5ANUlebtGIDciJyLfch13I3cs0adHCR7Xr06CGbzWZuBQsWVIsWLbRz587sblq6XL16VYGBgSpUqJDi4+OzuzkuJzo6Wu7u7mrZsmV2N+W2OHv7c7TUe5Ws2AALkWu4GWfPBWdvf45GrlmCDi5yhBYtWujkyZM6efKkVq1aJQ8PDz388MPZ3ax0WbhwoapUqaKwsDAtXrw4y8+XmJiY5efISWbMmKG+fftq3bp1OnHiRHY3J8PuZPsNw1BSUlKWngNA+pBr6UeuORdyDTkdHVzkCF5eXgoODlZwcLBq1KihIUOG6NixYzp79qxZ5tixY+rQoYMCAgIUGBio1q1b6/DhwzesMz4+Xs8//7yKFCkib29v1atXT5s2bTKP33333Xr77bfN123atFGePHkUFxcnSTp+/LhsNpsOHDhw07bPmDFDXbt2VdeuXTVjxgxz/7Rp0xQSEiK73fFO/9atW+vJJ580X3/zzTeqWbOmvL29VaZMGY0cOdLhl7nNZtPkyZP1yCOPKF++fBo7dqySk5PVs2dPhYaGysfHRxUrVtT777/vcJ6kpCQ9//zzCggIUMGCBTV48GB1797dYbqb3W7XuHHjzHrCw8P11Vdf3fTz3klxcXGaP3++nn32WbVs2VKzZs26brkNGzaoevXq8vb21n333afffvvN4XjqlzUvLy+VLl1aEyZMMI+98sorql27dpo6w8PDNWrUKPP19OnTValSJXl7eyssLEwfffRRptrfpUsXdezY0aF8YmKiChUqpDlz5ki69Z9P6lS277//XrVq1ZKXl5fWr1+vgwcPqnXr1goKClL+/Pl1zz33aOXKlQ7nOnnypFq2bCkfHx+FhoZq3rx5Kl26tN577z2zzMWLF/XUU0+pcOHC8vPz0wMPPKAdO3bc8nPfMVzpRg5GrpFr10OukWs3Ra5Zgg4ucpy4uDh99tlnKleunAoWLCgp5RdkRESEfH19FRUVpQ0bNih//vxq0aKFEhISrlvPSy+9pIULF2r27NnaunWrypUrp4iICJ0/f16S1LBhQ61Zs0ZSyhXCqKgoBQQEaP369ZKktWvXqlixYipXrtwN23rw4EFFR0erQ4cO6tChg6KionTkyBFJ0mOPPaZz585p9erVZvnz589r+fLlioyMlCRFRUWpW7dueuGFF7Rnzx5NnTpVs2bN0tixYx3OM2LECLVt21a7du3Sk08+KbvdruLFi+vLL7/Unj17NGzYML3yyitasGCB+Z4333xTc+fO1cyZM7VhwwbFxsamuRI/btw4zZkzR1OmTNHu3bvVr18/de3aVWvXrr3VH9MdsWDBAoWFhalixYrq2rWrPvnkExnX+aU9aNAgTZgwQZs2bVLhwoXVqlUrc0Rgy5Yt6tChgzp16qRdu3ZpxIgRGjp0qBnKkZGR2rhxow4ePGjWt3v3bu3cuVNdunSRJM2dO1fDhg3T2LFjtXfvXr3++usaOnSoZs+efdvtj4yM1JIlS8wvnpL0ww8/6MqVK2rbtq2k9P/5DBkyRG+88Yb27t2r6tWrKy4uTg899JBWrVqlbdu2qUWLFmrVqpWOHj1qvqdbt246ceKE1qxZo4ULF2ratGk6c+aMQ72PPfaYzpw5o++//15btmxRzZo11aRJE/PfEID0IdfItVTkGrmGO8AAsln37t0Nd3d3I1++fEa+fPkMSUbRokWNLVu2mGU+/fRTo2LFiobdbjf3xcfHGz4+PsYPP/xg1tO6dWvDMAwjLi7OyJMnjzF37lyzfEJCghESEmKMHz/eMAzD+Pbbbw1/f38jKSnJ2L59uxEcHGy88MILxuDBgw3DMIynnnrK6NKly03b/sorrxht2rQxX7du3doYPny4w+snn3zSfD116lQjJCTESE5ONgzDMJo0aWK8/vrrDnV++umnRtGiRc3XkowXX3zxpu0wDMPo3bu30b59e/N1UFCQ8dZbb5mvk5KSjJIlS5o/o2vXrhl58+Y1fv75Z4d6evbsaXTu3PmW57sT6tata7z33nuGYRhGYmKiUahQIWP16tXm8dWrVxuSjC+++MLcd+7cOcPHx8eYP3++YRiG0aVLF6NZs2YO9Q4aNMioXLmy+To8PNwYNWqU+frll182ateubb4uW7asMW/ePIc6Ro8ebdSpU+e225/6es6cOWb5zp07Gx07djQMI31/Pqmff/HixTdth2EYRpUqVYyJEycahmEYe/fuNSQZmzZtMo/v37/fkGS8++67hmEYRlRUlOHn52dcu3bNoZ6yZcsaU6dOveX5slJMTIwhyWhSaYARUfWVTG9NKg0wJBkxMTHZ+rngOsg1cu1GyDVy7XrINWsxgoscoXHjxtq+fbu2b9+ujRs3KiIiQg8++KB51XjHjh06cOCAfH19lT9/fuXPn1+BgYG6du2awxXKVAcPHlRiYqLuv/9+c1+ePHl07733au/evZKk+vXr69KlS9q2bZvWrl2rhg0bqlGjRubV77Vr16pRo0Y3bHNycrJmz56trl27mvu6du2qWbNmmdO3IiMjtXDhQnORjrlz56pTp05yc3MzP9eoUaPMz5Q/f3716tVLJ0+e1JUrV8x677777jTn//DDD1WrVi0VLlxY+fPn17Rp08wrmTExMTp9+rTuvfdes7y7u7tq1aplvj5w4ICuXLmiZs2aOZx/zpw51/2Z3mn79u3Txo0b1blzZ0mSh4eHOnbs6DBdLlWdOnXM/w8MDFTFihXNP+e9e/c6/D2QpPvvv1/79+9XcnKypJQ/p3nz5klKGfX4/PPPzdGIy5cv6+DBg+rZs6fDz2nMmDE3/Tndqv0eHh7q0KGD5s6da57nm2++Mc+bkT+f//79iIuL08CBA1WpUiUFBAQof/782rt3r/n3Y9++ffLw8FDNmjXN95QrV04FChQwX+/YsUNxcXEqWLCgw/kPHTqUI/5+SEp5zp9VG2Axco1c+y9yjVy7JXLNEh7Z3QBAkvLly+cwZWr69Ony9/fXxx9/rDFjxiguLk61atUyf2n+W+HChW/rnAEBAQoPD9eaNWsUHR2tZs2aqUGDBurYsaP++OMP7d+/Xw0bNrzh+3/44Qf99ddfae43SU5O1qpVq9SsWTO1atVKhmFo2bJluueeexQVFaV3333XLBsXF6eRI0eqXbt2aer39vY2/z9fvnwOx7744gsNHDhQEyZMUJ06deTr66u33npLv/76a7o/f+oUomXLlqlYsWIOx7y8vNJdT1aZMWOGkpKSFBISYu4zDENeXl6aNGmS/P39LTtX586dNXjwYG3dulVXr17VsWPHzD/X1J/Txx9/nOaeJnd390y1PzIyUg0bNtSZM2e0YsUK+fj4qEWLFg7nTc+fz3//fgwcOFArVqzQ22+/rXLlysnHx0ePPvroDac9Xk9cXJyKFi1qfjH+t4CAgHTXA+RW5Bq59l/kGrmGO4MOLnIkm80mNzc3Xb16VZJUs2ZNzZ8/X0WKFJGfn98t31+2bFl5enpqw4YNKlWqlKSU+502bdqkF1980SzXsGFDrV69Whs3btTYsWMVGBioSpUqaezYsSpatKgqVKhww3PMmDFDnTp10quvvuqwf+zYsZoxY4aaNWsmb29vtWvXTnPnztWBAwdUsWJFh6uLNWvW1L59+256P9T1bNiwQXXr1tVzzz1n7vv31Ud/f38FBQVp06ZNatCggaSULyhbt25VjRo1JEmVK1eWl5eXjh49etMvPNkhKSlJc+bM0YQJE9S8eXOHY23atNHnn3+uZ555xtz3yy+/qGTJkpKkCxcu6I8//lClSpUkSZUqVdKGDRsc6tiwYYMqVKhgBnnx4sXVsGFDzZ07V1evXlWzZs1UpEgRSVJQUJBCQkL0559/mlehrWp/3bp1VaJECc2fP1/ff/+9HnvsMeXJk0dS5v58NmzYoB49epj3PMXFxTksXFOxYkUlJSVp27Zt5ujHgQMHdOHCBbNMzZo1derUKXl4eKh06dIZOv+dYtWz/nL78wJxZ5BrN0eukWs3Q65lvJ7cjA4ucoT4+HidOnVKUsov8kmTJikuLk6tWrWSlDLV5q233lLr1q01atQoFS9eXEeOHNHXX3+tl156ScWLF3eoL1++fHr22Wc1aNAgBQYGqmTJkho/fryuXLminj17muUaNWqkiRMnqnDhwgoLCzP3TZo0SY899tgN23v27FktWbJE3377rapWrepwrFu3bmrbtq3Onz+vwMBARUZG6uGHH9bu3bsdpn1J0rBhw/Twww+rZMmSevTRR+Xm5qYdO3bot99+05gxY254/vLly2vOnDn64YcfFBoaqk8//VSbNm1SaGioWaZv374aN26cypUrp7CwME2cOFEXLlyQzWaTJPn6+mrgwIHq16+f7Ha76tWrp5iYGG3YsEF+fn7q3r37Dc+f1ZYuXaoLFy6oZ8+eaa5ot2/fXjNmzHD4IjBq1CgVLFhQQUFBevXVV1WoUCFzVc0BAwbonnvu0ejRo9WxY0dFR0dr0qRJaVaLjIyM1PDhw5WQkOAwGiFJI0eO1PPPPy9/f3+1aNFC8fHx2rx5sy5cuKD+/ftnqv1dunTRlClT9Mcffzgs3JKZP5/y5cvr66+/VqtWrWSz2TR06FCHVU/DwsLUtGlTPf3005o8ebLy5MmjAQMGyMfHx/z70bRpU9WpU0dt2rTR+PHjVaFCBZ04cULLli1T27Ztrzu98I6zaqXIXP5FAFmDXCPX/o1cI9fShVyzRvbd/guk6N69uyHJ3Hx9fY177rnH+OqrrxzKnTx50ujWrZtRqFAhw8vLyyhTpozRq1cv8wb6fy/GYRiGcfXqVaNv375m+fvvv9/YuHGjQ53nzp0zbDabuQCCYRjGokWLDEnGlClTbtjmt99+2wgICDASEhLSHIuPjzcCAgKM999/3zAMw0hOTjaKFi1qSDIOHjyYpvzy5cuNunXrGj4+Poafn59x7733GtOmTTOPSzIWLVrk8J5r164ZPXr0MPz9/Y2AgADj2WefNYYMGWKEh4ebZRITE40+ffoYfn5+RoECBYzBgwcbjz32mNGpUyezjN1uN9577z2jYsWKRp48eYzChQsbERERxtq1a2/42e+Ehx9+2HjooYeue+zXX381JBk7duwwF6NYsmSJUaVKFcPT09O49957jR07dji856uvvjIqV65s5MmTxyhZsqTDIiWpLly4YHh5eRl58+Y1Ll26lOb43LlzjRo1ahienp5GgQIFjAYNGhhff/11ptpvGIaxZ88eQ5JRqlQph8VmDOPWfz6pn//ChQsO7zt06JDRuHFjw8fHxyhRooQxadIko2HDhsYLL7xgljlx4oTx4IMPGl5eXkapUqWMefPmGUWKFHH4ex8bG2v07dvXCAkJMfLkyWOUKFHCiIyMNI4ePXrdz3anpC7G0bR8P6NF2JBMb03L98vVi3HAeuQaufZf5FoKcu36yDVr2Qwjt3fxgdzBbrerUqVK6tChg0aPHp3dzUEOc/z4cZUoUUIrV65UkyZNsrs5NxUbGyt/f381LfuiPNwzf19dUnK8Vh58TzExMemaKgogZyDXcDPkWu7NNaYoAy7qyJEj+vHHH9WwYUPFx8dr0qRJOnTokPkMPORuP/30k+Li4lStWjWdPHlSL730kkqXLm3e2+YUmMoF5CrkGm6GXPtPPbkYHVzARbm5uWnWrFkaOHCgDMNQ1apVtXLlSnORCuRuiYmJeuWVV/Tnn3/K19dXdevW1dy5c83FQAAgpyHXcDPkGlIxRRkA4FTMqVxlnpeHmwVTuezxWvnnB7l2KhcAIHuRa9ZiBBcA4JyYygUAcCXkmiXcsrsBAAAAAABYgRFcAIBzsqc+hcWKegAAyGbkmiUYwQUAAAAAuAQ6uEAu1qNHD7Vp08Z83ahRI7344ot3vB1r1qyRzWbTxYsXb1jGZrNp8eLF6a5zxIgRqlGjRqbadfjwYdlsNm3fvj1T9SCLGHbrNgAugVy7OXIthyPXLEEHF8hhevToIZvNJpvNJk9PT5UrV06jRo1SUlJSlp/766+/1ujRo9NVNj3hDWSp1MU4rNgAZBlyDUgncs0S3IML5EAtWrTQzJkzFR8fr++++069e/dWnjx59PLLL6cpm5CQIE9PT0vOGxgYaEk9AAD8G7kG4E5hBBfIgby8vBQcHKxSpUrp2WefVdOmTfXtt99K+mf61dixYxUSEqKKFStKko4dO6YOHTooICBAgYGBat26tQ4fPmzWmZycrP79+ysgIEAFCxbUSy+9pP8+Bvu/U7ni4+M1ePBglShRQl5eXipXrpxmzJihw4cPq3HjxpKkAgUKyGazqUePHpIku92ucePGKTQ0VD4+PgoPD9dXX33lcJ7vvvtOFSpUkI+Pjxo3buzQzvQaPHiwKlSooLx586pMmTIaOnSoEhMT05SbOnWqSpQoobx586pDhw6KiYlxOD59+nRVqlRJ3t7eCgsL00cffZThtiCb2A3rNgBZily7NXIN5Jo1GMEFnICPj4/OnTtnvl61apX8/Py0YsUKSVJiYqIiIiJUp04dRUVFycPDQ2PGjFGLFi20c+dOeXp6asKECZo1a5Y++eQTVapUSRMmTNCiRYv0wAMP3PC83bp1U3R0tD744AOFh4fr0KFD+vvvv1WiRAktXLhQ7du31759++Tn5ycfHx9J0rhx4/TZZ59pypQpKl++vNatW6euXbuqcOHCatiwoY4dO6Z27dqpd+/eevrpp7V582YNGDAgwz8TX19fzZo1SyEhIdq1a5d69eolX19fvfTSS2aZAwcOaMGCBVqyZIliY2PVs2dPPffcc5o7d64kae7cuRo2bJgmTZqku+66S9u2bVOvXr2UL18+de/ePcNtwh3G8wIBp0WupUWugVyzBh1cIAczDEOrVq3SDz/8oL59+5r78+XLp+nTp5tTuD777DPZ7XZNnz5dNptNkjRz5kwFBARozZo1at68ud577z29/PLLateunSRpypQp+uGHH2547j/++EMLFizQihUr1LRpU0lSmTJlzOOp076KFCmigIAASSlXxl9//XWtXLlSderUMd+zfv16TZ06VQ0bNtTkyZNVtmxZTZgwQZJUsWJF7dq1S2+++WaGfjavvfaa+f+lS5fWwIED9cUXXzh8Ebh27ZrmzJmjYsWKSZImTpyoli1basKECQoODtbw4cM1YcIE82cSGhqqPXv2aOrUqXwRAIAsQK7dGLkGWIMOLpADLV26VPnz51diYqLsdru6dOmiESNGmMerVavmcH/Sjh07dODAAfn6+jrUc+3aNR08eFAxMTE6efKkateubR7z8PDQ3XffnWY6V6rt27fL3d1dDRs2THe7Dxw4oCtXrqhZs2YO+xMSEnTXXXdJkvbu3evQDknml4aMmD9/vj744AMdPHhQcXFxSkpKkp+fn0OZkiVLml8CUs9jt9u1b98++fr66uDBg+rZs6d69epllklKSpK/v3+G24NsYMiiK92ZrwLAzZFrt0augVyzBh1cIAdq3LixJk+eLE9PT4WEhMjDw/Gfar58+Rxex8XFqVatWuYUpX8rXLjwbbUhdWpWRsTFxUmSli1b5hDAUsr9V1aJjo5WZGSkRo4cqYiICPn7++uLL74wr55npK0ff/xxmi8m7u7ulrUVWYipXIDTINdujlyDJHLNInRwgRwoX758KleuXLrL16xZU/Pnz1eRIkXSXO1NVbRoUf36669q0KCBpJQrulu2bFHNmjWvW75atWqy2+1au3atOZXr31KvtCcnJ5v7KleuLC8vLx09evSGV8grVapkLiyS6pdffrn1h/yXn3/+WaVKldKrr75q7jty5EiackePHtWJEycUEhJinsfNzU0VK1ZUUFCQQkJC9OeffyoyMjJD5wcAZAy5dnPkGmAdVlEGXEBkZKQKFSqk1q1bKyoqSocOHdKaNWv0/PPP6/jx45KkF154QW+88YYWL16s33//Xc8999xNn/VXunRpde/eXU8++aQWL15s1rlgwQJJUqlSpWSz2bR06VKdPXtWcXFx8vX11cCBA9WvXz/Nnj1bBw8e1NatWzVx4kTNnj1bkvTMM89o//79GjRokPbt26d58+Zp1qxZGfq85cuX19GjR/XFF1/o4MGD+uCDD7Ro0aI05by9vdW9e3ft2LFDUVFRev7559WhQwcFBwdLkkaOHKlx48bpgw8+0B9//KFdu3Zp5syZeueddzLUHmQTu926DUCOQq6Ra7kSuWYJOriAC8ibN6/WrVunkiVLql27dqpUqZJ69uypa9eumVe+BwwYoMcff1zdu3dXnTp15Ovrq7Zt29603smTJ+vRRx/Vc889p7CwMPXq1UuXL1+WJBUrVkwjR47UkCFDFBQUpD59+kiSRo8eraFDh2rcuHGqVKmSWrRooWXLlik0NFRSyv1DCxcu1OLFixUeHq4pU6bo9ddfz9DnfeSRR9SvXz/16dNHNWrU0M8//6yhQ4emKVeuXDm1a9dODz30kJo3b67q1as7PC7hqaee0vTp0zVz5kxVq1ZNDRs21KxZs8y2AgCyB7lGrgG3y2bc6E58AAByoNjYWPn7+6tp4Z7ycPO89RtuIcmeoJVnZygmJuaGUyEBAMgq5Jq1uAcXAOCcWIwDAOBKyDVLMEUZAAAAAOASGMEFADgnuyFLHvZnz91XugEAOQS5Zgk6uAAAp2QYdhlG5leKtKIOAAAyi1yzBlOUAQAAAAAugRFcAIBzMgxrpmHl8sU4AAA5BLlmCTq4AADnZFh0r1Iu/yIAAMghyDVLMEUZAAAAAOASGMEFADgnu12yWbCQRi5fjAMAkEOQa5aggwsAcE5M5QIAuBJyzRJMUQYAAAAAuARGcAEATsmw22VYMJUrtz8vEACQM5Br1mAEFwAAAADgEhjBBQA4J+5VAgC4EnLNEnRwAQDOyW5INr4IAABcBLlmCaYoAwAAAABcAiO4AADnZBiSrHheYO6+0g0AyCHINUvQwQUAOCXDbsiwYCqXkcu/CAAAcgZyzRpMUQYAAAAAuAQ6uAAA52TYrdtuw4cffqjSpUvL29tbtWvX1saNGy3+gACAXIVcswQdXACAUzLshmVbRs2fP1/9+/fX8OHDtXXrVoWHhysiIkJnzpzJgk8KAMgNyDVr0MEFACCD3nnnHfXq1UtPPPGEKleurClTpihv3rz65JNPsrtpAABkmCvlGh1cAIBzyqapXAkJCdqyZYuaNm1q7nNzc1PTpk0VHR1t9acEAOQW5JolWEUZAOCUkpQoWbBQZJISJUmxsbEO+728vOTl5ZWm/N9//63k5GQFBQU57A8KCtLvv/+e+QYBAHIlcs0adHABAE7F09NTwcHBWn/qO8vqzJ8/v0qUKOGwb/jw4RoxYoRl5wAA4HrINWvRwQUAOBVvb28dOnRICQkJltVpGIZsNpvDvutd5ZakQoUKyd3dXadPn3bYf/r0aQUHB1vWJgBA7kCuWYsOLgDA6Xh7e8vb2ztbzu3p6alatWpp1apVatOmjSTJbrdr1apV6tOnT7a0CQDg3Mg169DBBQAgg/r376/u3bvr7rvv1r333qv33ntPly9f1hNPPJHdTQMAIMNcKdfo4AIAkEEdO3bU2bNnNWzYMJ06dUo1atTQ8uXL0yzQAQCAM3ClXLMZhmHBWl0AAAAAAGQvnoMLAAAAAHAJdHABAAAAAC6BDi4AAAAAwCXQwQUAAAAAuAQ6uAAAAAAAl0AHFwAAAADgEujgAgAAAABcAh1cAAAAAIBLoIMLAAAAAHAJdHABAAAAAC6BDi4AAAAAwCXQwQUAAAAAuIT/Bx2wRU/UOlxKAAAAAElFTkSuQmCC", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "_, ax = plt.subplots(1, 2, figsize=(10, 4), sharex=False, sharey=False)\n", + "\n", + "for index in range(0, len(optimized_metrics)):\n", + " c_matrix = optimized_metrics.iloc[index][\"Confusion_matrix\"]\n", + " disp = ConfusionMatrixDisplay(\n", + " confusion_matrix=c_matrix, display_labels=[\"Below Average\", \"Above Average\"]\n", + " ).plot(ax=ax.flat[index])\n", + " disp.ax_.set_title(optimized_metrics.index[index]) \n", + "\n", + "plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.3)\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "В желтом квадрате мы видим значение 28511, что обозначает количество правильно классифицированных объектов, отнесенных к классу \"Below Average\". Это свидетельствует о том, что модель успешно идентифицирует объекты этого класса, минимизируя количество ложных положительных срабатываний.\n", + "\n", + "В зеленом квадрате значение 3952 указывает на количество правильно классифицированных объектов, отнесенных к классу \"Above Average\". Это также является показателем высокой точности модели в определении объектов данного класса." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Определение достижимого уровня качества модели для второй задачи (задача регрессии)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Загрузка данных и создание целевой переменной" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Среднее значение поля 'price': 1991.6325132793531\n", + " category sub_category \\\n", + "0 Groceries Fruits & Vegetables \n", + "1 Groceries Fruits & Vegetables \n", + "2 Groceries Fruits & Vegetables \n", + "3 Groceries Fruits & Vegetables \n", + "4 Groceries Fruits & Vegetables \n", + "\n", + " href \\\n", + "0 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "1 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "2 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "3 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "4 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "\n", + " items price \\\n", + "0 Fresh Dates (Pack) (Approx 450 g - 500 g) 109.0 \n", + "1 Tender Coconut Cling Wrapped (1 pc) (Approx 90... 49.0 \n", + "2 Mosambi 1 kg 69.0 \n", + "3 Orange Imported 1 kg 125.0 \n", + "4 Banana Robusta 6 pcs (Box) (Approx 800 g - 110... 44.0 \n", + "\n", + " above_average_price \n", + "0 0 \n", + "1 0 \n", + "2 0 \n", + "3 0 \n", + "4 0 \n", + "Статистическое описание DataFrame:\n", + " price above_average_price\n", + "count 1.622820e+05 162313.000000\n", + "mean 1.991633e+03 0.121734\n", + "std 1.593479e+04 0.326979\n", + "min 5.000000e+00 0.000000\n", + "25% 2.840000e+02 0.000000\n", + "50% 4.990000e+02 0.000000\n", + "75% 9.990000e+02 0.000000\n", + "max 3.900000e+06 1.000000\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from sklearn import set_config\n", + "\n", + "set_config(transform_output=\"pandas\")\n", + "\n", + "# Загрузка данных\n", + "df = pd.read_csv(\"..//static//csv//jio_mart_items.csv\")\n", + "\n", + "# Опция для настройки генерации случайных чисел \n", + "random_state = 42\n", + "\n", + "# Вычисление среднего значения поля \"price\"\n", + "average_price = df['price'].mean()\n", + "print(f\"Среднее значение поля 'price': {average_price}\")\n", + "\n", + "# Создание новой колонки, указывающей, выше или ниже среднего значение цены\n", + "df['above_average_price'] = (df['price'] > average_price).astype(int)\n", + "\n", + "# Вывод DataFrame с новой колонкой\n", + "print(df.head())\n", + "\n", + "# Примерный анализ данных\n", + "print(\"Статистическое описание DataFrame:\")\n", + "print(df.describe())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Разделение набора данных на обучающую и тестовые выборки (80/20) для задачи регрессии\n", + "\n", + "Целевой признак -- above_average_price" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'X_train'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorysub_categoryhrefitemsprice
38475GroceriesMom & Baby Carehttps://www.jiomart.com/c/groceries/mom-baby-c...Halo Nation Green Plastic Wobbling Roly Poly T...529.0
3550GroceriesStapleshttps://www.jiomart.com/c/groceries/staples/ri...OrgaSatva Organic Sona Masuri Rice (White) 1 kg420.0
145206ElectronicsAccessorieshttps://www.jiomart.com/c/electronics/accessor...itek 10000 mAh Power Bank, RBB013_BK1099.0
151588BeautyMake-Uphttps://www.jiomart.com/c/beauty/make-up/lips/...Fashion Colour Satin Smooth Lip Definer, 14 Ab...356.0
28297GroceriesHome Carehttps://www.jiomart.com/c/groceries/home-care/...My Home Lavender Trail Air Freshener Block 50 ...65.0
..................
119879FashionWomenhttps://www.jiomart.com/c/fashion/women/bags-b...Trysco Women Genuine Leather Yellow Belt599.0
103694Home & KitchenPooja Needshttps://www.jiomart.com/c/groceries/home-kitch...Majmua Attar Made Pure and Natural Exclusive I...599.0
131932FashionGirlshttps://www.jiomart.com/c/fashion/girls/watche...Mikado Analog Blue Watch For Girls ,Pack Of 2249.0
146867ElectronicsAccessorieshttps://www.jiomart.com/c/electronics/accessor...Reconnect RACMB1001 Car Mount100.0
121958FashionWomenhttps://www.jiomart.com/c/fashion/women/fashio...Traditional Long Earring Zinc Jhumki Earring (...129.0
\n", + "

129850 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " category sub_category \\\n", + "38475 Groceries Mom & Baby Care \n", + "3550 Groceries Staples \n", + "145206 Electronics Accessories \n", + "151588 Beauty Make-Up \n", + "28297 Groceries Home Care \n", + "... ... ... \n", + "119879 Fashion Women \n", + "103694 Home & Kitchen Pooja Needs \n", + "131932 Fashion Girls \n", + "146867 Electronics Accessories \n", + "121958 Fashion Women \n", + "\n", + " href \\\n", + "38475 https://www.jiomart.com/c/groceries/mom-baby-c... \n", + "3550 https://www.jiomart.com/c/groceries/staples/ri... \n", + "145206 https://www.jiomart.com/c/electronics/accessor... \n", + "151588 https://www.jiomart.com/c/beauty/make-up/lips/... \n", + "28297 https://www.jiomart.com/c/groceries/home-care/... \n", + "... ... \n", + "119879 https://www.jiomart.com/c/fashion/women/bags-b... \n", + "103694 https://www.jiomart.com/c/groceries/home-kitch... \n", + "131932 https://www.jiomart.com/c/fashion/girls/watche... \n", + "146867 https://www.jiomart.com/c/electronics/accessor... \n", + "121958 https://www.jiomart.com/c/fashion/women/fashio... \n", + "\n", + " items price \n", + "38475 Halo Nation Green Plastic Wobbling Roly Poly T... 529.0 \n", + "3550 OrgaSatva Organic Sona Masuri Rice (White) 1 kg 420.0 \n", + "145206 itek 10000 mAh Power Bank, RBB013_BK 1099.0 \n", + "151588 Fashion Colour Satin Smooth Lip Definer, 14 Ab... 356.0 \n", + "28297 My Home Lavender Trail Air Freshener Block 50 ... 65.0 \n", + "... ... ... \n", + "119879 Trysco Women Genuine Leather Yellow Belt 599.0 \n", + "103694 Majmua Attar Made Pure and Natural Exclusive I... 599.0 \n", + "131932 Mikado Analog Blue Watch For Girls ,Pack Of 2 249.0 \n", + "146867 Reconnect RACMB1001 Car Mount 100.0 \n", + "121958 Traditional Long Earring Zinc Jhumki Earring (... 129.0 \n", + "\n", + "[129850 rows x 5 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'y_train'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
above_average_price
384750
35500
1452060
1515880
282970
......
1198790
1036940
1319320
1468670
1219580
\n", + "

129850 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " above_average_price\n", + "38475 0\n", + "3550 0\n", + "145206 0\n", + "151588 0\n", + "28297 0\n", + "... ...\n", + "119879 0\n", + "103694 0\n", + "131932 0\n", + "146867 0\n", + "121958 0\n", + "\n", + "[129850 rows x 1 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'X_test'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
categorysub_categoryhrefitemsprice
52893Home & KitchenDininghttps://www.jiomart.com/c/groceries/home-kitch...CRAFTYKART Brown Shesham Wood Serving Tray ?35...699.0
78308Home & KitchenToys, Games & Fitnesshttps://www.jiomart.com/c/groceries/home-kitch...Magicwand Red ABS Plastic 4Wd 360 Degree Twist...7999.0
159477BeautyFragranceshttps://www.jiomart.com/c/beauty/fragrances/wo...Ajmal Senora EDP Floral Spicy Perfume And Sacr...1295.0
74384Home & KitchenToys, Games & Fitnesshttps://www.jiomart.com/c/groceries/home-kitch...Frantic Ultra Soft Stuffed Lovable Spongy Huga...369.0
93511Home & KitchenBags & Travel Luggagehttps://www.jiomart.com/c/groceries/home-kitch...DE VAGABOND Orange Black Polyester Travel Duff...749.0
..................
117300FashionWomenhttps://www.jiomart.com/c/fashion/women/wester...Tees World Women Grey Regular Fit Round Neck P...999.0
24023GroceriesPersonal Carehttps://www.jiomart.com/c/groceries/personal-c...Vetoni Fruit Punch Lather Shaving Cream for Me...300.0
129165FashionGirlshttps://www.jiomart.com/c/fashion/girls/wester...IndiWeaves Girls Printed Cotton Half Sleeves T...799.0
71336Home & KitchenFurniturehttps://www.jiomart.com/c/groceries/home-kitch...EVEREST DRAWER V2081.0
110968FashionMenhttps://www.jiomart.com/c/fashion/men/footwear...Birde Sports Shoes399.0
\n", + "

32463 rows × 5 columns

\n", + "
" + ], + "text/plain": [ + " category sub_category \\\n", + "52893 Home & Kitchen Dining \n", + "78308 Home & Kitchen Toys, Games & Fitness \n", + "159477 Beauty Fragrances \n", + "74384 Home & Kitchen Toys, Games & Fitness \n", + "93511 Home & Kitchen Bags & Travel Luggage \n", + "... ... ... \n", + "117300 Fashion Women \n", + "24023 Groceries Personal Care \n", + "129165 Fashion Girls \n", + "71336 Home & Kitchen Furniture \n", + "110968 Fashion Men \n", + "\n", + " href \\\n", + "52893 https://www.jiomart.com/c/groceries/home-kitch... \n", + "78308 https://www.jiomart.com/c/groceries/home-kitch... \n", + "159477 https://www.jiomart.com/c/beauty/fragrances/wo... \n", + "74384 https://www.jiomart.com/c/groceries/home-kitch... \n", + "93511 https://www.jiomart.com/c/groceries/home-kitch... \n", + "... ... \n", + "117300 https://www.jiomart.com/c/fashion/women/wester... \n", + "24023 https://www.jiomart.com/c/groceries/personal-c... \n", + "129165 https://www.jiomart.com/c/fashion/girls/wester... \n", + "71336 https://www.jiomart.com/c/groceries/home-kitch... \n", + "110968 https://www.jiomart.com/c/fashion/men/footwear... \n", + "\n", + " items price \n", + "52893 CRAFTYKART Brown Shesham Wood Serving Tray ?35... 699.0 \n", + "78308 Magicwand Red ABS Plastic 4Wd 360 Degree Twist... 7999.0 \n", + "159477 Ajmal Senora EDP Floral Spicy Perfume And Sacr... 1295.0 \n", + "74384 Frantic Ultra Soft Stuffed Lovable Spongy Huga... 369.0 \n", + "93511 DE VAGABOND Orange Black Polyester Travel Duff... 749.0 \n", + "... ... ... \n", + "117300 Tees World Women Grey Regular Fit Round Neck P... 999.0 \n", + "24023 Vetoni Fruit Punch Lather Shaving Cream for Me... 300.0 \n", + "129165 IndiWeaves Girls Printed Cotton Half Sleeves T... 799.0 \n", + "71336 EVEREST DRAWER V 2081.0 \n", + "110968 Birde Sports Shoes 399.0 \n", + "\n", + "[32463 rows x 5 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'y_test'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
above_average_price
528930
783081
1594770
743840
935110
......
1173000
240230
1291650
713361
1109680
\n", + "

32463 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " above_average_price\n", + "52893 0\n", + "78308 1\n", + "159477 0\n", + "74384 0\n", + "93511 0\n", + "... ...\n", + "117300 0\n", + "24023 0\n", + "129165 0\n", + "71336 1\n", + "110968 0\n", + "\n", + "[32463 rows x 1 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from typing import Tuple\n", + "import pandas as pd\n", + "from pandas import DataFrame\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "def split_into_train_test(\n", + " df_input: DataFrame,\n", + " target_colname: str = \"above_average_price\", \n", + " frac_train: float = 0.8,\n", + " random_state: int = None,\n", + ") -> Tuple[DataFrame, DataFrame, DataFrame, DataFrame]:\n", + " \n", + " if not (0 < frac_train < 1):\n", + " raise ValueError(\"Fraction must be between 0 and 1.\")\n", + " \n", + " # Проверка наличия целевого признака\n", + " if target_colname not in df_input.columns:\n", + " raise ValueError(f\"{target_colname} is not a column in the DataFrame.\")\n", + " \n", + " # Разделяем данные на признаки и целевую переменную\n", + " X = df_input.drop(columns=[target_colname]) # Признаки\n", + " y = df_input[[target_colname]] # Целевая переменная\n", + "\n", + " # Разделяем данные на обучающую и тестовую выборки\n", + " X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y,\n", + " test_size=(1.0 - frac_train),\n", + " random_state=random_state\n", + " )\n", + " \n", + " return X_train, X_test, y_train, y_test\n", + "\n", + "# Применение функции для разделения данных\n", + "X_train, X_test, y_train, y_test = split_into_train_test(\n", + " df, \n", + " target_colname=\"above_average_price\", \n", + " frac_train=0.8, \n", + " random_state=42 \n", + ")\n", + "\n", + "# Для отображения результатов\n", + "display(\"X_train\", X_train)\n", + "display(\"y_train\", y_train)\n", + "\n", + "display(\"X_test\", X_test)\n", + "display(\"y_test\", y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Формирование конвейера для решения задачи регрессии" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " price category_Electronics category_Fashion category_Groceries \\\n", + "0 -0.118140 0.0 0.0 1.0 \n", + "1 -0.121905 0.0 0.0 1.0 \n", + "2 -0.120650 0.0 0.0 1.0 \n", + "3 -0.117136 0.0 0.0 1.0 \n", + "4 -0.122219 0.0 0.0 1.0 \n", + "... ... ... ... ... \n", + "162308 -0.020231 0.0 0.0 0.0 \n", + "162309 -0.037679 0.0 0.0 0.0 \n", + "162310 -0.072637 0.0 0.0 0.0 \n", + "162311 0.017865 0.0 0.0 0.0 \n", + "162312 -0.072637 0.0 0.0 0.0 \n", + "\n", + " category_Home & Kitchen category_Jewellery sub_category_Apparel \\\n", + "0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "... ... ... ... \n", + "162308 0.0 1.0 0.0 \n", + "162309 0.0 1.0 0.0 \n", + "162310 0.0 1.0 0.0 \n", + "162311 0.0 1.0 0.0 \n", + "162312 0.0 1.0 0.0 \n", + "\n", + " sub_category_Auto Care sub_category_Ayush \\\n", + "0 0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "... ... ... \n", + "162308 0.0 0.0 \n", + "162309 0.0 0.0 \n", + "162310 0.0 0.0 \n", + "162311 0.0 0.0 \n", + "162312 0.0 0.0 \n", + "\n", + " sub_category_Bags & Travel Luggage ... \\\n", + "0 0.0 ... \n", + "1 0.0 ... \n", + "2 0.0 ... \n", + "3 0.0 ... \n", + "4 0.0 ... \n", + "... ... ... \n", + "162308 0.0 ... \n", + "162309 0.0 ... \n", + "162310 0.0 ... \n", + "162311 0.0 ... \n", + "162312 0.0 ... \n", + "\n", + " sub_category_Snacks & Branded Foods sub_category_Staples \\\n", + "0 0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "... ... ... \n", + "162308 0.0 0.0 \n", + "162309 0.0 0.0 \n", + "162310 0.0 0.0 \n", + "162311 0.0 0.0 \n", + "162312 0.0 0.0 \n", + "\n", + " sub_category_Stationery sub_category_TV & Speaker sub_category_Tech \\\n", + "0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "... ... ... ... \n", + "162308 0.0 0.0 0.0 \n", + "162309 0.0 0.0 0.0 \n", + "162310 0.0 0.0 0.0 \n", + "162311 0.0 0.0 0.0 \n", + "162312 0.0 0.0 0.0 \n", + "\n", + " sub_category_Tools & Appliances sub_category_Toys, Games & Fitness \\\n", + "0 0.0 0.0 \n", + "1 0.0 0.0 \n", + "2 0.0 0.0 \n", + "3 0.0 0.0 \n", + "4 0.0 0.0 \n", + "... ... ... \n", + "162308 0.0 0.0 \n", + "162309 0.0 0.0 \n", + "162310 0.0 0.0 \n", + "162311 0.0 0.0 \n", + "162312 0.0 0.0 \n", + "\n", + " sub_category_Treatments sub_category_Wellness sub_category_Women \n", + "0 0.0 0.0 0.0 \n", + "1 0.0 0.0 0.0 \n", + "2 0.0 0.0 0.0 \n", + "3 0.0 0.0 0.0 \n", + "4 0.0 0.0 0.0 \n", + "... ... ... ... \n", + "162308 0.0 0.0 0.0 \n", + "162309 0.0 0.0 0.0 \n", + "162310 0.0 0.0 0.0 \n", + "162311 0.0 0.0 0.0 \n", + "162312 0.0 0.0 0.0 \n", + "\n", + "[162313 rows x 77 columns]\n", + "(162313, 77)\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from sklearn.base import BaseEstimator, TransformerMixin\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.ensemble import RandomForestRegressor \n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.pipeline import make_pipeline\n", + "import pandas as pd\n", + "\n", + "class JioMartFeatures(BaseEstimator, TransformerMixin): \n", + " def __init__(self):\n", + " pass\n", + "\n", + " def fit(self, X, y=None):\n", + " return self\n", + "\n", + " def transform(self, X, y=None):\n", + " if 'category' in X.columns:\n", + " X[\"Price_per_Category\"] = X[\"price\"] / X[\"category\"].nunique()\n", + " return X\n", + "\n", + " def get_feature_names_out(self, features_in):\n", + " return np.append(features_in, [\"Price_per_Category\"], axis=0) \n", + "\n", + "# Определите признаки для вашей задачи\n", + "columns_to_drop = [\"href\", \"items\"] \n", + "num_columns = [\"price\"] \n", + "cat_columns = [\"category\", \"sub_category\"]\n", + "\n", + "# Преобразование числовых признаков\n", + "num_imputer = SimpleImputer(strategy=\"median\")\n", + "num_scaler = StandardScaler()\n", + "preprocessing_num = Pipeline(\n", + " [\n", + " (\"imputer\", num_imputer),\n", + " (\"scaler\", num_scaler),\n", + " ]\n", + ")\n", + "\n", + "# Преобразование категориальных признаков\n", + "cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n", + "cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n", + "preprocessing_cat = Pipeline(\n", + " [\n", + " (\"imputer\", cat_imputer),\n", + " (\"encoder\", cat_encoder),\n", + " ]\n", + ")\n", + "\n", + "# Формирование конвейера\n", + "features_preprocessing = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"prepocessing_num\", preprocessing_num, num_columns),\n", + " (\"prepocessing_cat\", preprocessing_cat, cat_columns),\n", + " ],\n", + " remainder=\"passthrough\" \n", + ")\n", + "\n", + "drop_columns = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"drop_columns\", \"drop\", columns_to_drop),\n", + " ],\n", + " remainder=\"passthrough\",\n", + ")\n", + "\n", + "# Окончательный конвейер\n", + "pipeline_end = Pipeline(\n", + " [\n", + " (\"features_preprocessing\", features_preprocessing),\n", + " (\"drop_columns\", drop_columns),\n", + " (\"custom_features\", JioMartFeatures()), # Добавляем custom_features\n", + " ]\n", + ")\n", + "\n", + "# Загрузка данных\n", + "df = pd.read_csv(\"..//static//csv//jio_mart_items.csv\")\n", + "\n", + "# Создаем целевой признак\n", + "average_price = df['price'].mean()\n", + "df['above_average_price'] = (df['price'] > average_price).astype(int)\n", + "\n", + "# Подготовка данных\n", + "X = df.drop('above_average_price', axis=1)\n", + "y = df['above_average_price'].values.ravel()\n", + "\n", + "# Проверка наличия столбцов перед применением конвейера\n", + "required_columns = set(num_columns + cat_columns + columns_to_drop)\n", + "missing_columns = required_columns - set(X.columns)\n", + "if missing_columns:\n", + " raise KeyError(f\"Missing columns: {missing_columns}\")\n", + "\n", + "# Применение конвейера\n", + "X_processed = pipeline_end.fit_transform(X)\n", + "\n", + "# Вывод\n", + "print(X_processed)\n", + "print(X_processed.shape)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Формирование набора моделей для регрессии" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/base.py:1473: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", + " return fit_method(estimator, *args, **kwargs)\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random Forest: Mean Score = 0.9897752006377067, Standard Deviation = 0.012886225390386691\n", + "Linear Regression: Mean Score = -1.439679711903671e+21, Standard Deviation = 1.9848730981021744e+21\n", + "Gradient Boosting: Mean Score = 0.990533312551943, Standard Deviation = 0.01338791677558754\n", + "Support Vector Regression: Mean Score = 0.6408179773886161, Standard Deviation = 0.045968161125540155\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/ensemble/_gb.py:668: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", + " y = column_or_1d(y, warn=True) # TODO: Is this still required?\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/ensemble/_gb.py:668: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", + " y = column_or_1d(y, warn=True) # TODO: Is this still required?\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/ensemble/_gb.py:668: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", + " y = column_or_1d(y, warn=True) # TODO: Is this still required?\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/utils/validation.py:1339: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", + " y = column_or_1d(y, warn=True)\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/utils/validation.py:1339: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", + " y = column_or_1d(y, warn=True)\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/utils/validation.py:1339: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().\n", + " y = column_or_1d(y, warn=True)\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n", + "/Library/Frameworks/Python.framework/Versions/3.9/lib/python3.9/site-packages/sklearn/preprocessing/_encoders.py:242: UserWarning: Found unknown categories in columns [1] during transform. These unknown categories will be encoded as all zeros\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.model_selection import cross_val_score\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.ensemble import GradientBoostingRegressor\n", + "from sklearn.svm import SVR\n", + "\n", + "def train_multiple_models(X, y, models, cv=3):\n", + " results = {}\n", + " for model_name, model in models.items():\n", + " # Создаем конвейер для каждой модели\n", + " model_pipeline = Pipeline(\n", + " [\n", + " (\"features_preprocessing\", features_preprocessing),\n", + " (\"drop_columns\", drop_columns),\n", + " (\"model\", model) # Используем текущую модель\n", + " ]\n", + " )\n", + " \n", + " # Обучаем модель и вычисляем кросс-валидацию\n", + " scores = cross_val_score(model_pipeline, X, y, cv=cv, n_jobs=-1) # Используем все ядра процессора\n", + " results[model_name] = {\n", + " \"mean_score\": scores.mean(),\n", + " \"std_dev\": scores.std()\n", + " }\n", + " \n", + " return results\n", + "\n", + "# Определение моделей\n", + "models = {\n", + " \"Random Forest\": RandomForestRegressor(n_estimators=10), # Уменьшаем количество деревьев\n", + " \"Linear Regression\": LinearRegression(),\n", + " \"Gradient Boosting\": GradientBoostingRegressor(),\n", + " \"Support Vector Regression\": SVR()\n", + "}\n", + "\n", + "# Используем подвыборку данных\n", + "sample_size = 1000 # Уменьшаем количество данных для обучения\n", + "X_train_sample = X_train.sample(n=sample_size, random_state=42)\n", + "y_train_sample = y_train.loc[X_train_sample.index] # Используем loc для индексации Series\n", + "\n", + "# Обучение моделей и вывод результатов\n", + "results = train_multiple_models(X_train_sample, y_train_sample, models, cv=3) # Уменьшаем количество фолдов\n", + "\n", + "# Вывод результатов\n", + "for model_name, scores in results.items():\n", + " print(f\"{model_name}: Mean Score = {scores['mean_score']}, Standard Deviation = {scores['std_dev']}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Модель: Random Forest\n", + "- **Mean Score**: 0.9897752006377067\n", + "- **Standard Deviation**: 0.012886225390386691\n", + "**Описание**:\n", + "- Random Forest показала очень высокое среднее значение, близкое к 1, что указывает на ее высокую точность в предсказании. Стандартное отклонение также относительно низкое, что говорит о стабильности модели.\n", + "\n", + "#### Модель: Linear Regression\n", + "- **Mean Score**: -1.439679711903671e+21\n", + "- **Standard Deviation**: 1.9848730981021744e+21\n", + "**Описание**:\n", + "- Линейная регрессия показала очень низкое среднее значение с огромным отрицательным числом, что указывает на ее неэффективность в данной задаче. Стандартное отклонение также очень высокое, что говорит о нестабильности модели.\n", + "\n", + "#### Модель: Gradient Boosting\n", + "- **Mean Score**: 0.990533312551943\n", + "- **Standard Deviation**: 0.01338791677558754\n", + "**Описание**:\n", + "- Gradient Boosting показала практически идеальное среднее значение, близкое к 1, что указывает на ее высокую точность в предсказании. Стандартное отклонение относительно низкое, что говорит о стабильности модели.\n", + "\n", + "#### Модель: Support Vector Regression\n", + "- **Mean Score**: 0.6408179773886161\n", + "- **Standard Deviation**: 0.045968161125540155\n", + "**Описание**:\n", + "- Support Vector Regression показала среднее значение около 0.64, что указывает на ее умеренную точность в предсказании. Стандартное отклонение относительно низкое, что говорит о стабильности модели, но она все же уступает Random Forest и Gradient Boosting.\n", + "\n", + "\n", + "1. **Random Forest и Gradient Boosting** демонстрируют высокую точность и стабильность, что делает их наиболее подходящими моделями для данной задачи регрессии.\n", + "2. **Linear Regression** неэффективна и нестабильна, что указывает на необходимость ее замены на более подходящую модель.\n", + "3. **Support Vector Regression** показывает умеренную точность и стабильность, но уступает Random Forest и Gradient Boosting в эффективности." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Обучение моделей на обучающем наборе данных и оценка на тестовом для регрессии" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: logistic\n", + "MSE (train): 0.00954948016942626\n", + "MSE (test): 0.009857376089702122\n", + "MAE (train): 0.00954948016942626\n", + "MAE (test): 0.009857376089702122\n", + "R2 (train): 0.9105001240660583\n", + "R2 (test): 0.9085410706513222\n", + "STD (train): 0.09733042790899017\n", + "STD (test): 0.09886474010790139\n", + "----------------------------------------\n", + "Model: ridge\n", + "MSE (train): 0.016395841355410088\n", + "MSE (test): 0.016418692049410096\n", + "MAE (train): 0.016395841355410088\n", + "MAE (test): 0.016418692049410096\n", + "R2 (train): 0.8463344872069661\n", + "R2 (test): 0.8476637208036084\n", + "STD (train): 0.12699418323145514\n", + "STD (test): 0.1270791824052891\n", + "----------------------------------------\n", + "Model: decision_tree\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: knn\n", + "MSE (train): 0.00041586445899114365\n", + "MSE (test): 0.0004928688044851062\n", + "MAE (train): 0.00041586445899114365\n", + "MAE (test): 0.0004928688044851062\n", + "R2 (train): 0.9961024247577155\n", + "R2 (test): 0.9954270535325661\n", + "STD (train): 0.020392609645475155\n", + "STD (test): 0.022199280947150013\n", + "----------------------------------------\n", + "Model: naive_bayes\n", + "MSE (train): 0.6530150173276857\n", + "MSE (test): 0.6539752949511752\n", + "MAE (train): 0.6530150173276857\n", + "MAE (test): 0.6539752949511752\n", + "R2 (train): -5.1202036128569794\n", + "R2 (test): -5.0677283439763485\n", + "STD (train): 0.4850279840944924\n", + "STD (test): 0.4855381725252704\n", + "----------------------------------------\n", + "Model: gradient_boosting\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: random_forest\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: mlp\n", + "MSE (train): 0.0009703504043126684\n", + "MSE (test): 0.0010781505098111696\n", + "MAE (train): 0.0009703504043126684\n", + "MAE (test): 0.0010781505098111696\n", + "R2 (train): 0.9909056577680027\n", + "R2 (test): 0.9899966796024884\n", + "STD (train): 0.031139749763093583\n", + "STD (test): 0.03281946301141911\n", + "----------------------------------------\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from sklearn import metrics\n", + "from sklearn.pipeline import Pipeline\n", + "\n", + "# Проверка наличия необходимых переменных\n", + "if 'class_models' not in locals():\n", + " raise ValueError(\"class_models is not defined\")\n", + "if 'X_train' not in locals() or 'X_test' not in locals() or 'y_train' not in locals() or 'y_test' not in locals():\n", + " raise ValueError(\"Train/test data is not defined\")\n", + "\n", + "# Преобразуем y_train и y_test в одномерные массивы\n", + "y_train = np.ravel(y_train) \n", + "y_test = np.ravel(y_test) \n", + "\n", + "# Инициализация списка для хранения результатов\n", + "results = []\n", + "\n", + "# Проход по моделям и оценка их качества\n", + "for model_name in class_models.keys():\n", + " print(f\"Model: {model_name}\")\n", + " \n", + " # Извлечение модели из словаря\n", + " model = class_models[model_name][\"model\"]\n", + " \n", + " # Создание пайплайна\n", + " model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n", + " \n", + " # Обучение модели\n", + " model_pipeline.fit(X_train, y_train)\n", + "\n", + " # Предсказание для обучающей и тестовой выборки\n", + " y_train_predict = model_pipeline.predict(X_train)\n", + " y_test_predict = model_pipeline.predict(X_test)\n", + "\n", + " # Сохранение пайплайна и предсказаний\n", + " class_models[model_name][\"pipeline\"] = model_pipeline\n", + " class_models[model_name][\"preds\"] = y_test_predict\n", + "\n", + " # Вычисление метрик для регрессии\n", + " class_models[model_name][\"MSE_train\"] = metrics.mean_squared_error(y_train, y_train_predict)\n", + " class_models[model_name][\"MSE_test\"] = metrics.mean_squared_error(y_test, y_test_predict)\n", + " class_models[model_name][\"MAE_train\"] = metrics.mean_absolute_error(y_train, y_train_predict)\n", + " class_models[model_name][\"MAE_test\"] = metrics.mean_absolute_error(y_test, y_test_predict)\n", + " class_models[model_name][\"R2_train\"] = metrics.r2_score(y_train, y_train_predict)\n", + " class_models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_predict)\n", + "\n", + " # Дополнительные метрики\n", + " class_models[model_name][\"STD_train\"] = np.std(y_train - y_train_predict)\n", + " class_models[model_name][\"STD_test\"] = np.std(y_test - y_test_predict)\n", + "\n", + " # Вывод результатов для текущей модели\n", + " print(f\"MSE (train): {class_models[model_name]['MSE_train']}\")\n", + " print(f\"MSE (test): {class_models[model_name]['MSE_test']}\")\n", + " print(f\"MAE (train): {class_models[model_name]['MAE_train']}\")\n", + " print(f\"MAE (test): {class_models[model_name]['MAE_test']}\")\n", + " print(f\"R2 (train): {class_models[model_name]['R2_train']}\")\n", + " print(f\"R2 (test): {class_models[model_name]['R2_test']}\")\n", + " print(f\"STD (train): {class_models[model_name]['STD_train']}\")\n", + " print(f\"STD (test): {class_models[model_name]['STD_test']}\")\n", + " print(\"-\" * 40) # Разделитель для разных моделей" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Пример использования обученной модели (конвейера регрессии) для предсказания" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: RandomForest\n", + "MSE (train): 8419071.042944524\n", + "MSE (test): 1708514.4521493362\n", + "MAE (train): 11.216263715771229\n", + "MAE (test): 14.19769129925748\n", + "R2 (train): 0.9638189510993855\n", + "R2 (test): 0.9949568688066726\n", + "----------------------------------------\n", + "Прогнозируемая цена: 5.77\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn import metrics\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.ensemble import RandomForestRegressor \n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "\n", + "# 1. Загрузка данных\n", + "data = pd.read_csv(\"..//static//csv//jio_mart_items.csv\") \n", + "\n", + "# 2. Подготовка данных для прогноза\n", + "average_price = data['price'].mean()\n", + "data['above_average_price'] = (data['price'] > average_price).astype(int) \n", + "\n", + "# Удаляем строки с пропущенными значениями в столбце 'price'\n", + "data = data.dropna(subset=['price'])\n", + "\n", + "# Предикторы и целевая переменная\n", + "X = data.drop('above_average_price', axis=1) # Удаляем только 'above_average_price'\n", + "y = data['price']\n", + "\n", + "# 3. Инициализация модели и пайплайна\n", + "class_models = {\n", + " \"RandomForest\": {\n", + " \"model\": RandomForestRegressor(n_estimators=100, random_state=42),\n", + " }\n", + "}\n", + "\n", + "# Предобработка признаков\n", + "num_columns = ['price']\n", + "cat_columns = ['category', 'sub_category']\n", + "\n", + "# Проверка наличия столбцов перед предобработкой\n", + "required_columns = set(num_columns + cat_columns)\n", + "missing_columns = required_columns - set(X.columns)\n", + "if missing_columns:\n", + " raise KeyError(f\"Missing columns: {missing_columns}\")\n", + "\n", + "# Преобразование числовых признаков\n", + "num_transformer = Pipeline(steps=[\n", + " ('imputer', SimpleImputer(strategy='median')),\n", + " ('scaler', StandardScaler())\n", + "])\n", + "\n", + "# Преобразование категориальных признаков\n", + "cat_transformer = Pipeline(steps=[\n", + " ('imputer', SimpleImputer(strategy='constant', fill_value='unknown')),\n", + " ('onehot', OneHotEncoder(handle_unknown='ignore', sparse_output=False, drop=\"first\"))\n", + "])\n", + "\n", + "# Создание конвейера предобработки\n", + "preprocessor = ColumnTransformer(\n", + " transformers=[\n", + " ('num', num_transformer, num_columns),\n", + " ('cat', cat_transformer, cat_columns)\n", + " ])\n", + "\n", + "# Создание конвейера модели\n", + "pipeline_end = Pipeline(steps=[\n", + " ('preprocessor', preprocessor),\n", + " # ('model', model) # Модель добавляется в цикле\n", + "])\n", + "\n", + "results = []\n", + "\n", + "# 4. Обучение модели и оценка\n", + "for model_name in class_models.keys():\n", + " print(f\"Model: {model_name}\")\n", + "\n", + " model = class_models[model_name][\"model\"]\n", + " model_pipeline = Pipeline(steps=[\n", + " ('preprocessor', preprocessor),\n", + " ('model', model)\n", + " ])\n", + "\n", + " # Разделение данных\n", + " X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + " # Обучение модели\n", + " model_pipeline.fit(X_train, y_train)\n", + "\n", + " # Предсказание\n", + " y_train_predict = model_pipeline.predict(X_train)\n", + " y_test_predict = model_pipeline.predict(X_test)\n", + "\n", + " # Сохранение результатов\n", + " class_models[model_name][\"preds\"] = y_test_predict\n", + "\n", + " # Вычисление метрик\n", + " class_models[model_name][\"MSE_train\"] = metrics.mean_squared_error(y_train, y_train_predict)\n", + " class_models[model_name][\"MSE_test\"] = metrics.mean_squared_error(y_test, y_test_predict)\n", + " class_models[model_name][\"MAE_train\"] = metrics.mean_absolute_error(y_train, y_train_predict)\n", + " class_models[model_name][\"MAE_test\"] = metrics.mean_absolute_error(y_test, y_test_predict)\n", + " class_models[model_name][\"R2_train\"] = metrics.r2_score(y_train, y_train_predict)\n", + " class_models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_predict)\n", + "\n", + " # Вывод результатов\n", + " print(f\"MSE (train): {class_models[model_name]['MSE_train']}\")\n", + " print(f\"MSE (test): {class_models[model_name]['MSE_test']}\")\n", + " print(f\"MAE (train): {class_models[model_name]['MAE_train']}\")\n", + " print(f\"MAE (test): {class_models[model_name]['MAE_test']}\")\n", + " print(f\"R2 (train): {class_models[model_name]['R2_train']}\")\n", + " print(f\"R2 (test): {class_models[model_name]['R2_test']}\")\n", + " print(\"-\" * 40)\n", + "\n", + "# Прогнозирование цены для нового товара\n", + "new_item_data = pd.DataFrame({\n", + " 'category': ['Electronics'],\n", + " 'sub_category': ['Smartphones'], \n", + " 'price': [0] # Добавляем столбец 'price' с нулевым значением\n", + "})\n", + "\n", + "predicted_price = model_pipeline.predict(new_item_data)\n", + "print(f\"Прогнозируемая цена: {predicted_price[0]}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Подбор гиперпараметров методом поиска по сетке" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 3 folds for each of 36 candidates, totalling 108 fits\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time= 12.4s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time= 12.6s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time= 12.6s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time= 23.3s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time= 23.2s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time= 23.2s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=50; total time= 10.8s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=50; total time= 11.2s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=50; total time= 11.2s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time= 44.9s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=100; total time= 21.9s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=100; total time= 22.0s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time= 45.4s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=100; total time= 22.2s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=50; total time= 12.0s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=50; total time= 12.2s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=50; total time= 12.3s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time= 46.4s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=100; total time= 23.9s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=200; total time= 46.0s\n", + "[CV] END .max_depth=10, min_samples_split=2, n_estimators=50; total time= 7.5s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=200; total time= 47.1s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=100; total time= 24.2s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=100; total time= 24.4s\n", + "[CV] END .max_depth=10, min_samples_split=2, n_estimators=50; total time= 8.1s\n", + "[CV] END .max_depth=10, min_samples_split=2, n_estimators=50; total time= 7.9s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=200; total time= 48.2s\n", + "[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time= 15.7s\n", + "[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time= 15.9s\n", + "[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time= 15.2s\n", + "[CV] END .max_depth=10, min_samples_split=5, n_estimators=50; total time= 7.6s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=200; total time= 48.2s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=200; total time= 48.3s\n", + "[CV] END .max_depth=10, min_samples_split=5, n_estimators=50; total time= 7.9s\n", + "[CV] END .max_depth=10, min_samples_split=5, n_estimators=50; total time= 8.1s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=200; total time= 50.2s\n", + "[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time= 32.1s\n", + "[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time= 16.6s\n", + "[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time= 17.1s\n", + "[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time= 16.8s\n", + "[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time= 32.8s\n", + "[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time= 32.8s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=50; total time= 8.2s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=50; total time= 8.1s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=50; total time= 8.2s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=100; total time= 15.4s\n", + "[CV] END max_depth=10, min_samples_split=5, n_estimators=200; total time= 31.9s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=100; total time= 15.8s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=100; total time= 15.8s\n", + "[CV] END max_depth=10, min_samples_split=5, n_estimators=200; total time= 32.7s\n", + "[CV] END max_depth=10, min_samples_split=5, n_estimators=200; total time= 32.4s\n", + "[CV] END .max_depth=20, min_samples_split=2, n_estimators=50; total time= 11.0s\n", + "[CV] END .max_depth=20, min_samples_split=2, n_estimators=50; total time= 11.1s\n", + "[CV] END .max_depth=20, min_samples_split=2, n_estimators=50; total time= 11.2s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=200; total time= 31.4s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=200; total time= 32.7s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=200; total time= 32.1s\n", + "[CV] END max_depth=20, min_samples_split=2, n_estimators=100; total time= 21.6s\n", + "[CV] END max_depth=20, min_samples_split=2, n_estimators=100; total time= 22.2s\n", + "[CV] END .max_depth=20, min_samples_split=5, n_estimators=50; total time= 11.7s\n", + "[CV] END max_depth=20, min_samples_split=2, n_estimators=100; total time= 22.2s\n", + "[CV] END .max_depth=20, min_samples_split=5, n_estimators=50; total time= 12.2s\n", + "[CV] END .max_depth=20, min_samples_split=5, n_estimators=50; total time= 12.4s\n", + "[CV] END max_depth=20, min_samples_split=5, n_estimators=100; total time= 24.3s\n", + "[CV] END max_depth=20, min_samples_split=5, n_estimators=100; total time= 23.7s\n", + "[CV] END max_depth=20, min_samples_split=5, n_estimators=100; total time= 24.7s\n", + "[CV] END max_depth=20, min_samples_split=2, n_estimators=200; total time= 46.2s\n", + "[CV] END max_depth=20, min_samples_split=2, n_estimators=200; total time= 46.6s\n", + "[CV] END max_depth=20, min_samples_split=2, n_estimators=200; total time= 48.0s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=50; total time= 11.8s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=50; total time= 11.9s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=50; total time= 11.8s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=100; total time= 24.2s\n", + "[CV] END max_depth=20, min_samples_split=5, n_estimators=200; total time= 47.3s\n", + "[CV] END max_depth=20, min_samples_split=5, n_estimators=200; total time= 49.4s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=100; total time= 25.3s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=100; total time= 25.8s\n", + "[CV] END .max_depth=30, min_samples_split=2, n_estimators=50; total time= 14.1s\n", + "[CV] END .max_depth=30, min_samples_split=2, n_estimators=50; total time= 14.5s\n", + "[CV] END max_depth=20, min_samples_split=5, n_estimators=200; total time= 50.1s\n", + "[CV] END .max_depth=30, min_samples_split=2, n_estimators=50; total time= 14.1s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=200; total time= 50.5s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=200; total time= 50.9s\n", + "[CV] END max_depth=30, min_samples_split=2, n_estimators=100; total time= 28.5s\n", + "[CV] END max_depth=30, min_samples_split=2, n_estimators=100; total time= 30.1s\n", + "[CV] END max_depth=30, min_samples_split=2, n_estimators=100; total time= 29.8s\n", + "[CV] END .max_depth=30, min_samples_split=5, n_estimators=50; total time= 14.6s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=200; total time= 52.1s\n", + "[CV] END .max_depth=30, min_samples_split=5, n_estimators=50; total time= 15.1s\n", + "[CV] END .max_depth=30, min_samples_split=5, n_estimators=50; total time= 14.4s\n", + "[CV] END max_depth=30, min_samples_split=2, n_estimators=200; total time= 57.7s\n", + "[CV] END max_depth=30, min_samples_split=5, n_estimators=100; total time= 28.2s\n", + "[CV] END max_depth=30, min_samples_split=2, n_estimators=200; total time= 57.7s\n", + "[CV] END max_depth=30, min_samples_split=5, n_estimators=100; total time= 29.2s\n", + "[CV] END max_depth=30, min_samples_split=5, n_estimators=100; total time= 29.3s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=50; total time= 15.3s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=50; total time= 15.5s\n", + "[CV] END max_depth=30, min_samples_split=2, n_estimators=200; total time= 59.2s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=50; total time= 15.3s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=100; total time= 29.4s\n", + "[CV] END max_depth=30, min_samples_split=5, n_estimators=200; total time= 58.3s\n", + "[CV] END max_depth=30, min_samples_split=5, n_estimators=200; total time= 59.1s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=100; total time= 29.0s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=100; total time= 29.0s\n", + "[CV] END max_depth=30, min_samples_split=5, n_estimators=200; total time= 53.8s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=200; total time= 44.9s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=200; total time= 45.4s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=200; total time= 35.2s\n", + "Лучшие параметры: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 100}\n", + "Лучший результат (MSE): 206320633.70862785\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn import metrics\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.model_selection import train_test_split, GridSearchCV\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.impute import SimpleImputer\n", + "\n", + "# Удаление строк с пропущенными значениями (если необходимо)\n", + "df = df.dropna()\n", + "\n", + "# Создание целевой переменной (price)\n", + "target = df['price']\n", + "\n", + "# Удаление целевой переменной из исходных данных\n", + "features = df.drop(columns=['price'])\n", + "\n", + "# Удаление столбцов, которые не будут использоваться (например, href и items)\n", + "features = features.drop(columns=['href', 'items'])\n", + "\n", + "# Определение столбцов для обработки\n", + "num_columns = features.select_dtypes(include=['number']).columns\n", + "cat_columns = features.select_dtypes(include=['object']).columns\n", + "\n", + "# Препроцессинг числовых столбцов\n", + "num_imputer = SimpleImputer(strategy=\"median\") # Используем медиану для заполнения пропущенных значений в числовых столбцах\n", + "num_scaler = StandardScaler()\n", + "preprocessing_num = Pipeline(\n", + " [\n", + " (\"imputer\", num_imputer),\n", + " (\"scaler\", num_scaler),\n", + " ]\n", + ")\n", + "\n", + "# Препроцессинг категориальных столбцов\n", + "cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\") # Используем 'unknown' для заполнения пропущенных значений в категориальных столбцах\n", + "cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n", + "preprocessing_cat = Pipeline(\n", + " [\n", + " (\"imputer\", cat_imputer),\n", + " (\"encoder\", cat_encoder),\n", + " ]\n", + ")\n", + "\n", + "# Объединение препроцессинга\n", + "features_preprocessing = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"preprocessing_num\", preprocessing_num, num_columns),\n", + " (\"preprocessing_cat\", preprocessing_cat, cat_columns),\n", + " ],\n", + " remainder=\"passthrough\"\n", + ")\n", + "\n", + "# Создание финального пайплайна\n", + "pipeline_end = Pipeline(\n", + " [\n", + " (\"features_preprocessing\", features_preprocessing),\n", + " ]\n", + ")\n", + "\n", + "# Разделение данных на обучающую и тестовую выборки\n", + "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)\n", + "\n", + "# Применение пайплайна к данным\n", + "X_train_processed = pipeline_end.fit_transform(X_train)\n", + "X_test_processed = pipeline_end.transform(X_test)\n", + "\n", + "# 2. Создание и настройка модели случайного леса\n", + "model = RandomForestRegressor()\n", + "\n", + "# Установка параметров для поиска по сетке\n", + "param_grid = {\n", + " 'n_estimators': [50, 100, 200], # Количество деревьев\n", + " 'max_depth': [None, 10, 20, 30], # Максимальная глубина дерева\n", + " 'min_samples_split': [2, 5, 10] # Минимальное количество образцов для разбиения узла\n", + "}\n", + "\n", + "# 3. Подбор гиперпараметров с помощью Grid Search\n", + "grid_search = GridSearchCV(estimator=model, param_grid=param_grid,\n", + " scoring='neg_mean_squared_error', cv=3, n_jobs=-1, verbose=2)\n", + "\n", + "# Обучение модели на тренировочных данных\n", + "grid_search.fit(X_train_processed, y_train)\n", + "\n", + "# 4. Результаты подбора гиперпараметров\n", + "print(\"Лучшие параметры:\", grid_search.best_params_)\n", + "print(\"Лучший результат (MSE):\", -grid_search.best_score_) # Меняем знак, так как берем отрицательное значение среднеквадратичной ошибки" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Обучение модели с новыми гиперпараметрами и сравнение новых и старых данных" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " category sub_category \\\n", + "0 Groceries Fruits & Vegetables \n", + "1 Groceries Fruits & Vegetables \n", + "2 Groceries Fruits & Vegetables \n", + "3 Groceries Fruits & Vegetables \n", + "4 Groceries Fruits & Vegetables \n", + "\n", + " href \\\n", + "0 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "1 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "2 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "3 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "4 https://www.jiomart.com/c/groceries/fruits-veg... \n", + "\n", + " items price \n", + "0 Fresh Dates (Pack) (Approx 450 g - 500 g) 109.0 \n", + "1 Tender Coconut Cling Wrapped (1 pc) (Approx 90... 49.0 \n", + "2 Mosambi 1 kg 69.0 \n", + "3 Orange Imported 1 kg 125.0 \n", + "4 Banana Robusta 6 pcs (Box) (Approx 800 g - 110... 44.0 \n", + "Fitting 3 folds for each of 36 candidates, totalling 108 fits\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=100; total time= 0.2s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=2, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=100; total time= 0.1s\n", + "[CV] END .max_depth=10, min_samples_split=2, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=100; total time= 0.1s\n", + "[CV] END .max_depth=10, min_samples_split=2, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time= 0.1s\n", + "[CV] END .max_depth=10, min_samples_split=5, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=200; total time= 0.2s\n", + "[CV] END .max_depth=10, min_samples_split=5, n_estimators=50; total time= 0.0s\n", + "[CV] END .max_depth=10, min_samples_split=2, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=10, min_samples_split=2, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=10, n_estimators=200; total time= 0.2s\n", + "[CV] END .max_depth=10, min_samples_split=5, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=None, min_samples_split=5, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=10, min_samples_split=5, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=10, min_samples_split=5, n_estimators=200; total time= 0.1s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=100; total time= 0.1s\n", + "[CV] END .max_depth=20, min_samples_split=2, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=10, min_samples_split=5, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=10, min_samples_split=2, n_estimators=200; total time= 0.2s\n", + "[CV] END .max_depth=20, min_samples_split=2, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=100; total time= 0.1s\n", + "[CV] END .max_depth=20, min_samples_split=2, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=10, min_samples_split=5, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=20, min_samples_split=2, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=200; total time= 0.1s\n", + "[CV] END max_depth=20, min_samples_split=2, n_estimators=100; total time= 0.1s\n", + "[CV] END .max_depth=20, min_samples_split=5, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=20, min_samples_split=2, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=20, min_samples_split=2, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=20, min_samples_split=5, n_estimators=100; total time= 0.1s\n", + "[CV] END .max_depth=20, min_samples_split=5, n_estimators=50; total time= 0.0s\n", + "[CV] END .max_depth=20, min_samples_split=5, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=20, min_samples_split=2, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=10, min_samples_split=10, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=20, min_samples_split=5, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=20, min_samples_split=5, n_estimators=200; total time= 0.1s\n", + "[CV] END max_depth=20, min_samples_split=5, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=20, min_samples_split=5, n_estimators=200; total time= 0.2s\n", + "[CV] END .max_depth=30, min_samples_split=2, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=20, min_samples_split=2, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=30, min_samples_split=2, n_estimators=100; total time= 0.1s\n", + "[CV] END .max_depth=30, min_samples_split=2, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=30, min_samples_split=2, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=200; total time= 0.2s\n", + "[CV] END .max_depth=30, min_samples_split=5, n_estimators=50; total time= 0.1s\n", + "[CV] END max_depth=30, min_samples_split=2, n_estimators=100; total time= 0.1s\n", + "[CV] END .max_depth=30, min_samples_split=2, n_estimators=50; total time= 0.0s\n", + "[CV] END .max_depth=30, min_samples_split=5, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=30, min_samples_split=5, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=20, min_samples_split=5, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=30, min_samples_split=2, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=30, min_samples_split=5, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=20, min_samples_split=10, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=30, min_samples_split=2, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=30, min_samples_split=5, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=30, min_samples_split=5, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=30, min_samples_split=2, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=30, min_samples_split=5, n_estimators=200; total time= 0.2s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=100; total time= 0.1s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=200; total time= 0.1s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=200; total time= 0.1s\n", + "[CV] END max_depth=30, min_samples_split=10, n_estimators=200; total time= 0.2s\n", + "[CV] END .max_depth=30, min_samples_split=5, n_estimators=50; total time= 0.0s\n", + "[CV] END max_depth=30, min_samples_split=5, n_estimators=100; total time= 0.0s\n", + "Старые параметры: {'max_depth': 30, 'min_samples_split': 5, 'n_estimators': 50}\n", + "Лучший результат (MSE) на старых параметрах: 4352.70053925649\n", + "\n", + "Новые параметры: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 200}\n", + "Лучший результат (MSE) на новых параметрах: 4862.953305666657\n", + "Среднеквадратическая ошибка (MSE) на тестовых данных: 3485.772883899025\n", + "Корень среднеквадратичной ошибки (RMSE) на тестовых данных: 59.04043431326556\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn import metrics\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.model_selection import train_test_split, GridSearchCV\n", + "from sklearn.preprocessing import StandardScaler, OneHotEncoder\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.pipeline import Pipeline\n", + "import matplotlib.pyplot as plt\n", + "\n", + "# Загрузка датасета\n", + "df = pd.read_csv(\"..//static//csv//jio_mart_items.csv\").head(100)\n", + "\n", + "# Вывод первых строк для проверки структуры\n", + "print(df.head())\n", + "\n", + "# Целевая переменная\n", + "target = df['price']\n", + "\n", + "# Удаление целевой переменной из признаков\n", + "features = df.drop(columns=['price', 'href'])\n", + "\n", + "# Определение столбцов для обработки\n", + "num_columns = features.select_dtypes(include=['number']).columns\n", + "cat_columns = features.select_dtypes(include=['object']).columns\n", + "\n", + "# Препроцессинг числовых столбцов\n", + "num_imputer = SimpleImputer(strategy=\"median\")\n", + "num_scaler = StandardScaler()\n", + "preprocessing_num = Pipeline([\n", + " (\"imputer\", num_imputer),\n", + " (\"scaler\", num_scaler),\n", + "])\n", + "\n", + "# Препроцессинг категориальных столбцов\n", + "cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n", + "cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n", + "preprocessing_cat = Pipeline([\n", + " (\"imputer\", cat_imputer),\n", + " (\"encoder\", cat_encoder),\n", + "])\n", + "\n", + "# Объединение препроцессинга\n", + "features_preprocessing = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"preprocessing_num\", preprocessing_num, num_columns),\n", + " (\"preprocessing_cat\", preprocessing_cat, cat_columns),\n", + " ],\n", + " remainder=\"passthrough\"\n", + ")\n", + "\n", + "# Создание финального пайплайна\n", + "pipeline_end = Pipeline([\n", + " (\"features_preprocessing\", features_preprocessing),\n", + "])\n", + "\n", + "# Разделение данных на обучающую и тестовую выборки\n", + "X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)\n", + "\n", + "# Применение пайплайна к данным\n", + "X_train_processed = pipeline_end.fit_transform(X_train)\n", + "X_test_processed = pipeline_end.transform(X_test)\n", + "\n", + "# 1. Настройка параметров для старых значений\n", + "old_param_grid = {\n", + " 'n_estimators': [50, 100, 200],\n", + " 'max_depth': [None, 10, 20, 30],\n", + " 'min_samples_split': [2, 5, 10]\n", + "}\n", + "\n", + "# Подбор гиперпараметров с помощью Grid Search для старых параметров\n", + "old_grid_search = GridSearchCV(estimator=RandomForestRegressor(),\n", + " param_grid=old_param_grid,\n", + " scoring='neg_mean_squared_error', cv=3, n_jobs=-1, verbose=2)\n", + "\n", + "# Обучение модели на тренировочных данных\n", + "old_grid_search.fit(X_train_processed, y_train)\n", + "\n", + "# Результаты подбора для старых параметров\n", + "old_best_params = old_grid_search.best_params_\n", + "old_best_mse = -old_grid_search.best_score_\n", + "\n", + "# 2. Настройка параметров для новых значений\n", + "new_param_grid = {\n", + " 'n_estimators': [200],\n", + " 'max_depth': [10],\n", + " 'min_samples_split': [10]\n", + "}\n", + "\n", + "# Подбор гиперпараметров с помощью Grid Search для новых параметров\n", + "new_grid_search = GridSearchCV(estimator=RandomForestRegressor(),\n", + " param_grid=new_param_grid,\n", + " scoring='neg_mean_squared_error', cv=2)\n", + "\n", + "# Обучение модели на тренировочных данных\n", + "new_grid_search.fit(X_train_processed, y_train)\n", + "\n", + "# Результаты подбора для новых параметров\n", + "new_best_params = new_grid_search.best_params_\n", + "new_best_mse = -new_grid_search.best_score_\n", + "\n", + "# 5. Обучение модели с лучшими параметрами для новых значений\n", + "model_best = RandomForestRegressor(**new_best_params)\n", + "model_best.fit(X_train_processed, y_train)\n", + "\n", + "# Прогнозирование на тестовой выборке\n", + "y_pred = model_best.predict(X_test_processed)\n", + "\n", + "# Оценка производительности модели\n", + "mse = metrics.mean_squared_error(y_test, y_pred)\n", + "rmse = np.sqrt(mse)\n", + "\n", + "# Вывод результатов\n", + "print(\"Старые параметры:\", old_best_params)\n", + "print(\"Лучший результат (MSE) на старых параметрах:\", old_best_mse)\n", + "print(\"\\nНовые параметры:\", new_best_params)\n", + "print(\"Лучший результат (MSE) на новых параметрах:\", new_best_mse)\n", + "print(\"Среднеквадратическая ошибка (MSE) на тестовых данных:\", mse)\n", + "print(\"Корень среднеквадратичной ошибки (RMSE) на тестовых данных:\", rmse)\n", + "\n", + "# Обучение модели с лучшими параметрами для старых значений\n", + "model_old = RandomForestRegressor(**old_best_params)\n", + "model_old.fit(X_train_processed, y_train)\n", + "\n", + "# Прогнозирование на тестовой выборке для старых параметров\n", + "y_pred_old = model_old.predict(X_test_processed)\n", + "\n", + "# Визуализация ошибок\n", + "plt.figure(figsize=(10, 5))\n", + "plt.plot(y_test.values, label='Реальные значения', marker='o', linestyle='-', color='black')\n", + "plt.plot(y_pred_old, label='Предсказанные значения (старые параметры)', marker='x', linestyle='--', color='blue')\n", + "plt.plot(y_pred, label='Предсказанные значения (новые параметры)', marker='s', linestyle='--', color='orange')\n", + "plt.xlabel('Объекты')\n", + "plt.ylabel('Цена')\n", + "plt.title('Сравнение реальных и предсказанных значений')\n", + "plt.legend()\n", + "plt.show()\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}