631 lines
107 KiB
Plaintext
Raw Normal View History

2024-12-20 14:14:59 +04:00
{
"cells": [
{
"cell_type": "code",
2024-12-21 11:21:05 +04:00
"execution_count": 1,
2024-12-20 14:14:59 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
2024-12-21 02:12:15 +04:00
"<class 'pandas.core.frame.DataFrame'>\n",
2024-12-20 14:14:59 +04:00
"RangeIndex: 162313 entries, 0 to 162312\n",
"Data columns (total 5 columns):\n",
2024-12-21 02:12:15 +04:00
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 category 162313 non-null object \n",
" 1 sub_category 162313 non-null object \n",
" 2 href 162313 non-null object \n",
" 3 items 162280 non-null object \n",
2024-12-20 14:14:59 +04:00
" 4 price 162282 non-null float64\n",
"dtypes: float64(1), object(4)\n",
2024-12-21 02:12:15 +04:00
"memory usage: 6.2+ MB\n"
2024-12-20 14:14:59 +04:00
]
}
],
"source": [
2024-12-21 02:12:15 +04:00
"import pandas as pd\n",
"import seaborn as sns\n",
2024-12-20 14:14:59 +04:00
"import matplotlib.pyplot as plt\n",
"from sklearn.preprocessing import LabelEncoder\n",
2024-12-21 02:12:15 +04:00
"from sklearn import metrics\n",
"from imblearn.over_sampling import RandomOverSampler\n",
"from imblearn.under_sampling import RandomUnderSampler\n",
"from sklearn.preprocessing import StandardScaler, OneHotEncoder\n",
"from sklearn.metrics import ConfusionMatrixDisplay\n",
"from sklearn.compose import ColumnTransformer\n",
"from sklearn.pipeline import Pipeline\n",
"from sklearn.impute import SimpleImputer\n",
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
"from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor, RandomForestClassifier, GradientBoostingClassifier\n",
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
"from sklearn.metrics import (\n",
" precision_score, recall_score, accuracy_score, roc_auc_score, f1_score,\n",
" matthews_corrcoef, cohen_kappa_score, confusion_matrix\n",
")\n",
"from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error\n",
"import numpy as np\n",
"import featuretools as ft\n",
"from sklearn.metrics import accuracy_score, classification_report\n",
2024-12-20 14:14:59 +04:00
"\n",
2024-12-21 02:12:15 +04:00
"# Функция для применения oversampling\n",
"def apply_oversampling(X, y):\n",
" oversampler = RandomOverSampler(random_state=42)\n",
" X_resampled, y_resampled = oversampler.fit_resample(X, y)\n",
" return X_resampled, y_resampled\n",
"\n",
"# Функция для применения undersampling\n",
"def apply_undersampling(X, y):\n",
" undersampler = RandomUnderSampler(random_state=42)\n",
" X_resampled, y_resampled = undersampler.fit_resample(X, y)\n",
" return X_resampled, y_resampled\n",
2024-12-20 14:14:59 +04:00
"\n",
2024-12-21 02:12:15 +04:00
"def split_stratified_into_train_val_test(\n",
" df_input,\n",
" stratify_colname=\"y\",\n",
" frac_train=0.6,\n",
" frac_val=0.15,\n",
" frac_test=0.25,\n",
" random_state=None,\n",
"):\n",
" \"\"\"\n",
" Splits a Pandas dataframe into three subsets (train, val, and test)\n",
" following fractional ratios provided by the user, where each subset is\n",
" stratified by the values in a specific column (that is, each subset has\n",
" the same relative frequency of the values in the column). It performs this\n",
" splitting by running train_test_split() twice.\n",
2024-12-20 14:14:59 +04:00
"\n",
2024-12-21 02:12:15 +04:00
" Parameters\n",
" ----------\n",
" df_input : Pandas dataframe\n",
" Input dataframe to be split.\n",
" stratify_colname : str\n",
" The name of the column that will be used for stratification. Usually\n",
" this column would be for the label.\n",
" frac_train : float\n",
" frac_val : float\n",
" frac_test : float\n",
" The ratios with which the dataframe will be split into train, val, and\n",
" test data. The values should be expressed as float fractions and should\n",
" sum to 1.0.\n",
" random_state : int, None, or RandomStateInstance\n",
" Value to be passed to train_test_split().\n",
2024-12-20 14:14:59 +04:00
"\n",
2024-12-21 02:12:15 +04:00
" Returns\n",
" -------\n",
" df_train, df_val, df_test :\n",
" Dataframes containing the three splits.\n",
" \"\"\"\n",
"\n",
" if frac_train + frac_val + frac_test != 1.0:\n",
" raise ValueError(\n",
" \"fractions %f, %f, %f do not add up to 1.0\"\n",
" % (frac_train, frac_val, frac_test)\n",
" )\n",
"\n",
" if stratify_colname not in df_input.columns:\n",
" raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
"\n",
" X = df_input # Contains all columns.\n",
" y = df_input[\n",
" [stratify_colname]\n",
" ] # Dataframe of just the column on which to stratify.\n",
"\n",
" # Split original dataframe into train and temp dataframes.\n",
" df_train, df_temp, y_train, y_temp = train_test_split(\n",
" X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
" )\n",
"\n",
" # Split the temp dataframe into val and test dataframes.\n",
" relative_frac_test = frac_test / (frac_val + frac_test)\n",
" df_val, df_test, y_val, y_test = train_test_split(\n",
" df_temp,\n",
" y_temp,\n",
" stratify=y_temp,\n",
" test_size=relative_frac_test,\n",
" random_state=random_state,\n",
" )\n",
"\n",
" assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
"\n",
" return df_train, df_val, df_test\n",
"\n",
"\n",
2024-12-21 11:21:05 +04:00
"df = pd.read_csv('../data/jio_mart_items.csv')\n",
"df.info()\n",
"df = df.sample(n=10000 , random_state=42)"
]
},
{
"cell_type": "code",
2024-12-21 11:21:05 +04:00
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['Dining' 'Toys, Games & Fitness' 'Fragrances' 'Bags & Travel Luggage'\n",
" 'Girls' 'Home Decor' 'Boys' 'Stationery' 'Beverages' 'Staples' 'Men'\n",
" 'Mobiles & Tablets' 'Personal Care' 'Dairy & Bakery' 'Mom & Baby Care'\n",
" 'Snacks & Branded Foods' 'Women' 'Books' 'Auto Care' 'Electrical'\n",
" 'Furnishing' 'Accessories' 'Pets' 'Home Care' 'Mops, Brushes & Scrubs'\n",
" 'Furniture' 'Computers' 'Kitchen Appliances' 'Home Appliances' 'Cameras'\n",
" 'Make-Up' 'Garden & Outdoor' 'Disposables' 'Carpentry & work accessories'\n",
" 'Mom & Baby' 'Kitchenware' 'Power & Hand Tools' 'Pooja Needs'\n",
" 'Bathroom & Laundry Accessories' 'Office Products' 'TV & Speaker'\n",
" 'Personal Care & Grooming' 'Hair' 'Skin Care'\n",
" 'Paint, Wall Treatments & Supplies' 'Industrial & Scientific Supplies'\n",
" 'Infants' 'Kitchen & Bath Fixtures' 'Home Safety & Automation'\n",
" 'Fine Jewellery' 'Fruits & Vegetables' 'Apparel' 'Premium Fruits'\n",
" 'Phones' 'Bathroom & Laundry' 'Junior Boys' 'Tools & Appliances'\n",
" 'Smart Devices' \"Men's Grooming\" 'Gaming' 'Health Care Devices'\n",
" 'Handloom & Handicraft' 'Hardware & Plumbing' 'Wellness' 'Treatments']\n"
]
}
],
"source": [
"print(df['sub_category'].unique())"
2024-12-20 14:14:59 +04:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2024-12-21 02:12:15 +04:00
"Как бизнес-цели выделим следующие 2 варианта:\n",
" 1) Регрессия - предсказание цены по категории (для аналитических систем или улучшения алгоритмов ценообразования)\n",
" 2) Классификация - определение категории продукта по его подкатегории (для логистических или аналитических систем)\n",
2024-12-21 02:12:15 +04:00
" \n",
"Однако данный датасет весьма плоо подходит для подобных задач."
2024-12-20 14:14:59 +04:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
2024-12-21 02:12:15 +04:00
"Выполним подготовку данных"
2024-12-20 14:14:59 +04:00
]
},
{
"cell_type": "code",
2024-12-21 11:21:05 +04:00
"execution_count": 3,
2024-12-20 14:14:59 +04:00
"metadata": {},
"outputs": [],
2024-12-20 14:14:59 +04:00
"source": [
"# df.fillna({\"category\": \"NaN\", \"sub_category\": \"NaN\", \"href\" : \"NaN\", \"items\" : \"NaN\", \"price\" : \"NaN\" }, inplace=True)\n",
2024-12-21 02:12:15 +04:00
"df = df.dropna()\n",
"data = df.copy()\n",
2024-12-20 14:14:59 +04:00
"\n",
2024-12-21 02:12:15 +04:00
"value_counts = data[\"category\"].value_counts()\n",
"rare = value_counts[value_counts < 100].index\n",
"data = data[~data[\"category\"].isin(rare)]\n",
"\n",
"data1 = pd.get_dummies(data, columns=['category', 'sub_category'], drop_first=True)\n",
"\n",
"# label_encoder = LabelEncoder()\n",
"# data1['sub_category'] = label_encoder.fit_transform(data['sub_category'])\n",
"# data1['category'] = label_encoder.fit_transform(data['category'])\n",
"# data1['items'] = label_encoder.fit_transform(data['items'])\n"
2024-12-20 14:14:59 +04:00
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Определить достижимый уровень качества модели для каждой задачи. На основе имеющихся данных уровень качества моделей регрессии будет низким, поскольку цена слабо коррелирует с категорией (кроме некоторых исключений)."
2024-12-20 14:14:59 +04:00
]
},
{
2024-12-21 02:12:15 +04:00
"cell_type": "markdown",
2024-12-20 14:14:59 +04:00
"metadata": {},
"source": [
"Построим конвейер."
2024-12-20 14:14:59 +04:00
]
},
{
"cell_type": "code",
2024-12-21 11:21:05 +04:00
"execution_count": 4,
2024-12-20 14:14:59 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Index(['href', 'items', 'price', 'category_Electronics', 'category_Fashion',\n",
" 'category_Groceries', 'category_Home & Kitchen', 'sub_category_Apparel',\n",
" 'sub_category_Auto Care', 'sub_category_Bags & Travel Luggage',\n",
" 'sub_category_Bathroom & Laundry',\n",
" 'sub_category_Bathroom & Laundry Accessories', 'sub_category_Beverages',\n",
" 'sub_category_Books', 'sub_category_Boys', 'sub_category_Cameras',\n",
" 'sub_category_Carpentry & work accessories', 'sub_category_Computers',\n",
" 'sub_category_Dairy & Bakery', 'sub_category_Dining',\n",
" 'sub_category_Disposables', 'sub_category_Electrical',\n",
" 'sub_category_Fragrances', 'sub_category_Fruits & Vegetables',\n",
" 'sub_category_Furnishing', 'sub_category_Furniture',\n",
" 'sub_category_Gaming', 'sub_category_Garden & Outdoor',\n",
" 'sub_category_Girls', 'sub_category_Hair',\n",
" 'sub_category_Handloom & Handicraft',\n",
" 'sub_category_Hardware & Plumbing', 'sub_category_Health Care Devices',\n",
" 'sub_category_Home Appliances', 'sub_category_Home Care',\n",
" 'sub_category_Home Decor', 'sub_category_Home Safety & Automation',\n",
" 'sub_category_Industrial & Scientific Supplies', 'sub_category_Infants',\n",
" 'sub_category_Junior Boys', 'sub_category_Kitchen & Bath Fixtures',\n",
" 'sub_category_Kitchen Appliances', 'sub_category_Kitchenware',\n",
" 'sub_category_Make-Up', 'sub_category_Men',\n",
" 'sub_category_Men's Grooming', 'sub_category_Mobiles & Tablets',\n",
" 'sub_category_Mom & Baby', 'sub_category_Mom & Baby Care',\n",
" 'sub_category_Mops, Brushes & Scrubs', 'sub_category_Office Products',\n",
" 'sub_category_Paint, Wall Treatments & Supplies',\n",
" 'sub_category_Personal Care', 'sub_category_Personal Care & Grooming',\n",
" 'sub_category_Pets', 'sub_category_Phones', 'sub_category_Pooja Needs',\n",
" 'sub_category_Power & Hand Tools', 'sub_category_Premium Fruits',\n",
" 'sub_category_Skin Care', 'sub_category_Smart Devices',\n",
" 'sub_category_Snacks & Branded Foods', 'sub_category_Staples',\n",
" 'sub_category_Stationery', 'sub_category_TV & Speaker',\n",
" 'sub_category_Tools & Appliances', 'sub_category_Toys, Games & Fitness',\n",
" 'sub_category_Wellness', 'sub_category_Women'],\n",
" dtype='object')\n",
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 9995 entries, 52893 to 146053\n",
"Data columns (total 69 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 href 9995 non-null object \n",
" 1 items 9995 non-null object \n",
" 2 price 9995 non-null float64\n",
" 3 category_Electronics 9995 non-null bool \n",
" 4 category_Fashion 9995 non-null bool \n",
" 5 category_Groceries 9995 non-null bool \n",
" 6 category_Home & Kitchen 9995 non-null bool \n",
" 7 sub_category_Apparel 9995 non-null bool \n",
" 8 sub_category_Auto Care 9995 non-null bool \n",
" 9 sub_category_Bags & Travel Luggage 9995 non-null bool \n",
" 10 sub_category_Bathroom & Laundry 9995 non-null bool \n",
" 11 sub_category_Bathroom & Laundry Accessories 9995 non-null bool \n",
" 12 sub_category_Beverages 9995 non-null bool \n",
" 13 sub_category_Books 9995 non-null bool \n",
" 14 sub_category_Boys 9995 non-null bool \n",
" 15 sub_category_Cameras 9995 non-null bool \n",
" 16 sub_category_Carpentry & work accessories 9995 non-null bool \n",
" 17 sub_category_Computers 9995 non-null bool \n",
" 18 sub_category_Dairy & Bakery 9995 non-null bool \n",
" 19 sub_category_Dining 9995 non-null bool \n",
" 20 sub_category_Disposables 9995 non-null bool \n",
" 21 sub_category_Electrical 9995 non-null bool \n",
" 22 sub_category_Fragrances 9995 non-null bool \n",
" 23 sub_category_Fruits & Vegetables 9995 non-null bool \n",
" 24 sub_category_Furnishing 9995 non-null bool \n",
" 25 sub_category_Furniture 9995 non-null bool \n",
" 26 sub_category_Gaming 9995 non-null bool \n",
" 27 sub_category_Garden & Outdoor 9995 non-null bool \n",
" 28 sub_category_Girls 9995 non-null bool \n",
" 29 sub_category_Hair 9995 non-null bool \n",
" 30 sub_category_Handloom & Handicraft 9995 non-null bool \n",
" 31 sub_category_Hardware & Plumbing 9995 non-null bool \n",
" 32 sub_category_Health Care Devices 9995 non-null bool \n",
" 33 sub_category_Home Appliances 9995 non-null bool \n",
" 34 sub_category_Home Care 9995 non-null bool \n",
" 35 sub_category_Home Decor 9995 non-null bool \n",
" 36 sub_category_Home Safety & Automation 9995 non-null bool \n",
" 37 sub_category_Industrial & Scientific Supplies 9995 non-null bool \n",
" 38 sub_category_Infants 9995 non-null bool \n",
" 39 sub_category_Junior Boys 9995 non-null bool \n",
" 40 sub_category_Kitchen & Bath Fixtures 9995 non-null bool \n",
" 41 sub_category_Kitchen Appliances 9995 non-null bool \n",
" 42 sub_category_Kitchenware 9995 non-null bool \n",
" 43 sub_category_Make-Up 9995 non-null bool \n",
" 44 sub_category_Men 9995 non-null bool \n",
" 45 sub_category_Men's Grooming 9995 non-null bool \n",
" 46 sub_category_Mobiles & Tablets 9995 non-null bool \n",
" 47 sub_category_Mom & Baby 9995 non-null bool \n",
" 48 sub_category_Mom & Baby Care 9995 non-null bool \n",
" 49 sub_category_Mops, Brushes & Scrubs 9995 non-null bool \n",
" 50 sub_category_Office Products 9995 non-null bool \n",
" 51 sub_category_Paint, Wall Treatments & Supplies 9995 non-null bool \n",
" 52 sub_category_Personal Care 9995 non-null bool \n",
" 53 sub_category_Personal Care & Grooming 9995 non-null bool \n",
" 54 sub_category_Pets 9995 non-null bool \n",
" 55 sub_category_Phones 9995 non-null bool \n",
" 56 sub_category_Pooja Needs 9995 non-null bool \n",
" 57 sub_category_Power & Hand Tools 9995 non-null bool \n",
" 58 sub_category_Premium Fruits 9995 non-null bool \n",
" 59 sub_category_Skin Care 9995 non-null bool \n",
" 60 sub_category_Smart Devices 9995 non-null bool \n",
" 61 sub_category_Snacks & Branded Foods 9995 non-null bool \n",
" 62 sub_category_Staples 9995 non-null bool \n",
" 63 sub_category_Stationery 9995 non-null bool \n",
" 64 sub_category_TV & Speaker 9995 non-null bool \n",
" 65 sub_category_Tools & Appliances 9995 non-null bool \n",
" 66 sub_category_Toys, Games & Fitness 9995 non-null bool \n",
" 67 sub_category_Wellness 9995 non-null bool \n",
" 68 sub_category_Women 9995 non-null bool \n",
"dtypes: bool(66), float64(1), object(2)\n",
"memory usage: 956.6+ KB\n"
]
}
],
2024-12-20 14:14:59 +04:00
"source": [
"print(data1.columns)\n",
"data1.info()"
2024-12-20 14:14:59 +04:00
]
},
{
"cell_type": "code",
"execution_count": 83,
2024-12-20 14:14:59 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Best parameters for Linear Regression: {}\n",
"Best parameters for Random Forest Regressor: {'model__max_depth': None, 'model__n_estimators': 300}\n",
"Best parameters for Gradient Boosting Regressor: {'model__learning_rate': 0.01, 'model__max_depth': 7, 'model__n_estimators': 300}\n",
"Model: Linear Regression\n",
"Model: Random Forest Regressor\n",
"Model: Gradient Boosting Regressor\n"
2024-12-20 14:14:59 +04:00
]
}
],
"source": [
"X_reg = data1.drop(['href', 'items', 'price'], axis=1)\n",
"y_reg = data1['price']\n",
"\n",
"# Разделение данных\n",
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
"\n",
"# Выбор моделей для регрессии\n",
"models_reg = {\n",
" 'Linear Regression': LinearRegression(),\n",
" 'Random Forest Regressor': RandomForestRegressor(random_state=42),\n",
" 'Gradient Boosting Regressor': GradientBoostingRegressor(random_state=42)\n",
"}\n",
"\n",
"# Создание конвейера для регрессии\n",
"pipelines_reg = {}\n",
"for name, model in models_reg.items():\n",
" pipelines_reg[name] = Pipeline([\n",
" ('scaler', StandardScaler()),\n",
" ('model', model)\n",
" ])\n",
"\n",
"# Определение сетки гиперпараметров для регрессии\n",
"param_grids_reg = {\n",
" 'Linear Regression': {},\n",
" 'Random Forest Regressor': {\n",
" 'model__n_estimators': [100, 200, 300],\n",
" 'model__max_depth': [None, 10, 20, 30]\n",
" },\n",
" 'Gradient Boosting Regressor': {\n",
" 'model__n_estimators': [100, 200, 300],\n",
" 'model__learning_rate': [0.01, 0.1, 0.2],\n",
" 'model__max_depth': [3, 5, 7]\n",
" }\n",
"}\n",
"\n",
"# Настройка гиперпараметров для регрессии\n",
"best_models_reg = {}\n",
"for name, pipeline in pipelines_reg.items():\n",
" grid_search = GridSearchCV(pipeline, param_grids_reg[name], cv=5, scoring='neg_mean_squared_error')\n",
" grid_search.fit(X_train_reg, y_train_reg)\n",
" best_models_reg[name] = {\n",
" 'pipeline': grid_search.best_estimator_,\n",
" 'best_params': grid_search.best_params_\n",
" }\n",
" print(f'Best parameters for {name}: {grid_search.best_params_}')\n",
"\n",
"# Обучение моделей и оценка качества\n",
"for model_name in best_models_reg.keys():\n",
" print(f\"Model: {model_name}\")\n",
" model_pipeline = best_models_reg[model_name]['pipeline']\n",
" model_pipeline.fit(X_train_reg, y_train_reg)\n",
"\n",
" y_train_predict = model_pipeline.predict(X_train_reg)\n",
" y_test_predict = model_pipeline.predict(X_test_reg)\n",
"\n",
" best_models_reg[model_name][\"preds_train\"] = y_train_predict\n",
" best_models_reg[model_name][\"preds_test\"] = y_test_predict\n",
"\n",
" best_models_reg[model_name][\"MSE_train\"] = mean_squared_error(y_train_reg, y_train_predict)\n",
" best_models_reg[model_name][\"MSE_test\"] = mean_squared_error(y_test_reg, y_test_predict)\n",
" best_models_reg[model_name][\"R2_train\"] = r2_score(y_train_reg, y_train_predict)\n",
" best_models_reg[model_name][\"R2_test\"] = r2_score(y_test_reg, y_test_predict)\n",
" best_models_reg[model_name][\"MAE_train\"] = mean_absolute_error(y_train_reg, y_train_predict)\n",
" best_models_reg[model_name][\"MAE_test\"] = mean_absolute_error(y_test_reg, y_test_predict)"
2024-12-20 14:14:59 +04:00
]
2024-12-21 02:12:15 +04:00
},
{
"cell_type": "code",
2024-12-21 11:21:05 +04:00
"execution_count": 11,
2024-12-21 02:12:15 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Accuracy: 0.991495747873937\n",
"Classification Report:\n",
2024-12-21 11:21:05 +04:00
" precision recall f1-score support\n",
"\n",
2024-12-21 11:21:05 +04:00
" Beauty 0.99 0.90 0.94 131\n",
" Electronics 0.99 1.00 0.99 241\n",
" Fashion 1.00 1.00 1.00 307\n",
" Groceries 0.98 1.00 0.99 573\n",
"Home & Kitchen 1.00 1.00 1.00 747\n",
"\n",
2024-12-21 11:21:05 +04:00
" accuracy 0.99 1999\n",
" macro avg 0.99 0.98 0.98 1999\n",
" weighted avg 0.99 0.99 0.99 1999\n",
"\n"
]
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAxQAAAK9CAYAAAC95yoDAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAjKVJREFUeJzs3Xl8TOf7//H3JLJIyCIkoSX2JUQpPsRS1L6V0kWphupCLSWKatXaSutTpa2tK6p7FZ+W2ilF7PtaS0iVCLHEEgnJ+f3hZ76ZJshMJWcir2cf51Fzn+2aOZlJrrnu+9wWwzAMAQAAAIADXMwOAAAAAEDuRUIBAAAAwGEkFAAAAAAcRkIBAAAAwGEkFAAAAAAcRkIBAAAAwGEkFAAAAAAcRkIBAAAAwGEkFAAAAAAcRkIBAJk4dOiQmjdvLl9fX1ksFs2fP/+eHv/YsWOyWCyaOXPmPT1ubtaoUSM1atTI7DAAAHYioQDgtI4cOaKXX35ZpUuXlqenp3x8fFSvXj19+OGHSkpKytZzR0REaPfu3XrnnXc0e/Zs1axZM1vPl5O6d+8ui8UiHx+fTF/HQ4cOyWKxyGKx6P3337f7+CdPntSoUaO0Y8eOexAtAMDZ5TM7AADIzMKFC/Xkk0/Kw8NDzz33nKpUqaKUlBStXbtWgwcP1t69e/Xpp59my7mTkpIUHR2tN998U3379s2Wc4SEhCgpKUlubm7Zcvy7yZcvn65evapff/1VTz31lM26b775Rp6enrp27ZpDxz558qRGjx6tkiVLqlq1alneb+nSpQ6dDwBgLhIKAE4nJiZGnTt3VkhIiFauXKmiRYta1/Xp00eHDx/WwoULs+38Z86ckST5+fll2zksFos8PT2z7fh34+HhoXr16um7777LkFB8++23atOmjX7++eccieXq1avy8vKSu7t7jpwPAHBv0eUJgNMZP368Ll++rC+++MImmbilbNmyevXVV62Pb9y4obFjx6pMmTLy8PBQyZIl9cYbbyg5Odlmv5IlS6pt27Zau3at/vOf/8jT01OlS5fWV199Zd1m1KhRCgkJkSQNHjxYFotFJUuWlHSzq9Ctf6c3atQoWSwWm7Zly5apfv368vPzU4ECBVShQgW98cYb1vW3G0OxcuVKNWjQQN7e3vLz81P79u21f//+TM93+PBhde/eXX5+fvL19VWPHj109erV27+w/9ClSxctWrRIFy5csLZt3rxZhw4dUpcuXTJsf+7cOb322msKCwtTgQIF5OPjo1atWmnnzp3WbX7//XfVqlVLktSjRw9r16lbz7NRo0aqUqWKtm7dqkceeUReXl7W1+WfYygiIiLk6emZ4fm3aNFC/v7+OnnyZJafKwAg+5BQAHA6v/76q0qXLq26detmafsXXnhBI0aM0MMPP6yJEyeqYcOGioqKUufOnTNse/jwYT3xxBNq1qyZJkyYIH9/f3Xv3l179+6VJHXs2FETJ06UJD3zzDOaPXu2Jk2aZFf8e/fuVdu2bZWcnKwxY8ZowoQJeuyxx7Ru3bo77rd8+XK1aNFC8fHxGjVqlCIjI7V+/XrVq1dPx44dy7D9U089pUuXLikqKkpPPfWUZs6cqdGjR2c5zo4dO8pisWju3LnWtm+//VYVK1bUww8/nGH7o0ePav78+Wrbtq0++OADDR48WLt371bDhg2tf9xXqlRJY8aMkSS99NJLmj17tmbPnq1HHnnEepyEhAS1atVK1apV06RJk9S4ceNM4/vwww9VpEgRRUREKDU1VZL0ySefaOnSpfr4449VrFixLD9XAEA2MgDAiVy8eNGQZLRv3z5L2+/YscOQZLzwwgs27a+99pohyVi5cqW1LSQkxJBkrFmzxtoWHx9veHh4GIMGDbK2xcTEGJKM//73vzbHjIiIMEJCQjLEMHLkSCP9x+nEiRMNScaZM2duG/etc8yYMcPaVq1aNSMwMNBISEiwtu3cudNwcXExnnvuuQzne/75522O+fjjjxsBAQG3PWf65+Ht7W0YhmE88cQTRpMmTQzDMIzU1FQjODjYGD16dKavwbVr14zU1NQMz8PDw8MYM2aMtW3z5s0ZntstDRs2NCQZ06dPz3Rdw4YNbdqWLFliSDLefvtt4+jRo0aBAgWMDh063PU5AgByDhUKAE4lMTFRklSwYMEsbf/bb79JkiIjI23aBw0aJEkZxlqEhoaqQYMG1sdFihRRhQoVdPToUYdj/qdbYy/+97//KS0tLUv7nDp1Sjt27FD37t1VqFAha3vVqlXVrFkz6/NMr1evXjaPGzRooISEBOtrmBVdunTR77//rri4OK1cuVJxcXGZdneSbo67cHG5+WsjNTVVCQkJ1u5c27Zty/I5PTw81KNHjyxt27x5c7388ssaM2aMOnbsKE9PT33yySdZPhcAIPuRUABwKj4+PpKkS5cuZWn748ePy8XFRWXLlrVpDw4Olp+fn44fP27TXqJEiQzH8Pf31/nz5x2MOKOnn35a9erV0wsvvKCgoCB17txZP/744x2Ti1txVqhQIcO6SpUq6ezZs7py5YpN+z+fi7+/vyTZ9Vxat26tggUL6ocfftA333yjWrVqZXgtb0lLS9PEiRNVrlw5eXh4qHDhwipSpIh27dqlixcvZvmcDzzwgF0DsN9//30VKlRIO3bs0EcffaTAwMAs7wsAyH4kFACcio+Pj4oVK6Y9e/bYtd8/B0Xfjqura6bthmE4fI5b/ftvyZ8/v9asWaPly5erW7du2rVrl55++mk1a9Ysw7b/xr95Lrd4eHioY8eOmjVrlubNm3fb6oQkjRs3TpGRkXrkkUf09ddfa8mSJVq2bJkqV66c5UqMdPP1scf27dsVHx8vSdq9e7dd+wIAsh8JBQCn07ZtWx05ckTR0dF33TYkJERpaWk6dOiQTfvp06d14cIF6x2b7gV/f3+bOyLd8s8qiCS5uLioSZMm+uCDD7Rv3z698847WrlypVatWpXpsW/FefDgwQzrDhw4oMKFC8vb2/vfPYHb6NKli7Zv365Lly5lOpD9ljlz5qhx48b64osv1LlzZzVv3lxNmzbN8JpkNbnLiitXrqhHjx4KDQ3VSy+9pPHjx2vz5s337PgAgH+PhAKA0xkyZIi8vb31wgsv6PTp0xnWHzlyRB9++KGkm112JGW4E9MHH3wgSWrTps09i6tMmTK6ePGidu3aZW07deqU5s2bZ7PduXPnMux7a4K3f97K9paiRYuqWrVqmjVrls0f6Hv27NHSpUutzzM7NG7cWGPHjtXkyZMVHBx82+1cXV0zVD9++ukn/f333zZttxKfzJIvew0dOlSxsbGaNWuWPvjgA5UsWVIRERG3fR0BADmPie0AOJ0yZcro22+/1dNPP61KlSrZzJS9fv16/fTTT+revbsk6aGHHlJERIQ+/fRTXbhwQQ0bNtSmTZs0a9YsdejQ4ba3JHVE586dNXToUD3++OPq37+/rl69qmnTpql8+fI2g5LHjBmjNWvWqE2bNgoJCVF8fLymTp2qBx98UPXr17/t8f/73/+qVatWCg8PV8+ePZWUlKSPP/5Yvr6+GjVq1D17Hv/k4uKi4cOH33W7tm3basyYMerRo4fq1q2r3bt365tvvlHp0qVttitTpoz8/Pw0ffp0FSxYUN7e3qpdu7ZKlSplV1wrV67U1KlTNXLkSOttbGfMmKFGjRrprbfe0vjx4+06HgAge1ChAOCUHnvsMe3atUtPPPGE/ve//6lPnz56/fXXdezYMU2YMEEfffSRddvPP/9co0eP1ubNmzVgwACtXLlSw4YN0/fff39PYwoICNC8efPk5eWlIUOGaNasWYqKilK7du0yxF6iRAl9+eWX6tOnj6ZMmaJHHnlEK1eulK+v722P37RpUy1evFgBAQEaMWKE3n//fdWpU0fr1q2z+4/x7PDGG29o0KBBWrJkiV599VVt27ZNCxcuVPHixW22c3Nz06xZs+Tq6qpevXrpmWee0erVq+0616VLl/T888+revXqevPNN63tDRo00KuvvqoJEyZow4YN9+R5AQD+HYthz+g9AAAAAEiHCgUAAAA
"text/plain": [
"<Figure size 1000x800 with 2 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
2024-12-21 11:21:05 +04:00
"# Кодирование категориальных данных через LabelEncoder\n",
"label_encoder = LabelEncoder()\n",
"data['sub_category_encoded'] = label_encoder.fit_transform(data['sub_category'])\n",
"\n",
"# Определение признаков (X) и целевой переменной (y)\n",
"X = data[['sub_category_encoded']] # Используем закодированный sub_category\n",
"y = label_encoder.fit_transform(data['category']) # Кодируем category\n",
"\n",
"# Разделение данных на тренировочную и тестовую выборки\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)\n",
"\n",
2024-12-21 11:21:05 +04:00
"# Создание конвейера для классификатора\n",
"pipeline = Pipeline([\n",
" ('scaler', StandardScaler()), # Масштабирование данных (хотя для категориальных признаков это не всегда нужно)\n",
" ('classifier', RandomForestClassifier(random_state=42, n_estimators=100, max_depth=10))\n",
"])\n",
"\n",
"# Обучение модели\n",
"pipeline.fit(X_train, y_train)\n",
"\n",
"# Предсказание на тестовых данных\n",
2024-12-21 11:21:05 +04:00
"y_pred = pipeline.predict(X_test)\n",
"\n",
"print(\"Accuracy:\", accuracy_score(y_test, y_pred))\n",
2024-12-21 11:21:05 +04:00
"print(\"Classification Report:\\n\", classification_report(y_test, y_pred, target_names=label_encoder.inverse_transform(np.unique(y_test))))\n",
"\n",
"# Матрица ошибок\n",
"cm = confusion_matrix(y_test, y_pred)\n",
"plt.figure(figsize=(10, 8))\n",
"sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=label_encoder.classes_, yticklabels=label_encoder.classes_)\n",
"plt.xlabel('Predicted')\n",
"plt.ylabel('Actual')\n",
"plt.title('Confusion Matrix')\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Модель классификации показывает неплохие результаты, что логично, учитывая структуру датасета."
]
2024-12-21 02:12:15 +04:00
},
{
"cell_type": "code",
2024-12-21 11:21:05 +04:00
"execution_count": 8,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 9995 entries, 52893 to 146053\n",
"Data columns (total 5 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 category 9995 non-null object \n",
" 1 sub_category 9995 non-null object \n",
" 2 href 9995 non-null object \n",
" 3 items 9995 non-null object \n",
" 4 price 9995 non-null float64\n",
"dtypes: float64(1), object(4)\n",
"memory usage: 468.5+ KB\n"
]
}
],
"source": [
"data.info()"
]
},
{
"cell_type": "code",
"execution_count": 9,
2024-12-21 02:12:15 +04:00
"metadata": {},
"outputs": [
2024-12-21 11:21:05 +04:00
{
"ename": "NameError",
"evalue": "name 'best_models_reg' is not defined",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[9], line 4\u001b[0m\n\u001b[0;32m 1\u001b[0m _, ax \u001b[38;5;241m=\u001b[39m plt\u001b[38;5;241m.\u001b[39msubplots(\u001b[38;5;241m3\u001b[39m, \u001b[38;5;241m2\u001b[39m, figsize\u001b[38;5;241m=\u001b[39m(\u001b[38;5;241m12\u001b[39m, \u001b[38;5;241m10\u001b[39m), sharex\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m, sharey\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m 2\u001b[0m ax \u001b[38;5;241m=\u001b[39m ax\u001b[38;5;241m.\u001b[39mflatten()\n\u001b[1;32m----> 4\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m index, (name, model) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28menumerate\u001b[39m(\u001b[43mbest_models_reg\u001b[49m\u001b[38;5;241m.\u001b[39mitems()):\n\u001b[0;32m 5\u001b[0m model_pipeline \u001b[38;5;241m=\u001b[39m model[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mpipeline\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m 6\u001b[0m y_pred_reg \u001b[38;5;241m=\u001b[39m model_pipeline\u001b[38;5;241m.\u001b[39mpredict(X_test_reg)\n",
"\u001b[1;31mNameError\u001b[0m: name 'best_models_reg' is not defined"
]
},
{
"data": {
2024-12-21 11:21:05 +04:00
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA+AAAAMzCAYAAAAmjXj8AAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjAsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvlHJYcgAAAAlwSFlzAAAPYQAAD2EBqD+naQAAWLhJREFUeJzt3W9sneV9N/Cf7eBjULEJy+L8mWkGHaUtkNCEuIYixOTVEihdXqz1ACVZxJ9RAoJYW0kIxKW0cUYB5VEJjUhhVHrKkhYBT9VEZtRtVFE8RU1iiY4ERANNVtUmWYedhtYm9v28QLgzcWiOuc9lJ3w+0nnhu9fl8zuXTL/5+hyfU5ZlWRYAAABASZWP9wAAAADwYaCAAwAAQAIKOAAAACSggAMAAEACCjgAAAAkoIADAABAAgo4AAAAJKCAAwAAQAIKOAAAACSggAMAAEACRRfwn/70p7FgwYKYMWNGlJWVxTPPPPMn92zbti0+/elPR6FQiI997GPx+OOPj2FUACAFWQ8ApVF0AT98+HDMnj071q9ff1zrX3vttbjqqqviiiuuiK6urrj99tvj+uuvj2effbboYQGA0pP1AFAaZVmWZWPeXFYWTz/9dCxcuPCYa+64447YsmVL/OIXvxi+9vd///fx5ptvRnt7+1jvGgBIQNYDQH4mlfoOOjs7o7GxccS1pqamuP3224+5p7+/P/r7+4e/Hhoait/+9rfxZ3/2Z1FWVlaqUQHguGRZFocOHYoZM2ZEebm3U5H1AJyMSpH3JS/g3d3dUVtbO+JabW1t9PX1xe9///s49dRTj9rT1tYW99xzT6lHA4APZP/+/fEXf/EX4z3GuJP1AJzM8sz7khfwsVi5cmW0tLQMf93b2xtnnXVW7N+/P6qrq8dxMgCI6Ovri7q6ujj99NPHe5QTlqwHYKIrRd6XvIBPmzYtenp6Rlzr6emJ6urqUX8jHhFRKBSiUCgcdb26ulooAzBheKn0O2Q9ACezPPO+5H+41tDQEB0dHSOuPffcc9HQ0FDquwYAEpD1AHB8ii7gv/vd76Krqyu6uroi4p2PHunq6op9+/ZFxDsvKVu8ePHw+ptuuin27t0bX/7yl2PPnj3x8MMPx/e+971Yvnx5Po8AAMiVrAeA0ii6gP/85z+Piy66KC666KKIiGhpaYmLLrooVq9eHRERv/nNb4YDOiLiL//yL2PLli3x3HPPxezZs+OBBx6Ib3/729HU1JTTQwAA8iTrAaA0PtDngKfS19cXNTU10dvb6+/CABh3cil/zhSAiaYU2eTDSwEAACABBRwAAAASUMABAAAgAQUcAAAAElDAAQAAIAEFHAAAABJQwAEAACABBRwAAAASUMABAAAgAQUcAAAAElDAAQAAIAEFHAAAABJQwAEAACABBRwAAAASUMABAAAgAQUcAAAAElDAAQAAIAEFHAAAABJQwAEAACABBRwAAAASUMABAAAgAQUcAAAAElDAAQAAIAEFHAAAABJQwAEAACABBRwAAAASUMABAAAgAQUcAAAAElDAAQAAIAEFHAAAABJQwAEAACABBRwAAAASUMABAAAgAQUcAAAAEhhTAV+/fn3MmjUrqqqqor6+PrZv3/6+69etWxcf//jH49RTT426urpYvnx5/OEPfxjTwABA6cl6AMhf0QV88+bN0dLSEq2trbFz586YPXt2NDU1xRtvvDHq+ieeeCJWrFgRra2tsXv37nj00Udj8+bNceedd37g4QGA/Ml6ACiNogv4gw8+GDfccEMsXbo0PvnJT8aGDRvitNNOi8cee2zU9S+88EJceumlcc0118SsWbPic5/7XFx99dV/8jfpAMD4kPUAUBpFFfCBgYHYsWNHNDY2/vEblJdHY2NjdHZ2jrrnkksuiR07dgyH8N69e2Pr1q1x5ZVXfoCxAYBSkPUAUDqTill88ODBGBwcjNra2hHXa2trY8+ePaPuueaaa+LgwYPx2c9+NrIsiyNHjsRNN930vi9L6+/vj/7+/uGv+/r6ihkTABgjWQ8ApVPyd0Hftm1brFmzJh5++OHYuXNnPPXUU7Fly5a49957j7mnra0tampqhm91dXWlHhMAGCNZDwDHpyzLsux4Fw8MDMRpp50WTz75ZCxcuHD4+pIlS+LNN9+M//f//t9Rey677LL4zGc+E9/4xjeGr/3f//t/48Ybb4zf/e53UV5+9O8ARvuteF1dXfT29kZ1dfXxjgsAJdHX1xc1NTUnZS7JegB4RynyvqhnwCsrK2Pu3LnR0dExfG1oaCg6OjqioaFh1D1vvfXWUcFbUVERERHH6v6FQiGqq6tH3ACA0pP1AFA6Rf0NeERES0tLLFmyJObNmxfz58+PdevWxeHDh2Pp0qUREbF48eKYOXNmtLW1RUTEggUL4sEHH4yLLroo6uvr49VXX4277747FixYMBzOAMDEIesBoDSKLuDNzc1x4MCBWL16dXR3d8ecOXOivb19+M1a9u3bN+K34HfddVeUlZXFXXfdFb/+9a/jz//8z2PBggXx9a9/Pb9HAQDkRtYDQGkU9Tfg4+Vk/ls7AE48cil/zhSAiWbc/wYcAAAAGBsFHAAAABJQwAEAACABBRwAAAASUMABAAAgAQUcAAAAElDAAQAAIAEFHAAAABJQwAEAACABBRwAAAASUMABAAAgAQUcAAAAElDAAQAAIAEFHAAAABJQwAEAACABBRwAAAASUMABAAAgAQUcAAAAElDAAQAAIAEFHAAAABJQwAEAACABBRwAAAASUMABAAAgAQUcAAAAElDAAQAAIAEFHAAAABJQwAEAACABBRwAAAASUMABAAAgAQUcAAAAElDAAQAAIAEFHAAAABJQwAEAACABBRwAAAASUMABAAAggTEV8PXr18esWbOiqqoq6uvrY/v27e+7/s0334xly5bF9OnTo1AoxLnnnhtbt24d08AAQOnJegDI36RiN2zevDlaWlpiw4YNUV9fH+vWrYumpqZ4+eWXY+rUqUetHxgYiL/5m7+JqVOnxpNPPhkzZ86MX/3qV3HGGWfkMT8AkDNZDwClUZZlWVbMhvr6+rj44ovjoYceioiIoaGhqKuri1tvvTVWrFhx1PoNGzbEN77xjdizZ0+ccsopYxqyr68vampqore3N6qrq8f0PQAgLyd7Lsl6AChNNhX1EvSBgYHYsWNHNDY2/vEblJdHY2NjdHZ2jrrnBz/4QTQ0NMSyZcuitrY2zj///FizZk0MDg4e8376+/ujr69vxA0AKD1ZDwClU1QBP3jwYAwODkZtbe2I67W1tdHd3T3qnr1798aTTz4Zg4ODsXXr1rj77rvjgQceiK997WvHvJ+2traoqakZvtXV1RUzJgAwRrIeAEqn5O+CPjQ0FFOnTo1HHnkk5s6dG83NzbFq1arYsGHDMfesXLkyent7h2/79+8v9ZgAwBjJegA4PkW9CduUKVOioqIienp6Rlzv6emJadOmjbpn+vTpccopp0RFRcXwtU984hPR3d0dAwMDUVlZedSeQqEQhUKhmNEAgBzIegAonaKeAa+srIy5c+dGR0fH8LWhoaHo6OiIhoaGUfdceuml8eqrr8bQ0NDwtVdeeSWmT58+aiADAONH1gNA6RT9EvSWlpbYuHFjfOc734ndu3fHl770pTh8+HAsXbo0IiIWL14cK1euHF7/pS99KX7729/GbbfdFq+88kps2bIl1qxZE8uWLcvvUQAAuZH1AFAaRX8OeHNzcxw4cCBWr14d3d3dMWfOnGhvbx9+s5Z9+/ZFefkfe31dXV08++yzsXz58rjwwgtj5syZcdttt8Udd9yR36MAAHIj6wGgNIr+HPDx4LNBAZhI5FL+nCkAE824fw44AAAAMDYKOAAAACSggAMAAEACCjgAAAAkoIADAABAAgo4AAAAJKCAAwAAQAIKOAAAACSggAMAAEACCjgAAAAkoIADAABAAgo4AAAAJKCAAwAAQAIKOAAAACSggAMAAEACCjgAAAAkoIADAABAAgo4AAAAJKCAAwAAQAIKOAAAACSggAMAAEACCjgAAAAkoIADAABAAgo4AAAAJKC
"text/plain": [
"<Figure size 1200x1000 with 6 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"_, ax = plt.subplots(3, 2, figsize=(12, 10), sharex=False, sharey=False)\n",
"ax = ax.flatten()\n",
"\n",
"for index, (name, model) in enumerate(best_models_reg.items()):\n",
" model_pipeline = model['pipeline']\n",
" y_pred_reg = model_pipeline.predict(X_test_reg)\n",
"\n",
" # График фактических значений против предсказанных значений\n",
" ax[index * 2].scatter(y_test_reg, y_pred_reg, alpha=0.5)\n",
" ax[index * 2].plot([min(y_test_reg), max(y_test_reg)], [min(y_test_reg), max(y_test_reg)], color='red', linestyle='--')\n",
" ax[index * 2].set_xlabel('Actual Values')\n",
" ax[index * 2].set_ylabel('Predicted Values')\n",
" ax[index * 2].set_title(f'{name}: Actual vs Predicted')\n",
"\n",
" # График остатков\n",
" residuals = y_test_reg - y_pred_reg\n",
" ax[index * 2 + 1].scatter(y_pred_reg, residuals, alpha=0.5)\n",
" ax[index * 2 + 1].axhline(y=0, color='red', linestyle='--')\n",
" ax[index * 2 + 1].set_xlabel('Predicted Values')\n",
" ax[index * 2 + 1].set_ylabel('Residuals')\n",
" ax[index * 2 + 1].set_title(f'{name}: Residuals vs Predicted')\n",
"\n",
"\n",
"plt.subplots_adjust(top=1, bottom=0, hspace=0.4, wspace=0.1)\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Модель регресии демонстрирует ужасные результаты ввиду недостаточной корреляции между целеовй характеристикой и строковыми значениями."
]
2024-12-20 14:14:59 +04:00
}
],
"metadata": {
"kernelspec": {
2024-12-21 11:21:05 +04:00
"display_name": "Python 3",
2024-12-20 14:14:59 +04:00
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2024-12-21 11:21:05 +04:00
"version": "3.10.8"
2024-12-20 14:14:59 +04:00
}
},
"nbformat": 4,
2024-12-21 02:12:15 +04:00
"nbformat_minor": 2
2024-12-20 14:14:59 +04:00
}