{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Загрузка набора данных" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameRatingSpec_scoreNo_of_simRamBatteryDisplayCameraExternal_MemoryAndroid_versionPricecompanyInbuilt_memoryfast_chargingScreen_resolutionProcessorProcessor_name
0Samsung Galaxy F14 5G4.6568Dual Sim, 3G, 4G, 5G, VoLTE,46000650.0Memory Card Supported, upto 1 TB139999.0Samsung128 GB inbuilt25W Fast Charging2408 x 1080 px Display with Water Drop NotchOcta Core ProcessorExynos 1330
1Samsung Galaxy A114.2063Dual Sim, 3G, 4G, VoLTE,24000613.0Memory Card Supported, upto 512 GB109990.0Samsung32 GB inbuilt15W Fast Charging720 x 1560 px Display with Punch Hole1.8 GHz ProcessorOcta Core
2Samsung Galaxy A134.3075Dual Sim, 3G, 4G, VoLTE,45000650.0Memory Card Supported, upto 1 TB1211999.0Samsung64 GB inbuilt25W Fast Charging1080 x 2408 px Display with Water Drop Notch2 GHz ProcessorOcta Core
3Samsung Galaxy F234.1073Dual Sim, 3G, 4G, VoLTE,46000648.0Memory Card Supported, upto 1 TB1211999.0Samsung64 GB inbuiltNaN720 x 1600 pxOcta CoreHelio G88
4Samsung Galaxy A03s (4GB RAM + 64GB)4.1069Dual Sim, 3G, 4G, VoLTE,45000613.0Memory Card Supported, upto 1 TB1111999.0Samsung64 GB inbuilt15W Fast Charging720 x 1600 px Display with Water Drop NotchOcta CoreHelio P35
......................................................
1365TCL 40R4.0575Dual Sim, 3G, 4G, 5G, VoLTE,45000650.0Memory Card (Hybrid)1218999.0TCL64 GB inbuilt15W Fast Charging720 x 1612 pxOcta CoreDimensity 700 5G
1366TCL 50 XL NxtPaper 5G4.1080Dual Sim, 3G, 4G, VoLTE,85000650.0Memory Card (Hybrid)1424990.0TCL128 GB inbuilt33W Fast Charging1200 x 2400 pxOcta CoreDimensity 7050
1367TCL 50 XE NxtPaper 5G4.0080Dual Sim, 3G, 4G, 5G, VoLTE,65000650.0Memory Card Supported, upto 1 TB1323990.0TCL256 GB inbuilt18W Fast Charging720 x 1612 pxOcta CoreDimensity 6080
1368TCL 40 NxtPaper 5G4.5079Dual Sim, 3G, 4G, 5G, VoLTE,65000650.0Memory Card Supported, upto 1 TB1322499.0TCL256 GB inbuilt15W Fast Charging720 x 1612 pxOcta CoreDimensity 6020
1369TCL Trifold4.6593Dual Sim, 3G, 4G, 5G, VoLTE, Vo5G,12460010NaN50 MP + 48 MP + 8 MP Triple Rear & 32 MP F...13119990.0TCL256 GB inbuilt67W Fast Charging1916 x 2160 pxOcta CoreSnapdragon 8 Gen2
\n", "

1370 rows × 17 columns

\n", "
" ], "text/plain": [ " Name Rating Spec_score \\\n", "0 Samsung Galaxy F14 5G 4.65 68 \n", "1 Samsung Galaxy A11 4.20 63 \n", "2 Samsung Galaxy A13 4.30 75 \n", "3 Samsung Galaxy F23 4.10 73 \n", "4 Samsung Galaxy A03s (4GB RAM + 64GB) 4.10 69 \n", "... ... ... ... \n", "1365 TCL 40R 4.05 75 \n", "1366 TCL 50 XL NxtPaper 5G 4.10 80 \n", "1367 TCL 50 XE NxtPaper 5G 4.00 80 \n", "1368 TCL 40 NxtPaper 5G 4.50 79 \n", "1369 TCL Trifold 4.65 93 \n", "\n", " No_of_sim Ram Battery Display Camera \\\n", "0 Dual Sim, 3G, 4G, 5G, VoLTE, 4 6000 6 50.0 \n", "1 Dual Sim, 3G, 4G, VoLTE, 2 4000 6 13.0 \n", "2 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n", "3 Dual Sim, 3G, 4G, VoLTE, 4 6000 6 48.0 \n", "4 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 13.0 \n", "... ... ... ... ... ... \n", "1365 Dual Sim, 3G, 4G, 5G, VoLTE, 4 5000 6 50.0 \n", "1366 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n", "1367 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n", "1368 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n", "1369 Dual Sim, 3G, 4G, 5G, VoLTE, Vo5G, 12 4600 10 NaN \n", "\n", " External_Memory Android_version \\\n", "0 Memory Card Supported, upto 1 TB 13 \n", "1 Memory Card Supported, upto 512 GB 10 \n", "2 Memory Card Supported, upto 1 TB 12 \n", "3 Memory Card Supported, upto 1 TB 12 \n", "4 Memory Card Supported, upto 1 TB 11 \n", "... ... ... \n", "1365 Memory Card (Hybrid) 12 \n", "1366 Memory Card (Hybrid) 14 \n", "1367 Memory Card Supported, upto 1 TB 13 \n", "1368 Memory Card Supported, upto 1 TB 13 \n", "1369 50 MP + 48 MP + 8 MP Triple Rear & 32 MP F... 13 \n", "\n", " Price company Inbuilt_memory fast_charging \\\n", "0 9999.0 Samsung 128 GB inbuilt 25W Fast Charging \n", "1 9990.0 Samsung 32 GB inbuilt 15W Fast Charging \n", "2 11999.0 Samsung 64 GB inbuilt 25W Fast Charging \n", "3 11999.0 Samsung 64 GB inbuilt NaN \n", "4 11999.0 Samsung 64 GB inbuilt 15W Fast Charging \n", "... ... ... ... ... \n", "1365 18999.0 TCL 64 GB inbuilt 15W Fast Charging \n", "1366 24990.0 TCL 128 GB inbuilt 33W Fast Charging \n", "1367 23990.0 TCL 256 GB inbuilt 18W Fast Charging \n", "1368 22499.0 TCL 256 GB inbuilt 15W Fast Charging \n", "1369 119990.0 TCL 256 GB inbuilt 67W Fast Charging \n", "\n", " Screen_resolution Processor \\\n", "0 2408 x 1080 px Display with Water Drop Notch Octa Core Processor \n", "1 720 x 1560 px Display with Punch Hole 1.8 GHz Processor \n", "2 1080 x 2408 px Display with Water Drop Notch 2 GHz Processor \n", "3 720 x 1600 px Octa Core \n", "4 720 x 1600 px Display with Water Drop Notch Octa Core \n", "... ... ... \n", "1365 720 x 1612 px Octa Core \n", "1366 1200 x 2400 px Octa Core \n", "1367 720 x 1612 px Octa Core \n", "1368 720 x 1612 px Octa Core \n", "1369 1916 x 2160 px Octa Core \n", "\n", " Processor_name \n", "0 Exynos 1330 \n", "1 Octa Core \n", "2 Octa Core \n", "3 Helio G88 \n", "4 Helio P35 \n", "... ... \n", "1365 Dimensity 700 5G \n", "1366 Dimensity 7050 \n", "1367 Dimensity 6080 \n", "1368 Dimensity 6020 \n", "1369 Snapdragon 8 Gen2 \n", "\n", "[1370 rows x 17 columns]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import featuretools as ft\n", "import re\n", "from sklearn.preprocessing import StandardScaler\n", "from imblearn.over_sampling import RandomOverSampler\n", "from sklearn.model_selection import train_test_split\n", "\n", "df = pd.read_csv(\"../data/mobile phone price prediction.csv\")\n", "\n", "df.drop([\"Unnamed: 0\"], axis=1, inplace=True)\n", "df[\"Price\"] = df[\"Price\"].str.replace(\",\", \"\").astype(float)\n", "\n", "numerical_features = [\n", " \"Ram\",\n", " \"Battery\",\n", " \"Display\",\n", " \"Camera\",\n", "]\n", "\n", "for feature in numerical_features:\n", " df[feature] = df[feature].apply(\n", " lambda x: int(re.search(r\"\\d+\", x).group()) if re.search(r\"\\d+\", x) else None # type: ignore\n", " )\n", "\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Бизнес-цели\n", "1. Классифицировать мобильные устройства по ценовым категориям (например, бюджетные, средний класс, флагманы).\n", "2. Определить, какие характеристики мобильных устройств наиболее сильно влияют на их рейтинг." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Проверка на пропущенные значения" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Пропущенные данные по каждому столбцу:\n", "Name 0\n", "Rating 0\n", "Spec_score 0\n", "No_of_sim 0\n", "Ram 0\n", "Battery 0\n", "Display 0\n", "Camera 79\n", "External_Memory 0\n", "Android_version 443\n", "Price 0\n", "company 0\n", "Inbuilt_memory 19\n", "fast_charging 89\n", "Screen_resolution 2\n", "Processor 28\n", "Processor_name 0\n", "dtype: int64\n" ] } ], "source": [ "print(\"Пропущенные данные по каждому столбцу:\")\n", "print(df.isnull().sum())" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Name 0\n", "Rating 0\n", "Spec_score 0\n", "No_of_sim 0\n", "Ram 0\n", "Battery 0\n", "Display 0\n", "Camera 0\n", "External_Memory 0\n", "Android_version 0\n", "Price 0\n", "company 0\n", "Inbuilt_memory 0\n", "fast_charging 0\n", "Screen_resolution 0\n", "Processor 0\n", "Processor_name 0\n", "dtype: int64\n" ] } ], "source": [ "df.dropna(inplace=True)\n", "print(df.isnull().sum())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Проверка на выбросы" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Количество строк до удаления выбросов: 785\n", "Количество строк после удаления выбросов: 721\n" ] } ], "source": [ "column1 = \"Spec_score\"\n", "column2 = \"Price\"\n", "\n", "plt.figure(figsize=(10, 6))\n", "plt.scatter(df[column1], df[column2], alpha=0.5)\n", "plt.xlabel(column1)\n", "plt.ylabel(column2)\n", "plt.title(f\"Scatter Plot of {column1} vs {column2} (Before Removing Outliers)\")\n", "plt.show()\n", "\n", "def remove_outliers(df, column):\n", " Q1 = df[column].quantile(0.25)\n", " Q3 = df[column].quantile(0.75)\n", " IQR = Q3 - Q1\n", " lower_bound = Q1 - 1.5 * IQR\n", " upper_bound = Q3 + 1.5 * IQR\n", " return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]\n", "\n", "df_cleaned = df.copy()\n", "for column in [column1, column2]:\n", " df_cleaned = remove_outliers(df_cleaned, column)\n", "\n", "plt.figure(figsize=(10, 6))\n", "plt.scatter(df_cleaned[column1], df_cleaned[column2], alpha=0.5)\n", "plt.xlabel(column1)\n", "plt.ylabel(column2)\n", "plt.title(f\"Scatter Plot of {column1} vs {column2} (After Removing Outliers)\")\n", "plt.show()\n", "\n", "print(f\"Количество строк до удаления выбросов: {len(df)}\")\n", "print(f\"Количество строк после удаления выбросов: {len(df_cleaned)}\")\n", "\n", "df = df_cleaned" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Разбиение данных на выборки." ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Размеры выборок:\n", "Обучающая выборка: 432 записей\n", "company\n", "Realme 86\n", "Samsung 70\n", "Motorola 50\n", "Vivo 48\n", "Xiaomi 45\n", "Poco 32\n", "OnePlus 15\n", "iQOO 14\n", "OPPO 12\n", "POCO 11\n", "Honor 11\n", "TCL 11\n", "Lava 9\n", "Oppo 5\n", "Huawei 5\n", "itel 3\n", "Google 2\n", "Gionee 1\n", "IQOO 1\n", "Lenovo 1\n", "Name: count, dtype: int64\n", "Контрольная выборка: 144 записей\n", "company\n", "Vivo 27\n", "Samsung 27\n", "Realme 21\n", "Xiaomi 12\n", "Poco 11\n", "Motorola 10\n", "OnePlus 7\n", "OPPO 6\n", "POCO 6\n", "Honor 3\n", "itel 3\n", "Lava 2\n", "LG 2\n", "iQOO 2\n", "Lenovo 2\n", "Oppo 1\n", "Itel 1\n", "Google 1\n", "Name: count, dtype: int64\n", "Тестовая выборка: 145 записей\n", "company\n", "Samsung 27\n", "Vivo 25\n", "Realme 16\n", "Xiaomi 12\n", "Motorola 11\n", "Poco 10\n", "OnePlus 7\n", "TCL 7\n", "iQOO 7\n", "Huawei 5\n", "Oppo 4\n", "Lenovo 2\n", "Honor 2\n", "Lava 2\n", "itel 2\n", "Tecno 1\n", "Google 1\n", "OPPO 1\n", "Coolpad 1\n", "POCO 1\n", "Itel 1\n", "Name: count, dtype: int64\n" ] } ], "source": [ "X = df\n", "y = df[\"company\"]\n", "\n", "train_df, X_temp, y_train, y_temp = train_test_split(\n", " X, y, test_size=0.4, random_state=42\n", ")\n", "val_df, test_df, y_val, y_test = train_test_split(\n", " X_temp, y_temp, test_size=0.5, random_state=42\n", ")\n", "\n", "print(\"Размеры выборок:\")\n", "print(f\"Обучающая выборка: {train_df.shape[0]} записей\")\n", "print(train_df.company.value_counts())\n", "print(f\"Контрольная выборка: {val_df.shape[0]} записей\")\n", "print(val_df.company.value_counts())\n", "print(f\"Тестовая выборка: {test_df.shape[0]} записей\")\n", "print(test_df.company.value_counts())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Oversampling" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Размеры выборок:\n", "Обучающая выборка: 1720 записей\n", "company\n", "Vivo 86\n", "Motorola 86\n", "Oppo 86\n", "POCO 86\n", "iQOO 86\n", "Xiaomi 86\n", "Realme 86\n", "OnePlus 86\n", "Poco 86\n", "Samsung 86\n", "TCL 86\n", "Gionee 86\n", "Honor 86\n", "OPPO 86\n", "Lava 86\n", "itel 86\n", "Huawei 86\n", "Google 86\n", "IQOO 86\n", "Lenovo 86\n", "Name: count, dtype: int64\n", "Контрольная выборка: 486 записей\n", "company\n", "Vivo 27\n", "Honor 27\n", "Motorola 27\n", "POCO 27\n", "Samsung 27\n", "itel 27\n", "Lava 27\n", "Xiaomi 27\n", "Realme 27\n", "OnePlus 27\n", "Poco 27\n", "iQOO 27\n", "LG 27\n", "Oppo 27\n", "Itel 27\n", "OPPO 27\n", "Google 27\n", "Lenovo 27\n", "Name: count, dtype: int64\n", "Тестовая выборка: 567 записей\n", "company\n", "Oppo 27\n", "Huawei 27\n", "Samsung 27\n", "Motorola 27\n", "TCL 27\n", "Realme 27\n", "Xiaomi 27\n", "Poco 27\n", "Google 27\n", "Vivo 27\n", "iQOO 27\n", "Tecno 27\n", "OnePlus 27\n", "Honor 27\n", "OPPO 27\n", "Lenovo 27\n", "Lava 27\n", "itel 27\n", "Coolpad 27\n", "POCO 27\n", "Itel 27\n", "Name: count, dtype: int64\n" ] } ], "source": [ "def oversample(df):\n", " X = df.drop(\"company\", axis=1)\n", " y = df[\"company\"]\n", "\n", " oversampler = RandomOverSampler(random_state=42)\n", " X_resampled, y_resampled = oversampler.fit_resample(X, y) # type: ignore\n", "\n", " resampled_df = pd.concat([X_resampled, y_resampled], axis=1)\n", " return resampled_df\n", "\n", "train_df_overs = oversample(train_df)\n", "val_df_overs = oversample(val_df)\n", "test_df_overs = oversample(test_df)\n", "\n", "print(\"Размеры выборок:\")\n", "print(f\"Обучающая выборка: {train_df_overs.shape[0]} записей\")\n", "print(train_df_overs.company.value_counts())\n", "print(f\"Контрольная выборка: {val_df_overs.shape[0]} записей\")\n", "print(val_df_overs.company.value_counts())\n", "print(f\"Тестовая выборка: {test_df_overs.shape[0]} записей\")\n", "print(test_df_overs.company.value_counts())" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Дискретизация числовых признаков" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameRatingSpec_scoreNo_of_simRamBatteryDisplayCameraExternal_MemoryAndroid_version...Inbuilt_memoryfast_chargingScreen_resolutionProcessorProcessor_namecompanySpec_score_binBattery_binRam_binCamera_bin
0Vivo Y21T3.9574Dual Sim, 3G, 4G, VoLTE,45000650.0Memory Card Supported, upto 1 TB11...128 GB inbuilt18W Fast Charging1600 x 720 pxOcta CoreSnapdragon 680Vivo1000
1Motorola Moto G234.4077Dual Sim, 3G, 4G, VoLTE,45000650.0Memory Card Supported, upto 512 GB13...128 GB inbuilt30W Fast Charging720 x 1600 pxOcta CoreHelio G85Motorola1000
2Oppo A78 4G4.2581Dual Sim, 3G, 4G, VoLTE,85000650.0Memory Card Supported, upto 1 TB13...128 GB inbuilt67W Fast Charging1080 x 2400 pxOcta CoreSnapdragon 680Oppo1010
3POCO M4 Pro 4G (8GB RAM + 128GB)4.4581Dual Sim, 3G, 4G, VoLTE,85000664.0Memory Card Supported, upto 1 TB11...128 GB inbuilt33W Fast Charging1080 x 2400 pxOcta CoreHelio G96POCO1010
4iQOO Z5 Pro 5G4.4084Dual Sim, 3G, 4G, 5G, VoLTE,84500664.0Memory Card (Hybrid)11...128 GB inbuilt65W Fast Charging1080 x 2460 pxOcta CoreSnapdragon 870iQOO2010
..................................................................
1715itel P55 5G (6GB RAM + 128GB)4.0075Dual Sim, 3G, 4G, 5G, VoLTE,65000650.0Memory Card Supported, upto 1 TB13...128 GB inbuilt18W Fast Charging1600 x 720 pxOcta CoreDimensity 6080itel1010
1716itel P55 5G (6GB RAM + 128GB)4.0075Dual Sim, 3G, 4G, 5G, VoLTE,65000650.0Memory Card Supported, upto 1 TB13...128 GB inbuilt18W Fast Charging1600 x 720 pxOcta CoreDimensity 6080itel1010
1717itel P55 5G (6GB RAM + 128GB)4.0075Dual Sim, 3G, 4G, 5G, VoLTE,65000650.0Memory Card Supported, upto 1 TB13...128 GB inbuilt18W Fast Charging1600 x 720 pxOcta CoreDimensity 6080itel1010
1718itel P55 Plus4.1074Dual Sim, 3G, 4G, VoLTE,85000650.0Memory Card Supported, upto 1 TB13...256 GB inbuilt45W Fast Charging720 x 1640 pxOcta CoreUnisoc T606itel1010
1719itel S244.3575Dual Sim, 3G, 4G,850006108.0Memory Card Supported13...128 GB inbuilt18W Fast Charging720 x 1612 pxOcta CoreHelio G91itel1011
\n", "

1720 rows × 21 columns

\n", "
" ], "text/plain": [ " Name Rating Spec_score \\\n", "0 Vivo Y21T 3.95 74 \n", "1 Motorola Moto G23 4.40 77 \n", "2 Oppo A78 4G 4.25 81 \n", "3 POCO M4 Pro 4G (8GB RAM + 128GB) 4.45 81 \n", "4 iQOO Z5 Pro 5G 4.40 84 \n", "... ... ... ... \n", "1715 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n", "1716 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n", "1717 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n", "1718 itel P55 Plus 4.10 74 \n", "1719 itel S24 4.35 75 \n", "\n", " No_of_sim Ram Battery Display Camera \\\n", "0 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n", "1 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n", "2 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n", "3 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 64.0 \n", "4 Dual Sim, 3G, 4G, 5G, VoLTE, 8 4500 6 64.0 \n", "... ... ... ... ... ... \n", "1715 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n", "1716 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n", "1717 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n", "1718 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n", "1719 Dual Sim, 3G, 4G, 8 5000 6 108.0 \n", "\n", " External_Memory Android_version ... \\\n", "0 Memory Card Supported, upto 1 TB 11 ... \n", "1 Memory Card Supported, upto 512 GB 13 ... \n", "2 Memory Card Supported, upto 1 TB 13 ... \n", "3 Memory Card Supported, upto 1 TB 11 ... \n", "4 Memory Card (Hybrid) 11 ... \n", "... ... ... ... \n", "1715 Memory Card Supported, upto 1 TB 13 ... \n", "1716 Memory Card Supported, upto 1 TB 13 ... \n", "1717 Memory Card Supported, upto 1 TB 13 ... \n", "1718 Memory Card Supported, upto 1 TB 13 ... \n", "1719 Memory Card Supported 13 ... \n", "\n", " Inbuilt_memory fast_charging Screen_resolution Processor \\\n", "0 128 GB inbuilt 18W Fast Charging 1600 x 720 px Octa Core \n", "1 128 GB inbuilt 30W Fast Charging 720 x 1600 px Octa Core \n", "2 128 GB inbuilt 67W Fast Charging 1080 x 2400 px Octa Core \n", "3 128 GB inbuilt 33W Fast Charging 1080 x 2400 px Octa Core \n", "4 128 GB inbuilt 65W Fast Charging 1080 x 2460 px Octa Core \n", "... ... ... ... ... \n", "1715 128 GB inbuilt 18W Fast Charging 1600 x 720 px Octa Core \n", "1716 128 GB inbuilt 18W Fast Charging 1600 x 720 px Octa Core \n", "1717 128 GB inbuilt 18W Fast Charging 1600 x 720 px Octa Core \n", "1718 256 GB inbuilt 45W Fast Charging 720 x 1640 px Octa Core \n", "1719 128 GB inbuilt 18W Fast Charging 720 x 1612 px Octa Core \n", "\n", " Processor_name company Spec_score_bin Battery_bin Ram_bin \\\n", "0 Snapdragon 680 Vivo 1 0 0 \n", "1 Helio G85 Motorola 1 0 0 \n", "2 Snapdragon 680 Oppo 1 0 1 \n", "3 Helio G96 POCO 1 0 1 \n", "4 Snapdragon 870 iQOO 2 0 1 \n", "... ... ... ... ... ... \n", "1715 Dimensity 6080 itel 1 0 1 \n", "1716 Dimensity 6080 itel 1 0 1 \n", "1717 Dimensity 6080 itel 1 0 1 \n", "1718 Unisoc T606 itel 1 0 1 \n", "1719 Helio G91 itel 1 0 1 \n", "\n", " Camera_bin \n", "0 0 \n", "1 0 \n", "2 0 \n", "3 0 \n", "4 0 \n", "... ... \n", "1715 0 \n", "1716 0 \n", "1717 0 \n", "1718 0 \n", "1719 1 \n", "\n", "[1720 rows x 21 columns]" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "numerical_features = [\"Spec_score\", \"Battery\", \"Ram\", \"Camera\"]\n", "\n", "def discretize_features(df, features, bins=3, labels=False):\n", " for feature in features:\n", " try:\n", " df[f\"{feature}_bin\"] = pd.cut(df[feature], bins=bins, labels=labels) # type: ignore\n", " except Exception as e:\n", " print(f\"Ошибка при дискретизации признака {feature}: {e}\")\n", " return df\n", "\n", "train_df_disc = discretize_features(train_df_overs, numerical_features)\n", "val_df_disc = discretize_features(val_df_overs, numerical_features)\n", "test_df_disc = discretize_features(test_df_overs, numerical_features)\n", "\n", "train_df_disc" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Унитарное кодирование категориальных признаков" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameRatingSpec_scoreNo_of_simRamBatteryDisplayCameraExternal_MemoryAndroid_version...Spec_score_bin_2Battery_bin_0Battery_bin_1Battery_bin_2Ram_bin_0Ram_bin_1Ram_bin_2Camera_bin_0Camera_bin_1Camera_bin_2
0Vivo Y21T3.9574Dual Sim, 3G, 4G, VoLTE,45000650.0Memory Card Supported, upto 1 TB11...FalseTrueFalseFalseTrueFalseFalseTrueFalseFalse
1Motorola Moto G234.4077Dual Sim, 3G, 4G, VoLTE,45000650.0Memory Card Supported, upto 512 GB13...FalseTrueFalseFalseTrueFalseFalseTrueFalseFalse
2Oppo A78 4G4.2581Dual Sim, 3G, 4G, VoLTE,85000650.0Memory Card Supported, upto 1 TB13...FalseTrueFalseFalseFalseTrueFalseTrueFalseFalse
3POCO M4 Pro 4G (8GB RAM + 128GB)4.4581Dual Sim, 3G, 4G, VoLTE,85000664.0Memory Card Supported, upto 1 TB11...FalseTrueFalseFalseFalseTrueFalseTrueFalseFalse
4iQOO Z5 Pro 5G4.4084Dual Sim, 3G, 4G, 5G, VoLTE,84500664.0Memory Card (Hybrid)11...TrueTrueFalseFalseFalseTrueFalseTrueFalseFalse
..................................................................
1715itel P55 5G (6GB RAM + 128GB)4.0075Dual Sim, 3G, 4G, 5G, VoLTE,65000650.0Memory Card Supported, upto 1 TB13...FalseTrueFalseFalseFalseTrueFalseTrueFalseFalse
1716itel P55 5G (6GB RAM + 128GB)4.0075Dual Sim, 3G, 4G, 5G, VoLTE,65000650.0Memory Card Supported, upto 1 TB13...FalseTrueFalseFalseFalseTrueFalseTrueFalseFalse
1717itel P55 5G (6GB RAM + 128GB)4.0075Dual Sim, 3G, 4G, 5G, VoLTE,65000650.0Memory Card Supported, upto 1 TB13...FalseTrueFalseFalseFalseTrueFalseTrueFalseFalse
1718itel P55 Plus4.1074Dual Sim, 3G, 4G, VoLTE,85000650.0Memory Card Supported, upto 1 TB13...FalseTrueFalseFalseFalseTrueFalseTrueFalseFalse
1719itel S244.3575Dual Sim, 3G, 4G,850006108.0Memory Card Supported13...FalseTrueFalseFalseFalseTrueFalseFalseTrueFalse
\n", "

1720 rows × 29 columns

\n", "
" ], "text/plain": [ " Name Rating Spec_score \\\n", "0 Vivo Y21T 3.95 74 \n", "1 Motorola Moto G23 4.40 77 \n", "2 Oppo A78 4G 4.25 81 \n", "3 POCO M4 Pro 4G (8GB RAM + 128GB) 4.45 81 \n", "4 iQOO Z5 Pro 5G 4.40 84 \n", "... ... ... ... \n", "1715 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n", "1716 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n", "1717 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n", "1718 itel P55 Plus 4.10 74 \n", "1719 itel S24 4.35 75 \n", "\n", " No_of_sim Ram Battery Display Camera \\\n", "0 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n", "1 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n", "2 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n", "3 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 64.0 \n", "4 Dual Sim, 3G, 4G, 5G, VoLTE, 8 4500 6 64.0 \n", "... ... ... ... ... ... \n", "1715 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n", "1716 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n", "1717 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n", "1718 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n", "1719 Dual Sim, 3G, 4G, 8 5000 6 108.0 \n", "\n", " External_Memory Android_version ... \\\n", "0 Memory Card Supported, upto 1 TB 11 ... \n", "1 Memory Card Supported, upto 512 GB 13 ... \n", "2 Memory Card Supported, upto 1 TB 13 ... \n", "3 Memory Card Supported, upto 1 TB 11 ... \n", "4 Memory Card (Hybrid) 11 ... \n", "... ... ... ... \n", "1715 Memory Card Supported, upto 1 TB 13 ... \n", "1716 Memory Card Supported, upto 1 TB 13 ... \n", "1717 Memory Card Supported, upto 1 TB 13 ... \n", "1718 Memory Card Supported, upto 1 TB 13 ... \n", "1719 Memory Card Supported 13 ... \n", "\n", " Spec_score_bin_2 Battery_bin_0 Battery_bin_1 Battery_bin_2 Ram_bin_0 \\\n", "0 False True False False True \n", "1 False True False False True \n", "2 False True False False False \n", "3 False True False False False \n", "4 True True False False False \n", "... ... ... ... ... ... \n", "1715 False True False False False \n", "1716 False True False False False \n", "1717 False True False False False \n", "1718 False True False False False \n", "1719 False True False False False \n", "\n", " Ram_bin_1 Ram_bin_2 Camera_bin_0 Camera_bin_1 Camera_bin_2 \n", "0 False False True False False \n", "1 False False True False False \n", "2 True False True False False \n", "3 True False True False False \n", "4 True False True False False \n", "... ... ... ... ... ... \n", "1715 True False True False False \n", "1716 True False True False False \n", "1717 True False True False False \n", "1718 True False True False False \n", "1719 True False False True False \n", "\n", "[1720 rows x 29 columns]" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "categorical_features = [\"Spec_score_bin\", \"Battery_bin\", \"Ram_bin\", \"Camera_bin\"]\n", "\n", "train_df_enc = pd.get_dummies(train_df_disc, columns=categorical_features)\n", "val_df_enc = pd.get_dummies(val_df_disc, columns=categorical_features)\n", "test_df_enc = pd.get_dummies(test_df_disc, columns=categorical_features)\n", "\n", "train_df_enc" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Ручной синтез признаков." ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameRatingSpec_scoreNo_of_simRamBatteryDisplayCameraExternal_MemoryAndroid_version...Battery_bin_0Battery_bin_1Battery_bin_2Ram_bin_0Ram_bin_1Ram_bin_2Camera_bin_0Camera_bin_1Camera_bin_2Camera_to_Display_Ratio
0Vivo Y21T3.9574Dual Sim, 3G, 4G, VoLTE,45000650.0Memory Card Supported, upto 1 TB11...TrueFalseFalseTrueFalseFalseTrueFalseFalse8.333333
1Motorola Moto G234.4077Dual Sim, 3G, 4G, VoLTE,45000650.0Memory Card Supported, upto 512 GB13...TrueFalseFalseTrueFalseFalseTrueFalseFalse8.333333
2Oppo A78 4G4.2581Dual Sim, 3G, 4G, VoLTE,85000650.0Memory Card Supported, upto 1 TB13...TrueFalseFalseFalseTrueFalseTrueFalseFalse8.333333
3POCO M4 Pro 4G (8GB RAM + 128GB)4.4581Dual Sim, 3G, 4G, VoLTE,85000664.0Memory Card Supported, upto 1 TB11...TrueFalseFalseFalseTrueFalseTrueFalseFalse10.666667
4iQOO Z5 Pro 5G4.4084Dual Sim, 3G, 4G, 5G, VoLTE,84500664.0Memory Card (Hybrid)11...TrueFalseFalseFalseTrueFalseTrueFalseFalse10.666667
..................................................................
1715itel P55 5G (6GB RAM + 128GB)4.0075Dual Sim, 3G, 4G, 5G, VoLTE,65000650.0Memory Card Supported, upto 1 TB13...TrueFalseFalseFalseTrueFalseTrueFalseFalse8.333333
1716itel P55 5G (6GB RAM + 128GB)4.0075Dual Sim, 3G, 4G, 5G, VoLTE,65000650.0Memory Card Supported, upto 1 TB13...TrueFalseFalseFalseTrueFalseTrueFalseFalse8.333333
1717itel P55 5G (6GB RAM + 128GB)4.0075Dual Sim, 3G, 4G, 5G, VoLTE,65000650.0Memory Card Supported, upto 1 TB13...TrueFalseFalseFalseTrueFalseTrueFalseFalse8.333333
1718itel P55 Plus4.1074Dual Sim, 3G, 4G, VoLTE,85000650.0Memory Card Supported, upto 1 TB13...TrueFalseFalseFalseTrueFalseTrueFalseFalse8.333333
1719itel S244.3575Dual Sim, 3G, 4G,850006108.0Memory Card Supported13...TrueFalseFalseFalseTrueFalseFalseTrueFalse18.000000
\n", "

1720 rows × 30 columns

\n", "
" ], "text/plain": [ " Name Rating Spec_score \\\n", "0 Vivo Y21T 3.95 74 \n", "1 Motorola Moto G23 4.40 77 \n", "2 Oppo A78 4G 4.25 81 \n", "3 POCO M4 Pro 4G (8GB RAM + 128GB) 4.45 81 \n", "4 iQOO Z5 Pro 5G 4.40 84 \n", "... ... ... ... \n", "1715 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n", "1716 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n", "1717 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n", "1718 itel P55 Plus 4.10 74 \n", "1719 itel S24 4.35 75 \n", "\n", " No_of_sim Ram Battery Display Camera \\\n", "0 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n", "1 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n", "2 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n", "3 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 64.0 \n", "4 Dual Sim, 3G, 4G, 5G, VoLTE, 8 4500 6 64.0 \n", "... ... ... ... ... ... \n", "1715 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n", "1716 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n", "1717 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n", "1718 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n", "1719 Dual Sim, 3G, 4G, 8 5000 6 108.0 \n", "\n", " External_Memory Android_version ... Battery_bin_0 \\\n", "0 Memory Card Supported, upto 1 TB 11 ... True \n", "1 Memory Card Supported, upto 512 GB 13 ... True \n", "2 Memory Card Supported, upto 1 TB 13 ... True \n", "3 Memory Card Supported, upto 1 TB 11 ... True \n", "4 Memory Card (Hybrid) 11 ... True \n", "... ... ... ... ... \n", "1715 Memory Card Supported, upto 1 TB 13 ... True \n", "1716 Memory Card Supported, upto 1 TB 13 ... True \n", "1717 Memory Card Supported, upto 1 TB 13 ... True \n", "1718 Memory Card Supported, upto 1 TB 13 ... True \n", "1719 Memory Card Supported 13 ... True \n", "\n", " Battery_bin_1 Battery_bin_2 Ram_bin_0 Ram_bin_1 Ram_bin_2 Camera_bin_0 \\\n", "0 False False True False False True \n", "1 False False True False False True \n", "2 False False False True False True \n", "3 False False False True False True \n", "4 False False False True False True \n", "... ... ... ... ... ... ... \n", "1715 False False False True False True \n", "1716 False False False True False True \n", "1717 False False False True False True \n", "1718 False False False True False True \n", "1719 False False False True False False \n", "\n", " Camera_bin_1 Camera_bin_2 Camera_to_Display_Ratio \n", "0 False False 8.333333 \n", "1 False False 8.333333 \n", "2 False False 8.333333 \n", "3 False False 10.666667 \n", "4 False False 10.666667 \n", "... ... ... ... \n", "1715 False False 8.333333 \n", "1716 False False 8.333333 \n", "1717 False False 8.333333 \n", "1718 False False 8.333333 \n", "1719 True False 18.000000 \n", "\n", "[1720 rows x 30 columns]" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_df_enc[\"Camera_to_Display_Ratio\"] = (train_df_enc[\"Camera\"] / train_df_enc[\"Display\"])\n", "val_df_enc[\"Camera_to_Display_Ratio\"] = val_df_enc[\"Camera\"] / val_df_enc[\"Display\"]\n", "test_df_enc[\"Camera_to_Display_Ratio\"] = test_df_enc[\"Camera\"] / test_df_enc[\"Display\"]\n", "\n", "train_df_enc" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Масштабирование признаков" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameRatingSpec_scoreNo_of_simRamBatteryDisplayCameraExternal_MemoryAndroid_version...Battery_bin_0Battery_bin_1Battery_bin_2Ram_bin_0Ram_bin_1Ram_bin_2Camera_bin_0Camera_bin_1Camera_bin_2Camera_to_Display_Ratio
0Vivo Y21T3.9574Dual Sim, 3G, 4G, VoLTE,-1.3889630.2061740.096622-0.240789Memory Card Supported, upto 1 TB11...TrueFalseFalseTrueFalseFalseTrueFalseFalse8.333333
1Motorola Moto G234.4077Dual Sim, 3G, 4G, VoLTE,-1.3889630.2061740.096622-0.240789Memory Card Supported, upto 512 GB13...TrueFalseFalseTrueFalseFalseTrueFalseFalse8.333333
2Oppo A78 4G4.2581Dual Sim, 3G, 4G, VoLTE,0.7200780.2061740.096622-0.240789Memory Card Supported, upto 1 TB13...TrueFalseFalseFalseTrueFalseTrueFalseFalse8.333333
3POCO M4 Pro 4G (8GB RAM + 128GB)4.4581Dual Sim, 3G, 4G, VoLTE,0.7200780.2061740.0966220.275662Memory Card Supported, upto 1 TB11...TrueFalseFalseFalseTrueFalseTrueFalseFalse10.666667
4iQOO Z5 Pro 5G4.4084Dual Sim, 3G, 4G, 5G, VoLTE,0.720078-0.6757890.0966220.275662Memory Card (Hybrid)11...TrueFalseFalseFalseTrueFalseTrueFalseFalse10.666667
..................................................................
1715itel P55 5G (6GB RAM + 128GB)4.0075Dual Sim, 3G, 4G, 5G, VoLTE,-0.3344420.2061740.096622-0.240789Memory Card Supported, upto 1 TB13...TrueFalseFalseFalseTrueFalseTrueFalseFalse8.333333
1716itel P55 5G (6GB RAM + 128GB)4.0075Dual Sim, 3G, 4G, 5G, VoLTE,-0.3344420.2061740.096622-0.240789Memory Card Supported, upto 1 TB13...TrueFalseFalseFalseTrueFalseTrueFalseFalse8.333333
1717itel P55 5G (6GB RAM + 128GB)4.0075Dual Sim, 3G, 4G, 5G, VoLTE,-0.3344420.2061740.096622-0.240789Memory Card Supported, upto 1 TB13...TrueFalseFalseFalseTrueFalseTrueFalseFalse8.333333
1718itel P55 Plus4.1074Dual Sim, 3G, 4G, VoLTE,0.7200780.2061740.096622-0.240789Memory Card Supported, upto 1 TB13...TrueFalseFalseFalseTrueFalseTrueFalseFalse8.333333
1719itel S244.3575Dual Sim, 3G, 4G,0.7200780.2061740.0966221.898794Memory Card Supported13...TrueFalseFalseFalseTrueFalseFalseTrueFalse18.000000
\n", "

1720 rows × 30 columns

\n", "
" ], "text/plain": [ " Name Rating Spec_score \\\n", "0 Vivo Y21T 3.95 74 \n", "1 Motorola Moto G23 4.40 77 \n", "2 Oppo A78 4G 4.25 81 \n", "3 POCO M4 Pro 4G (8GB RAM + 128GB) 4.45 81 \n", "4 iQOO Z5 Pro 5G 4.40 84 \n", "... ... ... ... \n", "1715 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n", "1716 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n", "1717 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n", "1718 itel P55 Plus 4.10 74 \n", "1719 itel S24 4.35 75 \n", "\n", " No_of_sim Ram Battery Display Camera \\\n", "0 Dual Sim, 3G, 4G, VoLTE, -1.388963 0.206174 0.096622 -0.240789 \n", "1 Dual Sim, 3G, 4G, VoLTE, -1.388963 0.206174 0.096622 -0.240789 \n", "2 Dual Sim, 3G, 4G, VoLTE, 0.720078 0.206174 0.096622 -0.240789 \n", "3 Dual Sim, 3G, 4G, VoLTE, 0.720078 0.206174 0.096622 0.275662 \n", "4 Dual Sim, 3G, 4G, 5G, VoLTE, 0.720078 -0.675789 0.096622 0.275662 \n", "... ... ... ... ... ... \n", "1715 Dual Sim, 3G, 4G, 5G, VoLTE, -0.334442 0.206174 0.096622 -0.240789 \n", "1716 Dual Sim, 3G, 4G, 5G, VoLTE, -0.334442 0.206174 0.096622 -0.240789 \n", "1717 Dual Sim, 3G, 4G, 5G, VoLTE, -0.334442 0.206174 0.096622 -0.240789 \n", "1718 Dual Sim, 3G, 4G, VoLTE, 0.720078 0.206174 0.096622 -0.240789 \n", "1719 Dual Sim, 3G, 4G, 0.720078 0.206174 0.096622 1.898794 \n", "\n", " External_Memory Android_version ... Battery_bin_0 \\\n", "0 Memory Card Supported, upto 1 TB 11 ... True \n", "1 Memory Card Supported, upto 512 GB 13 ... True \n", "2 Memory Card Supported, upto 1 TB 13 ... True \n", "3 Memory Card Supported, upto 1 TB 11 ... True \n", "4 Memory Card (Hybrid) 11 ... True \n", "... ... ... ... ... \n", "1715 Memory Card Supported, upto 1 TB 13 ... True \n", "1716 Memory Card Supported, upto 1 TB 13 ... True \n", "1717 Memory Card Supported, upto 1 TB 13 ... True \n", "1718 Memory Card Supported, upto 1 TB 13 ... True \n", "1719 Memory Card Supported 13 ... True \n", "\n", " Battery_bin_1 Battery_bin_2 Ram_bin_0 Ram_bin_1 Ram_bin_2 Camera_bin_0 \\\n", "0 False False True False False True \n", "1 False False True False False True \n", "2 False False False True False True \n", "3 False False False True False True \n", "4 False False False True False True \n", "... ... ... ... ... ... ... \n", "1715 False False False True False True \n", "1716 False False False True False True \n", "1717 False False False True False True \n", "1718 False False False True False True \n", "1719 False False False True False False \n", "\n", " Camera_bin_1 Camera_bin_2 Camera_to_Display_Ratio \n", "0 False False 8.333333 \n", "1 False False 8.333333 \n", "2 False False 8.333333 \n", "3 False False 10.666667 \n", "4 False False 10.666667 \n", "... ... ... ... \n", "1715 False False 8.333333 \n", "1716 False False 8.333333 \n", "1717 False False 8.333333 \n", "1718 False False 8.333333 \n", "1719 True False 18.000000 \n", "\n", "[1720 rows x 30 columns]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scaler = StandardScaler()\n", "\n", "numerical_features = [\n", " \"Ram\",\n", " \"Battery\",\n", " \"Display\",\n", " \"Camera\",\n", "]\n", "\n", "train_df_enc[numerical_features] = scaler.fit_transform(\n", " train_df_enc[numerical_features]\n", ")\n", "val_df_enc[numerical_features] = scaler.transform(val_df_enc[numerical_features])\n", "test_df_enc[numerical_features] = scaler.transform(test_df_enc[numerical_features])\n", "\n", "train_df_enc" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Конструирование признаков с помощью Featuretools" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\featuretools\\entityset\\entityset.py:1733: UserWarning: index id not found in dataframe, creating new integer column\n", " warnings.warn(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n", " pd.to_datetime(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\featuretools\\synthesis\\deep_feature_synthesis.py:169: UserWarning: Only one dataframe in entityset, changing max_depth to 1 since deeper features cannot be created\n", " warnings.warn(\n" ] }, { "data": { "text/plain": [ "[,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ,\n", " ]" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "es = ft.EntitySet(id=\"mobile_data\")\n", "es = es.add_dataframe(dataframe_name=\"train\", dataframe=train_df_enc, index=\"id\")\n", "feature_matrix, feature_defs = ft.dfs(\n", " entityset=es, target_dataframe_name=\"train\", max_depth=2\n", ")\n", "\n", "feature_defs" ] } ], "metadata": { "kernelspec": { "display_name": ".venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.8" } }, "nbformat": 4, "nbformat_minor": 2 }