2585 lines
263 KiB
Plaintext
Raw Normal View History

2024-12-06 18:43:41 +04:00
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Загрузка набора данных"
]
},
{
"cell_type": "code",
2024-12-07 00:08:27 +04:00
"execution_count": 30,
2024-12-06 18:43:41 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Rating</th>\n",
" <th>Spec_score</th>\n",
" <th>No_of_sim</th>\n",
" <th>Ram</th>\n",
" <th>Battery</th>\n",
" <th>Display</th>\n",
" <th>Camera</th>\n",
" <th>External_Memory</th>\n",
" <th>Android_version</th>\n",
" <th>Price</th>\n",
" <th>company</th>\n",
" <th>Inbuilt_memory</th>\n",
" <th>fast_charging</th>\n",
" <th>Screen_resolution</th>\n",
" <th>Processor</th>\n",
" <th>Processor_name</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Samsung Galaxy F14 5G</td>\n",
" <td>4.65</td>\n",
" <td>68</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>4</td>\n",
" <td>6000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>9999.0</td>\n",
" <td>Samsung</td>\n",
" <td>128 GB inbuilt</td>\n",
" <td>25W Fast Charging</td>\n",
" <td>2408 x 1080 px Display with Water Drop Notch</td>\n",
" <td>Octa Core Processor</td>\n",
" <td>Exynos 1330</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Samsung Galaxy A11</td>\n",
" <td>4.20</td>\n",
" <td>63</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>2</td>\n",
" <td>4000</td>\n",
" <td>6</td>\n",
" <td>13.0</td>\n",
" <td>Memory Card Supported, upto 512 GB</td>\n",
" <td>10</td>\n",
" <td>9990.0</td>\n",
" <td>Samsung</td>\n",
" <td>32 GB inbuilt</td>\n",
" <td>15W Fast Charging</td>\n",
" <td>720 x 1560 px Display with Punch Hole</td>\n",
" <td>1.8 GHz Processor</td>\n",
" <td>Octa Core</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Samsung Galaxy A13</td>\n",
" <td>4.30</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>4</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>12</td>\n",
" <td>11999.0</td>\n",
" <td>Samsung</td>\n",
" <td>64 GB inbuilt</td>\n",
" <td>25W Fast Charging</td>\n",
" <td>1080 x 2408 px Display with Water Drop Notch</td>\n",
" <td>2 GHz Processor</td>\n",
" <td>Octa Core</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>Samsung Galaxy F23</td>\n",
" <td>4.10</td>\n",
" <td>73</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>4</td>\n",
" <td>6000</td>\n",
" <td>6</td>\n",
" <td>48.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>12</td>\n",
" <td>11999.0</td>\n",
" <td>Samsung</td>\n",
" <td>64 GB inbuilt</td>\n",
" <td>NaN</td>\n",
" <td>720 x 1600 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Helio G88</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Samsung Galaxy A03s (4GB RAM + 64GB)</td>\n",
" <td>4.10</td>\n",
" <td>69</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>4</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>13.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>11</td>\n",
" <td>11999.0</td>\n",
" <td>Samsung</td>\n",
" <td>64 GB inbuilt</td>\n",
" <td>15W Fast Charging</td>\n",
" <td>720 x 1600 px Display with Water Drop Notch</td>\n",
" <td>Octa Core</td>\n",
" <td>Helio P35</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1365</th>\n",
" <td>TCL 40R</td>\n",
" <td>4.05</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>4</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card (Hybrid)</td>\n",
" <td>12</td>\n",
" <td>18999.0</td>\n",
" <td>TCL</td>\n",
" <td>64 GB inbuilt</td>\n",
" <td>15W Fast Charging</td>\n",
" <td>720 x 1612 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Dimensity 700 5G</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1366</th>\n",
" <td>TCL 50 XL NxtPaper 5G</td>\n",
" <td>4.10</td>\n",
" <td>80</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>8</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card (Hybrid)</td>\n",
" <td>14</td>\n",
" <td>24990.0</td>\n",
" <td>TCL</td>\n",
" <td>128 GB inbuilt</td>\n",
" <td>33W Fast Charging</td>\n",
" <td>1200 x 2400 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Dimensity 7050</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1367</th>\n",
" <td>TCL 50 XE NxtPaper 5G</td>\n",
" <td>4.00</td>\n",
" <td>80</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>6</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>23990.0</td>\n",
" <td>TCL</td>\n",
" <td>256 GB inbuilt</td>\n",
" <td>18W Fast Charging</td>\n",
" <td>720 x 1612 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Dimensity 6080</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1368</th>\n",
" <td>TCL 40 NxtPaper 5G</td>\n",
" <td>4.50</td>\n",
" <td>79</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>6</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>22499.0</td>\n",
" <td>TCL</td>\n",
" <td>256 GB inbuilt</td>\n",
" <td>15W Fast Charging</td>\n",
" <td>720 x 1612 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Dimensity 6020</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1369</th>\n",
" <td>TCL Trifold</td>\n",
" <td>4.65</td>\n",
" <td>93</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE, Vo5G,</td>\n",
" <td>12</td>\n",
" <td>4600</td>\n",
" <td>10</td>\n",
" <td>NaN</td>\n",
" <td>50 MP + 48 MP + 8 MP Triple Rear &amp;amp; 32 MP F...</td>\n",
" <td>13</td>\n",
" <td>119990.0</td>\n",
" <td>TCL</td>\n",
" <td>256 GB inbuilt</td>\n",
" <td>67W Fast Charging</td>\n",
" <td>1916 x 2160 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Snapdragon 8 Gen2</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1370 rows × 17 columns</p>\n",
"</div>"
],
"text/plain": [
" Name Rating Spec_score \\\n",
"0 Samsung Galaxy F14 5G 4.65 68 \n",
"1 Samsung Galaxy A11 4.20 63 \n",
"2 Samsung Galaxy A13 4.30 75 \n",
"3 Samsung Galaxy F23 4.10 73 \n",
"4 Samsung Galaxy A03s (4GB RAM + 64GB) 4.10 69 \n",
"... ... ... ... \n",
"1365 TCL 40R 4.05 75 \n",
"1366 TCL 50 XL NxtPaper 5G 4.10 80 \n",
"1367 TCL 50 XE NxtPaper 5G 4.00 80 \n",
"1368 TCL 40 NxtPaper 5G 4.50 79 \n",
"1369 TCL Trifold 4.65 93 \n",
"\n",
" No_of_sim Ram Battery Display Camera \\\n",
"0 Dual Sim, 3G, 4G, 5G, VoLTE, 4 6000 6 50.0 \n",
"1 Dual Sim, 3G, 4G, VoLTE, 2 4000 6 13.0 \n",
"2 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
"3 Dual Sim, 3G, 4G, VoLTE, 4 6000 6 48.0 \n",
"4 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 13.0 \n",
"... ... ... ... ... ... \n",
"1365 Dual Sim, 3G, 4G, 5G, VoLTE, 4 5000 6 50.0 \n",
"1366 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
"1367 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
"1368 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
"1369 Dual Sim, 3G, 4G, 5G, VoLTE, Vo5G, 12 4600 10 NaN \n",
"\n",
" External_Memory Android_version \\\n",
"0 Memory Card Supported, upto 1 TB 13 \n",
"1 Memory Card Supported, upto 512 GB 10 \n",
"2 Memory Card Supported, upto 1 TB 12 \n",
"3 Memory Card Supported, upto 1 TB 12 \n",
"4 Memory Card Supported, upto 1 TB 11 \n",
"... ... ... \n",
"1365 Memory Card (Hybrid) 12 \n",
"1366 Memory Card (Hybrid) 14 \n",
"1367 Memory Card Supported, upto 1 TB 13 \n",
"1368 Memory Card Supported, upto 1 TB 13 \n",
"1369 50 MP + 48 MP + 8 MP Triple Rear &amp; 32 MP F... 13 \n",
"\n",
" Price company Inbuilt_memory fast_charging \\\n",
"0 9999.0 Samsung 128 GB inbuilt 25W Fast Charging \n",
"1 9990.0 Samsung 32 GB inbuilt 15W Fast Charging \n",
"2 11999.0 Samsung 64 GB inbuilt 25W Fast Charging \n",
"3 11999.0 Samsung 64 GB inbuilt NaN \n",
"4 11999.0 Samsung 64 GB inbuilt 15W Fast Charging \n",
"... ... ... ... ... \n",
"1365 18999.0 TCL 64 GB inbuilt 15W Fast Charging \n",
"1366 24990.0 TCL 128 GB inbuilt 33W Fast Charging \n",
"1367 23990.0 TCL 256 GB inbuilt 18W Fast Charging \n",
"1368 22499.0 TCL 256 GB inbuilt 15W Fast Charging \n",
"1369 119990.0 TCL 256 GB inbuilt 67W Fast Charging \n",
"\n",
" Screen_resolution Processor \\\n",
"0 2408 x 1080 px Display with Water Drop Notch Octa Core Processor \n",
"1 720 x 1560 px Display with Punch Hole 1.8 GHz Processor \n",
"2 1080 x 2408 px Display with Water Drop Notch 2 GHz Processor \n",
"3 720 x 1600 px Octa Core \n",
"4 720 x 1600 px Display with Water Drop Notch Octa Core \n",
"... ... ... \n",
"1365 720 x 1612 px Octa Core \n",
"1366 1200 x 2400 px Octa Core \n",
"1367 720 x 1612 px Octa Core \n",
"1368 720 x 1612 px Octa Core \n",
"1369 1916 x 2160 px Octa Core \n",
"\n",
" Processor_name \n",
"0 Exynos 1330 \n",
"1 Octa Core \n",
"2 Octa Core \n",
"3 Helio G88 \n",
"4 Helio P35 \n",
"... ... \n",
"1365 Dimensity 700 5G \n",
"1366 Dimensity 7050 \n",
"1367 Dimensity 6080 \n",
"1368 Dimensity 6020 \n",
"1369 Snapdragon 8 Gen2 \n",
"\n",
"[1370 rows x 17 columns]"
]
},
2024-12-07 00:08:27 +04:00
"execution_count": 30,
2024-12-06 18:43:41 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"import numpy as np\n",
"import featuretools as ft\n",
"import re\n",
"from sklearn.preprocessing import StandardScaler\n",
"from imblearn.over_sampling import RandomOverSampler\n",
2024-12-07 00:08:27 +04:00
"from sklearn.model_selection import train_test_split\n",
2024-12-06 18:43:41 +04:00
"\n",
"df = pd.read_csv(\"../data/mobile phone price prediction.csv\")\n",
"\n",
"df.drop([\"Unnamed: 0\"], axis=1, inplace=True)\n",
"df[\"Price\"] = df[\"Price\"].str.replace(\",\", \"\").astype(float)\n",
"\n",
"numerical_features = [\n",
" \"Ram\",\n",
" \"Battery\",\n",
" \"Display\",\n",
" \"Camera\",\n",
"]\n",
"\n",
"for feature in numerical_features:\n",
" df[feature] = df[feature].apply(\n",
" lambda x: int(re.search(r\"\\d+\", x).group()) if re.search(r\"\\d+\", x) else None # type: ignore\n",
" )\n",
"\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Бизнес-цели\n",
"1. Классифицировать мобильные устройства по ценовым категориям (например, бюджетные, средний класс, флагманы).\n",
"2. Определить, какие характеристики мобильных устройств наиболее сильно влияют на их рейтинг."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Проверка на пропущенные значения"
]
},
{
"cell_type": "code",
2024-12-07 00:08:27 +04:00
"execution_count": 31,
2024-12-06 18:43:41 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Пропущенные данные по каждому столбцу:\n",
"Name 0\n",
"Rating 0\n",
"Spec_score 0\n",
"No_of_sim 0\n",
"Ram 0\n",
"Battery 0\n",
"Display 0\n",
"Camera 79\n",
"External_Memory 0\n",
"Android_version 443\n",
"Price 0\n",
"company 0\n",
"Inbuilt_memory 19\n",
"fast_charging 89\n",
"Screen_resolution 2\n",
"Processor 28\n",
"Processor_name 0\n",
"dtype: int64\n"
]
}
],
"source": [
"print(\"Пропущенные данные по каждому столбцу:\")\n",
"print(df.isnull().sum())"
]
},
{
"cell_type": "code",
2024-12-07 00:08:27 +04:00
"execution_count": 32,
2024-12-06 18:43:41 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Name 0\n",
"Rating 0\n",
"Spec_score 0\n",
"No_of_sim 0\n",
"Ram 0\n",
"Battery 0\n",
"Display 0\n",
"Camera 0\n",
"External_Memory 0\n",
"Android_version 0\n",
"Price 0\n",
"company 0\n",
"Inbuilt_memory 0\n",
"fast_charging 0\n",
"Screen_resolution 0\n",
"Processor 0\n",
"Processor_name 0\n",
"dtype: int64\n"
]
}
],
"source": [
"df.dropna(inplace=True)\n",
"print(df.isnull().sum())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Проверка на выбросы"
]
},
{
"cell_type": "code",
2024-12-07 00:08:27 +04:00
"execution_count": 33,
2024-12-06 18:43:41 +04:00
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2wAAAIjCAYAAAB/FZhcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADOWUlEQVR4nOzde3wU1dkH8N/M7P2SzYUkhBAIJMhdrYhIEdGKxGulahW1FZB6e0Gr2Fp9bRVqLa22td6ttYK1tUXbqvXGRRB9BbwiahAwIBFMQhJy2ex9d3bO+8eya5bsJiHZsJvw+34++bTMnJ05OzOJ++xzznMkIYQAERERERERZRw53R0gIiIiIiKixBiwERERERERZSgGbERERERERBmKARsREREREVGGYsBGRERERESUoRiwERERERERZSgGbERERERERBmKARsREREREVGGYsBGRERERESUoRiwEVFGq66uhiRJWLFiRbq7EmfVqlU4/vjjYTKZIEkSWltb090lGgAkScKSJUuO+Hk1TcOECRNwzz33HJHz3XfffRg5ciQURcHxxx9/RM5JwIoVKyBJEqqrq9PdlSNmyZIlkCQpbltpaSnmzZt3xPvy+OOPY9iwYQgEAkf83NS/MWAjSpPPPvsMF198MYYPHw6TyYTi4mKceeaZeOihh/rsnM8++yz++Mc/dtheW1uLJUuWYOvWrX127kNt2LABkiTFfvR6PUaOHIkrr7wSX375ZUrOsWnTJixZsiTlwVRTUxMuueQSmM1mPPLII3jmmWdgtVqTtk/HvaYj77TTTot7pnNzczF58mQ89dRT0DQt3d3r1D/+8Q/s27cPixYtim2Lfrhv/1NQUIDTTz8dr7/+eo/PtWbNGtx6662YNm0ali9fjl//+tepeAu9duj9M5vNOPbYY/HHP/4x4+9ff7Jx40Z873vfQ2FhIYxGI0pLS3Httddi7969PT6m1+vFkiVLsGHDhtR1tA/MmzcPwWAQf/rTn9LdFepndOnuANHRaNOmTTj99NMxbNgwXH311Rg8eDD27duHd999Fw888ABuuOGGPjnvs88+i8rKStx0001x22tra7F06VKUlpYe8W+7b7zxRkyePBmhUAhbtmzBE088gVdffRWfffYZhgwZ0qtjb9q0CUuXLsW8efOQnZ2dmg4D+OCDD+ByuXD33Xdj5syZXfYhHfea0mPo0KFYtmwZAKCxsRF//etfsWDBAnzxxRf4zW9+0+XrfT4fdLoj/5/m++67D3PmzIHD4eiw75e//CVGjBgBIQTq6+uxYsUKnHPOOXj55Zdx3nnnHfa51q9fD1mW8Ze//AUGgyEV3U+Z9vfvwIEDePbZZ3HzzTejsbHxiGUf+9IPf/hDzJkzB0ajMS3nf+ihh/DjH/8YI0eOxA033ICioiJs374dTz75JFauXInXXnsN3/72tw/7uF6vF0uXLgUQCby7snPnTsjykc9ZmEwmzJ07F3/4wx9www03dMj8ESXDgI0oDe655x44HA588MEHHQKJhoaG9HSqD3g8nk4zTwAwffp0XHzxxQCA+fPn45hjjsGNN96Ip59+GrfffvuR6OZhi96j7gSBR8u97mterxcWiyXd3eiSw+HAD37wg9i/r732WowePRoPP/ww7r77buj1+g6v0TQNwWAQJpMJJpPpSHYXAPDxxx/jk08+we9///uE+88++2yceOKJsX8vWLAAhYWF+Mc//tGjgK2hoQFmszllwZoQAn6/H2azudfHOvT+XXfddRgzZgweeugh/PKXv4SiKL0+RzopipK297Bx40bcdNNNOOWUU7Bq1aq43+frr78e06ZNw8UXX4xt27YhJyenT/uSyoBVVVVomtbt5/mSSy7BvffeizfffBPf+c53UtYPGtg4JJIoDXbv3o3x48cn/MBfUFDQYdvf/vY3nHTSSbBYLMjJycGpp56KNWvWxPa/9NJLOPfcczFkyBAYjUaUlZXh7rvvRjgcjrU57bTT8Oqrr+Krr76KDfkpLS3Fhg0bMHnyZACRgCm6r/2csffeew9nnXUWHA4HLBYLZsyYgY0bN8b1MTpP4PPPP8fll1+OnJwcnHLKKYd9baL/AduzZ0+n7davX4/p06fDarUiOzsbF1xwAbZv3x7Xn5/+9KcAgBEjRsTeV1dzN55//nlMmjQJZrMZgwYNwg9+8APU1NTE9p922mmYO3cuAGDy5MmQJKnTuRCHc68lScKiRYvw97//HaNHj4bJZMKkSZPw9ttvd3htTU0NrrrqqtiwovHjx+Opp57q0M7v92PJkiU45phjYDKZUFRUhAsvvBC7d+/u9Dq0V1VVhYsuugiDBw+GyWTC0KFDMWfOHDidzrh2XT2nAPDoo49i/PjxMBqNGDJkCBYuXNhhyOppp52GCRMm4KOPPsKpp54Ki8WC//3f/wUABAIB3HXXXSgvL4fRaERJSQluvfXWLueELFq0CDabDV6vt8O+yy67DIMHD479vnz44YeoqKjAoEGDYDabMWLECFx11VXdvl7tWSwWnHzyyfB4PGhsbAQQf5+j12LVqlWxfYfOYaupqcGCBQtiv98jRozA9ddfj2AwGGvT2tqKm266CSUlJTAajSgvL8dvf/vbbg3le/HFF2EwGHDqqad26z1lZ2fDbDZ3yARqmoY//vGPGD9+PEwmEwoLC3HttdeipaUl1kaSJCxfvhwej6fD3xpVVXH33XejrKwsNlTuf//3fzvc29LSUpx33nlYvXo1TjzxRJjN5tgQs95ch0RMJhMmT54Ml8vV4QuWv/3tb7G/Fbm5uZgzZw727dsX1yb6LH/66aeYMWMGLBYLysvL8a9//QsA8NZbb2HKlCkwm80YPXo03njjjQ59+Pjjj3H22WcjKysLNpsNZ5xxBt59993Y/g8//BCSJOHpp5/u8NrVq1dDkiS88sorABLPYYtez3feeQcnnXQSTCYTRo4cib/+9a8djhd9H2azGUOHDsWvfvUrLF++vFt/W+++++5YPw/98qWsrAz33nsv6urq4oYLnnbaaQkzZvPmzUNpaSmAyDzn/Px8AMDSpUtjz1Vnc0ETzWHrzrMTnVP9u9/9Dn/84x9jz+rnn38OIJJBHD9+fOxv4Iknnohnn3027jyTJk1Cbm4uXnrppU6vF1F7zLARpcHw4cOxefNmVFZWYsKECZ22Xbp0KZYsWYJvf/vb+OUvfwmDwYD33nsP69evx6xZswBE/iNss9mwePFi2Gw2rF+/HnfeeSfa2tpw3333AQDuuOMOOJ1OfP3117j//vsBADabDWPHjsUvf/lL3Hnnnbjmmmswffp0AIgNS1m/fj3OPvtsTJo0CXfddRdkWcby5cvxne98B//3f/+Hk046Ka6/3//+9zFq1Cj8+te/hhDisK9NNJDIy8tL2uaNN97A2WefjZEjR2LJkiXw+Xx46KGHMG3aNGzZsgWlpaW48MIL8cUXX+Af//gH7r//fgwaNAgAYv9hT2TFihWYP38+Jk+ejGXLlqG+vh4PPPAANm7ciI8//hjZ2dm44447MHr0aDzxxBOxoWJlZWVJj3k49xqIfIBbuXIlbrzxRhiNRjz66KM466yz8P7778deX19fj5NPPjn2wT8/Px+vv/46FixYgLa2ttiQ13A4jPPOOw/r1q3DnDlz8OMf/xgulwtr165FZWVlp/2OCgaDqKioQCAQwA033IDBgwejpqYGr7zyClpbW2ND6LrznC5ZsgRLly7FzJkzcf3112Pnzp147LHH8MEHH2Djxo1x2aempiacffbZmDNnDn7wgx+gsLAQmqbhu9/9Lt555x1cc801GDt2LD777DPcf//9+OKLL/Diiy8mfR+XXnopHnnkEbz66qv4/ve/H9vu9Xrx8ssvY968eVAUBQ0NDZg1axby8/Nx2223ITs7G9XV1fjPf/7T5bVK5ssvv4SiKHFB+/r16/Hcc89h0aJFGDRoUOzD56Fqa2tx0kknobW1Fddccw3GjBmDmpoa/Otf/4LX64XBYIDX68WMGTNQU1ODa6+9FsOGDcOmTZtw++23o66uLuG81fY2bdqECRMmJMz+AYDT6cS
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2QAAAIjCAYAAABswtioAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXxU1d0/8M+9M3Nnz2RPIAQCCcqqFIqKSHElpaht3ZdWwKUtFa1Ln5/1qXV9Wmx9XOpKaytYq3Vp1dZaBAW1LWAflWoB2YJEItkgy2Qy252Ze35/hBkzyQSSydzMTPJ5v15pzczl5MyZM8l855zz/UpCCAEiIiIiIiIacnK6O0BERERERDRSMSAjIiIiIiJKEwZkREREREREacKAjIiIiIiIKE0YkBEREREREaUJAzIiIiIiIqI0YUBGRERERESUJgzIiIiIiIiI0oQBGRERERERUZowICOijFRbWwtJkrB69ep0dyXOG2+8gRkzZsBisUCSJLS3t6e7SzQMSJKEO++8c8h/rqZpmDZtGn76058m3cYzzzyDSZMmwWQyITc3N3Wdo15OPfVUnHrqqenuxpCqqKjAkiVLYt+/8847kCQJ77zzzpD35aSTTsL/+3//b8h/Lg1/DMiIhtjWrVtxwQUXYNy4cbBYLCgrK8NZZ52FRx55RLef+dxzz+Ghhx7qdXt9fT3uvPNOfPTRR7r97J6if0yjXyaTCRMmTMAVV1yBTz/9NCU/Y9OmTbjzzjtTHiy1tLTgoosugtVqxWOPPYZnnnkGdru9z+vT8VzT0Dv11FPj5nR+fj5mz56Np556Cpqmpbt7R/SHP/wBdXV1WL58ecL7H3/8cUiShBNPPDHh/Tt37sSSJUtQWVmJJ598Er/+9a/h8/lw5513Dukb5ugHONEvWZaRn5+PhQsXYvPmzUPWj+EuFArh4YcfxuzZs+F0OuFwODB79mw8/PDDCIVCSber1+/sVLvlllvw2GOPobGxMd1doeFGENGQ2bhxo1AURVRVVYl77rlHPPnkk+L2228XCxYsEJWVlbr93EWLFolx48b1uv39998XAMSqVat0+9k9vf322wKAuP7668UzzzwjnnrqKbF8+XKhKIrIz88XBw4cEEIIsW/fvqT7dt999wkAYt++fSnt+5o1awQA8eabbx712nQ91zT05s+fL8aMGSOeeeYZ8cwzz4gHHnhAzJgxQwAQt9xyS7/a8Pv9IhQK6dzT3o4//njxne98p8/7Tz75ZFFRUSEAiD179vS6/4knnuh138GDBwUAcccdd+jR5YSivy8uvfRS8cwzz4jVq1eL//7v/xa5ubnCbDaL//znP0PWFz0Fg0ERDAbT8rM7OzvF/PnzBQBx9tlni0cffVQ8/vjj4txzzxUAxPz580VnZ2dSbR/pd/a4cePE4sWLY99HIhHh9/tFJBJJ8pEkLxKJiNLSUvGTn/xkyH82DW/GtESBRCPUT3/6U7hcLrz//vu9tvY0Nzenp1M68Hq9R1w5AoB58+bhggsuAAAsXboUxxxzDK6//no8/fTTuPXWW4eimwMWfY76sy1rpDzXevP5fLDZbOnuxlG5XC5861vfin3/3e9+F8ceeyweffRR3HPPPTCZTL3+jaZpUFUVFosFFotlKLsLAPj3v/+Njz/+GPfff3/C+/ft24dNmzbh5Zdfxne/+108++yzuOOOO+KuGchrYrD683tl5syZcc/DvHnzsHDhQjzxxBN4/PHH9e6i7hRFSdvPvummm/Duu+/ikUceiVtRXbZsGR577DEsX74cP/zhD/HEE0/o2g9ZllP6eunPvOr+sy+44AL87ne/w1133QVJklLWDxrh0h0REo0kxx57rDj11FP7ff0zzzwjZs+eLaxWq8jNzRXz5s0Ta9eujd3/6quviq997Wti1KhRQlEUMWHCBHH33XeLcDgcuyb6iWb3r3HjxsVWqnp+dV+Reu+990R1dbXIyckRVqtVfOUrXxH//Oc/4/p4xx13CABi+/bt4tJLLxW5ublixowZfT6m6M996aWX4m7ftm2bACCuueYaIUTfK2Tr168Xp5xyirDZbMLlcolzzz1XfPLJJ7360/PraKtlL774opg5c6awWCyioKBAXH755eLzzz8/4jh2/9S2p4E81wDEtddeK37/+9+LY445RpjNZjFz5kzx7rvv9rr2888/F0uXLhXFxcVCURQxZcoU8dvf/rbXdX6/X9xxxx1i4sSJwmw2i9LSUvHNb35T1NTU9KtPQgixe/ducd5554mSkhJhNptFWVmZuPjii0V7e3vcdUebp0II8dhjj4kpU6YIRVHEqFGjxPe//33R1tYWd838+fPF1KlTxQcffCDmzZsnrFar+MEPfiCEECIQCIjbb79dVFZWCkVRxJgxY8R//dd/iUAgcMTHcO211wq73S68Xm+v+y655BJRUlISe728//77YsGCBaKgoEBYLBZRUVEhli5detRxiva7pwsuuEAAiK36dn+ep0yZIoxGo3jllVdi9/VcUfr888/FlVdeGXt9V1RUiO9973txKyRtbW3iBz/4gRgzZoxQFEVUVlaKe++9t1+rB7fffrtQFEWoqprw/nvuuUfk5eWJYDAoli1bJiZOnBh3/7hx4xK+JhK9/ro/th07dojzzz9f5OXlCbPZLGbNmiX+/Oc/x7W9atUqAUC88847YtmyZaKoqEjk5ub2+Viivy/uu+++uNs7OzsFALFgwYK42/szbt3bfPTRR8X48eOF1WoVZ511lti/f7/QNE3cfffdoqysTFgsFnHuueeKlpaWXn072twfyBydP3++mD9/fuz+6O/TF154QfzP//yPKCsrE2azWZx++ukJVzSjj8NisYjZs2eLv//9773aTKSurk4YDAZx+umn93nNaaedJoxGo6irq4sbv0S7HLrPiaP9zu65QhZ9zG+//XZcm4P9e9XQ0CCWLFkiysrKhKIoorS0VJx77rm9/nb8+c9/FgDEli1bjjhmRAPBFTKiITRu3Dhs3rwZ27Ztw7Rp04547V133YU777wTJ598Mu6++24oioJ//etf2LBhAxYsWAAAWL16NRwOB2666SY4HA5s2LABt99+Ozo6OnDfffcBAH784x/D7Xbj888/x4MPPggAcDgcmDx5Mu6++27cfvvt+M53voN58+YBAE4++WQAwIYNG7Bw4ULMmjULd9xxB2RZxqpVq3D66afjH//4B0444YS4/l544YWYOHEifvazn0EIMeCx2bt3LwCgoKCgz2veeustLFy4EBMmTMCdd94Jv9+PRx55BHPnzsWWLVtQUVGB8847D7t378Yf/vAHPPjggygsLAQAFBUV9dnu6tWrsXTpUsyePRsrVqxAU1MTfvnLX2Ljxo3497//jdzcXPz4xz/Gsccei1//+te4++67MX78eFRWVvbZ5kCeawB499138cILL+D666+H2WzG448/jq9+9av4v//7v9i/b2pqwkknnQRJkrB8+XIUFRVhzZo1uOqqq9DR0YEbbrgBABCJRHD22Wdj/fr1uOSSS/CDH/wAHo8Hb775JrZt23bEfkepqorq6moEg0Fcd911KC0txYEDB/DXv/4V7e3tcLlcAPo3T++8807cddddOPPMM7Fs2TLs2rULTzzxBN5//31s3LgxbvWopaUFCxcuxCWXXIJvfetbKCkpgaZpOPfcc/HPf/4T3/nOdzB58mRs3boVDz74IHbv3o1XX321z8dx8cUX47HHHsPrr7+OCy+8MHa7z+fDa6+9hiVLlsBgMKC5uRkLFixAUVERfvSjHyE3Nxe1tbV4+eWXjzpWffn0009hMBjiVo82bNiAF198EcuXL0dhYSEqKioS/tv6+nqccMIJaG9vx3e+8x1MmjQJBw4cwB//+Ef4fD4oigKfz4f58+fjwIED+O53v4uxY8di06ZNuPXWW9HQ0JDw3Gh3mzZtwrRp0xKu3gHAs88+i/POOw+KouDSSy+NPWezZ88GADz00EP43e9+h1deeQVPPPEEHA4Hpk+fjpNOOgnLli3DN7/5TZx33nkAgOOOOw4AsH37dsydOxdlZWX40Y9
"text/plain": [
"<Figure size 1000x600 with 1 Axes>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Количество строк до удаления выбросов: 785\n",
"Количество строк после удаления выбросов: 721\n"
]
}
],
"source": [
"column1 = \"Spec_score\"\n",
"column2 = \"Price\"\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"plt.scatter(df[column1], df[column2], alpha=0.5)\n",
"plt.xlabel(column1)\n",
"plt.ylabel(column2)\n",
"plt.title(f\"Scatter Plot of {column1} vs {column2} (Before Removing Outliers)\")\n",
"plt.show()\n",
"\n",
"def remove_outliers(df, column):\n",
" Q1 = df[column].quantile(0.25)\n",
" Q3 = df[column].quantile(0.75)\n",
" IQR = Q3 - Q1\n",
" lower_bound = Q1 - 1.5 * IQR\n",
" upper_bound = Q3 + 1.5 * IQR\n",
" return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]\n",
"\n",
"df_cleaned = df.copy()\n",
"for column in [column1, column2]:\n",
" df_cleaned = remove_outliers(df_cleaned, column)\n",
"\n",
"plt.figure(figsize=(10, 6))\n",
"plt.scatter(df_cleaned[column1], df_cleaned[column2], alpha=0.5)\n",
"plt.xlabel(column1)\n",
"plt.ylabel(column2)\n",
"plt.title(f\"Scatter Plot of {column1} vs {column2} (After Removing Outliers)\")\n",
"plt.show()\n",
"\n",
"print(f\"Количество строк до удаления выбросов: {len(df)}\")\n",
"print(f\"Количество строк после удаления выбросов: {len(df_cleaned)}\")\n",
"\n",
"df = df_cleaned"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Разбиение данных на выборки."
]
},
{
"cell_type": "code",
2024-12-07 00:08:27 +04:00
"execution_count": 34,
2024-12-06 18:43:41 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Размеры выборок:\n",
"Обучающая выборка: 432 записей\n",
"company\n",
"Realme 86\n",
"Samsung 70\n",
"Motorola 50\n",
"Vivo 48\n",
"Xiaomi 45\n",
"Poco 32\n",
"OnePlus 15\n",
"iQOO 14\n",
"OPPO 12\n",
"POCO 11\n",
"Honor 11\n",
"TCL 11\n",
"Lava 9\n",
"Oppo 5\n",
"Huawei 5\n",
"itel 3\n",
"Google 2\n",
"Gionee 1\n",
"IQOO 1\n",
"Lenovo 1\n",
"Name: count, dtype: int64\n",
"Контрольная выборка: 144 записей\n",
"company\n",
"Vivo 27\n",
"Samsung 27\n",
"Realme 21\n",
"Xiaomi 12\n",
"Poco 11\n",
"Motorola 10\n",
"OnePlus 7\n",
"OPPO 6\n",
"POCO 6\n",
"Honor 3\n",
"itel 3\n",
"Lava 2\n",
"LG 2\n",
"iQOO 2\n",
"Lenovo 2\n",
"Oppo 1\n",
"Itel 1\n",
"Google 1\n",
"Name: count, dtype: int64\n",
"Тестовая выборка: 145 записей\n",
"company\n",
"Samsung 27\n",
"Vivo 25\n",
"Realme 16\n",
"Xiaomi 12\n",
"Motorola 11\n",
"Poco 10\n",
"OnePlus 7\n",
"TCL 7\n",
"iQOO 7\n",
"Huawei 5\n",
"Oppo 4\n",
"Lenovo 2\n",
"Honor 2\n",
"Lava 2\n",
"itel 2\n",
"Tecno 1\n",
"Google 1\n",
"OPPO 1\n",
"Coolpad 1\n",
"POCO 1\n",
"Itel 1\n",
"Name: count, dtype: int64\n"
]
}
],
"source": [
"X = df\n",
"y = df[\"company\"]\n",
"\n",
"train_df, X_temp, y_train, y_temp = train_test_split(\n",
" X, y, test_size=0.4, random_state=42\n",
")\n",
"val_df, test_df, y_val, y_test = train_test_split(\n",
" X_temp, y_temp, test_size=0.5, random_state=42\n",
")\n",
"\n",
"print(\"Размеры выборок:\")\n",
"print(f\"Обучающая выборка: {train_df.shape[0]} записей\")\n",
"print(train_df.company.value_counts())\n",
"print(f\"Контрольная выборка: {val_df.shape[0]} записей\")\n",
"print(val_df.company.value_counts())\n",
"print(f\"Тестовая выборка: {test_df.shape[0]} записей\")\n",
"print(test_df.company.value_counts())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Oversampling"
]
},
{
"cell_type": "code",
2024-12-07 00:08:27 +04:00
"execution_count": 35,
2024-12-06 18:43:41 +04:00
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Размеры выборок:\n",
"Обучающая выборка: 1720 записей\n",
"company\n",
"Vivo 86\n",
"Motorola 86\n",
"Oppo 86\n",
"POCO 86\n",
"iQOO 86\n",
"Xiaomi 86\n",
"Realme 86\n",
"OnePlus 86\n",
"Poco 86\n",
"Samsung 86\n",
"TCL 86\n",
"Gionee 86\n",
"Honor 86\n",
"OPPO 86\n",
"Lava 86\n",
"itel 86\n",
"Huawei 86\n",
"Google 86\n",
"IQOO 86\n",
"Lenovo 86\n",
"Name: count, dtype: int64\n",
"Контрольная выборка: 486 записей\n",
"company\n",
"Vivo 27\n",
"Honor 27\n",
"Motorola 27\n",
"POCO 27\n",
"Samsung 27\n",
"itel 27\n",
"Lava 27\n",
"Xiaomi 27\n",
"Realme 27\n",
"OnePlus 27\n",
"Poco 27\n",
"iQOO 27\n",
"LG 27\n",
"Oppo 27\n",
"Itel 27\n",
"OPPO 27\n",
"Google 27\n",
"Lenovo 27\n",
"Name: count, dtype: int64\n",
"Тестовая выборка: 567 записей\n",
"company\n",
"Oppo 27\n",
"Huawei 27\n",
"Samsung 27\n",
"Motorola 27\n",
"TCL 27\n",
"Realme 27\n",
"Xiaomi 27\n",
"Poco 27\n",
"Google 27\n",
"Vivo 27\n",
"iQOO 27\n",
"Tecno 27\n",
"OnePlus 27\n",
"Honor 27\n",
"OPPO 27\n",
"Lenovo 27\n",
"Lava 27\n",
"itel 27\n",
"Coolpad 27\n",
"POCO 27\n",
"Itel 27\n",
"Name: count, dtype: int64\n"
]
}
],
"source": [
"def oversample(df):\n",
" X = df.drop(\"company\", axis=1)\n",
" y = df[\"company\"]\n",
"\n",
" oversampler = RandomOverSampler(random_state=42)\n",
" X_resampled, y_resampled = oversampler.fit_resample(X, y) # type: ignore\n",
"\n",
" resampled_df = pd.concat([X_resampled, y_resampled], axis=1)\n",
" return resampled_df\n",
"\n",
"train_df_overs = oversample(train_df)\n",
"val_df_overs = oversample(val_df)\n",
"test_df_overs = oversample(test_df)\n",
"\n",
"print(\"Размеры выборок:\")\n",
"print(f\"Обучающая выборка: {train_df_overs.shape[0]} записей\")\n",
"print(train_df_overs.company.value_counts())\n",
"print(f\"Контрольная выборка: {val_df_overs.shape[0]} записей\")\n",
"print(val_df_overs.company.value_counts())\n",
"print(f\"Тестовая выборка: {test_df_overs.shape[0]} записей\")\n",
"print(test_df_overs.company.value_counts())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Дискретизация числовых признаков"
]
},
{
"cell_type": "code",
2024-12-07 00:08:27 +04:00
"execution_count": 36,
2024-12-06 18:43:41 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Rating</th>\n",
" <th>Spec_score</th>\n",
" <th>No_of_sim</th>\n",
" <th>Ram</th>\n",
" <th>Battery</th>\n",
" <th>Display</th>\n",
" <th>Camera</th>\n",
" <th>External_Memory</th>\n",
" <th>Android_version</th>\n",
" <th>...</th>\n",
" <th>Inbuilt_memory</th>\n",
" <th>fast_charging</th>\n",
" <th>Screen_resolution</th>\n",
" <th>Processor</th>\n",
" <th>Processor_name</th>\n",
" <th>company</th>\n",
" <th>Spec_score_bin</th>\n",
" <th>Battery_bin</th>\n",
" <th>Ram_bin</th>\n",
" <th>Camera_bin</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Vivo Y21T</td>\n",
" <td>3.95</td>\n",
" <td>74</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>4</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>128 GB inbuilt</td>\n",
" <td>18W Fast Charging</td>\n",
" <td>1600 x 720 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Snapdragon 680</td>\n",
" <td>Vivo</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Motorola Moto G23</td>\n",
" <td>4.40</td>\n",
" <td>77</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>4</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 512 GB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>128 GB inbuilt</td>\n",
" <td>30W Fast Charging</td>\n",
" <td>720 x 1600 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Helio G85</td>\n",
" <td>Motorola</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Oppo A78 4G</td>\n",
" <td>4.25</td>\n",
" <td>81</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>8</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>128 GB inbuilt</td>\n",
" <td>67W Fast Charging</td>\n",
" <td>1080 x 2400 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Snapdragon 680</td>\n",
" <td>Oppo</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>POCO M4 Pro 4G (8GB RAM + 128GB)</td>\n",
" <td>4.45</td>\n",
" <td>81</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>8</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>64.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>128 GB inbuilt</td>\n",
" <td>33W Fast Charging</td>\n",
" <td>1080 x 2400 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Helio G96</td>\n",
" <td>POCO</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>iQOO Z5 Pro 5G</td>\n",
" <td>4.40</td>\n",
" <td>84</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>8</td>\n",
" <td>4500</td>\n",
" <td>6</td>\n",
" <td>64.0</td>\n",
" <td>Memory Card (Hybrid)</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>128 GB inbuilt</td>\n",
" <td>65W Fast Charging</td>\n",
" <td>1080 x 2460 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Snapdragon 870</td>\n",
" <td>iQOO</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1715</th>\n",
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
" <td>4.00</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>6</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>128 GB inbuilt</td>\n",
" <td>18W Fast Charging</td>\n",
" <td>1600 x 720 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Dimensity 6080</td>\n",
" <td>itel</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1716</th>\n",
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
" <td>4.00</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>6</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>128 GB inbuilt</td>\n",
" <td>18W Fast Charging</td>\n",
" <td>1600 x 720 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Dimensity 6080</td>\n",
" <td>itel</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1717</th>\n",
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
" <td>4.00</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>6</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>128 GB inbuilt</td>\n",
" <td>18W Fast Charging</td>\n",
" <td>1600 x 720 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Dimensity 6080</td>\n",
" <td>itel</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1718</th>\n",
" <td>itel P55 Plus</td>\n",
" <td>4.10</td>\n",
" <td>74</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>8</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>256 GB inbuilt</td>\n",
" <td>45W Fast Charging</td>\n",
" <td>720 x 1640 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Unisoc T606</td>\n",
" <td>itel</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1719</th>\n",
" <td>itel S24</td>\n",
" <td>4.35</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G,</td>\n",
" <td>8</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>108.0</td>\n",
" <td>Memory Card Supported</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>128 GB inbuilt</td>\n",
" <td>18W Fast Charging</td>\n",
" <td>720 x 1612 px</td>\n",
" <td>Octa Core</td>\n",
" <td>Helio G91</td>\n",
" <td>itel</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1720 rows × 21 columns</p>\n",
"</div>"
],
"text/plain": [
" Name Rating Spec_score \\\n",
"0 Vivo Y21T 3.95 74 \n",
"1 Motorola Moto G23 4.40 77 \n",
"2 Oppo A78 4G 4.25 81 \n",
"3 POCO M4 Pro 4G (8GB RAM + 128GB) 4.45 81 \n",
"4 iQOO Z5 Pro 5G 4.40 84 \n",
"... ... ... ... \n",
"1715 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
"1716 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
"1717 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
"1718 itel P55 Plus 4.10 74 \n",
"1719 itel S24 4.35 75 \n",
"\n",
" No_of_sim Ram Battery Display Camera \\\n",
"0 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
"1 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
"2 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
"3 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 64.0 \n",
"4 Dual Sim, 3G, 4G, 5G, VoLTE, 8 4500 6 64.0 \n",
"... ... ... ... ... ... \n",
"1715 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
"1716 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
"1717 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
"1718 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
"1719 Dual Sim, 3G, 4G, 8 5000 6 108.0 \n",
"\n",
" External_Memory Android_version ... \\\n",
"0 Memory Card Supported, upto 1 TB 11 ... \n",
"1 Memory Card Supported, upto 512 GB 13 ... \n",
"2 Memory Card Supported, upto 1 TB 13 ... \n",
"3 Memory Card Supported, upto 1 TB 11 ... \n",
"4 Memory Card (Hybrid) 11 ... \n",
"... ... ... ... \n",
"1715 Memory Card Supported, upto 1 TB 13 ... \n",
"1716 Memory Card Supported, upto 1 TB 13 ... \n",
"1717 Memory Card Supported, upto 1 TB 13 ... \n",
"1718 Memory Card Supported, upto 1 TB 13 ... \n",
"1719 Memory Card Supported 13 ... \n",
"\n",
" Inbuilt_memory fast_charging Screen_resolution Processor \\\n",
"0 128 GB inbuilt 18W Fast Charging 1600 x 720 px Octa Core \n",
"1 128 GB inbuilt 30W Fast Charging 720 x 1600 px Octa Core \n",
"2 128 GB inbuilt 67W Fast Charging 1080 x 2400 px Octa Core \n",
"3 128 GB inbuilt 33W Fast Charging 1080 x 2400 px Octa Core \n",
"4 128 GB inbuilt 65W Fast Charging 1080 x 2460 px Octa Core \n",
"... ... ... ... ... \n",
"1715 128 GB inbuilt 18W Fast Charging 1600 x 720 px Octa Core \n",
"1716 128 GB inbuilt 18W Fast Charging 1600 x 720 px Octa Core \n",
"1717 128 GB inbuilt 18W Fast Charging 1600 x 720 px Octa Core \n",
"1718 256 GB inbuilt 45W Fast Charging 720 x 1640 px Octa Core \n",
"1719 128 GB inbuilt 18W Fast Charging 720 x 1612 px Octa Core \n",
"\n",
" Processor_name company Spec_score_bin Battery_bin Ram_bin \\\n",
"0 Snapdragon 680 Vivo 1 0 0 \n",
"1 Helio G85 Motorola 1 0 0 \n",
"2 Snapdragon 680 Oppo 1 0 1 \n",
"3 Helio G96 POCO 1 0 1 \n",
"4 Snapdragon 870 iQOO 2 0 1 \n",
"... ... ... ... ... ... \n",
"1715 Dimensity 6080 itel 1 0 1 \n",
"1716 Dimensity 6080 itel 1 0 1 \n",
"1717 Dimensity 6080 itel 1 0 1 \n",
"1718 Unisoc T606 itel 1 0 1 \n",
"1719 Helio G91 itel 1 0 1 \n",
"\n",
" Camera_bin \n",
"0 0 \n",
"1 0 \n",
"2 0 \n",
"3 0 \n",
"4 0 \n",
"... ... \n",
"1715 0 \n",
"1716 0 \n",
"1717 0 \n",
"1718 0 \n",
"1719 1 \n",
"\n",
"[1720 rows x 21 columns]"
]
},
2024-12-07 00:08:27 +04:00
"execution_count": 36,
2024-12-06 18:43:41 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"numerical_features = [\"Spec_score\", \"Battery\", \"Ram\", \"Camera\"]\n",
"\n",
"def discretize_features(df, features, bins=3, labels=False):\n",
" for feature in features:\n",
" try:\n",
" df[f\"{feature}_bin\"] = pd.cut(df[feature], bins=bins, labels=labels) # type: ignore\n",
" except Exception as e:\n",
" print(f\"Ошибка при дискретизации признака {feature}: {e}\")\n",
" return df\n",
"\n",
"train_df_disc = discretize_features(train_df_overs, numerical_features)\n",
"val_df_disc = discretize_features(val_df_overs, numerical_features)\n",
"test_df_disc = discretize_features(test_df_overs, numerical_features)\n",
"\n",
"train_df_disc"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Унитарное кодирование категориальных признаков"
]
},
{
"cell_type": "code",
2024-12-07 00:08:27 +04:00
"execution_count": 37,
2024-12-06 18:43:41 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Rating</th>\n",
" <th>Spec_score</th>\n",
" <th>No_of_sim</th>\n",
" <th>Ram</th>\n",
" <th>Battery</th>\n",
" <th>Display</th>\n",
" <th>Camera</th>\n",
" <th>External_Memory</th>\n",
" <th>Android_version</th>\n",
" <th>...</th>\n",
" <th>Spec_score_bin_2</th>\n",
" <th>Battery_bin_0</th>\n",
" <th>Battery_bin_1</th>\n",
" <th>Battery_bin_2</th>\n",
" <th>Ram_bin_0</th>\n",
" <th>Ram_bin_1</th>\n",
" <th>Ram_bin_2</th>\n",
" <th>Camera_bin_0</th>\n",
" <th>Camera_bin_1</th>\n",
" <th>Camera_bin_2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Vivo Y21T</td>\n",
" <td>3.95</td>\n",
" <td>74</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>4</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Motorola Moto G23</td>\n",
" <td>4.40</td>\n",
" <td>77</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>4</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 512 GB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Oppo A78 4G</td>\n",
" <td>4.25</td>\n",
" <td>81</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>8</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>POCO M4 Pro 4G (8GB RAM + 128GB)</td>\n",
" <td>4.45</td>\n",
" <td>81</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>8</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>64.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>iQOO Z5 Pro 5G</td>\n",
" <td>4.40</td>\n",
" <td>84</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>8</td>\n",
" <td>4500</td>\n",
" <td>6</td>\n",
" <td>64.0</td>\n",
" <td>Memory Card (Hybrid)</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1715</th>\n",
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
" <td>4.00</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>6</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1716</th>\n",
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
" <td>4.00</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>6</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1717</th>\n",
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
" <td>4.00</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>6</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1718</th>\n",
" <td>itel P55 Plus</td>\n",
" <td>4.10</td>\n",
" <td>74</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>8</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1719</th>\n",
" <td>itel S24</td>\n",
" <td>4.35</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G,</td>\n",
" <td>8</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>108.0</td>\n",
" <td>Memory Card Supported</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1720 rows × 29 columns</p>\n",
"</div>"
],
"text/plain": [
" Name Rating Spec_score \\\n",
"0 Vivo Y21T 3.95 74 \n",
"1 Motorola Moto G23 4.40 77 \n",
"2 Oppo A78 4G 4.25 81 \n",
"3 POCO M4 Pro 4G (8GB RAM + 128GB) 4.45 81 \n",
"4 iQOO Z5 Pro 5G 4.40 84 \n",
"... ... ... ... \n",
"1715 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
"1716 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
"1717 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
"1718 itel P55 Plus 4.10 74 \n",
"1719 itel S24 4.35 75 \n",
"\n",
" No_of_sim Ram Battery Display Camera \\\n",
"0 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
"1 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
"2 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
"3 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 64.0 \n",
"4 Dual Sim, 3G, 4G, 5G, VoLTE, 8 4500 6 64.0 \n",
"... ... ... ... ... ... \n",
"1715 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
"1716 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
"1717 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
"1718 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
"1719 Dual Sim, 3G, 4G, 8 5000 6 108.0 \n",
"\n",
" External_Memory Android_version ... \\\n",
"0 Memory Card Supported, upto 1 TB 11 ... \n",
"1 Memory Card Supported, upto 512 GB 13 ... \n",
"2 Memory Card Supported, upto 1 TB 13 ... \n",
"3 Memory Card Supported, upto 1 TB 11 ... \n",
"4 Memory Card (Hybrid) 11 ... \n",
"... ... ... ... \n",
"1715 Memory Card Supported, upto 1 TB 13 ... \n",
"1716 Memory Card Supported, upto 1 TB 13 ... \n",
"1717 Memory Card Supported, upto 1 TB 13 ... \n",
"1718 Memory Card Supported, upto 1 TB 13 ... \n",
"1719 Memory Card Supported 13 ... \n",
"\n",
" Spec_score_bin_2 Battery_bin_0 Battery_bin_1 Battery_bin_2 Ram_bin_0 \\\n",
"0 False True False False True \n",
"1 False True False False True \n",
"2 False True False False False \n",
"3 False True False False False \n",
"4 True True False False False \n",
"... ... ... ... ... ... \n",
"1715 False True False False False \n",
"1716 False True False False False \n",
"1717 False True False False False \n",
"1718 False True False False False \n",
"1719 False True False False False \n",
"\n",
" Ram_bin_1 Ram_bin_2 Camera_bin_0 Camera_bin_1 Camera_bin_2 \n",
"0 False False True False False \n",
"1 False False True False False \n",
"2 True False True False False \n",
"3 True False True False False \n",
"4 True False True False False \n",
"... ... ... ... ... ... \n",
"1715 True False True False False \n",
"1716 True False True False False \n",
"1717 True False True False False \n",
"1718 True False True False False \n",
"1719 True False False True False \n",
"\n",
"[1720 rows x 29 columns]"
]
},
2024-12-07 00:08:27 +04:00
"execution_count": 37,
2024-12-06 18:43:41 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"categorical_features = [\"Spec_score_bin\", \"Battery_bin\", \"Ram_bin\", \"Camera_bin\"]\n",
"\n",
"train_df_enc = pd.get_dummies(train_df_disc, columns=categorical_features)\n",
"val_df_enc = pd.get_dummies(val_df_disc, columns=categorical_features)\n",
"test_df_enc = pd.get_dummies(test_df_disc, columns=categorical_features)\n",
"\n",
"train_df_enc"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Ручной синтез признаков."
]
},
{
"cell_type": "code",
2024-12-07 00:08:27 +04:00
"execution_count": 38,
2024-12-06 18:43:41 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Rating</th>\n",
" <th>Spec_score</th>\n",
" <th>No_of_sim</th>\n",
" <th>Ram</th>\n",
" <th>Battery</th>\n",
" <th>Display</th>\n",
" <th>Camera</th>\n",
" <th>External_Memory</th>\n",
" <th>Android_version</th>\n",
" <th>...</th>\n",
" <th>Battery_bin_0</th>\n",
" <th>Battery_bin_1</th>\n",
" <th>Battery_bin_2</th>\n",
" <th>Ram_bin_0</th>\n",
" <th>Ram_bin_1</th>\n",
" <th>Ram_bin_2</th>\n",
" <th>Camera_bin_0</th>\n",
" <th>Camera_bin_1</th>\n",
" <th>Camera_bin_2</th>\n",
" <th>Camera_to_Display_Ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Vivo Y21T</td>\n",
" <td>3.95</td>\n",
" <td>74</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>4</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Motorola Moto G23</td>\n",
" <td>4.40</td>\n",
" <td>77</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>4</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 512 GB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Oppo A78 4G</td>\n",
" <td>4.25</td>\n",
" <td>81</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>8</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>POCO M4 Pro 4G (8GB RAM + 128GB)</td>\n",
" <td>4.45</td>\n",
" <td>81</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>8</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>64.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>10.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>iQOO Z5 Pro 5G</td>\n",
" <td>4.40</td>\n",
" <td>84</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>8</td>\n",
" <td>4500</td>\n",
" <td>6</td>\n",
" <td>64.0</td>\n",
" <td>Memory Card (Hybrid)</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>10.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1715</th>\n",
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
" <td>4.00</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>6</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1716</th>\n",
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
" <td>4.00</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>6</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1717</th>\n",
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
" <td>4.00</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>6</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1718</th>\n",
" <td>itel P55 Plus</td>\n",
" <td>4.10</td>\n",
" <td>74</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>8</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>50.0</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1719</th>\n",
" <td>itel S24</td>\n",
" <td>4.35</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G,</td>\n",
" <td>8</td>\n",
" <td>5000</td>\n",
" <td>6</td>\n",
" <td>108.0</td>\n",
" <td>Memory Card Supported</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>18.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1720 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" Name Rating Spec_score \\\n",
"0 Vivo Y21T 3.95 74 \n",
"1 Motorola Moto G23 4.40 77 \n",
"2 Oppo A78 4G 4.25 81 \n",
"3 POCO M4 Pro 4G (8GB RAM + 128GB) 4.45 81 \n",
"4 iQOO Z5 Pro 5G 4.40 84 \n",
"... ... ... ... \n",
"1715 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
"1716 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
"1717 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
"1718 itel P55 Plus 4.10 74 \n",
"1719 itel S24 4.35 75 \n",
"\n",
" No_of_sim Ram Battery Display Camera \\\n",
"0 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
"1 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
"2 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
"3 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 64.0 \n",
"4 Dual Sim, 3G, 4G, 5G, VoLTE, 8 4500 6 64.0 \n",
"... ... ... ... ... ... \n",
"1715 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
"1716 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
"1717 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
"1718 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
"1719 Dual Sim, 3G, 4G, 8 5000 6 108.0 \n",
"\n",
" External_Memory Android_version ... Battery_bin_0 \\\n",
"0 Memory Card Supported, upto 1 TB 11 ... True \n",
"1 Memory Card Supported, upto 512 GB 13 ... True \n",
"2 Memory Card Supported, upto 1 TB 13 ... True \n",
"3 Memory Card Supported, upto 1 TB 11 ... True \n",
"4 Memory Card (Hybrid) 11 ... True \n",
"... ... ... ... ... \n",
"1715 Memory Card Supported, upto 1 TB 13 ... True \n",
"1716 Memory Card Supported, upto 1 TB 13 ... True \n",
"1717 Memory Card Supported, upto 1 TB 13 ... True \n",
"1718 Memory Card Supported, upto 1 TB 13 ... True \n",
"1719 Memory Card Supported 13 ... True \n",
"\n",
" Battery_bin_1 Battery_bin_2 Ram_bin_0 Ram_bin_1 Ram_bin_2 Camera_bin_0 \\\n",
"0 False False True False False True \n",
"1 False False True False False True \n",
"2 False False False True False True \n",
"3 False False False True False True \n",
"4 False False False True False True \n",
"... ... ... ... ... ... ... \n",
"1715 False False False True False True \n",
"1716 False False False True False True \n",
"1717 False False False True False True \n",
"1718 False False False True False True \n",
"1719 False False False True False False \n",
"\n",
" Camera_bin_1 Camera_bin_2 Camera_to_Display_Ratio \n",
"0 False False 8.333333 \n",
"1 False False 8.333333 \n",
"2 False False 8.333333 \n",
"3 False False 10.666667 \n",
"4 False False 10.666667 \n",
"... ... ... ... \n",
"1715 False False 8.333333 \n",
"1716 False False 8.333333 \n",
"1717 False False 8.333333 \n",
"1718 False False 8.333333 \n",
"1719 True False 18.000000 \n",
"\n",
"[1720 rows x 30 columns]"
]
},
2024-12-07 00:08:27 +04:00
"execution_count": 38,
2024-12-06 18:43:41 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"train_df_enc[\"Camera_to_Display_Ratio\"] = (train_df_enc[\"Camera\"] / train_df_enc[\"Display\"])\n",
"val_df_enc[\"Camera_to_Display_Ratio\"] = val_df_enc[\"Camera\"] / val_df_enc[\"Display\"]\n",
"test_df_enc[\"Camera_to_Display_Ratio\"] = test_df_enc[\"Camera\"] / test_df_enc[\"Display\"]\n",
"\n",
"train_df_enc"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Масштабирование признаков"
]
},
{
"cell_type": "code",
2024-12-07 00:08:27 +04:00
"execution_count": 39,
2024-12-06 18:43:41 +04:00
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Name</th>\n",
" <th>Rating</th>\n",
" <th>Spec_score</th>\n",
" <th>No_of_sim</th>\n",
" <th>Ram</th>\n",
" <th>Battery</th>\n",
" <th>Display</th>\n",
" <th>Camera</th>\n",
" <th>External_Memory</th>\n",
" <th>Android_version</th>\n",
" <th>...</th>\n",
" <th>Battery_bin_0</th>\n",
" <th>Battery_bin_1</th>\n",
" <th>Battery_bin_2</th>\n",
" <th>Ram_bin_0</th>\n",
" <th>Ram_bin_1</th>\n",
" <th>Ram_bin_2</th>\n",
" <th>Camera_bin_0</th>\n",
" <th>Camera_bin_1</th>\n",
" <th>Camera_bin_2</th>\n",
" <th>Camera_to_Display_Ratio</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Vivo Y21T</td>\n",
" <td>3.95</td>\n",
" <td>74</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>-1.388963</td>\n",
" <td>0.206174</td>\n",
" <td>0.096622</td>\n",
" <td>-0.240789</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Motorola Moto G23</td>\n",
" <td>4.40</td>\n",
" <td>77</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>-1.388963</td>\n",
" <td>0.206174</td>\n",
" <td>0.096622</td>\n",
" <td>-0.240789</td>\n",
" <td>Memory Card Supported, upto 512 GB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>Oppo A78 4G</td>\n",
" <td>4.25</td>\n",
" <td>81</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>0.720078</td>\n",
" <td>0.206174</td>\n",
" <td>0.096622</td>\n",
" <td>-0.240789</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>POCO M4 Pro 4G (8GB RAM + 128GB)</td>\n",
" <td>4.45</td>\n",
" <td>81</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>0.720078</td>\n",
" <td>0.206174</td>\n",
" <td>0.096622</td>\n",
" <td>0.275662</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>10.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>iQOO Z5 Pro 5G</td>\n",
" <td>4.40</td>\n",
" <td>84</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>0.720078</td>\n",
" <td>-0.675789</td>\n",
" <td>0.096622</td>\n",
" <td>0.275662</td>\n",
" <td>Memory Card (Hybrid)</td>\n",
" <td>11</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>10.666667</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1715</th>\n",
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
" <td>4.00</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>-0.334442</td>\n",
" <td>0.206174</td>\n",
" <td>0.096622</td>\n",
" <td>-0.240789</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1716</th>\n",
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
" <td>4.00</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>-0.334442</td>\n",
" <td>0.206174</td>\n",
" <td>0.096622</td>\n",
" <td>-0.240789</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1717</th>\n",
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
" <td>4.00</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
" <td>-0.334442</td>\n",
" <td>0.206174</td>\n",
" <td>0.096622</td>\n",
" <td>-0.240789</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1718</th>\n",
" <td>itel P55 Plus</td>\n",
" <td>4.10</td>\n",
" <td>74</td>\n",
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
" <td>0.720078</td>\n",
" <td>0.206174</td>\n",
" <td>0.096622</td>\n",
" <td>-0.240789</td>\n",
" <td>Memory Card Supported, upto 1 TB</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>8.333333</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1719</th>\n",
" <td>itel S24</td>\n",
" <td>4.35</td>\n",
" <td>75</td>\n",
" <td>Dual Sim, 3G, 4G,</td>\n",
" <td>0.720078</td>\n",
" <td>0.206174</td>\n",
" <td>0.096622</td>\n",
" <td>1.898794</td>\n",
" <td>Memory Card Supported</td>\n",
" <td>13</td>\n",
" <td>...</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>False</td>\n",
" <td>True</td>\n",
" <td>False</td>\n",
" <td>18.000000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>1720 rows × 30 columns</p>\n",
"</div>"
],
"text/plain": [
" Name Rating Spec_score \\\n",
"0 Vivo Y21T 3.95 74 \n",
"1 Motorola Moto G23 4.40 77 \n",
"2 Oppo A78 4G 4.25 81 \n",
"3 POCO M4 Pro 4G (8GB RAM + 128GB) 4.45 81 \n",
"4 iQOO Z5 Pro 5G 4.40 84 \n",
"... ... ... ... \n",
"1715 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
"1716 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
"1717 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
"1718 itel P55 Plus 4.10 74 \n",
"1719 itel S24 4.35 75 \n",
"\n",
" No_of_sim Ram Battery Display Camera \\\n",
"0 Dual Sim, 3G, 4G, VoLTE, -1.388963 0.206174 0.096622 -0.240789 \n",
"1 Dual Sim, 3G, 4G, VoLTE, -1.388963 0.206174 0.096622 -0.240789 \n",
"2 Dual Sim, 3G, 4G, VoLTE, 0.720078 0.206174 0.096622 -0.240789 \n",
"3 Dual Sim, 3G, 4G, VoLTE, 0.720078 0.206174 0.096622 0.275662 \n",
"4 Dual Sim, 3G, 4G, 5G, VoLTE, 0.720078 -0.675789 0.096622 0.275662 \n",
"... ... ... ... ... ... \n",
"1715 Dual Sim, 3G, 4G, 5G, VoLTE, -0.334442 0.206174 0.096622 -0.240789 \n",
"1716 Dual Sim, 3G, 4G, 5G, VoLTE, -0.334442 0.206174 0.096622 -0.240789 \n",
"1717 Dual Sim, 3G, 4G, 5G, VoLTE, -0.334442 0.206174 0.096622 -0.240789 \n",
"1718 Dual Sim, 3G, 4G, VoLTE, 0.720078 0.206174 0.096622 -0.240789 \n",
"1719 Dual Sim, 3G, 4G, 0.720078 0.206174 0.096622 1.898794 \n",
"\n",
" External_Memory Android_version ... Battery_bin_0 \\\n",
"0 Memory Card Supported, upto 1 TB 11 ... True \n",
"1 Memory Card Supported, upto 512 GB 13 ... True \n",
"2 Memory Card Supported, upto 1 TB 13 ... True \n",
"3 Memory Card Supported, upto 1 TB 11 ... True \n",
"4 Memory Card (Hybrid) 11 ... True \n",
"... ... ... ... ... \n",
"1715 Memory Card Supported, upto 1 TB 13 ... True \n",
"1716 Memory Card Supported, upto 1 TB 13 ... True \n",
"1717 Memory Card Supported, upto 1 TB 13 ... True \n",
"1718 Memory Card Supported, upto 1 TB 13 ... True \n",
"1719 Memory Card Supported 13 ... True \n",
"\n",
" Battery_bin_1 Battery_bin_2 Ram_bin_0 Ram_bin_1 Ram_bin_2 Camera_bin_0 \\\n",
"0 False False True False False True \n",
"1 False False True False False True \n",
"2 False False False True False True \n",
"3 False False False True False True \n",
"4 False False False True False True \n",
"... ... ... ... ... ... ... \n",
"1715 False False False True False True \n",
"1716 False False False True False True \n",
"1717 False False False True False True \n",
"1718 False False False True False True \n",
"1719 False False False True False False \n",
"\n",
" Camera_bin_1 Camera_bin_2 Camera_to_Display_Ratio \n",
"0 False False 8.333333 \n",
"1 False False 8.333333 \n",
"2 False False 8.333333 \n",
"3 False False 10.666667 \n",
"4 False False 10.666667 \n",
"... ... ... ... \n",
"1715 False False 8.333333 \n",
"1716 False False 8.333333 \n",
"1717 False False 8.333333 \n",
"1718 False False 8.333333 \n",
"1719 True False 18.000000 \n",
"\n",
"[1720 rows x 30 columns]"
]
},
2024-12-07 00:08:27 +04:00
"execution_count": 39,
2024-12-06 18:43:41 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scaler = StandardScaler()\n",
"\n",
"numerical_features = [\n",
" \"Ram\",\n",
" \"Battery\",\n",
" \"Display\",\n",
" \"Camera\",\n",
"]\n",
"\n",
"train_df_enc[numerical_features] = scaler.fit_transform(\n",
" train_df_enc[numerical_features]\n",
")\n",
"val_df_enc[numerical_features] = scaler.transform(val_df_enc[numerical_features])\n",
"test_df_enc[numerical_features] = scaler.transform(test_df_enc[numerical_features])\n",
"\n",
"train_df_enc"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Конструирование признаков с помощью Featuretools"
]
},
{
"cell_type": "code",
2024-12-07 00:08:27 +04:00
"execution_count": 40,
2024-12-06 18:43:41 +04:00
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\featuretools\\entityset\\entityset.py:1733: UserWarning: index id not found in dataframe, creating new integer column\n",
" warnings.warn(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
" pd.to_datetime(\n",
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\featuretools\\synthesis\\deep_feature_synthesis.py:169: UserWarning: Only one dataframe in entityset, changing max_depth to 1 since deeper features cannot be created\n",
" warnings.warn(\n"
]
},
{
"data": {
"text/plain": [
"[<Feature: Rating>,\n",
" <Feature: Spec_score>,\n",
" <Feature: No_of_sim>,\n",
" <Feature: Ram>,\n",
" <Feature: Battery>,\n",
" <Feature: Display>,\n",
" <Feature: Camera>,\n",
" <Feature: External_Memory>,\n",
" <Feature: Android_version>,\n",
" <Feature: Price>,\n",
" <Feature: Inbuilt_memory>,\n",
" <Feature: fast_charging>,\n",
" <Feature: Screen_resolution>,\n",
" <Feature: Processor>,\n",
" <Feature: Processor_name>,\n",
" <Feature: company>,\n",
" <Feature: Spec_score_bin_0>,\n",
" <Feature: Spec_score_bin_1>,\n",
" <Feature: Spec_score_bin_2>,\n",
" <Feature: Battery_bin_0>,\n",
" <Feature: Battery_bin_1>,\n",
" <Feature: Battery_bin_2>,\n",
" <Feature: Ram_bin_0>,\n",
" <Feature: Ram_bin_1>,\n",
" <Feature: Ram_bin_2>,\n",
" <Feature: Camera_bin_0>,\n",
" <Feature: Camera_bin_1>,\n",
" <Feature: Camera_bin_2>,\n",
" <Feature: Camera_to_Display_Ratio>]"
]
},
2024-12-07 00:08:27 +04:00
"execution_count": 40,
2024-12-06 18:43:41 +04:00
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"es = ft.EntitySet(id=\"mobile_data\")\n",
"es = es.add_dataframe(dataframe_name=\"train\", dataframe=train_df_enc, index=\"id\")\n",
"feature_matrix, feature_defs = ft.dfs(\n",
" entityset=es, target_dataframe_name=\"train\", max_depth=2\n",
")\n",
"\n",
"feature_defs"
]
}
],
"metadata": {
"kernelspec": {
"display_name": ".venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
2024-12-07 00:08:27 +04:00
"version": "3.12.8"
2024-12-06 18:43:41 +04:00
}
},
"nbformat": 4,
"nbformat_minor": 2
}