2585 lines
263 KiB
Plaintext
2585 lines
263 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Загрузка набора данных"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 193,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Name</th>\n",
|
|||
|
" <th>Rating</th>\n",
|
|||
|
" <th>Spec_score</th>\n",
|
|||
|
" <th>No_of_sim</th>\n",
|
|||
|
" <th>Ram</th>\n",
|
|||
|
" <th>Battery</th>\n",
|
|||
|
" <th>Display</th>\n",
|
|||
|
" <th>Camera</th>\n",
|
|||
|
" <th>External_Memory</th>\n",
|
|||
|
" <th>Android_version</th>\n",
|
|||
|
" <th>Price</th>\n",
|
|||
|
" <th>company</th>\n",
|
|||
|
" <th>Inbuilt_memory</th>\n",
|
|||
|
" <th>fast_charging</th>\n",
|
|||
|
" <th>Screen_resolution</th>\n",
|
|||
|
" <th>Processor</th>\n",
|
|||
|
" <th>Processor_name</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>Samsung Galaxy F14 5G</td>\n",
|
|||
|
" <td>4.65</td>\n",
|
|||
|
" <td>68</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>6000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>9999.0</td>\n",
|
|||
|
" <td>Samsung</td>\n",
|
|||
|
" <td>128 GB inbuilt</td>\n",
|
|||
|
" <td>25W Fast Charging</td>\n",
|
|||
|
" <td>2408 x 1080 px Display with Water Drop Notch</td>\n",
|
|||
|
" <td>Octa Core Processor</td>\n",
|
|||
|
" <td>Exynos 1330</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>Samsung Galaxy A11</td>\n",
|
|||
|
" <td>4.20</td>\n",
|
|||
|
" <td>63</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>4000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>13.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 512 GB</td>\n",
|
|||
|
" <td>10</td>\n",
|
|||
|
" <td>9990.0</td>\n",
|
|||
|
" <td>Samsung</td>\n",
|
|||
|
" <td>32 GB inbuilt</td>\n",
|
|||
|
" <td>15W Fast Charging</td>\n",
|
|||
|
" <td>720 x 1560 px Display with Punch Hole</td>\n",
|
|||
|
" <td>1.8 GHz Processor</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>Samsung Galaxy A13</td>\n",
|
|||
|
" <td>4.30</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>12</td>\n",
|
|||
|
" <td>11999.0</td>\n",
|
|||
|
" <td>Samsung</td>\n",
|
|||
|
" <td>64 GB inbuilt</td>\n",
|
|||
|
" <td>25W Fast Charging</td>\n",
|
|||
|
" <td>1080 x 2408 px Display with Water Drop Notch</td>\n",
|
|||
|
" <td>2 GHz Processor</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>Samsung Galaxy F23</td>\n",
|
|||
|
" <td>4.10</td>\n",
|
|||
|
" <td>73</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>6000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>48.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>12</td>\n",
|
|||
|
" <td>11999.0</td>\n",
|
|||
|
" <td>Samsung</td>\n",
|
|||
|
" <td>64 GB inbuilt</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>720 x 1600 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Helio G88</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>Samsung Galaxy A03s (4GB RAM + 64GB)</td>\n",
|
|||
|
" <td>4.10</td>\n",
|
|||
|
" <td>69</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>13.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>11</td>\n",
|
|||
|
" <td>11999.0</td>\n",
|
|||
|
" <td>Samsung</td>\n",
|
|||
|
" <td>64 GB inbuilt</td>\n",
|
|||
|
" <td>15W Fast Charging</td>\n",
|
|||
|
" <td>720 x 1600 px Display with Water Drop Notch</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Helio P35</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1365</th>\n",
|
|||
|
" <td>TCL 40R</td>\n",
|
|||
|
" <td>4.05</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card (Hybrid)</td>\n",
|
|||
|
" <td>12</td>\n",
|
|||
|
" <td>18999.0</td>\n",
|
|||
|
" <td>TCL</td>\n",
|
|||
|
" <td>64 GB inbuilt</td>\n",
|
|||
|
" <td>15W Fast Charging</td>\n",
|
|||
|
" <td>720 x 1612 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Dimensity 700 5G</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1366</th>\n",
|
|||
|
" <td>TCL 50 XL NxtPaper 5G</td>\n",
|
|||
|
" <td>4.10</td>\n",
|
|||
|
" <td>80</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card (Hybrid)</td>\n",
|
|||
|
" <td>14</td>\n",
|
|||
|
" <td>24990.0</td>\n",
|
|||
|
" <td>TCL</td>\n",
|
|||
|
" <td>128 GB inbuilt</td>\n",
|
|||
|
" <td>33W Fast Charging</td>\n",
|
|||
|
" <td>1200 x 2400 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Dimensity 7050</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1367</th>\n",
|
|||
|
" <td>TCL 50 XE NxtPaper 5G</td>\n",
|
|||
|
" <td>4.00</td>\n",
|
|||
|
" <td>80</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>23990.0</td>\n",
|
|||
|
" <td>TCL</td>\n",
|
|||
|
" <td>256 GB inbuilt</td>\n",
|
|||
|
" <td>18W Fast Charging</td>\n",
|
|||
|
" <td>720 x 1612 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Dimensity 6080</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1368</th>\n",
|
|||
|
" <td>TCL 40 NxtPaper 5G</td>\n",
|
|||
|
" <td>4.50</td>\n",
|
|||
|
" <td>79</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>22499.0</td>\n",
|
|||
|
" <td>TCL</td>\n",
|
|||
|
" <td>256 GB inbuilt</td>\n",
|
|||
|
" <td>15W Fast Charging</td>\n",
|
|||
|
" <td>720 x 1612 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Dimensity 6020</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1369</th>\n",
|
|||
|
" <td>TCL Trifold</td>\n",
|
|||
|
" <td>4.65</td>\n",
|
|||
|
" <td>93</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE, Vo5G,</td>\n",
|
|||
|
" <td>12</td>\n",
|
|||
|
" <td>4600</td>\n",
|
|||
|
" <td>10</td>\n",
|
|||
|
" <td>NaN</td>\n",
|
|||
|
" <td>50 MP + 48 MP + 8 MP Triple Rear &amp; 32 MP F...</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>119990.0</td>\n",
|
|||
|
" <td>TCL</td>\n",
|
|||
|
" <td>256 GB inbuilt</td>\n",
|
|||
|
" <td>67W Fast Charging</td>\n",
|
|||
|
" <td>1916 x 2160 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Snapdragon 8 Gen2</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>1370 rows × 17 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Name Rating Spec_score \\\n",
|
|||
|
"0 Samsung Galaxy F14 5G 4.65 68 \n",
|
|||
|
"1 Samsung Galaxy A11 4.20 63 \n",
|
|||
|
"2 Samsung Galaxy A13 4.30 75 \n",
|
|||
|
"3 Samsung Galaxy F23 4.10 73 \n",
|
|||
|
"4 Samsung Galaxy A03s (4GB RAM + 64GB) 4.10 69 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"1365 TCL 40R 4.05 75 \n",
|
|||
|
"1366 TCL 50 XL NxtPaper 5G 4.10 80 \n",
|
|||
|
"1367 TCL 50 XE NxtPaper 5G 4.00 80 \n",
|
|||
|
"1368 TCL 40 NxtPaper 5G 4.50 79 \n",
|
|||
|
"1369 TCL Trifold 4.65 93 \n",
|
|||
|
"\n",
|
|||
|
" No_of_sim Ram Battery Display Camera \\\n",
|
|||
|
"0 Dual Sim, 3G, 4G, 5G, VoLTE, 4 6000 6 50.0 \n",
|
|||
|
"1 Dual Sim, 3G, 4G, VoLTE, 2 4000 6 13.0 \n",
|
|||
|
"2 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
|
|||
|
"3 Dual Sim, 3G, 4G, VoLTE, 4 6000 6 48.0 \n",
|
|||
|
"4 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 13.0 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"1365 Dual Sim, 3G, 4G, 5G, VoLTE, 4 5000 6 50.0 \n",
|
|||
|
"1366 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
|
|||
|
"1367 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
|
|||
|
"1368 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
|
|||
|
"1369 Dual Sim, 3G, 4G, 5G, VoLTE, Vo5G, 12 4600 10 NaN \n",
|
|||
|
"\n",
|
|||
|
" External_Memory Android_version \\\n",
|
|||
|
"0 Memory Card Supported, upto 1 TB 13 \n",
|
|||
|
"1 Memory Card Supported, upto 512 GB 10 \n",
|
|||
|
"2 Memory Card Supported, upto 1 TB 12 \n",
|
|||
|
"3 Memory Card Supported, upto 1 TB 12 \n",
|
|||
|
"4 Memory Card Supported, upto 1 TB 11 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"1365 Memory Card (Hybrid) 12 \n",
|
|||
|
"1366 Memory Card (Hybrid) 14 \n",
|
|||
|
"1367 Memory Card Supported, upto 1 TB 13 \n",
|
|||
|
"1368 Memory Card Supported, upto 1 TB 13 \n",
|
|||
|
"1369 50 MP + 48 MP + 8 MP Triple Rear & 32 MP F... 13 \n",
|
|||
|
"\n",
|
|||
|
" Price company Inbuilt_memory fast_charging \\\n",
|
|||
|
"0 9999.0 Samsung 128 GB inbuilt 25W Fast Charging \n",
|
|||
|
"1 9990.0 Samsung 32 GB inbuilt 15W Fast Charging \n",
|
|||
|
"2 11999.0 Samsung 64 GB inbuilt 25W Fast Charging \n",
|
|||
|
"3 11999.0 Samsung 64 GB inbuilt NaN \n",
|
|||
|
"4 11999.0 Samsung 64 GB inbuilt 15W Fast Charging \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"1365 18999.0 TCL 64 GB inbuilt 15W Fast Charging \n",
|
|||
|
"1366 24990.0 TCL 128 GB inbuilt 33W Fast Charging \n",
|
|||
|
"1367 23990.0 TCL 256 GB inbuilt 18W Fast Charging \n",
|
|||
|
"1368 22499.0 TCL 256 GB inbuilt 15W Fast Charging \n",
|
|||
|
"1369 119990.0 TCL 256 GB inbuilt 67W Fast Charging \n",
|
|||
|
"\n",
|
|||
|
" Screen_resolution Processor \\\n",
|
|||
|
"0 2408 x 1080 px Display with Water Drop Notch Octa Core Processor \n",
|
|||
|
"1 720 x 1560 px Display with Punch Hole 1.8 GHz Processor \n",
|
|||
|
"2 1080 x 2408 px Display with Water Drop Notch 2 GHz Processor \n",
|
|||
|
"3 720 x 1600 px Octa Core \n",
|
|||
|
"4 720 x 1600 px Display with Water Drop Notch Octa Core \n",
|
|||
|
"... ... ... \n",
|
|||
|
"1365 720 x 1612 px Octa Core \n",
|
|||
|
"1366 1200 x 2400 px Octa Core \n",
|
|||
|
"1367 720 x 1612 px Octa Core \n",
|
|||
|
"1368 720 x 1612 px Octa Core \n",
|
|||
|
"1369 1916 x 2160 px Octa Core \n",
|
|||
|
"\n",
|
|||
|
" Processor_name \n",
|
|||
|
"0 Exynos 1330 \n",
|
|||
|
"1 Octa Core \n",
|
|||
|
"2 Octa Core \n",
|
|||
|
"3 Helio G88 \n",
|
|||
|
"4 Helio P35 \n",
|
|||
|
"... ... \n",
|
|||
|
"1365 Dimensity 700 5G \n",
|
|||
|
"1366 Dimensity 7050 \n",
|
|||
|
"1367 Dimensity 6080 \n",
|
|||
|
"1368 Dimensity 6020 \n",
|
|||
|
"1369 Snapdragon 8 Gen2 \n",
|
|||
|
"\n",
|
|||
|
"[1370 rows x 17 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 193,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"import featuretools as ft\n",
|
|||
|
"import re\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.model_selection import train_test_split\n",
|
|||
|
"from imblearn.over_sampling import RandomOverSampler\n",
|
|||
|
"\n",
|
|||
|
"df = pd.read_csv(\"../data/mobile phone price prediction.csv\")\n",
|
|||
|
"\n",
|
|||
|
"df.drop([\"Unnamed: 0\"], axis=1, inplace=True)\n",
|
|||
|
"df[\"Price\"] = df[\"Price\"].str.replace(\",\", \"\").astype(float)\n",
|
|||
|
"\n",
|
|||
|
"numerical_features = [\n",
|
|||
|
" \"Ram\",\n",
|
|||
|
" \"Battery\",\n",
|
|||
|
" \"Display\",\n",
|
|||
|
" \"Camera\",\n",
|
|||
|
"]\n",
|
|||
|
"\n",
|
|||
|
"for feature in numerical_features:\n",
|
|||
|
" df[feature] = df[feature].apply(\n",
|
|||
|
" lambda x: int(re.search(r\"\\d+\", x).group()) if re.search(r\"\\d+\", x) else None # type: ignore\n",
|
|||
|
" )\n",
|
|||
|
"\n",
|
|||
|
"df"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Бизнес-цели\n",
|
|||
|
"1. Классифицировать мобильные устройства по ценовым категориям (например, бюджетные, средний класс, флагманы).\n",
|
|||
|
"2. Определить, какие характеристики мобильных устройств наиболее сильно влияют на их рейтинг."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Проверка на пропущенные значения"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 194,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Пропущенные данные по каждому столбцу:\n",
|
|||
|
"Name 0\n",
|
|||
|
"Rating 0\n",
|
|||
|
"Spec_score 0\n",
|
|||
|
"No_of_sim 0\n",
|
|||
|
"Ram 0\n",
|
|||
|
"Battery 0\n",
|
|||
|
"Display 0\n",
|
|||
|
"Camera 79\n",
|
|||
|
"External_Memory 0\n",
|
|||
|
"Android_version 443\n",
|
|||
|
"Price 0\n",
|
|||
|
"company 0\n",
|
|||
|
"Inbuilt_memory 19\n",
|
|||
|
"fast_charging 89\n",
|
|||
|
"Screen_resolution 2\n",
|
|||
|
"Processor 28\n",
|
|||
|
"Processor_name 0\n",
|
|||
|
"dtype: int64\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"print(\"Пропущенные данные по каждому столбцу:\")\n",
|
|||
|
"print(df.isnull().sum())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 195,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Name 0\n",
|
|||
|
"Rating 0\n",
|
|||
|
"Spec_score 0\n",
|
|||
|
"No_of_sim 0\n",
|
|||
|
"Ram 0\n",
|
|||
|
"Battery 0\n",
|
|||
|
"Display 0\n",
|
|||
|
"Camera 0\n",
|
|||
|
"External_Memory 0\n",
|
|||
|
"Android_version 0\n",
|
|||
|
"Price 0\n",
|
|||
|
"company 0\n",
|
|||
|
"Inbuilt_memory 0\n",
|
|||
|
"fast_charging 0\n",
|
|||
|
"Screen_resolution 0\n",
|
|||
|
"Processor 0\n",
|
|||
|
"Processor_name 0\n",
|
|||
|
"dtype: int64\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"df.dropna(inplace=True)\n",
|
|||
|
"print(df.isnull().sum())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Проверка на выбросы"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 196,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2wAAAIjCAYAAAB/FZhcAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAADOWUlEQVR4nOzde3wU1dkH8N/M7P2SzYUkhBAIJMhdrYhIEdGKxGulahW1FZB6e0Gr2Fp9bRVqLa22td6ttYK1tUXbqvXGRRB9BbwiahAwIBFMQhJy2ex9d3bO+8eya5bsJiHZsJvw+34++bTMnJ05OzOJ++xzznMkIYQAERERERERZRw53R0gIiIiIiKixBiwERERERERZSgGbERERERERBmKARsREREREVGGYsBGRERERESUoRiwERERERERZSgGbERERERERBmKARsREREREVGGYsBGRERERESUoRiwEVFGq66uhiRJWLFiRbq7EmfVqlU4/vjjYTKZIEkSWltb090lGgAkScKSJUuO+Hk1TcOECRNwzz33HJHz3XfffRg5ciQURcHxxx9/RM5JwIoVKyBJEqqrq9PdlSNmyZIlkCQpbltpaSnmzZt3xPvy+OOPY9iwYQgEAkf83NS/MWAjSpPPPvsMF198MYYPHw6TyYTi4mKceeaZeOihh/rsnM8++yz++Mc/dtheW1uLJUuWYOvWrX127kNt2LABkiTFfvR6PUaOHIkrr7wSX375ZUrOsWnTJixZsiTlwVRTUxMuueQSmM1mPPLII3jmmWdgtVqTtk/HvaYj77TTTot7pnNzczF58mQ89dRT0DQt3d3r1D/+8Q/s27cPixYtim2Lfrhv/1NQUIDTTz8dr7/+eo/PtWbNGtx6662YNm0ali9fjl//+tepeAu9duj9M5vNOPbYY/HHP/4x4+9ff7Jx40Z873vfQ2FhIYxGI0pLS3Httddi7969PT6m1+vFkiVLsGHDhtR1tA/MmzcPwWAQf/rTn9LdFepndOnuANHRaNOmTTj99NMxbNgwXH311Rg8eDD27duHd999Fw888ABuuOGGPjnvs88+i8rKStx0001x22tra7F06VKUlpYe8W+7b7zxRkyePBmhUAhbtmzBE088gVdffRWfffYZhgwZ0qtjb9q0CUuXLsW8efOQnZ2dmg4D+OCDD+ByuXD33Xdj5syZXfYhHfea0mPo0KFYtmwZAKCxsRF//etfsWDBAnzxxRf4zW9+0+XrfT4fdLoj/5/m++67D3PmzIHD4eiw75e//CVGjBgBIQTq6+uxYsUKnHPOOXj55Zdx3nnnHfa51q9fD1mW8Ze//AUGgyEV3U+Z9vfvwIEDePbZZ3HzzTejsbHxiGUf+9IPf/hDzJkzB0ajMS3nf+ihh/DjH/8YI0eOxA033ICioiJs374dTz75JFauXInXXnsN3/72tw/7uF6vF0uXLgUQCby7snPnTsjykc9ZmEwmzJ07F3/4wx9www03dMj8ESXDgI0oDe655x44HA588MEHHQKJhoaG9HSqD3g8nk4zTwAwffp0XHzxxQCA+fPn45hjjsGNN96Ip59+GrfffvuR6OZhi96j7gSBR8u97mterxcWiyXd3eiSw+HAD37wg9i/r732WowePRoPP/ww7r77buj1+g6v0TQNwWAQJpMJJpPpSHYXAPDxxx/jk08+we9///uE+88++2yceOKJsX8vWLAAhYWF+Mc//tGjgK2hoQFmszllwZoQAn6/H2azudfHOvT+XXfddRgzZgweeugh/PKXv4SiKL0+RzopipK297Bx40bcdNNNOOWUU7Bq1aq43+frr78e06ZNw8UXX4xt27YhJyenT/uSyoBVVVVomtbt5/mSSy7BvffeizfffBPf+c53UtYPGtg4JJIoDXbv3o3x48cn/MBfUFDQYdvf/vY3nHTSSbBYLMjJycGpp56KNWvWxPa/9NJLOPfcczFkyBAYjUaUlZXh7rvvRjgcjrU57bTT8Oqrr+Krr76KDfkpLS3Fhg0bMHnyZACRgCm6r/2csffeew9nnXUWHA4HLBYLZsyYgY0bN8b1MTpP4PPPP8fll1+OnJwcnHLKKYd9baL/AduzZ0+n7davX4/p06fDarUiOzsbF1xwAbZv3x7Xn5/+9KcAgBEjRsTeV1dzN55//nlMmjQJZrMZgwYNwg9+8APU1NTE9p922mmYO3cuAGDy5MmQJKnTuRCHc68lScKiRYvw97//HaNHj4bJZMKkSZPw9ttvd3htTU0NrrrqqtiwovHjx+Opp57q0M7v92PJkiU45phjYDKZUFRUhAsvvBC7d+/u9Dq0V1VVhYsuugiDBw+GyWTC0KFDMWfOHDidzrh2XT2nAPDoo49i/PjxMBqNGDJkCBYuXNhhyOppp52GCRMm4KOPPsKpp54Ki8WC//3f/wUABAIB3HXXXSgvL4fRaERJSQluvfXWLueELFq0CDabDV6vt8O+yy67DIMHD479vnz44YeoqKjAoEGDYDabMWLECFx11VXdvl7tWSwWnHzyyfB4PGhsbAQQf5+j12LVqlWxfYfOYaupqcGCBQtiv98jRozA9ddfj2AwGGvT2tqKm266CSUlJTAajSgvL8dvf/vbbg3le/HFF2EwGHDqqad26z1lZ2fDbDZ3yARqmoY//vGPGD9+PEwmEwoLC3HttdeipaUl1kaSJCxfvhwej6fD3xpVVXH33XejrKwsNlTuf//3fzvc29LSUpx33nlYvXo1TjzxRJjN5tgQs95ch0RMJhMmT54Ml8vV4QuWv/3tb7G/Fbm5uZgzZw727dsX1yb6LH/66aeYMWMGLBYLysvL8a9//QsA8NZbb2HKlCkwm80YPXo03njjjQ59+Pjjj3H22WcjKysLNpsNZ5xxBt59993Y/g8//BCSJOHpp5/u8NrVq1dDkiS88sorABLPYYtez3feeQcnnXQSTCYTRo4cib/+9a8djhd9H2azGUOHDsWvfvUrLF++vFt/W+++++5YPw/98qWsrAz33nsv6urq4oYLnnbaaQkzZvPmzUNpaSmAyDzn/Px8AMDSpUtjz1Vnc0ETzWHrzrMTnVP9u9/9Dn/84x9jz+rnn38OIJJBHD9+fOxv4Iknnohnn3027jyTJk1Cbm4uXnrppU6vF1F7zLARpcHw4cOxefNmVFZWYsKECZ22Xbp0KZYsWYJvf/vb+OUvfwmDwYD33nsP69evx6xZswBE/iNss9mwePFi2Gw2rF+/HnfeeSfa2tpw3333AQDuuOMOOJ1OfP3117j//vsBADabDWPHjsUvf/lL3Hnnnbjmmmswffp0AIgNS1m/fj3OPvtsTJo0CXfddRdkWcby5cvxne98B//3f/+Hk046Ka6/3//+9zFq1Cj8+te/hhDisK9NNJDIy8tL2uaNN97A2WefjZEjR2LJkiXw+Xx46KGHMG3aNGzZsgWlpaW48MIL8cUXX+Af//gH7r//fgwaNAgAYv9hT2TFihWYP38+Jk+ejGXLlqG+vh4PPPAANm7ciI8//hjZ2dm44447MHr0aDzxxBOxoWJlZWVJj3k49xqIfIBbuXIlbrzxRhiNRjz66KM466yz8P7778deX19fj5NPPjn2wT8/Px+vv/46FixYgLa2ttiQ13A4jPPOOw/r1q3DnDlz8OMf/xgulwtr165FZWVlp/2OCgaDqKioQCAQwA033IDBgwejpqYGr7zyClpbW2ND6LrznC5ZsgRLly7FzJkzcf3112Pnzp147LHH8MEHH2Djxo1x2aempiacffbZmDNnDn7wgx+gsLAQmqbhu9/9Lt555x1cc801GDt2LD777DPcf//9+OKLL/Diiy8mfR+XXnopHnnkEbz66qv4/ve/H9vu9Xrx8ssvY968eVAUBQ0NDZg1axby8/Nx2223ITs7G9XV1fjPf/7T5bVK5ssvv4SiKHFB+/r16/Hcc89h0aJFGDRoUOzD56Fqa2tx0kknobW1Fddccw3GjBmDmpoa/Otf/4LX64XBYIDX68WMGTNQU1ODa6+9FsOGDcOmTZtw++23o66uLuG81fY2bdqECRMmJMz+AYDT6cS
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAA2QAAAIjCAYAAABswtioAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXxU1d0/8M+9M3Nnz2RPIAQCCcqqFIqKSHElpaht3ZdWwKUtFa1Ln5/1qXV9Wmx9XOpKaytYq3Vp1dZaBAW1LWAflWoB2YJEItkgy2Qy252Ze35/hBkzyQSSydzMTPJ5v15pzczl5MyZM8l855zz/UpCCAEiIiIiIiIacnK6O0BERERERDRSMSAjIiIiIiJKEwZkREREREREacKAjIiIiIiIKE0YkBEREREREaUJAzIiIiIiIqI0YUBGRERERESUJgzIiIiIiIiI0oQBGRERERERUZowICOijFRbWwtJkrB69ep0dyXOG2+8gRkzZsBisUCSJLS3t6e7SzQMSJKEO++8c8h/rqZpmDZtGn76058m3cYzzzyDSZMmwWQyITc3N3Wdo15OPfVUnHrqqenuxpCqqKjAkiVLYt+/8847kCQJ77zzzpD35aSTTsL/+3//b8h/Lg1/DMiIhtjWrVtxwQUXYNy4cbBYLCgrK8NZZ52FRx55RLef+dxzz+Ghhx7qdXt9fT3uvPNOfPTRR7r97J6if0yjXyaTCRMmTMAVV1yBTz/9NCU/Y9OmTbjzzjtTHiy1tLTgoosugtVqxWOPPYZnnnkGdru9z+vT8VzT0Dv11FPj5nR+fj5mz56Np556Cpqmpbt7R/SHP/wBdXV1WL58ecL7H3/8cUiShBNPPDHh/Tt37sSSJUtQWVmJJ598Er/+9a/h8/lw5513Dukb5ugHONEvWZaRn5+PhQsXYvPmzUPWj+EuFArh4YcfxuzZs+F0OuFwODB79mw8/PDDCIVCSber1+/sVLvlllvw2GOPobGxMd1doeFGENGQ2bhxo1AURVRVVYl77rlHPPnkk+L2228XCxYsEJWVlbr93EWLFolx48b1uv39998XAMSqVat0+9k9vf322wKAuP7668UzzzwjnnrqKbF8+XKhKIrIz88XBw4cEEIIsW/fvqT7dt999wkAYt++fSnt+5o1awQA8eabbx712nQ91zT05s+fL8aMGSOeeeYZ8cwzz4gHHnhAzJgxQwAQt9xyS7/a8Pv9IhQK6dzT3o4//njxne98p8/7Tz75ZFFRUSEAiD179vS6/4knnuh138GDBwUAcccdd+jR5YSivy8uvfRS8cwzz4jVq1eL//7v/xa5ubnCbDaL//znP0PWFz0Fg0ERDAbT8rM7OzvF/PnzBQBx9tlni0cffVQ8/vjj4txzzxUAxPz580VnZ2dSbR/pd/a4cePE4sWLY99HIhHh9/tFJBJJ8pEkLxKJiNLSUvGTn/xkyH82DW/GtESBRCPUT3/6U7hcLrz//vu9tvY0Nzenp1M68Hq9R1w5AoB58+bhggsuAAAsXboUxxxzDK6//no8/fTTuPXWW4eimwMWfY76sy1rpDzXevP5fLDZbOnuxlG5XC5861vfin3/3e9+F8ceeyweffRR3HPPPTCZTL3+jaZpUFUVFosFFotlKLsLAPj3v/+Njz/+GPfff3/C+/ft24dNmzbh5Zdfxne/+108++yzuOOOO+KuGchrYrD683tl5syZcc/DvHnzsHDhQjzxxBN4/PHH9e6i7hRFSdvPvummm/Duu+/ikUceiVtRXbZsGR577DEsX74cP/zhD/HEE0/o2g9ZllP6eunPvOr+sy+44AL87ne/w1133QVJklLWDxrh0h0REo0kxx57rDj11FP7ff0zzzwjZs+eLaxWq8jNzRXz5s0Ta9eujd3/6quviq997Wti1KhRQlEUMWHCBHH33XeLcDgcuyb6iWb3r3HjxsVWqnp+dV+Reu+990R1dbXIyckRVqtVfOUrXxH//Oc/4/p4xx13CABi+/bt4tJLLxW5ublixowZfT6m6M996aWX4m7ftm2bACCuueYaIUTfK2Tr168Xp5xyirDZbMLlcolzzz1XfPLJJ7360/PraKtlL774opg5c6awWCyioKBAXH755eLzzz8/4jh2/9S2p4E81wDEtddeK37/+9+LY445RpjNZjFz5kzx7rvv9rr2888/F0uXLhXFxcVCURQxZcoU8dvf/rbXdX6/X9xxxx1i4sSJwmw2i9LSUvHNb35T1NTU9KtPQgixe/ducd5554mSkhJhNptFWVmZuPjii0V7e3vcdUebp0II8dhjj4kpU6YIRVHEqFGjxPe//33R1tYWd838+fPF1KlTxQcffCDmzZsnrFar+MEPfiCEECIQCIjbb79dVFZWCkVRxJgxY8R//dd/iUAgcMTHcO211wq73S68Xm+v+y655BJRUlISe728//77YsGCBaKgoEBYLBZRUVEhli5detRxiva7pwsuuEAAiK36dn+ep0yZIoxGo3jllVdi9/VcUfr888/FlVdeGXt9V1RUiO9973txKyRtbW3iBz/4gRgzZoxQFEVUVlaKe++9t1+rB7fffrtQFEWoqprw/nvuuUfk5eWJYDAoli1bJiZOnBh3/7hx4xK+JhK9/ro/th07dojzzz9f5OXlCbPZLGbNmiX+/Oc/x7W9atUqAUC88847YtmyZaKoqEjk5ub2+Viivy/uu+++uNs7OzsFALFgwYK42/szbt3bfPTRR8X48eOF1WoVZ511lti/f7/QNE3cfffdoqysTFgsFnHuueeKlpaWXn072twfyBydP3++mD9/fuz+6O/TF154QfzP//yPKCsrE2azWZx++ukJVzSjj8NisYjZs2eLv//9773aTKSurk4YDAZx+umn93nNaaedJoxGo6irq4sbv0S7HLrPiaP9zu65QhZ9zG+//XZcm4P9e9XQ0CCWLFkiysrKhKIoorS0VJx77rm9/nb8+c9/FgDEli1bjjhmRAPBFTKiITRu3Dhs3rwZ27Ztw7Rp04547V133YU777wTJ598Mu6++24oioJ//etf2LBhAxYsWAAAWL16NRwOB2666SY4HA5s2LABt99+Ozo6OnDfffcBAH784x/D7Xbj888/x4MPPggAcDgcmDx5Mu6++27cfvvt+M53voN58+YBAE4++WQAwIYNG7Bw4ULMmjULd9xxB2RZxqpVq3D66afjH//4B0444YS4/l544YWYOHEifvazn0EIMeCx2bt3LwCgoKCgz2veeustLFy4EBMmTMCdd94Jv9+PRx55BHPnzsWWLVtQUVGB8847D7t378Yf/vAHPPjggygsLAQAFBUV9dnu6tWrsXTpUsyePRsrVqxAU1MTfvnLX2Ljxo3497//jdzcXPz4xz/Gsccei1//+te4++67MX78eFRWVvbZ5kCeawB499138cILL+D666+H2WzG448/jq9+9av4v//7v9i/b2pqwkknnQRJkrB8+XIUFRVhzZo1uOqqq9DR0YEbbrgBABCJRHD22Wdj/fr1uOSSS/CDH/wAHo8Hb775JrZt23bEfkepqorq6moEg0Fcd911KC0txYEDB/DXv/4V7e3tcLlcAPo3T++8807cddddOPPMM7Fs2TLs2rULTzzxBN5//31s3LgxbvWopaUFCxcuxCWXXIJvfetbKCkpgaZpOPfcc/HPf/4T3/nOdzB58mRs3boVDz74IHbv3o1XX321z8dx8cUX47HHHsPrr7+OCy+8MHa7z+fDa6+9hiVLlsBgMKC5uRkLFixAUVERfvSjHyE3Nxe1tbV4+eWXjzpWffn0009hMBjiVo82bNiAF198EcuXL0dhYSEqKioS/tv6+nqccMIJaG9vx3e+8x1MmjQJBw4cwB//+Ef4fD4oigKfz4f58+fjwIED+O53v4uxY8di06ZNuPXWW9HQ0JDw3Gh3mzZtwrRp0xKu3gHAs88+i/POOw+KouDSSy+NPWezZ88GADz00EP43e9+h1deeQVPPPEEHA4Hpk+fjpNOOgnLli3DN7/5TZx33nkAgOOOOw4AsH37dsydOxdlZWX40Y9
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1000x600 with 1 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Количество строк до удаления выбросов: 785\n",
|
|||
|
"Количество строк после удаления выбросов: 721\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"column1 = \"Spec_score\"\n",
|
|||
|
"column2 = \"Price\"\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.scatter(df[column1], df[column2], alpha=0.5)\n",
|
|||
|
"plt.xlabel(column1)\n",
|
|||
|
"plt.ylabel(column2)\n",
|
|||
|
"plt.title(f\"Scatter Plot of {column1} vs {column2} (Before Removing Outliers)\")\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"def remove_outliers(df, column):\n",
|
|||
|
" Q1 = df[column].quantile(0.25)\n",
|
|||
|
" Q3 = df[column].quantile(0.75)\n",
|
|||
|
" IQR = Q3 - Q1\n",
|
|||
|
" lower_bound = Q1 - 1.5 * IQR\n",
|
|||
|
" upper_bound = Q3 + 1.5 * IQR\n",
|
|||
|
" return df[(df[column] >= lower_bound) & (df[column] <= upper_bound)]\n",
|
|||
|
"\n",
|
|||
|
"df_cleaned = df.copy()\n",
|
|||
|
"for column in [column1, column2]:\n",
|
|||
|
" df_cleaned = remove_outliers(df_cleaned, column)\n",
|
|||
|
"\n",
|
|||
|
"plt.figure(figsize=(10, 6))\n",
|
|||
|
"plt.scatter(df_cleaned[column1], df_cleaned[column2], alpha=0.5)\n",
|
|||
|
"plt.xlabel(column1)\n",
|
|||
|
"plt.ylabel(column2)\n",
|
|||
|
"plt.title(f\"Scatter Plot of {column1} vs {column2} (After Removing Outliers)\")\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"print(f\"Количество строк до удаления выбросов: {len(df)}\")\n",
|
|||
|
"print(f\"Количество строк после удаления выбросов: {len(df_cleaned)}\")\n",
|
|||
|
"\n",
|
|||
|
"df = df_cleaned"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Разбиение данных на выборки."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 197,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Размеры выборок:\n",
|
|||
|
"Обучающая выборка: 432 записей\n",
|
|||
|
"company\n",
|
|||
|
"Realme 86\n",
|
|||
|
"Samsung 70\n",
|
|||
|
"Motorola 50\n",
|
|||
|
"Vivo 48\n",
|
|||
|
"Xiaomi 45\n",
|
|||
|
"Poco 32\n",
|
|||
|
"OnePlus 15\n",
|
|||
|
"iQOO 14\n",
|
|||
|
"OPPO 12\n",
|
|||
|
"POCO 11\n",
|
|||
|
"Honor 11\n",
|
|||
|
"TCL 11\n",
|
|||
|
"Lava 9\n",
|
|||
|
"Oppo 5\n",
|
|||
|
"Huawei 5\n",
|
|||
|
"itel 3\n",
|
|||
|
"Google 2\n",
|
|||
|
"Gionee 1\n",
|
|||
|
"IQOO 1\n",
|
|||
|
"Lenovo 1\n",
|
|||
|
"Name: count, dtype: int64\n",
|
|||
|
"Контрольная выборка: 144 записей\n",
|
|||
|
"company\n",
|
|||
|
"Vivo 27\n",
|
|||
|
"Samsung 27\n",
|
|||
|
"Realme 21\n",
|
|||
|
"Xiaomi 12\n",
|
|||
|
"Poco 11\n",
|
|||
|
"Motorola 10\n",
|
|||
|
"OnePlus 7\n",
|
|||
|
"OPPO 6\n",
|
|||
|
"POCO 6\n",
|
|||
|
"Honor 3\n",
|
|||
|
"itel 3\n",
|
|||
|
"Lava 2\n",
|
|||
|
"LG 2\n",
|
|||
|
"iQOO 2\n",
|
|||
|
"Lenovo 2\n",
|
|||
|
"Oppo 1\n",
|
|||
|
"Itel 1\n",
|
|||
|
"Google 1\n",
|
|||
|
"Name: count, dtype: int64\n",
|
|||
|
"Тестовая выборка: 145 записей\n",
|
|||
|
"company\n",
|
|||
|
"Samsung 27\n",
|
|||
|
"Vivo 25\n",
|
|||
|
"Realme 16\n",
|
|||
|
"Xiaomi 12\n",
|
|||
|
"Motorola 11\n",
|
|||
|
"Poco 10\n",
|
|||
|
"OnePlus 7\n",
|
|||
|
"TCL 7\n",
|
|||
|
"iQOO 7\n",
|
|||
|
"Huawei 5\n",
|
|||
|
"Oppo 4\n",
|
|||
|
"Lenovo 2\n",
|
|||
|
"Honor 2\n",
|
|||
|
"Lava 2\n",
|
|||
|
"itel 2\n",
|
|||
|
"Tecno 1\n",
|
|||
|
"Google 1\n",
|
|||
|
"OPPO 1\n",
|
|||
|
"Coolpad 1\n",
|
|||
|
"POCO 1\n",
|
|||
|
"Itel 1\n",
|
|||
|
"Name: count, dtype: int64\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"X = df\n",
|
|||
|
"y = df[\"company\"]\n",
|
|||
|
"\n",
|
|||
|
"train_df, X_temp, y_train, y_temp = train_test_split(\n",
|
|||
|
" X, y, test_size=0.4, random_state=42\n",
|
|||
|
")\n",
|
|||
|
"val_df, test_df, y_val, y_test = train_test_split(\n",
|
|||
|
" X_temp, y_temp, test_size=0.5, random_state=42\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"print(\"Размеры выборок:\")\n",
|
|||
|
"print(f\"Обучающая выборка: {train_df.shape[0]} записей\")\n",
|
|||
|
"print(train_df.company.value_counts())\n",
|
|||
|
"print(f\"Контрольная выборка: {val_df.shape[0]} записей\")\n",
|
|||
|
"print(val_df.company.value_counts())\n",
|
|||
|
"print(f\"Тестовая выборка: {test_df.shape[0]} записей\")\n",
|
|||
|
"print(test_df.company.value_counts())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Oversampling"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 198,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Размеры выборок:\n",
|
|||
|
"Обучающая выборка: 1720 записей\n",
|
|||
|
"company\n",
|
|||
|
"Vivo 86\n",
|
|||
|
"Motorola 86\n",
|
|||
|
"Oppo 86\n",
|
|||
|
"POCO 86\n",
|
|||
|
"iQOO 86\n",
|
|||
|
"Xiaomi 86\n",
|
|||
|
"Realme 86\n",
|
|||
|
"OnePlus 86\n",
|
|||
|
"Poco 86\n",
|
|||
|
"Samsung 86\n",
|
|||
|
"TCL 86\n",
|
|||
|
"Gionee 86\n",
|
|||
|
"Honor 86\n",
|
|||
|
"OPPO 86\n",
|
|||
|
"Lava 86\n",
|
|||
|
"itel 86\n",
|
|||
|
"Huawei 86\n",
|
|||
|
"Google 86\n",
|
|||
|
"IQOO 86\n",
|
|||
|
"Lenovo 86\n",
|
|||
|
"Name: count, dtype: int64\n",
|
|||
|
"Контрольная выборка: 486 записей\n",
|
|||
|
"company\n",
|
|||
|
"Vivo 27\n",
|
|||
|
"Honor 27\n",
|
|||
|
"Motorola 27\n",
|
|||
|
"POCO 27\n",
|
|||
|
"Samsung 27\n",
|
|||
|
"itel 27\n",
|
|||
|
"Lava 27\n",
|
|||
|
"Xiaomi 27\n",
|
|||
|
"Realme 27\n",
|
|||
|
"OnePlus 27\n",
|
|||
|
"Poco 27\n",
|
|||
|
"iQOO 27\n",
|
|||
|
"LG 27\n",
|
|||
|
"Oppo 27\n",
|
|||
|
"Itel 27\n",
|
|||
|
"OPPO 27\n",
|
|||
|
"Google 27\n",
|
|||
|
"Lenovo 27\n",
|
|||
|
"Name: count, dtype: int64\n",
|
|||
|
"Тестовая выборка: 567 записей\n",
|
|||
|
"company\n",
|
|||
|
"Oppo 27\n",
|
|||
|
"Huawei 27\n",
|
|||
|
"Samsung 27\n",
|
|||
|
"Motorola 27\n",
|
|||
|
"TCL 27\n",
|
|||
|
"Realme 27\n",
|
|||
|
"Xiaomi 27\n",
|
|||
|
"Poco 27\n",
|
|||
|
"Google 27\n",
|
|||
|
"Vivo 27\n",
|
|||
|
"iQOO 27\n",
|
|||
|
"Tecno 27\n",
|
|||
|
"OnePlus 27\n",
|
|||
|
"Honor 27\n",
|
|||
|
"OPPO 27\n",
|
|||
|
"Lenovo 27\n",
|
|||
|
"Lava 27\n",
|
|||
|
"itel 27\n",
|
|||
|
"Coolpad 27\n",
|
|||
|
"POCO 27\n",
|
|||
|
"Itel 27\n",
|
|||
|
"Name: count, dtype: int64\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"def oversample(df):\n",
|
|||
|
" X = df.drop(\"company\", axis=1)\n",
|
|||
|
" y = df[\"company\"]\n",
|
|||
|
"\n",
|
|||
|
" oversampler = RandomOverSampler(random_state=42)\n",
|
|||
|
" X_resampled, y_resampled = oversampler.fit_resample(X, y) # type: ignore\n",
|
|||
|
"\n",
|
|||
|
" resampled_df = pd.concat([X_resampled, y_resampled], axis=1)\n",
|
|||
|
" return resampled_df\n",
|
|||
|
"\n",
|
|||
|
"train_df_overs = oversample(train_df)\n",
|
|||
|
"val_df_overs = oversample(val_df)\n",
|
|||
|
"test_df_overs = oversample(test_df)\n",
|
|||
|
"\n",
|
|||
|
"print(\"Размеры выборок:\")\n",
|
|||
|
"print(f\"Обучающая выборка: {train_df_overs.shape[0]} записей\")\n",
|
|||
|
"print(train_df_overs.company.value_counts())\n",
|
|||
|
"print(f\"Контрольная выборка: {val_df_overs.shape[0]} записей\")\n",
|
|||
|
"print(val_df_overs.company.value_counts())\n",
|
|||
|
"print(f\"Тестовая выборка: {test_df_overs.shape[0]} записей\")\n",
|
|||
|
"print(test_df_overs.company.value_counts())"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Дискретизация числовых признаков"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 199,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Name</th>\n",
|
|||
|
" <th>Rating</th>\n",
|
|||
|
" <th>Spec_score</th>\n",
|
|||
|
" <th>No_of_sim</th>\n",
|
|||
|
" <th>Ram</th>\n",
|
|||
|
" <th>Battery</th>\n",
|
|||
|
" <th>Display</th>\n",
|
|||
|
" <th>Camera</th>\n",
|
|||
|
" <th>External_Memory</th>\n",
|
|||
|
" <th>Android_version</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>Inbuilt_memory</th>\n",
|
|||
|
" <th>fast_charging</th>\n",
|
|||
|
" <th>Screen_resolution</th>\n",
|
|||
|
" <th>Processor</th>\n",
|
|||
|
" <th>Processor_name</th>\n",
|
|||
|
" <th>company</th>\n",
|
|||
|
" <th>Spec_score_bin</th>\n",
|
|||
|
" <th>Battery_bin</th>\n",
|
|||
|
" <th>Ram_bin</th>\n",
|
|||
|
" <th>Camera_bin</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>Vivo Y21T</td>\n",
|
|||
|
" <td>3.95</td>\n",
|
|||
|
" <td>74</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>11</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>128 GB inbuilt</td>\n",
|
|||
|
" <td>18W Fast Charging</td>\n",
|
|||
|
" <td>1600 x 720 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Snapdragon 680</td>\n",
|
|||
|
" <td>Vivo</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>Motorola Moto G23</td>\n",
|
|||
|
" <td>4.40</td>\n",
|
|||
|
" <td>77</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 512 GB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>128 GB inbuilt</td>\n",
|
|||
|
" <td>30W Fast Charging</td>\n",
|
|||
|
" <td>720 x 1600 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Helio G85</td>\n",
|
|||
|
" <td>Motorola</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>Oppo A78 4G</td>\n",
|
|||
|
" <td>4.25</td>\n",
|
|||
|
" <td>81</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>128 GB inbuilt</td>\n",
|
|||
|
" <td>67W Fast Charging</td>\n",
|
|||
|
" <td>1080 x 2400 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Snapdragon 680</td>\n",
|
|||
|
" <td>Oppo</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>POCO M4 Pro 4G (8GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.45</td>\n",
|
|||
|
" <td>81</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>64.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>11</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>128 GB inbuilt</td>\n",
|
|||
|
" <td>33W Fast Charging</td>\n",
|
|||
|
" <td>1080 x 2400 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Helio G96</td>\n",
|
|||
|
" <td>POCO</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>iQOO Z5 Pro 5G</td>\n",
|
|||
|
" <td>4.40</td>\n",
|
|||
|
" <td>84</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>4500</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>64.0</td>\n",
|
|||
|
" <td>Memory Card (Hybrid)</td>\n",
|
|||
|
" <td>11</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>128 GB inbuilt</td>\n",
|
|||
|
" <td>65W Fast Charging</td>\n",
|
|||
|
" <td>1080 x 2460 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Snapdragon 870</td>\n",
|
|||
|
" <td>iQOO</td>\n",
|
|||
|
" <td>2</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1715</th>\n",
|
|||
|
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.00</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>128 GB inbuilt</td>\n",
|
|||
|
" <td>18W Fast Charging</td>\n",
|
|||
|
" <td>1600 x 720 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Dimensity 6080</td>\n",
|
|||
|
" <td>itel</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1716</th>\n",
|
|||
|
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.00</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>128 GB inbuilt</td>\n",
|
|||
|
" <td>18W Fast Charging</td>\n",
|
|||
|
" <td>1600 x 720 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Dimensity 6080</td>\n",
|
|||
|
" <td>itel</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1717</th>\n",
|
|||
|
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.00</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>128 GB inbuilt</td>\n",
|
|||
|
" <td>18W Fast Charging</td>\n",
|
|||
|
" <td>1600 x 720 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Dimensity 6080</td>\n",
|
|||
|
" <td>itel</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1718</th>\n",
|
|||
|
" <td>itel P55 Plus</td>\n",
|
|||
|
" <td>4.10</td>\n",
|
|||
|
" <td>74</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>256 GB inbuilt</td>\n",
|
|||
|
" <td>45W Fast Charging</td>\n",
|
|||
|
" <td>720 x 1640 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Unisoc T606</td>\n",
|
|||
|
" <td>itel</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1719</th>\n",
|
|||
|
" <td>itel S24</td>\n",
|
|||
|
" <td>4.35</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>108.0</td>\n",
|
|||
|
" <td>Memory Card Supported</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>128 GB inbuilt</td>\n",
|
|||
|
" <td>18W Fast Charging</td>\n",
|
|||
|
" <td>720 x 1612 px</td>\n",
|
|||
|
" <td>Octa Core</td>\n",
|
|||
|
" <td>Helio G91</td>\n",
|
|||
|
" <td>itel</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>0</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" <td>1</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>1720 rows × 21 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Name Rating Spec_score \\\n",
|
|||
|
"0 Vivo Y21T 3.95 74 \n",
|
|||
|
"1 Motorola Moto G23 4.40 77 \n",
|
|||
|
"2 Oppo A78 4G 4.25 81 \n",
|
|||
|
"3 POCO M4 Pro 4G (8GB RAM + 128GB) 4.45 81 \n",
|
|||
|
"4 iQOO Z5 Pro 5G 4.40 84 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"1715 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
|
|||
|
"1716 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
|
|||
|
"1717 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
|
|||
|
"1718 itel P55 Plus 4.10 74 \n",
|
|||
|
"1719 itel S24 4.35 75 \n",
|
|||
|
"\n",
|
|||
|
" No_of_sim Ram Battery Display Camera \\\n",
|
|||
|
"0 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
|
|||
|
"1 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
|
|||
|
"2 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
|
|||
|
"3 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 64.0 \n",
|
|||
|
"4 Dual Sim, 3G, 4G, 5G, VoLTE, 8 4500 6 64.0 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"1715 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
|
|||
|
"1716 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
|
|||
|
"1717 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
|
|||
|
"1718 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
|
|||
|
"1719 Dual Sim, 3G, 4G, 8 5000 6 108.0 \n",
|
|||
|
"\n",
|
|||
|
" External_Memory Android_version ... \\\n",
|
|||
|
"0 Memory Card Supported, upto 1 TB 11 ... \n",
|
|||
|
"1 Memory Card Supported, upto 512 GB 13 ... \n",
|
|||
|
"2 Memory Card Supported, upto 1 TB 13 ... \n",
|
|||
|
"3 Memory Card Supported, upto 1 TB 11 ... \n",
|
|||
|
"4 Memory Card (Hybrid) 11 ... \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"1715 Memory Card Supported, upto 1 TB 13 ... \n",
|
|||
|
"1716 Memory Card Supported, upto 1 TB 13 ... \n",
|
|||
|
"1717 Memory Card Supported, upto 1 TB 13 ... \n",
|
|||
|
"1718 Memory Card Supported, upto 1 TB 13 ... \n",
|
|||
|
"1719 Memory Card Supported 13 ... \n",
|
|||
|
"\n",
|
|||
|
" Inbuilt_memory fast_charging Screen_resolution Processor \\\n",
|
|||
|
"0 128 GB inbuilt 18W Fast Charging 1600 x 720 px Octa Core \n",
|
|||
|
"1 128 GB inbuilt 30W Fast Charging 720 x 1600 px Octa Core \n",
|
|||
|
"2 128 GB inbuilt 67W Fast Charging 1080 x 2400 px Octa Core \n",
|
|||
|
"3 128 GB inbuilt 33W Fast Charging 1080 x 2400 px Octa Core \n",
|
|||
|
"4 128 GB inbuilt 65W Fast Charging 1080 x 2460 px Octa Core \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"1715 128 GB inbuilt 18W Fast Charging 1600 x 720 px Octa Core \n",
|
|||
|
"1716 128 GB inbuilt 18W Fast Charging 1600 x 720 px Octa Core \n",
|
|||
|
"1717 128 GB inbuilt 18W Fast Charging 1600 x 720 px Octa Core \n",
|
|||
|
"1718 256 GB inbuilt 45W Fast Charging 720 x 1640 px Octa Core \n",
|
|||
|
"1719 128 GB inbuilt 18W Fast Charging 720 x 1612 px Octa Core \n",
|
|||
|
"\n",
|
|||
|
" Processor_name company Spec_score_bin Battery_bin Ram_bin \\\n",
|
|||
|
"0 Snapdragon 680 Vivo 1 0 0 \n",
|
|||
|
"1 Helio G85 Motorola 1 0 0 \n",
|
|||
|
"2 Snapdragon 680 Oppo 1 0 1 \n",
|
|||
|
"3 Helio G96 POCO 1 0 1 \n",
|
|||
|
"4 Snapdragon 870 iQOO 2 0 1 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"1715 Dimensity 6080 itel 1 0 1 \n",
|
|||
|
"1716 Dimensity 6080 itel 1 0 1 \n",
|
|||
|
"1717 Dimensity 6080 itel 1 0 1 \n",
|
|||
|
"1718 Unisoc T606 itel 1 0 1 \n",
|
|||
|
"1719 Helio G91 itel 1 0 1 \n",
|
|||
|
"\n",
|
|||
|
" Camera_bin \n",
|
|||
|
"0 0 \n",
|
|||
|
"1 0 \n",
|
|||
|
"2 0 \n",
|
|||
|
"3 0 \n",
|
|||
|
"4 0 \n",
|
|||
|
"... ... \n",
|
|||
|
"1715 0 \n",
|
|||
|
"1716 0 \n",
|
|||
|
"1717 0 \n",
|
|||
|
"1718 0 \n",
|
|||
|
"1719 1 \n",
|
|||
|
"\n",
|
|||
|
"[1720 rows x 21 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 199,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"numerical_features = [\"Spec_score\", \"Battery\", \"Ram\", \"Camera\"]\n",
|
|||
|
"\n",
|
|||
|
"def discretize_features(df, features, bins=3, labels=False):\n",
|
|||
|
" for feature in features:\n",
|
|||
|
" try:\n",
|
|||
|
" df[f\"{feature}_bin\"] = pd.cut(df[feature], bins=bins, labels=labels) # type: ignore\n",
|
|||
|
" except Exception as e:\n",
|
|||
|
" print(f\"Ошибка при дискретизации признака {feature}: {e}\")\n",
|
|||
|
" return df\n",
|
|||
|
"\n",
|
|||
|
"train_df_disc = discretize_features(train_df_overs, numerical_features)\n",
|
|||
|
"val_df_disc = discretize_features(val_df_overs, numerical_features)\n",
|
|||
|
"test_df_disc = discretize_features(test_df_overs, numerical_features)\n",
|
|||
|
"\n",
|
|||
|
"train_df_disc"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Унитарное кодирование категориальных признаков"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 200,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Name</th>\n",
|
|||
|
" <th>Rating</th>\n",
|
|||
|
" <th>Spec_score</th>\n",
|
|||
|
" <th>No_of_sim</th>\n",
|
|||
|
" <th>Ram</th>\n",
|
|||
|
" <th>Battery</th>\n",
|
|||
|
" <th>Display</th>\n",
|
|||
|
" <th>Camera</th>\n",
|
|||
|
" <th>External_Memory</th>\n",
|
|||
|
" <th>Android_version</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>Spec_score_bin_2</th>\n",
|
|||
|
" <th>Battery_bin_0</th>\n",
|
|||
|
" <th>Battery_bin_1</th>\n",
|
|||
|
" <th>Battery_bin_2</th>\n",
|
|||
|
" <th>Ram_bin_0</th>\n",
|
|||
|
" <th>Ram_bin_1</th>\n",
|
|||
|
" <th>Ram_bin_2</th>\n",
|
|||
|
" <th>Camera_bin_0</th>\n",
|
|||
|
" <th>Camera_bin_1</th>\n",
|
|||
|
" <th>Camera_bin_2</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>Vivo Y21T</td>\n",
|
|||
|
" <td>3.95</td>\n",
|
|||
|
" <td>74</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>11</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>Motorola Moto G23</td>\n",
|
|||
|
" <td>4.40</td>\n",
|
|||
|
" <td>77</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 512 GB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>Oppo A78 4G</td>\n",
|
|||
|
" <td>4.25</td>\n",
|
|||
|
" <td>81</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>POCO M4 Pro 4G (8GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.45</td>\n",
|
|||
|
" <td>81</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>64.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>11</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>iQOO Z5 Pro 5G</td>\n",
|
|||
|
" <td>4.40</td>\n",
|
|||
|
" <td>84</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>4500</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>64.0</td>\n",
|
|||
|
" <td>Memory Card (Hybrid)</td>\n",
|
|||
|
" <td>11</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1715</th>\n",
|
|||
|
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.00</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1716</th>\n",
|
|||
|
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.00</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1717</th>\n",
|
|||
|
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.00</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1718</th>\n",
|
|||
|
" <td>itel P55 Plus</td>\n",
|
|||
|
" <td>4.10</td>\n",
|
|||
|
" <td>74</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1719</th>\n",
|
|||
|
" <td>itel S24</td>\n",
|
|||
|
" <td>4.35</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>108.0</td>\n",
|
|||
|
" <td>Memory Card Supported</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>1720 rows × 29 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Name Rating Spec_score \\\n",
|
|||
|
"0 Vivo Y21T 3.95 74 \n",
|
|||
|
"1 Motorola Moto G23 4.40 77 \n",
|
|||
|
"2 Oppo A78 4G 4.25 81 \n",
|
|||
|
"3 POCO M4 Pro 4G (8GB RAM + 128GB) 4.45 81 \n",
|
|||
|
"4 iQOO Z5 Pro 5G 4.40 84 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"1715 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
|
|||
|
"1716 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
|
|||
|
"1717 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
|
|||
|
"1718 itel P55 Plus 4.10 74 \n",
|
|||
|
"1719 itel S24 4.35 75 \n",
|
|||
|
"\n",
|
|||
|
" No_of_sim Ram Battery Display Camera \\\n",
|
|||
|
"0 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
|
|||
|
"1 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
|
|||
|
"2 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
|
|||
|
"3 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 64.0 \n",
|
|||
|
"4 Dual Sim, 3G, 4G, 5G, VoLTE, 8 4500 6 64.0 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"1715 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
|
|||
|
"1716 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
|
|||
|
"1717 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
|
|||
|
"1718 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
|
|||
|
"1719 Dual Sim, 3G, 4G, 8 5000 6 108.0 \n",
|
|||
|
"\n",
|
|||
|
" External_Memory Android_version ... \\\n",
|
|||
|
"0 Memory Card Supported, upto 1 TB 11 ... \n",
|
|||
|
"1 Memory Card Supported, upto 512 GB 13 ... \n",
|
|||
|
"2 Memory Card Supported, upto 1 TB 13 ... \n",
|
|||
|
"3 Memory Card Supported, upto 1 TB 11 ... \n",
|
|||
|
"4 Memory Card (Hybrid) 11 ... \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"1715 Memory Card Supported, upto 1 TB 13 ... \n",
|
|||
|
"1716 Memory Card Supported, upto 1 TB 13 ... \n",
|
|||
|
"1717 Memory Card Supported, upto 1 TB 13 ... \n",
|
|||
|
"1718 Memory Card Supported, upto 1 TB 13 ... \n",
|
|||
|
"1719 Memory Card Supported 13 ... \n",
|
|||
|
"\n",
|
|||
|
" Spec_score_bin_2 Battery_bin_0 Battery_bin_1 Battery_bin_2 Ram_bin_0 \\\n",
|
|||
|
"0 False True False False True \n",
|
|||
|
"1 False True False False True \n",
|
|||
|
"2 False True False False False \n",
|
|||
|
"3 False True False False False \n",
|
|||
|
"4 True True False False False \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"1715 False True False False False \n",
|
|||
|
"1716 False True False False False \n",
|
|||
|
"1717 False True False False False \n",
|
|||
|
"1718 False True False False False \n",
|
|||
|
"1719 False True False False False \n",
|
|||
|
"\n",
|
|||
|
" Ram_bin_1 Ram_bin_2 Camera_bin_0 Camera_bin_1 Camera_bin_2 \n",
|
|||
|
"0 False False True False False \n",
|
|||
|
"1 False False True False False \n",
|
|||
|
"2 True False True False False \n",
|
|||
|
"3 True False True False False \n",
|
|||
|
"4 True False True False False \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"1715 True False True False False \n",
|
|||
|
"1716 True False True False False \n",
|
|||
|
"1717 True False True False False \n",
|
|||
|
"1718 True False True False False \n",
|
|||
|
"1719 True False False True False \n",
|
|||
|
"\n",
|
|||
|
"[1720 rows x 29 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 200,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"categorical_features = [\"Spec_score_bin\", \"Battery_bin\", \"Ram_bin\", \"Camera_bin\"]\n",
|
|||
|
"\n",
|
|||
|
"train_df_enc = pd.get_dummies(train_df_disc, columns=categorical_features)\n",
|
|||
|
"val_df_enc = pd.get_dummies(val_df_disc, columns=categorical_features)\n",
|
|||
|
"test_df_enc = pd.get_dummies(test_df_disc, columns=categorical_features)\n",
|
|||
|
"\n",
|
|||
|
"train_df_enc"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Ручной синтез признаков."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 201,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Name</th>\n",
|
|||
|
" <th>Rating</th>\n",
|
|||
|
" <th>Spec_score</th>\n",
|
|||
|
" <th>No_of_sim</th>\n",
|
|||
|
" <th>Ram</th>\n",
|
|||
|
" <th>Battery</th>\n",
|
|||
|
" <th>Display</th>\n",
|
|||
|
" <th>Camera</th>\n",
|
|||
|
" <th>External_Memory</th>\n",
|
|||
|
" <th>Android_version</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>Battery_bin_0</th>\n",
|
|||
|
" <th>Battery_bin_1</th>\n",
|
|||
|
" <th>Battery_bin_2</th>\n",
|
|||
|
" <th>Ram_bin_0</th>\n",
|
|||
|
" <th>Ram_bin_1</th>\n",
|
|||
|
" <th>Ram_bin_2</th>\n",
|
|||
|
" <th>Camera_bin_0</th>\n",
|
|||
|
" <th>Camera_bin_1</th>\n",
|
|||
|
" <th>Camera_bin_2</th>\n",
|
|||
|
" <th>Camera_to_Display_Ratio</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>Vivo Y21T</td>\n",
|
|||
|
" <td>3.95</td>\n",
|
|||
|
" <td>74</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>11</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>Motorola Moto G23</td>\n",
|
|||
|
" <td>4.40</td>\n",
|
|||
|
" <td>77</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>4</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 512 GB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>Oppo A78 4G</td>\n",
|
|||
|
" <td>4.25</td>\n",
|
|||
|
" <td>81</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>POCO M4 Pro 4G (8GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.45</td>\n",
|
|||
|
" <td>81</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>64.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>11</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>10.666667</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>iQOO Z5 Pro 5G</td>\n",
|
|||
|
" <td>4.40</td>\n",
|
|||
|
" <td>84</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>4500</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>64.0</td>\n",
|
|||
|
" <td>Memory Card (Hybrid)</td>\n",
|
|||
|
" <td>11</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>10.666667</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1715</th>\n",
|
|||
|
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.00</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1716</th>\n",
|
|||
|
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.00</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1717</th>\n",
|
|||
|
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.00</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1718</th>\n",
|
|||
|
" <td>itel P55 Plus</td>\n",
|
|||
|
" <td>4.10</td>\n",
|
|||
|
" <td>74</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>50.0</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1719</th>\n",
|
|||
|
" <td>itel S24</td>\n",
|
|||
|
" <td>4.35</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G,</td>\n",
|
|||
|
" <td>8</td>\n",
|
|||
|
" <td>5000</td>\n",
|
|||
|
" <td>6</td>\n",
|
|||
|
" <td>108.0</td>\n",
|
|||
|
" <td>Memory Card Supported</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>18.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>1720 rows × 30 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Name Rating Spec_score \\\n",
|
|||
|
"0 Vivo Y21T 3.95 74 \n",
|
|||
|
"1 Motorola Moto G23 4.40 77 \n",
|
|||
|
"2 Oppo A78 4G 4.25 81 \n",
|
|||
|
"3 POCO M4 Pro 4G (8GB RAM + 128GB) 4.45 81 \n",
|
|||
|
"4 iQOO Z5 Pro 5G 4.40 84 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"1715 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
|
|||
|
"1716 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
|
|||
|
"1717 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
|
|||
|
"1718 itel P55 Plus 4.10 74 \n",
|
|||
|
"1719 itel S24 4.35 75 \n",
|
|||
|
"\n",
|
|||
|
" No_of_sim Ram Battery Display Camera \\\n",
|
|||
|
"0 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
|
|||
|
"1 Dual Sim, 3G, 4G, VoLTE, 4 5000 6 50.0 \n",
|
|||
|
"2 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
|
|||
|
"3 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 64.0 \n",
|
|||
|
"4 Dual Sim, 3G, 4G, 5G, VoLTE, 8 4500 6 64.0 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"1715 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
|
|||
|
"1716 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
|
|||
|
"1717 Dual Sim, 3G, 4G, 5G, VoLTE, 6 5000 6 50.0 \n",
|
|||
|
"1718 Dual Sim, 3G, 4G, VoLTE, 8 5000 6 50.0 \n",
|
|||
|
"1719 Dual Sim, 3G, 4G, 8 5000 6 108.0 \n",
|
|||
|
"\n",
|
|||
|
" External_Memory Android_version ... Battery_bin_0 \\\n",
|
|||
|
"0 Memory Card Supported, upto 1 TB 11 ... True \n",
|
|||
|
"1 Memory Card Supported, upto 512 GB 13 ... True \n",
|
|||
|
"2 Memory Card Supported, upto 1 TB 13 ... True \n",
|
|||
|
"3 Memory Card Supported, upto 1 TB 11 ... True \n",
|
|||
|
"4 Memory Card (Hybrid) 11 ... True \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"1715 Memory Card Supported, upto 1 TB 13 ... True \n",
|
|||
|
"1716 Memory Card Supported, upto 1 TB 13 ... True \n",
|
|||
|
"1717 Memory Card Supported, upto 1 TB 13 ... True \n",
|
|||
|
"1718 Memory Card Supported, upto 1 TB 13 ... True \n",
|
|||
|
"1719 Memory Card Supported 13 ... True \n",
|
|||
|
"\n",
|
|||
|
" Battery_bin_1 Battery_bin_2 Ram_bin_0 Ram_bin_1 Ram_bin_2 Camera_bin_0 \\\n",
|
|||
|
"0 False False True False False True \n",
|
|||
|
"1 False False True False False True \n",
|
|||
|
"2 False False False True False True \n",
|
|||
|
"3 False False False True False True \n",
|
|||
|
"4 False False False True False True \n",
|
|||
|
"... ... ... ... ... ... ... \n",
|
|||
|
"1715 False False False True False True \n",
|
|||
|
"1716 False False False True False True \n",
|
|||
|
"1717 False False False True False True \n",
|
|||
|
"1718 False False False True False True \n",
|
|||
|
"1719 False False False True False False \n",
|
|||
|
"\n",
|
|||
|
" Camera_bin_1 Camera_bin_2 Camera_to_Display_Ratio \n",
|
|||
|
"0 False False 8.333333 \n",
|
|||
|
"1 False False 8.333333 \n",
|
|||
|
"2 False False 8.333333 \n",
|
|||
|
"3 False False 10.666667 \n",
|
|||
|
"4 False False 10.666667 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"1715 False False 8.333333 \n",
|
|||
|
"1716 False False 8.333333 \n",
|
|||
|
"1717 False False 8.333333 \n",
|
|||
|
"1718 False False 8.333333 \n",
|
|||
|
"1719 True False 18.000000 \n",
|
|||
|
"\n",
|
|||
|
"[1720 rows x 30 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 201,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"train_df_enc[\"Camera_to_Display_Ratio\"] = (train_df_enc[\"Camera\"] / train_df_enc[\"Display\"])\n",
|
|||
|
"val_df_enc[\"Camera_to_Display_Ratio\"] = val_df_enc[\"Camera\"] / val_df_enc[\"Display\"]\n",
|
|||
|
"test_df_enc[\"Camera_to_Display_Ratio\"] = test_df_enc[\"Camera\"] / test_df_enc[\"Display\"]\n",
|
|||
|
"\n",
|
|||
|
"train_df_enc"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Масштабирование признаков"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 202,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Name</th>\n",
|
|||
|
" <th>Rating</th>\n",
|
|||
|
" <th>Spec_score</th>\n",
|
|||
|
" <th>No_of_sim</th>\n",
|
|||
|
" <th>Ram</th>\n",
|
|||
|
" <th>Battery</th>\n",
|
|||
|
" <th>Display</th>\n",
|
|||
|
" <th>Camera</th>\n",
|
|||
|
" <th>External_Memory</th>\n",
|
|||
|
" <th>Android_version</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>Battery_bin_0</th>\n",
|
|||
|
" <th>Battery_bin_1</th>\n",
|
|||
|
" <th>Battery_bin_2</th>\n",
|
|||
|
" <th>Ram_bin_0</th>\n",
|
|||
|
" <th>Ram_bin_1</th>\n",
|
|||
|
" <th>Ram_bin_2</th>\n",
|
|||
|
" <th>Camera_bin_0</th>\n",
|
|||
|
" <th>Camera_bin_1</th>\n",
|
|||
|
" <th>Camera_bin_2</th>\n",
|
|||
|
" <th>Camera_to_Display_Ratio</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>Vivo Y21T</td>\n",
|
|||
|
" <td>3.95</td>\n",
|
|||
|
" <td>74</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>-1.388963</td>\n",
|
|||
|
" <td>0.206174</td>\n",
|
|||
|
" <td>0.096622</td>\n",
|
|||
|
" <td>-0.240789</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>11</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>Motorola Moto G23</td>\n",
|
|||
|
" <td>4.40</td>\n",
|
|||
|
" <td>77</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>-1.388963</td>\n",
|
|||
|
" <td>0.206174</td>\n",
|
|||
|
" <td>0.096622</td>\n",
|
|||
|
" <td>-0.240789</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 512 GB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>Oppo A78 4G</td>\n",
|
|||
|
" <td>4.25</td>\n",
|
|||
|
" <td>81</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>0.720078</td>\n",
|
|||
|
" <td>0.206174</td>\n",
|
|||
|
" <td>0.096622</td>\n",
|
|||
|
" <td>-0.240789</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>POCO M4 Pro 4G (8GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.45</td>\n",
|
|||
|
" <td>81</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>0.720078</td>\n",
|
|||
|
" <td>0.206174</td>\n",
|
|||
|
" <td>0.096622</td>\n",
|
|||
|
" <td>0.275662</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>11</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>10.666667</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>iQOO Z5 Pro 5G</td>\n",
|
|||
|
" <td>4.40</td>\n",
|
|||
|
" <td>84</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>0.720078</td>\n",
|
|||
|
" <td>-0.675789</td>\n",
|
|||
|
" <td>0.096622</td>\n",
|
|||
|
" <td>0.275662</td>\n",
|
|||
|
" <td>Memory Card (Hybrid)</td>\n",
|
|||
|
" <td>11</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>10.666667</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1715</th>\n",
|
|||
|
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.00</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>-0.334442</td>\n",
|
|||
|
" <td>0.206174</td>\n",
|
|||
|
" <td>0.096622</td>\n",
|
|||
|
" <td>-0.240789</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1716</th>\n",
|
|||
|
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.00</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>-0.334442</td>\n",
|
|||
|
" <td>0.206174</td>\n",
|
|||
|
" <td>0.096622</td>\n",
|
|||
|
" <td>-0.240789</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1717</th>\n",
|
|||
|
" <td>itel P55 5G (6GB RAM + 128GB)</td>\n",
|
|||
|
" <td>4.00</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, 5G, VoLTE,</td>\n",
|
|||
|
" <td>-0.334442</td>\n",
|
|||
|
" <td>0.206174</td>\n",
|
|||
|
" <td>0.096622</td>\n",
|
|||
|
" <td>-0.240789</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1718</th>\n",
|
|||
|
" <td>itel P55 Plus</td>\n",
|
|||
|
" <td>4.10</td>\n",
|
|||
|
" <td>74</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G, VoLTE,</td>\n",
|
|||
|
" <td>0.720078</td>\n",
|
|||
|
" <td>0.206174</td>\n",
|
|||
|
" <td>0.096622</td>\n",
|
|||
|
" <td>-0.240789</td>\n",
|
|||
|
" <td>Memory Card Supported, upto 1 TB</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>8.333333</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1719</th>\n",
|
|||
|
" <td>itel S24</td>\n",
|
|||
|
" <td>4.35</td>\n",
|
|||
|
" <td>75</td>\n",
|
|||
|
" <td>Dual Sim, 3G, 4G,</td>\n",
|
|||
|
" <td>0.720078</td>\n",
|
|||
|
" <td>0.206174</td>\n",
|
|||
|
" <td>0.096622</td>\n",
|
|||
|
" <td>1.898794</td>\n",
|
|||
|
" <td>Memory Card Supported</td>\n",
|
|||
|
" <td>13</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>True</td>\n",
|
|||
|
" <td>False</td>\n",
|
|||
|
" <td>18.000000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>1720 rows × 30 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Name Rating Spec_score \\\n",
|
|||
|
"0 Vivo Y21T 3.95 74 \n",
|
|||
|
"1 Motorola Moto G23 4.40 77 \n",
|
|||
|
"2 Oppo A78 4G 4.25 81 \n",
|
|||
|
"3 POCO M4 Pro 4G (8GB RAM + 128GB) 4.45 81 \n",
|
|||
|
"4 iQOO Z5 Pro 5G 4.40 84 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"1715 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
|
|||
|
"1716 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
|
|||
|
"1717 itel P55 5G (6GB RAM + 128GB) 4.00 75 \n",
|
|||
|
"1718 itel P55 Plus 4.10 74 \n",
|
|||
|
"1719 itel S24 4.35 75 \n",
|
|||
|
"\n",
|
|||
|
" No_of_sim Ram Battery Display Camera \\\n",
|
|||
|
"0 Dual Sim, 3G, 4G, VoLTE, -1.388963 0.206174 0.096622 -0.240789 \n",
|
|||
|
"1 Dual Sim, 3G, 4G, VoLTE, -1.388963 0.206174 0.096622 -0.240789 \n",
|
|||
|
"2 Dual Sim, 3G, 4G, VoLTE, 0.720078 0.206174 0.096622 -0.240789 \n",
|
|||
|
"3 Dual Sim, 3G, 4G, VoLTE, 0.720078 0.206174 0.096622 0.275662 \n",
|
|||
|
"4 Dual Sim, 3G, 4G, 5G, VoLTE, 0.720078 -0.675789 0.096622 0.275662 \n",
|
|||
|
"... ... ... ... ... ... \n",
|
|||
|
"1715 Dual Sim, 3G, 4G, 5G, VoLTE, -0.334442 0.206174 0.096622 -0.240789 \n",
|
|||
|
"1716 Dual Sim, 3G, 4G, 5G, VoLTE, -0.334442 0.206174 0.096622 -0.240789 \n",
|
|||
|
"1717 Dual Sim, 3G, 4G, 5G, VoLTE, -0.334442 0.206174 0.096622 -0.240789 \n",
|
|||
|
"1718 Dual Sim, 3G, 4G, VoLTE, 0.720078 0.206174 0.096622 -0.240789 \n",
|
|||
|
"1719 Dual Sim, 3G, 4G, 0.720078 0.206174 0.096622 1.898794 \n",
|
|||
|
"\n",
|
|||
|
" External_Memory Android_version ... Battery_bin_0 \\\n",
|
|||
|
"0 Memory Card Supported, upto 1 TB 11 ... True \n",
|
|||
|
"1 Memory Card Supported, upto 512 GB 13 ... True \n",
|
|||
|
"2 Memory Card Supported, upto 1 TB 13 ... True \n",
|
|||
|
"3 Memory Card Supported, upto 1 TB 11 ... True \n",
|
|||
|
"4 Memory Card (Hybrid) 11 ... True \n",
|
|||
|
"... ... ... ... ... \n",
|
|||
|
"1715 Memory Card Supported, upto 1 TB 13 ... True \n",
|
|||
|
"1716 Memory Card Supported, upto 1 TB 13 ... True \n",
|
|||
|
"1717 Memory Card Supported, upto 1 TB 13 ... True \n",
|
|||
|
"1718 Memory Card Supported, upto 1 TB 13 ... True \n",
|
|||
|
"1719 Memory Card Supported 13 ... True \n",
|
|||
|
"\n",
|
|||
|
" Battery_bin_1 Battery_bin_2 Ram_bin_0 Ram_bin_1 Ram_bin_2 Camera_bin_0 \\\n",
|
|||
|
"0 False False True False False True \n",
|
|||
|
"1 False False True False False True \n",
|
|||
|
"2 False False False True False True \n",
|
|||
|
"3 False False False True False True \n",
|
|||
|
"4 False False False True False True \n",
|
|||
|
"... ... ... ... ... ... ... \n",
|
|||
|
"1715 False False False True False True \n",
|
|||
|
"1716 False False False True False True \n",
|
|||
|
"1717 False False False True False True \n",
|
|||
|
"1718 False False False True False True \n",
|
|||
|
"1719 False False False True False False \n",
|
|||
|
"\n",
|
|||
|
" Camera_bin_1 Camera_bin_2 Camera_to_Display_Ratio \n",
|
|||
|
"0 False False 8.333333 \n",
|
|||
|
"1 False False 8.333333 \n",
|
|||
|
"2 False False 8.333333 \n",
|
|||
|
"3 False False 10.666667 \n",
|
|||
|
"4 False False 10.666667 \n",
|
|||
|
"... ... ... ... \n",
|
|||
|
"1715 False False 8.333333 \n",
|
|||
|
"1716 False False 8.333333 \n",
|
|||
|
"1717 False False 8.333333 \n",
|
|||
|
"1718 False False 8.333333 \n",
|
|||
|
"1719 True False 18.000000 \n",
|
|||
|
"\n",
|
|||
|
"[1720 rows x 30 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 202,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"scaler = StandardScaler()\n",
|
|||
|
"\n",
|
|||
|
"numerical_features = [\n",
|
|||
|
" \"Ram\",\n",
|
|||
|
" \"Battery\",\n",
|
|||
|
" \"Display\",\n",
|
|||
|
" \"Camera\",\n",
|
|||
|
"]\n",
|
|||
|
"\n",
|
|||
|
"train_df_enc[numerical_features] = scaler.fit_transform(\n",
|
|||
|
" train_df_enc[numerical_features]\n",
|
|||
|
")\n",
|
|||
|
"val_df_enc[numerical_features] = scaler.transform(val_df_enc[numerical_features])\n",
|
|||
|
"test_df_enc[numerical_features] = scaler.transform(test_df_enc[numerical_features])\n",
|
|||
|
"\n",
|
|||
|
"train_df_enc"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Конструирование признаков с помощью Featuretools"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 203,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stderr",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\featuretools\\entityset\\entityset.py:1733: UserWarning: index id not found in dataframe, creating new integer column\n",
|
|||
|
" warnings.warn(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\woodwork\\type_sys\\utils.py:33: UserWarning: Could not infer format, so each element will be parsed individually, falling back to `dateutil`. To ensure parsing is consistent and as-expected, please specify a format.\n",
|
|||
|
" pd.to_datetime(\n",
|
|||
|
"c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\featuretools\\synthesis\\deep_feature_synthesis.py:169: UserWarning: Only one dataframe in entityset, changing max_depth to 1 since deeper features cannot be created\n",
|
|||
|
" warnings.warn(\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/plain": [
|
|||
|
"[<Feature: Rating>,\n",
|
|||
|
" <Feature: Spec_score>,\n",
|
|||
|
" <Feature: No_of_sim>,\n",
|
|||
|
" <Feature: Ram>,\n",
|
|||
|
" <Feature: Battery>,\n",
|
|||
|
" <Feature: Display>,\n",
|
|||
|
" <Feature: Camera>,\n",
|
|||
|
" <Feature: External_Memory>,\n",
|
|||
|
" <Feature: Android_version>,\n",
|
|||
|
" <Feature: Price>,\n",
|
|||
|
" <Feature: Inbuilt_memory>,\n",
|
|||
|
" <Feature: fast_charging>,\n",
|
|||
|
" <Feature: Screen_resolution>,\n",
|
|||
|
" <Feature: Processor>,\n",
|
|||
|
" <Feature: Processor_name>,\n",
|
|||
|
" <Feature: company>,\n",
|
|||
|
" <Feature: Spec_score_bin_0>,\n",
|
|||
|
" <Feature: Spec_score_bin_1>,\n",
|
|||
|
" <Feature: Spec_score_bin_2>,\n",
|
|||
|
" <Feature: Battery_bin_0>,\n",
|
|||
|
" <Feature: Battery_bin_1>,\n",
|
|||
|
" <Feature: Battery_bin_2>,\n",
|
|||
|
" <Feature: Ram_bin_0>,\n",
|
|||
|
" <Feature: Ram_bin_1>,\n",
|
|||
|
" <Feature: Ram_bin_2>,\n",
|
|||
|
" <Feature: Camera_bin_0>,\n",
|
|||
|
" <Feature: Camera_bin_1>,\n",
|
|||
|
" <Feature: Camera_bin_2>,\n",
|
|||
|
" <Feature: Camera_to_Display_Ratio>]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 203,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"es = ft.EntitySet(id=\"mobile_data\")\n",
|
|||
|
"es = es.add_dataframe(dataframe_name=\"train\", dataframe=train_df_enc, index=\"id\")\n",
|
|||
|
"feature_matrix, feature_defs = ft.dfs(\n",
|
|||
|
" entityset=es, target_dataframe_name=\"train\", max_depth=2\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"feature_defs"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": ".venv",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.7"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|