From daa238663b02f1025d9bee341edf97b1370fd2f7 Mon Sep 17 00:00:00 2001 From: Zakharov_Rostislav Date: Sat, 7 Dec 2024 01:40:37 +0400 Subject: [PATCH] feat(lab3): add oversampling --- notebooks/lab3_2.ipynb | 1095 ++++++++++++++++++++++------------------ 1 file changed, 610 insertions(+), 485 deletions(-) diff --git a/notebooks/lab3_2.ipynb b/notebooks/lab3_2.ipynb index 54f34e1..0c6b54c 100644 --- a/notebooks/lab3_2.ipynb +++ b/notebooks/lab3_2.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 1058, + "execution_count": 1329, "metadata": {}, "outputs": [ { @@ -325,7 +325,7 @@ "[19237 rows x 17 columns]" ] }, - "execution_count": 1058, + "execution_count": 1329, "metadata": {}, "output_type": "execute_result" } @@ -358,7 +358,7 @@ }, { "cell_type": "code", - "execution_count": 1059, + "execution_count": 1330, "metadata": {}, "outputs": [ { @@ -384,7 +384,7 @@ "dtype: object" ] }, - "execution_count": 1059, + "execution_count": 1330, "metadata": {}, "output_type": "execute_result" } @@ -395,7 +395,7 @@ }, { "cell_type": "code", - "execution_count": 1060, + "execution_count": 1331, "metadata": {}, "outputs": [ { @@ -409,7 +409,7 @@ " 6.8, 4.5, 7.3, 0.1, 3.1, 6.4, 3.9, 0.9, 5.2, 5.8])" ] }, - "execution_count": 1060, + "execution_count": 1331, "metadata": {}, "output_type": "execute_result" } @@ -422,7 +422,7 @@ }, { "cell_type": "code", - "execution_count": 1061, + "execution_count": 1332, "metadata": {}, "outputs": [ { @@ -431,7 +431,7 @@ "array([186005, 192000, 200000, ..., 140607, 307325, 186923])" ] }, - "execution_count": 1061, + "execution_count": 1332, "metadata": {}, "output_type": "execute_result" } @@ -444,7 +444,7 @@ }, { "cell_type": "code", - "execution_count": 1062, + "execution_count": 1333, "metadata": {}, "outputs": [ { @@ -515,7 +515,7 @@ " 1901])" ] }, - "execution_count": 1062, + "execution_count": 1333, "metadata": {}, "output_type": "execute_result" } @@ -528,7 +528,7 @@ }, { "cell_type": "code", - "execution_count": 1063, + "execution_count": 1334, "metadata": {}, "outputs": [ { @@ -537,7 +537,7 @@ "array([ 6, 4, 8, 1, 12, 3, 2, 16, 5, 7, 9, 10, 14])" ] }, - "execution_count": 1063, + "execution_count": 1334, "metadata": {}, "output_type": "execute_result" } @@ -549,7 +549,7 @@ }, { "cell_type": "code", - "execution_count": 1064, + "execution_count": 1335, "metadata": {}, "outputs": [ { @@ -558,7 +558,7 @@ "array(['04-May', '02-Mar', '>5'], dtype=object)" ] }, - "execution_count": 1064, + "execution_count": 1335, "metadata": {}, "output_type": "execute_result" } @@ -569,7 +569,7 @@ }, { "cell_type": "code", - "execution_count": 1065, + "execution_count": 1336, "metadata": {}, "outputs": [ { @@ -578,7 +578,7 @@ "array(['Четырехдверный', 'Двухдверный', 'Многодверный'], dtype=object)" ] }, - "execution_count": 1065, + "execution_count": 1336, "metadata": {}, "output_type": "execute_result" } @@ -592,7 +592,7 @@ }, { "cell_type": "code", - "execution_count": 1066, + "execution_count": 1337, "metadata": {}, "outputs": [ { @@ -601,7 +601,7 @@ "array([ 1, 3, 6, ..., 627220, 872946, 26307500])" ] }, - "execution_count": 1066, + "execution_count": 1337, "metadata": {}, "output_type": "execute_result" } @@ -613,7 +613,7 @@ }, { "cell_type": "code", - "execution_count": 1067, + "execution_count": 1338, "metadata": {}, "outputs": [ { @@ -633,7 +633,7 @@ }, { "cell_type": "code", - "execution_count": 1068, + "execution_count": 1339, "metadata": {}, "outputs": [ { @@ -642,7 +642,7 @@ "array([ 500, 549, 600, ..., 627220, 872946, 26307500])" ] }, - "execution_count": 1068, + "execution_count": 1339, "metadata": {}, "output_type": "execute_result" } @@ -654,7 +654,7 @@ }, { "cell_type": "code", - "execution_count": 1069, + "execution_count": 1340, "metadata": {}, "outputs": [ { @@ -667,7 +667,7 @@ " 2014, 2015, 2016, 2017, 2018, 2019, 2020])" ] }, - "execution_count": 1069, + "execution_count": 1340, "metadata": {}, "output_type": "execute_result" } @@ -686,7 +686,7 @@ }, { "cell_type": "code", - "execution_count": 1070, + "execution_count": 1341, "metadata": {}, "outputs": [ { @@ -697,7 +697,7 @@ " 34, 35, 36, 37, 38, 39, 40, 42, 43, 46, 47, 52, 55, 56, 63, 67, 77])" ] }, - "execution_count": 1070, + "execution_count": 1341, "metadata": {}, "output_type": "execute_result" } @@ -711,7 +711,7 @@ }, { "cell_type": "code", - "execution_count": 1071, + "execution_count": 1342, "metadata": {}, "outputs": [ { @@ -1023,7 +1023,7 @@ "[17574 rows x 17 columns]" ] }, - "execution_count": 1071, + "execution_count": 1342, "metadata": {}, "output_type": "execute_result" } @@ -1041,7 +1041,7 @@ }, { "cell_type": "code", - "execution_count": 1072, + "execution_count": 1343, "metadata": {}, "outputs": [ { @@ -1050,7 +1050,7 @@ "np.int64(2773)" ] }, - "execution_count": 1072, + "execution_count": 1343, "metadata": {}, "output_type": "execute_result" } @@ -1061,7 +1061,7 @@ }, { "cell_type": "code", - "execution_count": 1073, + "execution_count": 1344, "metadata": {}, "outputs": [], "source": [ @@ -1070,7 +1070,7 @@ }, { "cell_type": "code", - "execution_count": 1074, + "execution_count": 1345, "metadata": {}, "outputs": [ { @@ -1096,7 +1096,7 @@ "dtype: int64" ] }, - "execution_count": 1074, + "execution_count": 1345, "metadata": {}, "output_type": "execute_result" } @@ -1114,7 +1114,7 @@ }, { "cell_type": "code", - "execution_count": 1075, + "execution_count": 1346, "metadata": {}, "outputs": [ { @@ -1140,7 +1140,7 @@ "dtype: object" ] }, - "execution_count": 1075, + "execution_count": 1346, "metadata": {}, "output_type": "execute_result" } @@ -1151,7 +1151,7 @@ }, { "cell_type": "code", - "execution_count": 1076, + "execution_count": 1347, "metadata": {}, "outputs": [ { @@ -1213,7 +1213,7 @@ }, { "cell_type": "code", - "execution_count": 1077, + "execution_count": 1348, "metadata": {}, "outputs": [ { @@ -1244,7 +1244,7 @@ }, { "cell_type": "code", - "execution_count": 1078, + "execution_count": 1349, "metadata": {}, "outputs": [ { @@ -1306,7 +1306,7 @@ }, { "cell_type": "code", - "execution_count": 1079, + "execution_count": 1350, "metadata": {}, "outputs": [ { @@ -1314,17 +1314,120 @@ "output_type": "stream", "text": [ "Размеры выборок:\n", - "Обучающая выборка: 10077 записей\n", - "Тестовая выборка: 2520 записей\n" + "Обучающая выборка: 8817 записей\n", + "Category\n", + "Sedan 3954\n", + "Jeep 2263\n", + "Hatchback 1554\n", + "Minivan 312\n", + "Coupe 251\n", + "Universal 180\n", + "Microbus 143\n", + "Goods wagon 120\n", + "Pickup 22\n", + "Cabriolet 16\n", + "Limousine 2\n", + "Name: count, dtype: int64\n", + "Тестовая выборка: 3780 записей\n", + "Category\n", + "Sedan 1692\n", + "Jeep 990\n", + "Hatchback 636\n", + "Minivan 151\n", + "Coupe 117\n", + "Universal 82\n", + "Goods wagon 52\n", + "Microbus 46\n", + "Pickup 8\n", + "Cabriolet 5\n", + "Limousine 1\n", + "Name: count, dtype: int64\n" ] } ], "source": [ - "train_df, test_df = train_test_split(df, test_size=0.2, random_state=42)\n", + "X = df\n", + "y = df[\"Category\"]\n", + "\n", + "train_df, test_df, y_train, y_test = train_test_split(\n", + " X, y, test_size=0.3, random_state=42\n", + ")\n", "\n", "print(\"Размеры выборок:\")\n", "print(f\"Обучающая выборка: {train_df.shape[0]} записей\")\n", - "print(f\"Тестовая выборка: {test_df.shape[0]} записей\")" + "print(train_df.Category.value_counts())\n", + "print(f\"Тестовая выборка: {test_df.shape[0]} записей\")\n", + "print(test_df.Category.value_counts())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Oversampling" + ] + }, + { + "cell_type": "code", + "execution_count": 1351, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Размеры выборок:\n", + "Обучающая выборка: 43494 записей\n", + "Category\n", + "Sedan 3954\n", + "Jeep 3954\n", + "Universal 3954\n", + "Hatchback 3954\n", + "Coupe 3954\n", + "Goods wagon 3954\n", + "Minivan 3954\n", + "Microbus 3954\n", + "Pickup 3954\n", + "Limousine 3954\n", + "Cabriolet 3954\n", + "Name: count, dtype: int64\n", + "Тестовая выборка: 18612 записей\n", + "Category\n", + "Hatchback 1692\n", + "Sedan 1692\n", + "Universal 1692\n", + "Jeep 1692\n", + "Coupe 1692\n", + "Minivan 1692\n", + "Goods wagon 1692\n", + "Microbus 1692\n", + "Pickup 1692\n", + "Cabriolet 1692\n", + "Limousine 1692\n", + "Name: count, dtype: int64\n" + ] + } + ], + "source": [ + "def oversample(df):\n", + " X = df.drop(\"Category\", axis=1)\n", + " y = df[\"Category\"]\n", + "\n", + " oversampler = RandomOverSampler(random_state=42)\n", + " X_resampled, y_resampled = oversampler.fit_resample(X, y) # type: ignore\n", + "\n", + " resampled_df = pd.concat([X_resampled, y_resampled], axis=1)\n", + " return resampled_df\n", + "\n", + "\n", + "train_df_overs = oversample(train_df)\n", + "test_df_overs = oversample(test_df)\n", + "\n", + "print(\"Размеры выборок:\")\n", + "print(f\"Обучающая выборка: {train_df_overs.shape[0]} записей\")\n", + "print(train_df_overs.Category.value_counts())\n", + "print(f\"Тестовая выборка: {test_df_overs.shape[0]} записей\")\n", + "print(test_df_overs.Category.value_counts())" ] }, { @@ -1336,7 +1439,7 @@ }, { "cell_type": "code", - "execution_count": 1080, + "execution_count": 1352, "metadata": {}, "outputs": [ { @@ -1362,7 +1465,7 @@ "dtype: object" ] }, - "execution_count": 1080, + "execution_count": 1352, "metadata": {}, "output_type": "execute_result" } @@ -1373,7 +1476,7 @@ }, { "cell_type": "code", - "execution_count": 1081, + "execution_count": 1353, "metadata": {}, "outputs": [ { @@ -1419,16 +1522,16 @@ " \n", " \n", " \n", - " 14829\n", - " 6743\n", - " 966\n", - " DAEWOO\n", - " Lacetti\n", + " 15146\n", + " 18503\n", + " 0\n", + " TOYOTA\n", + " Prius\n", " Sedan\n", - " Yes\n", - " Diesel\n", - " 2.0\n", - " 62227\n", + " No\n", + " Petrol\n", + " 1.8\n", + " 13000\n", " 4\n", " Automatic\n", " Front\n", @@ -1436,91 +1539,91 @@ " Left wheel\n", " White\n", " 4\n", - " 11\n", + " 0\n", + " Новый\n", + " \n", + " \n", + " 14145\n", + " 9722\n", + " 0\n", + " TOYOTA\n", + " Ractis\n", + " Sedan\n", + " No\n", + " Petrol\n", + " 1.5\n", + " 116800\n", + " 4\n", + " Tiptronic\n", + " Front\n", + " Четырехдверный\n", + " Right-hand drive\n", + " Brown\n", + " 2\n", + " 13\n", " Старый\n", " \n", " \n", - " 3632\n", - " 20005\n", - " 583\n", - " HYUNDAI\n", - " Elantra\n", - " Sedan\n", - " Yes\n", - " Petrol\n", - " 1.6\n", - " 94479\n", - " 4\n", - " Automatic\n", - " Front\n", - " Четырехдверный\n", - " Left wheel\n", - " Red\n", - " 4\n", - " 9\n", - " Средний\n", - " \n", - " \n", - " 4982\n", - " 13172\n", - " 836\n", - " DODGE\n", - " Caliber\n", - " Hatchback\n", - " No\n", - " Petrol\n", - " 2.0\n", - " 114000\n", - " 4\n", - " Variator\n", - " Front\n", - " Четырехдверный\n", - " Left wheel\n", - " Silver\n", - " 8\n", - " 10\n", - " Средний\n", - " \n", - " \n", - " 16758\n", - " 8781\n", + " 8943\n", + " 15367\n", " 584\n", " HYUNDAI\n", " Elantra\n", " Sedan\n", - " Yes\n", + " No\n", " Petrol\n", " 1.8\n", - " 60000\n", + " 78222\n", " 4\n", " Tiptronic\n", " Front\n", " Четырехдверный\n", " Left wheel\n", - " Grey\n", + " Beige\n", " 10\n", " 6\n", " Средний\n", " \n", " \n", - " 6875\n", - " 25086\n", - " 0\n", - " TOYOTA\n", - " Prius\n", - " Hatchback\n", - " No\n", - " Hybrid\n", - " 1.8\n", + " 17889\n", + " 11917\n", " 0\n", + " SUBARU\n", + " Forester L.L.BEAN\n", + " Jeep\n", + " Yes\n", + " CNG\n", + " 2.5\n", + " 220000\n", + " 4\n", + " Automatic\n", + " 4x4\n", + " Четырехдверный\n", + " Left wheel\n", + " Green\n", + " 5\n", + " 16\n", + " Очень старый\n", + " \n", + " \n", + " 9515\n", + " 46919\n", + " 1327\n", + " HYUNDAI\n", + " H1\n", + " Universal\n", + " Yes\n", + " Diesel\n", + " 2.5\n", + " 71689\n", " 4\n", " Automatic\n", " Front\n", " Четырехдверный\n", " Left wheel\n", - " Silver\n", - " 12\n", - " 5\n", + " Grey\n", + " 4\n", + " 2\n", " Новый\n", " \n", " \n", @@ -1651,53 +1754,66 @@ " \n", " \n", "\n", - "

10077 rows × 18 columns

\n", + "

8817 rows × 18 columns

\n", "" ], "text/plain": [ - " Price Levy Manufacturer Model Category Leather interior \\\n", - "14829 6743 966 DAEWOO Lacetti Sedan Yes \n", - "3632 20005 583 HYUNDAI Elantra Sedan Yes \n", - "4982 13172 836 DODGE Caliber Hatchback No \n", - "16758 8781 584 HYUNDAI Elantra Sedan Yes \n", - "6875 25086 0 TOYOTA Prius Hatchback No \n", - "... ... ... ... ... ... ... \n", - "18201 10349 0 AUDI A4 Sedan Yes \n", - "7436 2038 765 KIA Avella Sedan Yes \n", - "7728 13485 843 TOYOTA Prius Hatchback No \n", - "1136 15677 0 FORD Fiesta Sedan No \n", - "10640 16308 751 KIA Optima EX Sedan Yes \n", + " Price Levy Manufacturer Model Category \\\n", + "15146 18503 0 TOYOTA Prius Sedan \n", + "14145 9722 0 TOYOTA Ractis Sedan \n", + "8943 15367 584 HYUNDAI Elantra Sedan \n", + "17889 11917 0 SUBARU Forester L.L.BEAN Jeep \n", + "9515 46919 1327 HYUNDAI H1 Universal \n", + "... ... ... ... ... ... \n", + "18201 10349 0 AUDI A4 Sedan \n", + "7436 2038 765 KIA Avella Sedan \n", + "7728 13485 843 TOYOTA Prius Hatchback \n", + "1136 15677 0 FORD Fiesta Sedan \n", + "10640 16308 751 KIA Optima EX Sedan \n", "\n", - " Fuel type Engine volume Mileage Cylinders Gear box type Drive wheels \\\n", - "14829 Diesel 2.0 62227 4 Automatic Front \n", - "3632 Petrol 1.6 94479 4 Automatic Front \n", - "4982 Petrol 2.0 114000 4 Variator Front \n", - "16758 Petrol 1.8 60000 4 Tiptronic Front \n", - "6875 Hybrid 1.8 0 4 Automatic Front \n", - "... ... ... ... ... ... ... \n", - "18201 Petrol 2.4 150000 6 Manual 4x4 \n", - "7436 Petrol 2.0 125621 4 Automatic Front \n", - "7728 Hybrid 1.5 212000 4 Variator Front \n", - "1136 Petrol 1.6 74800 4 Automatic Front \n", - "10640 Petrol 2.4 92000 12 Tiptronic Front \n", + " Leather interior Fuel type Engine volume Mileage Cylinders \\\n", + "15146 No Petrol 1.8 13000 4 \n", + "14145 No Petrol 1.5 116800 4 \n", + "8943 No Petrol 1.8 78222 4 \n", + "17889 Yes CNG 2.5 220000 4 \n", + "9515 Yes Diesel 2.5 71689 4 \n", + "... ... ... ... ... ... \n", + "18201 Yes Petrol 2.4 150000 6 \n", + "7436 Yes Petrol 2.0 125621 4 \n", + "7728 No Hybrid 1.5 212000 4 \n", + "1136 No Petrol 1.6 74800 4 \n", + "10640 Yes Petrol 2.4 92000 12 \n", "\n", - " Doors Wheel Color Airbags Age Age_bin \n", - "14829 Четырехдверный Left wheel White 4 11 Старый \n", - "3632 Четырехдверный Left wheel Red 4 9 Средний \n", - "4982 Четырехдверный Left wheel Silver 8 10 Средний \n", - "16758 Четырехдверный Left wheel Grey 10 6 Средний \n", - "6875 Четырехдверный Left wheel Silver 12 5 Новый \n", - "... ... ... ... ... ... ... \n", - "18201 Четырехдверный Left wheel Grey 4 13 Старый \n", - "7436 Четырехдверный Left wheel Silver 12 5 Новый \n", - "7728 Четырехдверный Left wheel Silver 8 12 Старый \n", - "1136 Четырехдверный Left wheel Silver 8 4 Новый \n", - "10640 Четырехдверный Left wheel Silver 8 7 Средний \n", + " Gear box type Drive wheels Doors Wheel Color \\\n", + "15146 Automatic Front Четырехдверный Left wheel White \n", + "14145 Tiptronic Front Четырехдверный Right-hand drive Brown \n", + "8943 Tiptronic Front Четырехдверный Left wheel Beige \n", + "17889 Automatic 4x4 Четырехдверный Left wheel Green \n", + "9515 Automatic Front Четырехдверный Left wheel Grey \n", + "... ... ... ... ... ... \n", + "18201 Manual 4x4 Четырехдверный Left wheel Grey \n", + "7436 Automatic Front Четырехдверный Left wheel Silver \n", + "7728 Variator Front Четырехдверный Left wheel Silver \n", + "1136 Automatic Front Четырехдверный Left wheel Silver \n", + "10640 Tiptronic Front Четырехдверный Left wheel Silver \n", "\n", - "[10077 rows x 18 columns]" + " Airbags Age Age_bin \n", + "15146 4 0 Новый \n", + "14145 2 13 Старый \n", + "8943 10 6 Средний \n", + "17889 5 16 Очень старый \n", + "9515 4 2 Новый \n", + "... ... ... ... \n", + "18201 4 13 Старый \n", + "7436 12 5 Новый \n", + "7728 8 12 Старый \n", + "1136 8 4 Новый \n", + "10640 8 7 Средний \n", + "\n", + "[8817 rows x 18 columns]" ] }, - "execution_count": 1081, + "execution_count": 1353, "metadata": {}, "output_type": "execute_result" } @@ -1729,9 +1845,18 @@ }, { "cell_type": "code", - "execution_count": 1082, + "execution_count": 1354, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\IPython\\core\\displayhook.py:281: UserWarning: Output cache limit (currently 1000 entries) hit.\n", + "Flushing oldest 200 entries.\n", + " warn('Output cache limit (currently {sz} entries) hit.\\n'\n" + ] + }, { "data": { "text/plain": [ @@ -1756,7 +1881,7 @@ "dtype: object" ] }, - "execution_count": 1082, + "execution_count": 1354, "metadata": {}, "output_type": "execute_result" } @@ -1767,7 +1892,7 @@ }, { "cell_type": "code", - "execution_count": 1083, + "execution_count": 1355, "metadata": {}, "outputs": [ { @@ -1816,17 +1941,17 @@ " \n", " \n", " \n", - " 14829\n", - " 6743\n", - " 966\n", - " DAEWOO\n", - " Lacetti\n", - " 2.0\n", - " 62227\n", + " 15146\n", + " 18503\n", + " 0\n", + " TOYOTA\n", + " Prius\n", + " 1.8\n", + " 13000\n", " 4\n", " White\n", " 4\n", - " 11\n", + " 0\n", " ...\n", " False\n", " False\n", @@ -1834,69 +1959,45 @@ " True\n", " True\n", " False\n", - " False\n", - " False\n", " True\n", " False\n", + " False\n", + " False\n", " \n", " \n", - " 3632\n", - " 20005\n", - " 583\n", - " HYUNDAI\n", - " Elantra\n", - " 1.6\n", - " 94479\n", + " 14145\n", + " 9722\n", + " 0\n", + " TOYOTA\n", + " Ractis\n", + " 1.5\n", + " 116800\n", " 4\n", - " Red\n", - " 4\n", - " 9\n", + " Brown\n", + " 2\n", + " 13\n", " ...\n", " False\n", " False\n", " False\n", " True\n", + " False\n", " True\n", " False\n", " False\n", " True\n", " False\n", - " False\n", " \n", " \n", - " 4982\n", - " 13172\n", - " 836\n", - " DODGE\n", - " Caliber\n", - " 2.0\n", - " 114000\n", - " 4\n", - " Silver\n", - " 8\n", - " 10\n", - " ...\n", - " False\n", - " False\n", - " False\n", - " True\n", - " True\n", - " False\n", - " False\n", - " True\n", - " False\n", - " False\n", - " \n", - " \n", - " 16758\n", - " 8781\n", + " 8943\n", + " 15367\n", " 584\n", " HYUNDAI\n", " Elantra\n", " 1.8\n", - " 60000\n", + " 78222\n", " 4\n", - " Grey\n", + " Beige\n", " 10\n", " 6\n", " ...\n", @@ -1912,17 +2013,41 @@ " False\n", " \n", " \n", - " 6875\n", - " 25086\n", - " 0\n", - " TOYOTA\n", - " Prius\n", - " 1.8\n", + " 17889\n", + " 11917\n", " 0\n", + " SUBARU\n", + " Forester L.L.BEAN\n", + " 2.5\n", + " 220000\n", " 4\n", - " Silver\n", - " 12\n", + " Green\n", " 5\n", + " 16\n", + " ...\n", + " False\n", + " False\n", + " False\n", + " True\n", + " True\n", + " False\n", + " False\n", + " False\n", + " False\n", + " True\n", + " \n", + " \n", + " 9515\n", + " 46919\n", + " 1327\n", + " HYUNDAI\n", + " H1\n", + " 2.5\n", + " 71689\n", + " 4\n", + " Grey\n", + " 4\n", + " 2\n", " ...\n", " False\n", " False\n", @@ -2081,79 +2206,79 @@ " \n", " \n", "\n", - "

10077 rows × 46 columns

\n", + "

8817 rows × 46 columns

\n", "" ], "text/plain": [ - " Price Levy Manufacturer Model Engine volume Mileage Cylinders \\\n", - "14829 6743 966 DAEWOO Lacetti 2.0 62227 4 \n", - "3632 20005 583 HYUNDAI Elantra 1.6 94479 4 \n", - "4982 13172 836 DODGE Caliber 2.0 114000 4 \n", - "16758 8781 584 HYUNDAI Elantra 1.8 60000 4 \n", - "6875 25086 0 TOYOTA Prius 1.8 0 4 \n", - "... ... ... ... ... ... ... ... \n", - "18201 10349 0 AUDI A4 2.4 150000 6 \n", - "7436 2038 765 KIA Avella 2.0 125621 4 \n", - "7728 13485 843 TOYOTA Prius 1.5 212000 4 \n", - "1136 15677 0 FORD Fiesta 1.6 74800 4 \n", - "10640 16308 751 KIA Optima EX 2.4 92000 12 \n", + " Price Levy Manufacturer Model Engine volume Mileage \\\n", + "15146 18503 0 TOYOTA Prius 1.8 13000 \n", + "14145 9722 0 TOYOTA Ractis 1.5 116800 \n", + "8943 15367 584 HYUNDAI Elantra 1.8 78222 \n", + "17889 11917 0 SUBARU Forester L.L.BEAN 2.5 220000 \n", + "9515 46919 1327 HYUNDAI H1 2.5 71689 \n", + "... ... ... ... ... ... ... \n", + "18201 10349 0 AUDI A4 2.4 150000 \n", + "7436 2038 765 KIA Avella 2.0 125621 \n", + "7728 13485 843 TOYOTA Prius 1.5 212000 \n", + "1136 15677 0 FORD Fiesta 1.6 74800 \n", + "10640 16308 751 KIA Optima EX 2.4 92000 \n", "\n", - " Color Airbags Age ... Drive wheels_Rear Doors_Двухдверный \\\n", - "14829 White 4 11 ... False False \n", - "3632 Red 4 9 ... False False \n", - "4982 Silver 8 10 ... False False \n", - "16758 Grey 10 6 ... False False \n", - "6875 Silver 12 5 ... False False \n", - "... ... ... ... ... ... ... \n", - "18201 Grey 4 13 ... False False \n", - "7436 Silver 12 5 ... False False \n", - "7728 Silver 8 12 ... False False \n", - "1136 Silver 8 4 ... False False \n", - "10640 Silver 8 7 ... False False \n", + " Cylinders Color Airbags Age ... Drive wheels_Rear \\\n", + "15146 4 White 4 0 ... False \n", + "14145 4 Brown 2 13 ... False \n", + "8943 4 Beige 10 6 ... False \n", + "17889 4 Green 5 16 ... False \n", + "9515 4 Grey 4 2 ... False \n", + "... ... ... ... ... ... ... \n", + "18201 6 Grey 4 13 ... False \n", + "7436 4 Silver 12 5 ... False \n", + "7728 4 Silver 8 12 ... False \n", + "1136 4 Silver 8 4 ... False \n", + "10640 12 Silver 8 7 ... False \n", "\n", - " Doors_Многодверный Doors_Четырехдверный Wheel_Left wheel \\\n", - "14829 False True True \n", - "3632 False True True \n", - "4982 False True True \n", - "16758 False True True \n", - "6875 False True True \n", - "... ... ... ... \n", - "18201 False True True \n", - "7436 False True True \n", - "7728 False True True \n", - "1136 False True True \n", - "10640 False True True \n", + " Doors_Двухдверный Doors_Многодверный Doors_Четырехдверный \\\n", + "15146 False False True \n", + "14145 False False True \n", + "8943 False False True \n", + "17889 False False True \n", + "9515 False False True \n", + "... ... ... ... \n", + "18201 False False True \n", + "7436 False False True \n", + "7728 False False True \n", + "1136 False False True \n", + "10640 False False True \n", "\n", - " Wheel_Right-hand drive Age_bin_Новый Age_bin_Средний Age_bin_Старый \\\n", - "14829 False False False True \n", - "3632 False False True False \n", - "4982 False False True False \n", - "16758 False False True False \n", - "6875 False True False False \n", - "... ... ... ... ... \n", - "18201 False False False True \n", - "7436 False True False False \n", - "7728 False False False True \n", - "1136 False True False False \n", - "10640 False False True False \n", + " Wheel_Left wheel Wheel_Right-hand drive Age_bin_Новый \\\n", + "15146 True False True \n", + "14145 False True False \n", + "8943 True False False \n", + "17889 True False False \n", + "9515 True False True \n", + "... ... ... ... \n", + "18201 True False False \n", + "7436 True False True \n", + "7728 True False False \n", + "1136 True False True \n", + "10640 True False False \n", "\n", - " Age_bin_Очень старый \n", - "14829 False \n", - "3632 False \n", - "4982 False \n", - "16758 False \n", - "6875 False \n", - "... ... \n", - "18201 False \n", - "7436 False \n", - "7728 False \n", - "1136 False \n", - "10640 False \n", + " Age_bin_Средний Age_bin_Старый Age_bin_Очень старый \n", + "15146 False False False \n", + "14145 False True False \n", + "8943 True False False \n", + "17889 False False True \n", + "9515 False False False \n", + "... ... ... ... \n", + "18201 False True False \n", + "7436 False False False \n", + "7728 False True False \n", + "1136 False False False \n", + "10640 True False False \n", "\n", - "[10077 rows x 46 columns]" + "[8817 rows x 46 columns]" ] }, - "execution_count": 1083, + "execution_count": 1355, "metadata": {}, "output_type": "execute_result" } @@ -2185,7 +2310,7 @@ }, { "cell_type": "code", - "execution_count": 1084, + "execution_count": 1356, "metadata": {}, "outputs": [ { @@ -2240,7 +2365,7 @@ "dtype: object" ] }, - "execution_count": 1084, + "execution_count": 1356, "metadata": {}, "output_type": "execute_result" } @@ -2251,7 +2376,7 @@ }, { "cell_type": "code", - "execution_count": 1085, + "execution_count": 1357, "metadata": {}, "outputs": [ { @@ -2300,113 +2425,113 @@ " \n", " \n", " \n", - " 14829\n", - " -0.936428\n", - " 0.909873\n", - " DAEWOO\n", - " Lacetti\n", - " -0.212078\n", - " -0.855905\n", - " -0.399820\n", - " White\n", - " -0.681491\n", - " 0.446831\n", - " ...\n", - " False\n", - " False\n", - " False\n", - " True\n", - " True\n", - " False\n", - " False\n", - " False\n", - " True\n", - " False\n", - " \n", - " \n", - " 3632\n", - " 0.288147\n", - " 0.076376\n", - " HYUNDAI\n", - " Elantra\n", - " -0.757467\n", - " -0.422001\n", - " -0.399820\n", - " Red\n", - " -0.681491\n", - " 0.013523\n", - " ...\n", - " False\n", - " False\n", - " False\n", - " True\n", - " True\n", - " False\n", - " False\n", - " True\n", - " False\n", - " False\n", - " \n", - " \n", - " 4982\n", - " -0.342793\n", - " 0.626963\n", - " DODGE\n", - " Caliber\n", - " -0.212078\n", - " -0.159374\n", - " -0.399820\n", - " Silver\n", - " 0.330763\n", - " 0.230177\n", - " ...\n", - " False\n", - " False\n", - " False\n", - " True\n", - " True\n", - " False\n", - " False\n", - " True\n", - " False\n", - " False\n", - " \n", - " \n", - " 16758\n", - " -0.748245\n", - " 0.078552\n", - " HYUNDAI\n", - " Elantra\n", - " -0.484772\n", - " -0.885866\n", - " -0.399820\n", - " Grey\n", - " 0.836890\n", - " -0.636438\n", - " ...\n", - " False\n", - " False\n", - " False\n", - " True\n", - " True\n", - " False\n", - " False\n", - " True\n", - " False\n", - " False\n", - " \n", - " \n", - " 6875\n", - " 0.757313\n", - " -1.192368\n", + " 15146\n", + " 0.153774\n", + " -1.192982\n", " TOYOTA\n", " Prius\n", - " -0.484772\n", - " -1.693079\n", - " -0.399820\n", - " Silver\n", - " 1.343017\n", - " -0.853091\n", + " -0.479341\n", + " -1.531744\n", + " -0.403213\n", + " White\n", + " -0.683755\n", + " -1.946936\n", + " ...\n", + " False\n", + " False\n", + " False\n", + " True\n", + " True\n", + " False\n", + " True\n", + " False\n", + " False\n", + " False\n", + " \n", + " \n", + " 14145\n", + " -0.658018\n", + " -1.192982\n", + " TOYOTA\n", + " Ractis\n", + " -0.887855\n", + " -0.130245\n", + " -0.403213\n", + " Brown\n", + " -1.190217\n", + " 0.879266\n", + " ...\n", + " False\n", + " False\n", + " False\n", + " True\n", + " False\n", + " True\n", + " False\n", + " False\n", + " True\n", + " False\n", + " \n", + " \n", + " 8943\n", + " -0.136145\n", + " 0.081576\n", + " HYUNDAI\n", + " Elantra\n", + " -0.479341\n", + " -0.651122\n", + " -0.403213\n", + " Beige\n", + " 0.835631\n", + " -0.642535\n", + " ...\n", + " False\n", + " False\n", + " False\n", + " True\n", + " True\n", + " False\n", + " False\n", + " True\n", + " False\n", + " False\n", + " \n", + " \n", + " 17889\n", + " -0.455093\n", + " -1.192982\n", + " SUBARU\n", + " Forester L.L.BEAN\n", + " 0.473858\n", + " 1.263152\n", + " -0.403213\n", + " Green\n", + " -0.430524\n", + " 1.531466\n", + " ...\n", + " False\n", + " False\n", + " False\n", + " True\n", + " True\n", + " False\n", + " False\n", + " False\n", + " False\n", + " True\n", + " \n", + " \n", + " 9515\n", + " 2.780795\n", + " 1.703146\n", + " HYUNDAI\n", + " H1\n", + " 0.473858\n", + " -0.739330\n", + " -0.403213\n", + " Grey\n", + " -0.683755\n", + " -1.512135\n", " ...\n", " False\n", " False\n", @@ -2445,16 +2570,16 @@ " \n", " \n", " 18201\n", - " -0.603461\n", - " -1.192368\n", + " -0.600053\n", + " -1.192982\n", " AUDI\n", " A4\n", - " 0.333312\n", - " 0.324954\n", - " 1.520116\n", + " 0.337687\n", + " 0.318018\n", + " 1.538421\n", " Grey\n", - " -0.681491\n", - " 0.880138\n", + " -0.683755\n", + " 0.879266\n", " ...\n", " False\n", " False\n", @@ -2469,16 +2594,16 @@ " \n", " \n", " 7436\n", - " -1.370875\n", - " 0.472450\n", + " -1.368394\n", + " 0.476602\n", " KIA\n", " Avella\n", - " -0.212078\n", - " -0.003030\n", - " -0.399820\n", + " -0.206998\n", + " -0.011145\n", + " -0.403213\n", " Silver\n", - " 1.343017\n", - " -0.853091\n", + " 1.342092\n", + " -0.859935\n", " ...\n", " False\n", " False\n", @@ -2493,16 +2618,16 @@ " \n", " \n", " 7728\n", - " -0.313891\n", - " 0.642196\n", + " -0.310134\n", + " 0.646834\n", " TOYOTA\n", " Prius\n", - " -0.893814\n", - " 1.159074\n", - " -0.399820\n", + " -0.887855\n", + " 1.155137\n", + " -0.403213\n", " Silver\n", - " 0.330763\n", - " 0.663484\n", + " 0.329169\n", + " 0.661866\n", " ...\n", " False\n", " False\n", @@ -2517,16 +2642,16 @@ " \n", " \n", " 1136\n", - " -0.111488\n", - " -1.192368\n", + " -0.107486\n", + " -1.192982\n", " FORD\n", " Fiesta\n", - " -0.757467\n", - " -0.686753\n", - " -0.399820\n", + " -0.751684\n", + " -0.697325\n", + " -0.403213\n", " Silver\n", - " 0.330763\n", - " -1.069745\n", + " 0.329169\n", + " -1.077335\n", " ...\n", " False\n", " False\n", @@ -2541,16 +2666,16 @@ " \n", " \n", " 10640\n", - " -0.053223\n", - " 0.441983\n", + " -0.049151\n", + " 0.446048\n", " KIA\n", " Optima EX\n", - " 0.333312\n", - " -0.455352\n", - " 7.279922\n", + " 0.337687\n", + " -0.465093\n", + " 7.363324\n", " Silver\n", - " 0.330763\n", - " -0.419784\n", + " 0.329169\n", + " -0.425135\n", " ...\n", " False\n", " False\n", @@ -2565,79 +2690,79 @@ " \n", " \n", "\n", - "

10077 rows × 46 columns

\n", + "

8817 rows × 46 columns

\n", "" ], "text/plain": [ - " Price Levy Manufacturer Model Engine volume Mileage \\\n", - "14829 -0.936428 0.909873 DAEWOO Lacetti -0.212078 -0.855905 \n", - "3632 0.288147 0.076376 HYUNDAI Elantra -0.757467 -0.422001 \n", - "4982 -0.342793 0.626963 DODGE Caliber -0.212078 -0.159374 \n", - "16758 -0.748245 0.078552 HYUNDAI Elantra -0.484772 -0.885866 \n", - "6875 0.757313 -1.192368 TOYOTA Prius -0.484772 -1.693079 \n", - "... ... ... ... ... ... ... \n", - "18201 -0.603461 -1.192368 AUDI A4 0.333312 0.324954 \n", - "7436 -1.370875 0.472450 KIA Avella -0.212078 -0.003030 \n", - "7728 -0.313891 0.642196 TOYOTA Prius -0.893814 1.159074 \n", - "1136 -0.111488 -1.192368 FORD Fiesta -0.757467 -0.686753 \n", - "10640 -0.053223 0.441983 KIA Optima EX 0.333312 -0.455352 \n", + " Price Levy Manufacturer Model Engine volume \\\n", + "15146 0.153774 -1.192982 TOYOTA Prius -0.479341 \n", + "14145 -0.658018 -1.192982 TOYOTA Ractis -0.887855 \n", + "8943 -0.136145 0.081576 HYUNDAI Elantra -0.479341 \n", + "17889 -0.455093 -1.192982 SUBARU Forester L.L.BEAN 0.473858 \n", + "9515 2.780795 1.703146 HYUNDAI H1 0.473858 \n", + "... ... ... ... ... ... \n", + "18201 -0.600053 -1.192982 AUDI A4 0.337687 \n", + "7436 -1.368394 0.476602 KIA Avella -0.206998 \n", + "7728 -0.310134 0.646834 TOYOTA Prius -0.887855 \n", + "1136 -0.107486 -1.192982 FORD Fiesta -0.751684 \n", + "10640 -0.049151 0.446048 KIA Optima EX 0.337687 \n", "\n", - " Cylinders Color Airbags Age ... Drive wheels_Rear \\\n", - "14829 -0.399820 White -0.681491 0.446831 ... False \n", - "3632 -0.399820 Red -0.681491 0.013523 ... False \n", - "4982 -0.399820 Silver 0.330763 0.230177 ... False \n", - "16758 -0.399820 Grey 0.836890 -0.636438 ... False \n", - "6875 -0.399820 Silver 1.343017 -0.853091 ... False \n", - "... ... ... ... ... ... ... \n", - "18201 1.520116 Grey -0.681491 0.880138 ... False \n", - "7436 -0.399820 Silver 1.343017 -0.853091 ... False \n", - "7728 -0.399820 Silver 0.330763 0.663484 ... False \n", - "1136 -0.399820 Silver 0.330763 -1.069745 ... False \n", - "10640 7.279922 Silver 0.330763 -0.419784 ... False \n", + " Mileage Cylinders Color Airbags Age ... \\\n", + "15146 -1.531744 -0.403213 White -0.683755 -1.946936 ... \n", + "14145 -0.130245 -0.403213 Brown -1.190217 0.879266 ... \n", + "8943 -0.651122 -0.403213 Beige 0.835631 -0.642535 ... \n", + "17889 1.263152 -0.403213 Green -0.430524 1.531466 ... \n", + "9515 -0.739330 -0.403213 Grey -0.683755 -1.512135 ... \n", + "... ... ... ... ... ... ... \n", + "18201 0.318018 1.538421 Grey -0.683755 0.879266 ... \n", + "7436 -0.011145 -0.403213 Silver 1.342092 -0.859935 ... \n", + "7728 1.155137 -0.403213 Silver 0.329169 0.661866 ... \n", + "1136 -0.697325 -0.403213 Silver 0.329169 -1.077335 ... \n", + "10640 -0.465093 7.363324 Silver 0.329169 -0.425135 ... \n", "\n", - " Doors_Двухдверный Doors_Многодверный Doors_Четырехдверный \\\n", - "14829 False False True \n", - "3632 False False True \n", - "4982 False False True \n", - "16758 False False True \n", - "6875 False False True \n", - "... ... ... ... \n", - "18201 False False True \n", - "7436 False False True \n", - "7728 False False True \n", - "1136 False False True \n", - "10640 False False True \n", + " Drive wheels_Rear Doors_Двухдверный Doors_Многодверный \\\n", + "15146 False False False \n", + "14145 False False False \n", + "8943 False False False \n", + "17889 False False False \n", + "9515 False False False \n", + "... ... ... ... \n", + "18201 False False False \n", + "7436 False False False \n", + "7728 False False False \n", + "1136 False False False \n", + "10640 False False False \n", "\n", - " Wheel_Left wheel Wheel_Right-hand drive Age_bin_Новый \\\n", - "14829 True False False \n", - "3632 True False False \n", - "4982 True False False \n", - "16758 True False False \n", - "6875 True False True \n", - "... ... ... ... \n", - "18201 True False False \n", - "7436 True False True \n", - "7728 True False False \n", - "1136 True False True \n", - "10640 True False False \n", + " Doors_Четырехдверный Wheel_Left wheel Wheel_Right-hand drive \\\n", + "15146 True True False \n", + "14145 True False True \n", + "8943 True True False \n", + "17889 True True False \n", + "9515 True True False \n", + "... ... ... ... \n", + "18201 True True False \n", + "7436 True True False \n", + "7728 True True False \n", + "1136 True True False \n", + "10640 True True False \n", "\n", - " Age_bin_Средний Age_bin_Старый Age_bin_Очень старый \n", - "14829 False True False \n", - "3632 True False False \n", - "4982 True False False \n", - "16758 True False False \n", - "6875 False False False \n", - "... ... ... ... \n", - "18201 False True False \n", - "7436 False False False \n", - "7728 False True False \n", - "1136 False False False \n", - "10640 True False False \n", + " Age_bin_Новый Age_bin_Средний Age_bin_Старый Age_bin_Очень старый \n", + "15146 True False False False \n", + "14145 False False True False \n", + "8943 False True False False \n", + "17889 False False False True \n", + "9515 True False False False \n", + "... ... ... ... ... \n", + "18201 False False True False \n", + "7436 True False False False \n", + "7728 False False True False \n", + "1136 True False False False \n", + "10640 False True False False \n", "\n", - "[10077 rows x 46 columns]" + "[8817 rows x 46 columns]" ] }, - "execution_count": 1085, + "execution_count": 1357, "metadata": {}, "output_type": "execute_result" } @@ -2674,7 +2799,7 @@ }, { "cell_type": "code", - "execution_count": 1086, + "execution_count": 1358, "metadata": {}, "outputs": [ { @@ -2710,7 +2835,7 @@ }, { "cell_type": "code", - "execution_count": 1087, + "execution_count": 1359, "metadata": {}, "outputs": [ { @@ -2764,7 +2889,7 @@ " ]" ] }, - "execution_count": 1087, + "execution_count": 1359, "metadata": {}, "output_type": "execute_result" }