diff --git a/notebooks/lab4.ipynb b/notebooks/lab4.ipynb index 8dead69..2bea38a 100644 --- a/notebooks/lab4.ipynb +++ b/notebooks/lab4.ipynb @@ -13,7 +13,7 @@ }, { "cell_type": "code", - "execution_count": 533, + "execution_count": 833, "metadata": {}, "outputs": [ { @@ -325,7 +325,7 @@ "[19237 rows x 17 columns]" ] }, - "execution_count": 533, + "execution_count": 833, "metadata": {}, "output_type": "execute_result" } @@ -375,7 +375,7 @@ }, { "cell_type": "code", - "execution_count": 534, + "execution_count": 834, "metadata": {}, "outputs": [ { @@ -401,7 +401,7 @@ "dtype: object" ] }, - "execution_count": 534, + "execution_count": 834, "metadata": {}, "output_type": "execute_result" } @@ -412,7 +412,7 @@ }, { "cell_type": "code", - "execution_count": 535, + "execution_count": 835, "metadata": {}, "outputs": [ { @@ -426,7 +426,7 @@ " 6.8, 4.5, 7.3, 0.1, 3.1, 6.4, 3.9, 0.9, 5.2, 5.8])" ] }, - "execution_count": 535, + "execution_count": 835, "metadata": {}, "output_type": "execute_result" } @@ -439,7 +439,7 @@ }, { "cell_type": "code", - "execution_count": 536, + "execution_count": 836, "metadata": {}, "outputs": [ { @@ -448,7 +448,7 @@ "array([186005, 192000, 200000, ..., 140607, 307325, 186923])" ] }, - "execution_count": 536, + "execution_count": 836, "metadata": {}, "output_type": "execute_result" } @@ -461,7 +461,7 @@ }, { "cell_type": "code", - "execution_count": 537, + "execution_count": 837, "metadata": {}, "outputs": [ { @@ -532,7 +532,7 @@ " 1901])" ] }, - "execution_count": 537, + "execution_count": 837, "metadata": {}, "output_type": "execute_result" } @@ -545,7 +545,7 @@ }, { "cell_type": "code", - "execution_count": 538, + "execution_count": 838, "metadata": {}, "outputs": [ { @@ -554,7 +554,7 @@ "array([ 6, 4, 8, 1, 12, 3, 2, 16, 5, 7, 9, 10, 14])" ] }, - "execution_count": 538, + "execution_count": 838, "metadata": {}, "output_type": "execute_result" } @@ -566,7 +566,7 @@ }, { "cell_type": "code", - "execution_count": 539, + "execution_count": 839, "metadata": {}, "outputs": [ { @@ -575,7 +575,7 @@ "array(['04-May', '02-Mar', '>5'], dtype=object)" ] }, - "execution_count": 539, + "execution_count": 839, "metadata": {}, "output_type": "execute_result" } @@ -586,7 +586,7 @@ }, { "cell_type": "code", - "execution_count": 540, + "execution_count": 840, "metadata": {}, "outputs": [ { @@ -595,7 +595,7 @@ "array(['Четырехдверный', 'Двухдверный', 'Многодверный'], dtype=object)" ] }, - "execution_count": 540, + "execution_count": 840, "metadata": {}, "output_type": "execute_result" } @@ -609,7 +609,7 @@ }, { "cell_type": "code", - "execution_count": 541, + "execution_count": 841, "metadata": {}, "outputs": [ { @@ -618,7 +618,7 @@ "array([ 1, 3, 6, ..., 627220, 872946, 26307500])" ] }, - "execution_count": 541, + "execution_count": 841, "metadata": {}, "output_type": "execute_result" } @@ -630,7 +630,7 @@ }, { "cell_type": "code", - "execution_count": 542, + "execution_count": 842, "metadata": {}, "outputs": [ { @@ -650,7 +650,7 @@ }, { "cell_type": "code", - "execution_count": 543, + "execution_count": 843, "metadata": {}, "outputs": [ { @@ -659,7 +659,7 @@ "array([ 500, 549, 600, ..., 627220, 872946, 26307500])" ] }, - "execution_count": 543, + "execution_count": 843, "metadata": {}, "output_type": "execute_result" } @@ -671,7 +671,7 @@ }, { "cell_type": "code", - "execution_count": 544, + "execution_count": 844, "metadata": {}, "outputs": [ { @@ -684,7 +684,7 @@ " 2014, 2015, 2016, 2017, 2018, 2019, 2020])" ] }, - "execution_count": 544, + "execution_count": 844, "metadata": {}, "output_type": "execute_result" } @@ -696,7 +696,7 @@ }, { "cell_type": "code", - "execution_count": 545, + "execution_count": 845, "metadata": {}, "outputs": [ { @@ -1021,7 +1021,7 @@ "[17574 rows x 17 columns]" ] }, - "execution_count": 545, + "execution_count": 845, "metadata": {}, "output_type": "execute_result" } @@ -1039,7 +1039,7 @@ }, { "cell_type": "code", - "execution_count": 546, + "execution_count": 846, "metadata": {}, "outputs": [ { @@ -1048,7 +1048,7 @@ "np.int64(2773)" ] }, - "execution_count": 546, + "execution_count": 846, "metadata": {}, "output_type": "execute_result" } @@ -1059,7 +1059,7 @@ }, { "cell_type": "code", - "execution_count": 547, + "execution_count": 847, "metadata": {}, "outputs": [ { @@ -1081,7 +1081,7 @@ }, { "cell_type": "code", - "execution_count": 548, + "execution_count": 848, "metadata": {}, "outputs": [ { @@ -1107,7 +1107,7 @@ "dtype: int64" ] }, - "execution_count": 548, + "execution_count": 848, "metadata": {}, "output_type": "execute_result" } @@ -1125,7 +1125,7 @@ }, { "cell_type": "code", - "execution_count": 549, + "execution_count": 849, "metadata": {}, "outputs": [ { @@ -1151,7 +1151,7 @@ "dtype: object" ] }, - "execution_count": 549, + "execution_count": 849, "metadata": {}, "output_type": "execute_result" } @@ -1162,7 +1162,7 @@ }, { "cell_type": "code", - "execution_count": 550, + "execution_count": 850, "metadata": {}, "outputs": [ { @@ -1224,7 +1224,7 @@ }, { "cell_type": "code", - "execution_count": 551, + "execution_count": 851, "metadata": {}, "outputs": [ { @@ -1255,7 +1255,7 @@ }, { "cell_type": "code", - "execution_count": 552, + "execution_count": 852, "metadata": {}, "outputs": [ { @@ -1310,7 +1310,7 @@ }, { "cell_type": "code", - "execution_count": 553, + "execution_count": 853, "metadata": {}, "outputs": [], "source": [ @@ -1326,7 +1326,7 @@ }, { "cell_type": "code", - "execution_count": 554, + "execution_count": 854, "metadata": {}, "outputs": [ { @@ -1352,7 +1352,7 @@ "dtype: object" ] }, - "execution_count": 554, + "execution_count": 854, "metadata": {}, "output_type": "execute_result" } @@ -1363,7 +1363,7 @@ }, { "cell_type": "code", - "execution_count": 555, + "execution_count": 855, "metadata": {}, "outputs": [], "source": [ @@ -1467,7 +1467,7 @@ }, { "cell_type": "code", - "execution_count": 556, + "execution_count": 856, "metadata": {}, "outputs": [ { @@ -1866,7 +1866,7 @@ "[12597 rows x 83 columns]" ] }, - "execution_count": 556, + "execution_count": 856, "metadata": {}, "output_type": "execute_result" } @@ -1890,7 +1890,7 @@ }, { "cell_type": "code", - "execution_count": 557, + "execution_count": 857, "metadata": {}, "outputs": [ { @@ -1915,7 +1915,7 @@ }, { "cell_type": "code", - "execution_count": 558, + "execution_count": 858, "metadata": {}, "outputs": [ { @@ -2314,7 +2314,7 @@ "[8817 rows x 82 columns]" ] }, - "execution_count": 558, + "execution_count": 858, "metadata": {}, "output_type": "execute_result" } @@ -2337,7 +2337,7 @@ }, { "cell_type": "code", - "execution_count": 559, + "execution_count": 859, "metadata": {}, "outputs": [], "source": [ @@ -2377,7 +2377,7 @@ }, { "cell_type": "code", - "execution_count": 560, + "execution_count": 860, "metadata": {}, "outputs": [ { @@ -2417,21 +2417,7 @@ "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but DecisionTreeRegressor was fitted without feature names\n", " warnings.warn(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but KNeighborsRegressor was fitted without feature names\n", - " warnings.warn(\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: decision_tree\n", - "Model: knn\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ + " warnings.warn(\n", "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but KNeighborsRegressor was fitted without feature names\n", " warnings.warn(\n" ] @@ -2440,6 +2426,8 @@ "name": "stdout", "output_type": "stream", "text": [ + "Model: decision_tree\n", + "Model: knn\n", "Model: random_forest\n" ] }, @@ -2490,112 +2478,112 @@ }, { "cell_type": "code", - "execution_count": 561, + "execution_count": 861, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 RMSE_trainRMSE_testRMAE_testR2_testRMSE_trainRMSE_testRMAE_testR2_test
random_forest0.3245210.5481050.6027030.702688random_forest0.3245210.5481050.6027030.702688
knn0.5293590.5943540.6351910.650397knn0.5293590.5943540.6351910.650397
decision_tree0.2811120.6592400.6542940.569896decision_tree0.2811120.6592400.6542940.569896
ridge0.7536370.7591540.7588690.429644ridge0.7536370.7591540.7588690.429644
linear0.7525690.7593410.7587840.429364linear0.7525690.7593410.7587840.429364
\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 561, + "execution_count": 861, "metadata": {}, "output_type": "execute_result" } @@ -2611,7 +2599,7 @@ }, { "cell_type": "code", - "execution_count": 562, + "execution_count": 862, "metadata": {}, "outputs": [ { @@ -2639,7 +2627,7 @@ }, { "cell_type": "code", - "execution_count": 563, + "execution_count": 863, "metadata": {}, "outputs": [ { @@ -2663,25 +2651,6 @@ " \n", " \n", " \n", - " Leather interior_Yes\n", - " Category_Coupe\n", - " Category_Goods wagon\n", - " Category_Hatchback\n", - " Category_Jeep\n", - " Category_Limousine\n", - " Category_Microbus\n", - " Category_Minivan\n", - " Category_Pickup\n", - " Category_Sedan\n", - " ...\n", - " Manufacturer_VAZ\n", - " Manufacturer_VOLKSWAGEN\n", - " Manufacturer_VOLVO\n", - " Manufacturer_სხვა\n", - " Levy\n", - " Prod. year\n", - " Engine volume\n", - " Airbags\n", " Price\n", " PricePred\n", " \n", @@ -2689,176 +2658,43 @@ " \n", " \n", " 15146\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " ...\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " -1.187596\n", - " 1.943625\n", - " -0.487669\n", - " -0.673150\n", " 0.144553\n", " 0.603431\n", " \n", " \n", " 14145\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " ...\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " -1.187596\n", - " -0.874572\n", - " -0.900100\n", - " -1.179883\n", " -0.665312\n", " -0.619034\n", " \n", " \n", " 8943\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " ...\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.081225\n", - " 0.642919\n", - " -0.487669\n", - " 0.847049\n", " -0.144678\n", " -0.179776\n", " \n", " \n", " 17889\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " ...\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " -1.187596\n", - " -1.524925\n", - " 0.474671\n", - " -0.419784\n", " -0.462869\n", " -0.566201\n", " \n", " \n", " 9515\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " ...\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 1.695495\n", - " 1.510056\n", - " 0.474671\n", - " -0.673150\n", " 2.765339\n", " 2.675833\n", " \n", " \n", "\n", - "

5 rows × 84 columns

\n", "" ], "text/plain": [ - " Leather interior_Yes Category_Coupe Category_Goods wagon \\\n", - "15146 0.0 0.0 0.0 \n", - "14145 0.0 0.0 0.0 \n", - "8943 0.0 0.0 0.0 \n", - "17889 1.0 0.0 0.0 \n", - "9515 1.0 0.0 0.0 \n", - "\n", - " Category_Hatchback Category_Jeep Category_Limousine \\\n", - "15146 0.0 0.0 0.0 \n", - "14145 0.0 0.0 0.0 \n", - "8943 0.0 0.0 0.0 \n", - "17889 0.0 1.0 0.0 \n", - "9515 0.0 0.0 0.0 \n", - "\n", - " Category_Microbus Category_Minivan Category_Pickup Category_Sedan \\\n", - "15146 0.0 0.0 0.0 1.0 \n", - "14145 0.0 0.0 0.0 1.0 \n", - "8943 0.0 0.0 0.0 1.0 \n", - "17889 0.0 0.0 0.0 0.0 \n", - "9515 0.0 0.0 0.0 0.0 \n", - "\n", - " ... Manufacturer_VAZ Manufacturer_VOLKSWAGEN Manufacturer_VOLVO \\\n", - "15146 ... 0.0 0.0 0.0 \n", - "14145 ... 0.0 0.0 0.0 \n", - "8943 ... 0.0 0.0 0.0 \n", - "17889 ... 0.0 0.0 0.0 \n", - "9515 ... 0.0 0.0 0.0 \n", - "\n", - " Manufacturer_სხვა Levy Prod. year Engine volume Airbags \\\n", - "15146 0.0 -1.187596 1.943625 -0.487669 -0.673150 \n", - "14145 0.0 -1.187596 -0.874572 -0.900100 -1.179883 \n", - "8943 0.0 0.081225 0.642919 -0.487669 0.847049 \n", - "17889 0.0 -1.187596 -1.524925 0.474671 -0.419784 \n", - "9515 0.0 1.695495 1.510056 0.474671 -0.673150 \n", - "\n", - " Price PricePred \n", - "15146 0.144553 0.603431 \n", - "14145 -0.665312 -0.619034 \n", - "8943 -0.144678 -0.179776 \n", - "17889 -0.462869 -0.566201 \n", - "9515 2.765339 2.675833 \n", - "\n", - "[5 rows x 84 columns]" + " Price PricePred\n", + "15146 0.144553 0.603431\n", + "14145 -0.665312 -0.619034\n", + "8943 -0.144678 -0.179776\n", + "17889 -0.462869 -0.566201\n", + "9515 2.765339 2.675833" ] }, - "execution_count": 563, + "execution_count": 863, "metadata": {}, "output_type": "execute_result" } @@ -2866,7 +2702,6 @@ "source": [ "pd.concat(\n", " [\n", - " train_df,\n", " price_y_train,\n", " pd.Series(\n", " models[best_model][\"train_preds\"],\n", @@ -2880,7 +2715,7 @@ }, { "cell_type": "code", - "execution_count": 564, + "execution_count": 864, "metadata": {}, "outputs": [ { @@ -2904,25 +2739,6 @@ " \n", " \n", " \n", - " Leather interior_Yes\n", - " Category_Coupe\n", - " Category_Goods wagon\n", - " Category_Hatchback\n", - " Category_Jeep\n", - " Category_Limousine\n", - " Category_Microbus\n", - " Category_Minivan\n", - " Category_Pickup\n", - " Category_Sedan\n", - " ...\n", - " Manufacturer_VAZ\n", - " Manufacturer_VOLKSWAGEN\n", - " Manufacturer_VOLVO\n", - " Manufacturer_სხვა\n", - " Levy\n", - " Prod. year\n", - " Engine volume\n", - " Airbags\n", " Price\n", " PricePred\n", " \n", @@ -2930,176 +2746,43 @@ " \n", " \n", " 10968\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " ...\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " -0.055651\n", - " -0.007434\n", - " -1.037578\n", - " 1.353782\n", " -1.316082\n", " -1.213541\n", " \n", " \n", " 1121\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " ...\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " -0.331577\n", - " 0.642919\n", - " 0.474671\n", - " 1.353782\n", " -1.489657\n", " -0.753720\n", " \n", " \n", " 4355\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " ...\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.398430\n", - " 1.076487\n", - " -0.762623\n", - " -0.673150\n", " 0.771343\n", " 0.798361\n", " \n", " \n", " 17702\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " ...\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.528788\n", - " -0.441003\n", - " -0.762623\n", - " -0.673150\n", " -0.679792\n", " -0.531793\n", " \n", " \n", " 6167\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 1.0\n", - " ...\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.0\n", - " 0.719981\n", - " -0.441003\n", - " -0.487669\n", - " 0.340316\n", " -0.173638\n", " -0.322582\n", " \n", " \n", "\n", - "

5 rows × 84 columns

\n", "" ], "text/plain": [ - " Leather interior_Yes Category_Coupe Category_Goods wagon \\\n", - "10968 1.0 0.0 0.0 \n", - "1121 1.0 0.0 0.0 \n", - "4355 1.0 0.0 0.0 \n", - "17702 1.0 0.0 0.0 \n", - "6167 0.0 0.0 0.0 \n", - "\n", - " Category_Hatchback Category_Jeep Category_Limousine \\\n", - "10968 1.0 0.0 0.0 \n", - "1121 0.0 0.0 0.0 \n", - "4355 0.0 0.0 0.0 \n", - "17702 1.0 0.0 0.0 \n", - "6167 0.0 0.0 0.0 \n", - "\n", - " Category_Microbus Category_Minivan Category_Pickup Category_Sedan \\\n", - "10968 0.0 0.0 0.0 0.0 \n", - "1121 0.0 0.0 0.0 1.0 \n", - "4355 0.0 0.0 0.0 1.0 \n", - "17702 0.0 0.0 0.0 0.0 \n", - "6167 0.0 0.0 0.0 1.0 \n", - "\n", - " ... Manufacturer_VAZ Manufacturer_VOLKSWAGEN Manufacturer_VOLVO \\\n", - "10968 ... 0.0 0.0 0.0 \n", - "1121 ... 0.0 0.0 0.0 \n", - "4355 ... 0.0 0.0 0.0 \n", - "17702 ... 0.0 0.0 0.0 \n", - "6167 ... 0.0 0.0 0.0 \n", - "\n", - " Manufacturer_სხვა Levy Prod. year Engine volume Airbags \\\n", - "10968 0.0 -0.055651 -0.007434 -1.037578 1.353782 \n", - "1121 0.0 -0.331577 0.642919 0.474671 1.353782 \n", - "4355 0.0 0.398430 1.076487 -0.762623 -0.673150 \n", - "17702 0.0 0.528788 -0.441003 -0.762623 -0.673150 \n", - "6167 0.0 0.719981 -0.441003 -0.487669 0.340316 \n", - "\n", - " Price PricePred \n", - "10968 -1.316082 -1.213541 \n", - "1121 -1.489657 -0.753720 \n", - "4355 0.771343 0.798361 \n", - "17702 -0.679792 -0.531793 \n", - "6167 -0.173638 -0.322582 \n", - "\n", - "[5 rows x 84 columns]" + " Price PricePred\n", + "10968 -1.316082 -1.213541\n", + "1121 -1.489657 -0.753720\n", + "4355 0.771343 0.798361\n", + "17702 -0.679792 -0.531793\n", + "6167 -0.173638 -0.322582" ] }, - "execution_count": 564, + "execution_count": 864, "metadata": {}, "output_type": "execute_result" } @@ -3107,7 +2790,6 @@ "source": [ "pd.concat(\n", " [\n", - " test_df,\n", " price_y_test,\n", " pd.Series(\n", " models[best_model][\"preds\"],\n", @@ -3118,6 +2800,142 @@ " axis=1,\n", ").head(5)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Подбор гиперпараметров методом поиска по сетке" + ] + }, + { + "cell_type": "code", + "execution_count": 865, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'max_depth': 20, 'n_estimators': 70}" + ] + }, + "execution_count": 865, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "optimized_model_type = \"random_forest\"\n", + "\n", + "random_forest_model = models[optimized_model_type][\"fitted\"]\n", + "\n", + "param_grid = {\n", + " \"n_estimators\": [\n", + " 40,\n", + " 50,\n", + " 60,\n", + " 70,\n", + " 80,\n", + " ],\n", + " \"max_depth\": [\n", + " 10,\n", + " 20,\n", + " 30,\n", + " 40,\n", + " 50,\n", + " ],\n", + "}\n", + "\n", + "gs_optomizer = GridSearchCV(\n", + " estimator=random_forest_model, param_grid=param_grid, n_jobs=-1\n", + ")\n", + "gs_optomizer.fit(train_df, price_y_train.values.ravel())\n", + "gs_optomizer.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 866, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'n_estimators': 75}" + ] + }, + "execution_count": 866, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "optimized_model_type = \"random_forest\"\n", + "\n", + "random_forest_model = models[optimized_model_type][\"fitted\"]\n", + "\n", + "param_grid = {\n", + " \"n_estimators\": [\n", + " 70,\n", + " 71,\n", + " 72,\n", + " 73,\n", + " 74,\n", + " 75,\n", + " 76,\n", + " ],\n", + "}\n", + "\n", + "gs_optomizer = GridSearchCV(\n", + " estimator=random_forest_model, param_grid=param_grid, n_jobs=-1\n", + ")\n", + "gs_optomizer.fit(train_df, price_y_train.values.ravel())\n", + "gs_optomizer.best_params_" + ] + }, + { + "cell_type": "code", + "execution_count": 868, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'max_depth': 17}" + ] + }, + "execution_count": 868, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "from sklearn.model_selection import GridSearchCV\n", + "\n", + "optimized_model_type = \"random_forest\"\n", + "\n", + "random_forest_model = models[optimized_model_type][\"fitted\"]\n", + "\n", + "param_grid = {\n", + " \"max_depth\": [\n", + " 16,\n", + " 17,\n", + " 18,\n", + " 19,\n", + " 20,\n", + " ],\n", + "}\n", + "\n", + "gs_optomizer = GridSearchCV(\n", + " estimator=random_forest_model, param_grid=param_grid, n_jobs=-1\n", + ")\n", + "gs_optomizer.fit(train_df, price_y_train.values.ravel())\n", + "gs_optomizer.best_params_" + ] } ], "metadata": { diff --git a/notebooks/lab4_pipeline.ipynb b/notebooks/lab4_pipeline.ipynb index b091041..df7229d 100644 --- a/notebooks/lab4_pipeline.ipynb +++ b/notebooks/lab4_pipeline.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 37, + "execution_count": 577, "metadata": {}, "outputs": [ { @@ -334,7 +334,7 @@ "[12597 rows x 17 columns]" ] }, - "execution_count": 37, + "execution_count": 577, "metadata": {}, "output_type": "execute_result" } @@ -376,7 +376,7 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 578, "metadata": {}, "outputs": [ { @@ -402,7 +402,7 @@ "dtype: object" ] }, - "execution_count": 38, + "execution_count": 578, "metadata": {}, "output_type": "execute_result" } @@ -420,13 +420,12 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 579, "metadata": {}, "outputs": [], "source": [ "columns_to_drop = [\n", " \"Model\",\n", - " # \"Manufacturer\",\n", " \"Color\",\n", " \"Doors\",\n", " \"Cylinders\",\n", @@ -512,7 +511,7 @@ " (\"features_preprocessing\", features_preprocessing),\n", " (\"drop_columns\", drop_columns),\n", " ]\n", - ")" + ", memory=None)" ] }, { @@ -524,7 +523,7 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 580, "metadata": {}, "outputs": [ { @@ -923,7 +922,7 @@ "[12597 rows x 83 columns]" ] }, - "execution_count": 40, + "execution_count": 580, "metadata": {}, "output_type": "execute_result" } @@ -947,7 +946,7 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 581, "metadata": {}, "outputs": [ { @@ -970,7 +969,7 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 582, "metadata": {}, "outputs": [ { @@ -1369,7 +1368,7 @@ "[8817 rows x 82 columns]" ] }, - "execution_count": 42, + "execution_count": 582, "metadata": {}, "output_type": "execute_result" } @@ -1392,17 +1391,26 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 583, "metadata": {}, "outputs": [], "source": [ "models = {\n", " \"linear\": {\"model\": linear_model.LinearRegression(n_jobs=-1)},\n", " \"ridge\": {\"model\": linear_model.RidgeCV()},\n", - " \"decision_tree\": {\"model\": tree.DecisionTreeRegressor(random_state=random_state)},\n", - " \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=7, n_jobs=-1)},\n", - " \"random_forest\": {\n", - " \"model\": ensemble.RandomForestRegressor(random_state=random_state, n_jobs=-1)\n", + " \"decision_tree\": {\n", + " \"model\": tree.DecisionTreeRegressor(max_depth=35, random_state=random_state)\n", + " },\n", + " \"knn\": {\"model\": neighbors.KNeighborsRegressor(n_neighbors=6, n_jobs=-1)},\n", + " \"random_forest1\": {\n", + " \"model\": ensemble.RandomForestRegressor(\n", + " random_state=random_state, n_jobs=-1,\n", + " )\n", + " },\n", + " \"random_forest2\": {\n", + " \"model\": ensemble.RandomForestRegressor(\n", + " random_state=random_state, n_jobs=-1, max_depth=17, n_estimators=75\n", + " )\n", " },\n", "}" ] @@ -1416,7 +1424,7 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 584, "metadata": {}, "outputs": [ { @@ -1467,7 +1475,24 @@ "text": [ "Model: decision_tree\n", "Model: knn\n", - "Model: random_forest\n" + "Model: random_forest1\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but RandomForestRegressor was fitted without feature names\n", + " warnings.warn(\n", + "c:\\Users\\user\\source\\repos\\mai_pi-33_zakharov\\.venv\\Lib\\site-packages\\sklearn\\base.py:486: UserWarning: X has feature names, but RandomForestRegressor was fitted without feature names\n", + " warnings.warn(\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: random_forest2\n" ] }, { @@ -1517,112 +1542,135 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 585, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 RMSE_trainRMSE_testRMAE_testR2_testRMSE_trainRMSE_testRMAE_testR2_test
random_forest0.3245210.5481050.6027030.702688random_forest20.3555560.5449280.6033620.706124
knn0.5293590.5943540.6351910.650397random_forest10.3245210.5481050.6027030.702688
decision_tree0.2811120.6592400.6542940.569896knn0.5200510.5917740.6326280.653426
ridge0.7536370.7591540.7588690.429644decision_tree0.2811130.6590130.6552310.570193
linear0.7525690.7593410.7587840.429364ridge0.7536370.7591540.7588690.429644
linear0.7525690.7593410.7587840.429364
\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 45, + "execution_count": 585, "metadata": {}, "output_type": "execute_result" } @@ -1638,13 +1686,13 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 586, "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "'random_forest'" + "'random_forest2'" ] }, "metadata": {}, @@ -1666,7 +1714,7 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 587, "metadata": {}, "outputs": [ { @@ -1736,7 +1784,7 @@ " -0.487669\n", " -0.673150\n", " 0.144553\n", - " 0.603431\n", + " 0.496276\n", " \n", " \n", " 9482\n", @@ -1760,7 +1808,7 @@ " -0.900100\n", " -1.179883\n", " -0.665312\n", - " -0.619034\n", + " -0.598765\n", " \n", " \n", " 6177\n", @@ -1784,7 +1832,7 @@ " -0.487669\n", " 0.847049\n", " -0.144678\n", - " -0.179776\n", + " -0.075680\n", " \n", " \n", " 11756\n", @@ -1808,7 +1856,7 @@ " 0.474671\n", " -0.419784\n", " -0.462869\n", - " -0.566201\n", + " -0.564690\n", " \n", " \n", " 6557\n", @@ -1832,7 +1880,7 @@ " 0.474671\n", " -0.673150\n", " 2.765339\n", - " 2.675833\n", + " 2.638053\n", " \n", " \n", "\n", @@ -1876,16 +1924,16 @@ "6557 0.0 1.695495 1.510056 0.474671 -0.673150 \n", "\n", " Price PricePred \n", - "10083 0.144553 0.603431 \n", - "9482 -0.665312 -0.619034 \n", - "6177 -0.144678 -0.179776 \n", - "11756 -0.462869 -0.566201 \n", - "6557 2.765339 2.675833 \n", + "10083 0.144553 0.496276 \n", + "9482 -0.665312 -0.598765 \n", + "6177 -0.144678 -0.075680 \n", + "11756 -0.462869 -0.564690 \n", + "6557 2.765339 2.638053 \n", "\n", "[5 rows x 84 columns]" ] }, - "execution_count": 47, + "execution_count": 587, "metadata": {}, "output_type": "execute_result" } @@ -1907,7 +1955,7 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 588, "metadata": {}, "outputs": [ { @@ -1977,7 +2025,7 @@ " -1.037578\n", " 1.353782\n", " -1.316082\n", - " -1.213541\n", + " -1.121364\n", " \n", " \n", " 850\n", @@ -2001,7 +2049,7 @@ " 0.474671\n", " 1.353782\n", " -1.489657\n", - " -0.753720\n", + " -0.675715\n", " \n", " \n", " 3126\n", @@ -2025,7 +2073,7 @@ " -0.762623\n", " -0.673150\n", " 0.771343\n", - " 0.798361\n", + " 0.795271\n", " \n", " \n", " 11638\n", @@ -2049,7 +2097,7 @@ " -0.762623\n", " -0.673150\n", " -0.679792\n", - " -0.531793\n", + " -0.537321\n", " \n", " \n", " 4367\n", @@ -2073,7 +2121,7 @@ " -0.487669\n", " 0.340316\n", " -0.173638\n", - " -0.322582\n", + " -0.317791\n", " \n", " \n", "\n", @@ -2117,16 +2165,16 @@ "4367 0.0 0.719981 -0.441003 -0.487669 0.340316 \n", "\n", " Price PricePred \n", - "7472 -1.316082 -1.213541 \n", - "850 -1.489657 -0.753720 \n", - "3126 0.771343 0.798361 \n", - "11638 -0.679792 -0.531793 \n", - "4367 -0.173638 -0.322582 \n", + "7472 -1.316082 -1.121364 \n", + "850 -1.489657 -0.675715 \n", + "3126 0.771343 0.795271 \n", + "11638 -0.679792 -0.537321 \n", + "4367 -0.173638 -0.317791 \n", "\n", "[5 rows x 84 columns]" ] }, - "execution_count": 48, + "execution_count": 588, "metadata": {}, "output_type": "execute_result" }