diff --git a/lab_4/lab_4.ipynb b/lab_4/lab_4.ipynb index 6838ed8..b1c86bc 100644 --- a/lab_4/lab_4.ipynb +++ b/lab_4/lab_4.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 112, + "execution_count": 313, "metadata": {}, "outputs": [ { @@ -375,7 +375,7 @@ "[10000 rows x 21 columns]" ] }, - "execution_count": 112, + "execution_count": 313, "metadata": {}, "output_type": "execute_result" } @@ -401,7 +401,7 @@ }, { "cell_type": "code", - "execution_count": 113, + "execution_count": 314, "metadata": {}, "outputs": [ { @@ -1026,7 +1026,7 @@ "[20 rows x 22 columns]" ] }, - "execution_count": 113, + "execution_count": 314, "metadata": {}, "output_type": "execute_result" } @@ -1062,7 +1062,7 @@ }, { "cell_type": "code", - "execution_count": 114, + "execution_count": 315, "metadata": {}, "outputs": [ { @@ -2103,8 +2103,6 @@ "\n", "features_preprocessing -- трансформер для предобработки признаков\n", "\n", - "features_engineering -- трансформер для конструирования признаков\n", - "\n", "drop_columns -- трансформер для удаления колонок\n", "\n", "pipeline_end -- основной конвейер предобработки данных и конструирования признаков" @@ -2112,7 +2110,7 @@ }, { "cell_type": "code", - "execution_count": 191, + "execution_count": 316, "metadata": {}, "outputs": [ { @@ -2146,309 +2144,576 @@ " condition\n", " grade\n", " sqft_above\n", - " sqft_basement\n", - " yr_built\n", + " ...\n", " yr_renovated\n", " zipcode\n", " lat\n", " long\n", " sqft_living15\n", " sqft_lot15\n", - " price_category\n", + " price_h\n", + " price_l\n", + " price_m\n", + " price_vh\n", " \n", " \n", " \n", " \n", " 0\n", - " 3\n", - " 3260000340\n", - " 732600.0\n", - " 4\n", - " 2.5\n", - " 2130\n", - " 7300\n", + " -0.451103\n", + " 0.916381\n", + " 0.700559\n", + " 0.573416\n", + " 0.081706\n", + " -0.187493\n", + " -0.838739\n", + " 0.839159\n", + " -0.512647\n", + " -0.638064\n", + " ...\n", + " -0.2158\n", + " -1.349962\n", + " 0.32254\n", + " 0.340593\n", + " 0.223199\n", + " -0.210584\n", " 1.0\n", - " 4\n", - " 7\n", - " 1230\n", - " 900\n", - " 1963\n", - " 0\n", - " 98005\n", - " 47.605\n", - " -122.167\n", - " 2130\n", - " 7560\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", " \n", " \n", " 1\n", - " 2\n", - " 9828702055\n", - " 358000.0\n", - " 2\n", - " 1.5\n", - " 960\n", - " 1808\n", - " 2.0\n", - " 3\n", - " 7\n", - " 960\n", - " 0\n", - " 1993\n", - " 0\n", - " 98122\n", - " 47.6183\n", - " -122.298\n", - " 1290\n", - " 1668\n", + " 1.845014\n", + " -0.589326\n", + " -1.49426\n", + " -0.72971\n", + " -1.191326\n", + " -0.302999\n", + " 1.120073\n", + " -0.666734\n", + " -0.512647\n", + " -0.969739\n", + " ...\n", + " -0.2158\n", + " 0.820656\n", + " 0.417588\n", + " -0.601419\n", + " -1.022503\n", + " -0.421966\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", " \n", " \n", " 2\n", - " 1\n", - " 3438500625\n", - " 210000.0\n", - " 3\n", + " -0.388708\n", + " -1.184213\n", + " -0.396851\n", + " -1.381273\n", + " -1.060759\n", + " 0.101544\n", + " -0.838739\n", + " -0.666734\n", + " -1.369558\n", + " -0.822328\n", + " ...\n", + " -0.2158\n", + " 0.523819\n", + " -0.059795\n", + " -1.025683\n", + " -0.889035\n", + " -0.208431\n", + " 0.0\n", " 1.0\n", - " 1080\n", - " 21043\n", - " 1.0\n", - " 3\n", - " 6\n", - " 1080\n", - " 0\n", - " 1942\n", - " 0\n", - " 98106\n", - " 47.5515\n", - " -122.357\n", - " 1380\n", - " 7620\n", + " 0.0\n", + " 0.0\n", " \n", " \n", " 3\n", - " 2\n", - " 2422029094\n", - " 517534.0\n", - " 2\n", + " -0.74402\n", + " 0.051922\n", + " -1.49426\n", + " -1.381273\n", + " -1.32951\n", + " 2.686416\n", + " -0.838739\n", + " -0.666734\n", + " -2.22647\n", + " -1.125749\n", + " ...\n", + " -0.2158\n", + " -0.144063\n", + " -1.221808\n", + " -1.924549\n", + " -0.889035\n", + " 4.682444\n", + " 0.0\n", + " 0.0\n", " 1.0\n", - " 833\n", - " 143947\n", - " 1.0\n", - " 3\n", - " 5\n", - " 833\n", - " 0\n", - " 2006\n", - " 0\n", - " 98070\n", - " 47.3889\n", - " -122.482\n", - " 1380\n", - " 143947\n", + " 0.0\n", " \n", " \n", " 4\n", - " 2\n", - " 7462900015\n", - " 387000.0\n", - " 3\n", - " 2.25\n", - " 1760\n", - " 45133\n", - " 2.0\n", - " 3\n", - " 7\n", - " 1760\n", - " 0\n", - " 1984\n", - " 0\n", - " 98065\n", - " 47.5124\n", - " -121.866\n", - " 1910\n", - " 51773\n", - " \n", - " \n", - " ...\n", + " 1.018038\n", + " -0.47276\n", + " -0.396851\n", + " 0.247635\n", + " -0.320877\n", + " 0.608196\n", + " 1.120073\n", + " -0.666734\n", + " -0.512647\n", + " 0.013003\n", " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " ...\n", - " \n", - " \n", - " 7995\n", - " 2\n", - " 2787720140\n", - " 416000.0\n", - " 3\n", - " 2.5\n", - " 1790\n", - " 11542\n", + " -0.2158\n", + " -0.236825\n", + " -0.339221\n", + " 2.505062\n", + " -0.103056\n", + " 1.375604\n", + " 0.0\n", + " 0.0\n", " 1.0\n", - " 5\n", - " 7\n", - " 1190\n", - " 600\n", - " 1969\n", - " 0\n", - " 98059\n", - " 47.5124\n", - " -122.16\n", - " 1790\n", - " 9131\n", + " 0.0\n", " \n", " \n", - " 7996\n", - " 3\n", - " 6192400400\n", - " 775000.0\n", - " 4\n", - " 2.5\n", - " 3090\n", - " 7112\n", - " 2.0\n", - " 3\n", - " 9\n", - " 3090\n", - " 0\n", - " 2001\n", - " 0\n", - " 98052\n", - " 47.705\n", - " -122.118\n", - " 3050\n", - " 6000\n", - " \n", - " \n", - " 7997\n", - " 2\n", - " 2296500036\n", - " 450000.0\n", - " 4\n", - " 2.75\n", - " 2980\n", - " 13260\n", + " 5\n", + " -0.083826\n", + " -0.492858\n", + " -0.396851\n", + " 1.550761\n", + " -0.701698\n", + " -0.314672\n", + " 3.078884\n", + " -0.666734\n", + " 0.344264\n", + " -0.416947\n", + " ...\n", + " -0.2158\n", + " 0.468162\n", + " 0.987875\n", + " -0.903438\n", + " -0.844546\n", + " -0.436854\n", + " 0.0\n", + " 0.0\n", " 1.0\n", - " 4\n", - " 8\n", - " 1800\n", - " 1180\n", - " 1979\n", - " 0\n", - " 98056\n", - " 47.5152\n", - " -122.197\n", - " 1920\n", - " 10731\n", + " 0.0\n", " \n", " \n", - " 7998\n", - " 1\n", - " 2787310130\n", - " 289950.0\n", - " 4\n", - " 1.75\n", - " 2090\n", - " 7416\n", + " 6\n", + " 0.301277\n", + " -0.953091\n", + " -0.396851\n", + " 0.573416\n", + " -0.712579\n", + " -0.180574\n", + " -0.838739\n", + " -0.666734\n", + " -0.512647\n", + " -0.773191\n", + " ...\n", + " -0.2158\n", + " -0.886155\n", + " -1.293987\n", + " 0.254302\n", + " -0.666588\n", + " -0.205992\n", + " 0.0\n", " 1.0\n", - " 4\n", - " 7\n", - " 1050\n", - " 1040\n", - " 1970\n", - " 0\n", - " 98031\n", - " 47.4107\n", - " -122.179\n", - " 1710\n", - " 7527\n", + " 0.0\n", + " 0.0\n", " \n", " \n", - " 7999\n", - " 2\n", - " 8567300110\n", - " 485000.0\n", - " 3\n", - " 2.5\n", - " 2340\n", - " 59058\n", + " 7\n", + " -0.086798\n", + " -1.148038\n", + " -1.49426\n", + " -1.381273\n", + " -1.25661\n", + " -0.232501\n", + " -0.838739\n", + " -0.666734\n", + " -1.369558\n", + " -1.043445\n", + " ...\n", + " -0.2158\n", + " 0.523819\n", + " -0.249176\n", + " -1.018493\n", + " -1.600865\n", + " -0.296686\n", + " 0.0\n", " 1.0\n", - " 3\n", - " 8\n", - " 2340\n", - " 0\n", - " 1985\n", - " 0\n", - " 98038\n", - " 47.4052\n", - " -122.028\n", - " 2700\n", - " 37263\n", + " 0.0\n", + " 0.0\n", + " \n", + " \n", + " 8\n", + " -0.824567\n", + " -1.148038\n", + " -1.49426\n", + " -1.381273\n", + " -1.0934\n", + " -0.15174\n", + " 0.140667\n", + " 0.839159\n", + " -0.512647\n", + " -0.859181\n", + " ...\n", + " -0.2158\n", + " -1.387066\n", + " -1.937882\n", + " -0.60861\n", + " -0.636929\n", + " -0.137397\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " 0.0\n", + " \n", + " \n", + " 9\n", + " 1.647935\n", + " -0.762165\n", + " 2.895378\n", + " 0.899198\n", + " 0.963036\n", + " -0.186442\n", + " -0.838739\n", + " -0.666734\n", + " 0.344264\n", + " 0.037571\n", + " ...\n", + " -0.2158\n", + " -1.016021\n", + " -1.783519\n", + " -0.896247\n", + " 0.208369\n", + " -0.186332\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " 0.0\n", + " \n", + " \n", + " 10\n", + " -1.159614\n", + " -0.581287\n", + " -1.49426\n", + " -1.381273\n", + " -1.321893\n", + " -0.185096\n", + " -0.838739\n", + " 0.839159\n", + " -1.369558\n", + " -1.11715\n", + " ...\n", + " -0.2158\n", + " -0.830498\n", + " 0.837799\n", + " 0.304638\n", + " -0.355163\n", + " -0.130796\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " \n", + " \n", + " 11\n", + " -1.329183\n", + " -0.681775\n", + " -1.49426\n", + " -1.381273\n", + " -1.071639\n", + " -0.200575\n", + " -0.838739\n", + " 0.839159\n", + " -0.512647\n", + " -0.834612\n", + " ...\n", + " -0.2158\n", + " 1.024731\n", + " 1.226566\n", + " -1.025683\n", + " -0.444141\n", + " -0.202404\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " 0.0\n", + " \n", + " \n", + " 12\n", + " 0.377864\n", + " 0.286926\n", + " 0.700559\n", + " 0.573416\n", + " 0.419005\n", + " 0.256379\n", + " 1.120073\n", + " -0.666734\n", + " 0.344264\n", + " 0.848334\n", + " ...\n", + " -0.2158\n", + " -0.923259\n", + " 1.277306\n", + " -0.169963\n", + " 0.742242\n", + " -0.071779\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " \n", + " \n", + " 13\n", + " 0.289882\n", + " -0.88677\n", + " -0.396851\n", + " 0.573416\n", + " 0.103467\n", + " -0.143853\n", + " -0.838739\n", + " -0.666734\n", + " 0.344264\n", + " -0.244967\n", + " ...\n", + " -0.2158\n", + " 2.045107\n", + " -0.729417\n", + " -0.428836\n", + " -0.043737\n", + " -0.155335\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " 0.0\n", + " \n", + " \n", + " 14\n", + " 1.613049\n", + " 0.282907\n", + " -0.396851\n", + " -0.078147\n", + " 0.103467\n", + " -0.259422\n", + " -0.838739\n", + " -0.666734\n", + " 0.344264\n", + " -0.822328\n", + " ...\n", + " -0.2158\n", + " 0.727894\n", + " 0.868529\n", + " -1.277366\n", + " 0.223199\n", + " -0.338303\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " \n", + " \n", + " 15\n", + " -0.962885\n", + " 0.285118\n", + " 0.700559\n", + " 0.573416\n", + " 0.005542\n", + " -0.183813\n", + " -0.838739\n", + " -0.666734\n", + " 0.344264\n", + " -0.380094\n", + " ...\n", + " -0.2158\n", + " -0.478004\n", + " 1.195837\n", + " 0.78643\n", + " 0.445646\n", + " -0.180592\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " \n", + " \n", + " 16\n", + " 1.722145\n", + " -0.259726\n", + " -0.396851\n", + " -0.403928\n", + " -0.571131\n", + " -0.18865\n", + " -0.838739\n", + " 0.839159\n", + " -0.512647\n", + " -0.269535\n", + " ...\n", + " -0.2158\n", + " -0.811945\n", + " 1.222993\n", + " 0.168011\n", + " -0.666588\n", + " -0.213095\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " \n", + " \n", + " 17\n", + " 0.740562\n", + " 1.589247\n", + " 0.700559\n", + " 1.550761\n", + " 2.878025\n", + " 0.466843\n", + " 1.120073\n", + " -0.666734\n", + " 2.058087\n", + " 2.052192\n", + " ...\n", + " -0.2158\n", + " -1.349962\n", + " 0.604825\n", + " 0.340593\n", + " 2.462498\n", + " 0.79434\n", + " 0.0\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " \n", + " \n", + " 18\n", + " -1.555659\n", + " -0.922945\n", + " -0.396851\n", + " -1.381273\n", + " -0.799624\n", + " -0.107784\n", + " -0.838739\n", + " -0.666734\n", + " -0.512647\n", + " -0.527505\n", + " ...\n", + " -0.2158\n", + " 1.432881\n", + " 1.536008\n", + " -0.644564\n", + " -0.978014\n", + " -0.183354\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", + " 0.0\n", + " \n", + " \n", + " 19\n", + " -0.953738\n", + " 0.142224\n", + " 2.895378\n", + " 1.224979\n", + " 0.886872\n", + " 4.00146\n", + " 1.120073\n", + " -0.666734\n", + " -0.512647\n", + " 0.713207\n", + " ...\n", + " 4.605736\n", + " -0.663527\n", + " -1.135335\n", + " 0.85834\n", + " 0.593944\n", + " 1.659169\n", + " 0.0\n", + " 0.0\n", + " 1.0\n", + " 0.0\n", " \n", " \n", "\n", - "

8000 rows × 19 columns

\n", + "

20 rows × 22 columns

\n", "" ], "text/plain": [ - " id price bedrooms bathrooms sqft_living sqft_lot floors \\\n", - "0 3 3260000340 732600.0 4 2.5 2130 7300 \n", - "1 2 9828702055 358000.0 2 1.5 960 1808 \n", - "2 1 3438500625 210000.0 3 1.0 1080 21043 \n", - "3 2 2422029094 517534.0 2 1.0 833 143947 \n", - "4 2 7462900015 387000.0 3 2.25 1760 45133 \n", - "... .. ... ... ... ... ... ... \n", - "7995 2 2787720140 416000.0 3 2.5 1790 11542 \n", - "7996 3 6192400400 775000.0 4 2.5 3090 7112 \n", - "7997 2 2296500036 450000.0 4 2.75 2980 13260 \n", - "7998 1 2787310130 289950.0 4 1.75 2090 7416 \n", - "7999 2 8567300110 485000.0 3 2.5 2340 59058 \n", + " id price bedrooms bathrooms sqft_living sqft_lot floors \\\n", + "0 -0.451103 0.916381 0.700559 0.573416 0.081706 -0.187493 -0.838739 \n", + "1 1.845014 -0.589326 -1.49426 -0.72971 -1.191326 -0.302999 1.120073 \n", + "2 -0.388708 -1.184213 -0.396851 -1.381273 -1.060759 0.101544 -0.838739 \n", + "3 -0.74402 0.051922 -1.49426 -1.381273 -1.32951 2.686416 -0.838739 \n", + "4 1.018038 -0.47276 -0.396851 0.247635 -0.320877 0.608196 1.120073 \n", + "5 -0.083826 -0.492858 -0.396851 1.550761 -0.701698 -0.314672 3.078884 \n", + "6 0.301277 -0.953091 -0.396851 0.573416 -0.712579 -0.180574 -0.838739 \n", + "7 -0.086798 -1.148038 -1.49426 -1.381273 -1.25661 -0.232501 -0.838739 \n", + "8 -0.824567 -1.148038 -1.49426 -1.381273 -1.0934 -0.15174 0.140667 \n", + "9 1.647935 -0.762165 2.895378 0.899198 0.963036 -0.186442 -0.838739 \n", + "10 -1.159614 -0.581287 -1.49426 -1.381273 -1.321893 -0.185096 -0.838739 \n", + "11 -1.329183 -0.681775 -1.49426 -1.381273 -1.071639 -0.200575 -0.838739 \n", + "12 0.377864 0.286926 0.700559 0.573416 0.419005 0.256379 1.120073 \n", + "13 0.289882 -0.88677 -0.396851 0.573416 0.103467 -0.143853 -0.838739 \n", + "14 1.613049 0.282907 -0.396851 -0.078147 0.103467 -0.259422 -0.838739 \n", + "15 -0.962885 0.285118 0.700559 0.573416 0.005542 -0.183813 -0.838739 \n", + "16 1.722145 -0.259726 -0.396851 -0.403928 -0.571131 -0.18865 -0.838739 \n", + "17 0.740562 1.589247 0.700559 1.550761 2.878025 0.466843 1.120073 \n", + "18 -1.555659 -0.922945 -0.396851 -1.381273 -0.799624 -0.107784 -0.838739 \n", + "19 -0.953738 0.142224 2.895378 1.224979 0.886872 4.00146 1.120073 \n", "\n", - " condition grade sqft_above sqft_basement yr_built yr_renovated zipcode \\\n", - "0 1.0 4 7 1230 900 1963 0 \n", - "1 2.0 3 7 960 0 1993 0 \n", - "2 1.0 3 6 1080 0 1942 0 \n", - "3 1.0 3 5 833 0 2006 0 \n", - "4 2.0 3 7 1760 0 1984 0 \n", - "... ... ... ... ... ... ... ... \n", - "7995 1.0 5 7 1190 600 1969 0 \n", - "7996 2.0 3 9 3090 0 2001 0 \n", - "7997 1.0 4 8 1800 1180 1979 0 \n", - "7998 1.0 4 7 1050 1040 1970 0 \n", - "7999 1.0 3 8 2340 0 1985 0 \n", + " condition grade sqft_above ... yr_renovated zipcode lat \\\n", + "0 0.839159 -0.512647 -0.638064 ... -0.2158 -1.349962 0.32254 \n", + "1 -0.666734 -0.512647 -0.969739 ... -0.2158 0.820656 0.417588 \n", + "2 -0.666734 -1.369558 -0.822328 ... -0.2158 0.523819 -0.059795 \n", + "3 -0.666734 -2.22647 -1.125749 ... -0.2158 -0.144063 -1.221808 \n", + "4 -0.666734 -0.512647 0.013003 ... -0.2158 -0.236825 -0.339221 \n", + "5 -0.666734 0.344264 -0.416947 ... -0.2158 0.468162 0.987875 \n", + "6 -0.666734 -0.512647 -0.773191 ... -0.2158 -0.886155 -1.293987 \n", + "7 -0.666734 -1.369558 -1.043445 ... -0.2158 0.523819 -0.249176 \n", + "8 0.839159 -0.512647 -0.859181 ... -0.2158 -1.387066 -1.937882 \n", + "9 -0.666734 0.344264 0.037571 ... -0.2158 -1.016021 -1.783519 \n", + "10 0.839159 -1.369558 -1.11715 ... -0.2158 -0.830498 0.837799 \n", + "11 0.839159 -0.512647 -0.834612 ... -0.2158 1.024731 1.226566 \n", + "12 -0.666734 0.344264 0.848334 ... -0.2158 -0.923259 1.277306 \n", + "13 -0.666734 0.344264 -0.244967 ... -0.2158 2.045107 -0.729417 \n", + "14 -0.666734 0.344264 -0.822328 ... -0.2158 0.727894 0.868529 \n", + "15 -0.666734 0.344264 -0.380094 ... -0.2158 -0.478004 1.195837 \n", + "16 0.839159 -0.512647 -0.269535 ... -0.2158 -0.811945 1.222993 \n", + "17 -0.666734 2.058087 2.052192 ... -0.2158 -1.349962 0.604825 \n", + "18 -0.666734 -0.512647 -0.527505 ... -0.2158 1.432881 1.536008 \n", + "19 -0.666734 -0.512647 0.713207 ... 4.605736 -0.663527 -1.135335 \n", "\n", - " lat long sqft_living15 sqft_lot15 price_category \n", - "0 98005 47.605 -122.167 2130 7560 \n", - "1 98122 47.6183 -122.298 1290 1668 \n", - "2 98106 47.5515 -122.357 1380 7620 \n", - "3 98070 47.3889 -122.482 1380 143947 \n", - "4 98065 47.5124 -121.866 1910 51773 \n", - "... ... ... ... ... ... \n", - "7995 98059 47.5124 -122.16 1790 9131 \n", - "7996 98052 47.705 -122.118 3050 6000 \n", - "7997 98056 47.5152 -122.197 1920 10731 \n", - "7998 98031 47.4107 -122.179 1710 7527 \n", - "7999 98038 47.4052 -122.028 2700 37263 \n", + " long sqft_living15 sqft_lot15 price_h price_l price_m price_vh \n", + "0 0.340593 0.223199 -0.210584 1.0 0.0 0.0 0.0 \n", + "1 -0.601419 -1.022503 -0.421966 0.0 0.0 1.0 0.0 \n", + "2 -1.025683 -0.889035 -0.208431 0.0 1.0 0.0 0.0 \n", + "3 -1.924549 -0.889035 4.682444 0.0 0.0 1.0 0.0 \n", + "4 2.505062 -0.103056 1.375604 0.0 0.0 1.0 0.0 \n", + "5 -0.903438 -0.844546 -0.436854 0.0 0.0 1.0 0.0 \n", + "6 0.254302 -0.666588 -0.205992 0.0 1.0 0.0 0.0 \n", + "7 -1.018493 -1.600865 -0.296686 0.0 1.0 0.0 0.0 \n", + "8 -0.60861 -0.636929 -0.137397 0.0 1.0 0.0 0.0 \n", + "9 -0.896247 0.208369 -0.186332 0.0 1.0 0.0 0.0 \n", + "10 0.304638 -0.355163 -0.130796 0.0 0.0 1.0 0.0 \n", + "11 -1.025683 -0.444141 -0.202404 0.0 1.0 0.0 0.0 \n", + "12 -0.169963 0.742242 -0.071779 0.0 0.0 1.0 0.0 \n", + "13 -0.428836 -0.043737 -0.155335 0.0 1.0 0.0 0.0 \n", + "14 -1.277366 0.223199 -0.338303 0.0 0.0 1.0 0.0 \n", + "15 0.78643 0.445646 -0.180592 0.0 0.0 1.0 0.0 \n", + "16 0.168011 -0.666588 -0.213095 0.0 0.0 1.0 0.0 \n", + "17 0.340593 2.462498 0.79434 0.0 0.0 0.0 1.0 \n", + "18 -0.644564 -0.978014 -0.183354 0.0 1.0 0.0 0.0 \n", + "19 0.85834 0.593944 1.659169 0.0 0.0 1.0 0.0 \n", "\n", - "[8000 rows x 19 columns]" + "[20 rows x 22 columns]" ] }, - "execution_count": 191, + "execution_count": 316, "metadata": {}, "output_type": "execute_result" } @@ -2465,34 +2730,7 @@ "from sklearn.model_selection import train_test_split\n", "from sklearn.pipeline import make_pipeline\n", "\n", - "class HousesFeatures(BaseEstimator, TransformerMixin):\n", - " def __init__(self):\n", - " pass\n", - "\n", - " def fit(self, X, y=None):\n", - " return self\n", - "\n", - "\n", - " def transform(self, X, y=None):\n", - "\n", - " def get_price_type(category) -> int:\n", - " if pd.isna(category):\n", - " return \"unknown\"\n", - " if category == 'low':\n", - " return 1\n", - " elif category == 'middle':\n", - " return 2\n", - " elif category == 'high':\n", - " return 3\n", - " elif category == 'very_high':\n", - " return 4\n", - "\n", - " # Преобразование категориальных столбцов в числовые 1/0\n", - " X[\"price_category\"] = [get_price_type(category) for category in X[\"price_category\"]]\n", - " return X\n", - "\n", - " def get_feature_names_out(self, features_in):\n", - " return np.append(features_in, [\"price_type\"], axis=0)\n", + "random_state = 42\n", "\n", "# Указываем столбцы, которые нужно удалить и обрабатывать\n", "columns_to_drop = [\"date\", \"view\", \"waterfront\"]\n", @@ -2518,8 +2756,8 @@ ")\n", "\n", "# Определяем предобработку для категориальных данных\n", - "cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n", - "cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n", + "cat_imputer = SimpleImputer(strategy=\"constant\")\n", + "cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False)\n", "preprocessing_cat = Pipeline(\n", " [\n", " (\"imputer\", cat_imputer),\n", @@ -2532,19 +2770,11 @@ " transformers=[\n", " (\"prepocessing_num\", preprocessing_num, num_columns),\n", " (\"prepocessing_cat\", preprocessing_cat, cat_columns),\n", - " # (\"prepocessing_features\", cat_imputer, [\"price_category\"]),\n", + " (\"prepocessing_features\", cat_imputer, [\"price_category\"]),\n", " ],\n", " remainder=\"passthrough\"\n", ")\n", "\n", - "features_engineering = ColumnTransformer(\n", - " verbose_feature_names_out=False,\n", - " transformers=[\n", - " (\"add_features\", HousesFeatures(), [\"price_category\"]),\n", - " ],\n", - " remainder=\"passthrough\",\n", - ")\n", - "\n", "drop_columns = ColumnTransformer(\n", " verbose_feature_names_out=False,\n", " transformers=[\n", @@ -2564,29 +2794,181 @@ "pipeline_end = Pipeline(\n", " [\n", " (\"features_preprocessing\", features_preprocessing),\n", - " (\"features_engineering\", features_engineering),\n", " (\"drop_columns\", drop_columns),\n", " (\"features_postprocessing\", features_postprocessing),\n", " ]\n", "\n", ")\n", - "cols = ['a', 'b']\n", - "preprocessing_result = drop_columns.fit_transform(X_train)\n", - "preprocessing_result = pd.DataFrame(preprocessing_result, columns=num_columns + cat_columns)\n", - "preprocessing_result = features_engineering.fit_transform(preprocessing_result)\n", - "preprocessing_result = pd.DataFrame(preprocessing_result, columns=num_columns + cat_columns)\n", - "preprocessing_result\n", - "# # preprocessing_result = features_preprocessing.fit_transform(preprocessing_result)\n", - "# # preprocessing_result = pd.DataFrame(preprocessing_result, columns=num_columns + cat_columns)\n", + "cols = ['price_h', 'price_l', 'price_m', 'price_vh']\n", + "preprocessing_result = features_preprocessing.fit_transform(X_train)\n", + "preprocessing_result = pd.DataFrame(preprocessing_result, columns=num_columns + cat_columns + cols + columns_to_drop)\n", "\n", - "# preprocessing_result = features_postprocessing.fit_transform(preprocessing_result)\n", + "preprocessing_result = drop_columns.fit_transform(preprocessing_result)\n", + "preprocessing_result = pd.DataFrame(preprocessing_result, columns=num_columns + cols + cat_columns)\n", "\n", - "# preprocessing_result = pipeline_end.fit_transform(X_train)\n", - "# preprocessed_df = pd.DataFrame(\n", - "# preprocessing_result,\n", - "# columns=pipeline_end.get_feature_names_out(),\n", - "# )\n", - "# preprocessed_df" + "preprocessing_result = preprocessing_result.drop(columns=[\"price_category\"])\n", + "preprocessing_result.head(20)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Формирование набора моделей для классификации¶\n", + "logistic -- логистическая регрессия\n", + "\n", + "ridge -- гребневая регрессия\n", + "\n", + "decision_tree -- дерево решений\n", + "\n", + "knn -- k-ближайших соседей\n", + "\n", + "naive_bayes -- наивный Байесовский классификатор\n", + "\n", + "gradient_boosting -- метод градиентного бустинга (набор деревьев решений)\n", + "\n", + "random_forest -- метод случайного леса (набор деревьев решений)\n", + "\n", + "mlp -- многослойный персептрон (нейронная сеть)" + ] + }, + { + "cell_type": "code", + "execution_count": 317, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn import ensemble, linear_model, naive_bayes, neighbors, neural_network, tree\n", + "\n", + "class_models = {\n", + " \"logistic\": {\"model\": linear_model.LogisticRegression()},\n", + " # \"ridge\": {\"model\": linear_model.RidgeClassifierCV(cv=5, class_weight=\"balanced\")},\n", + " \"ridge\": {\"model\": linear_model.LogisticRegression(penalty=\"l2\", class_weight=\"balanced\")},\n", + " \"decision_tree\": {\n", + " \"model\": tree.DecisionTreeClassifier(max_depth=7, random_state=random_state)\n", + " },\n", + " \"knn\": {\"model\": neighbors.KNeighborsClassifier(n_neighbors=7)},\n", + " \"naive_bayes\": {\"model\": naive_bayes.GaussianNB()},\n", + " \"gradient_boosting\": {\n", + " \"model\": ensemble.GradientBoostingClassifier(n_estimators=210)\n", + " },\n", + " \"random_forest\": {\n", + " \"model\": ensemble.RandomForestClassifier(\n", + " max_depth=11, class_weight=\"balanced\", random_state=random_state\n", + " )\n", + " },\n", + " \"mlp\": {\n", + " \"model\": neural_network.MLPClassifier(\n", + " hidden_layer_sizes=(7,),\n", + " max_iter=500,\n", + " early_stopping=True,\n", + " random_state=random_state,\n", + " )\n", + " },\n", + "}" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Обучение моделей на обучающем наборе данных и оценка на тестовом" + ] + }, + { + "cell_type": "code", + "execution_count": 320, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: logistic\n" + ] + }, + { + "ename": "ValueError", + "evalue": "Specifying the columns using strings is only supported for dataframes.", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mAttributeError\u001b[0m Traceback (most recent call last)", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\utils\\_indexing.py:338\u001b[0m, in \u001b[0;36m_get_column_indices\u001b[1;34m(X, key)\u001b[0m\n\u001b[0;32m 337\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 338\u001b[0m all_columns \u001b[38;5;241m=\u001b[39m \u001b[43mX\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcolumns\u001b[49m\n\u001b[0;32m 339\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n", + "\u001b[1;31mAttributeError\u001b[0m: 'numpy.ndarray' object has no attribute 'columns'", + "\nDuring handling of the above exception, another exception occurred:\n", + "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[320], line 9\u001b[0m\n\u001b[0;32m 6\u001b[0m model \u001b[38;5;241m=\u001b[39m class_models[model_name][\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m 8\u001b[0m model_pipeline \u001b[38;5;241m=\u001b[39m Pipeline([(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpipeline\u001b[39m\u001b[38;5;124m\"\u001b[39m, pipeline_end), (\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmodel\u001b[39m\u001b[38;5;124m\"\u001b[39m, model)])\n\u001b[1;32m----> 9\u001b[0m model_pipeline \u001b[38;5;241m=\u001b[39m \u001b[43mmodel_pipeline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX_train\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my_train\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mvalues\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mravel\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 11\u001b[0m y_train_predict \u001b[38;5;241m=\u001b[39m model_pipeline\u001b[38;5;241m.\u001b[39mpredict(X_train)\n\u001b[0;32m 12\u001b[0m y_test_probs \u001b[38;5;241m=\u001b[39m model_pipeline\u001b[38;5;241m.\u001b[39mpredict_proba(X_test)[:, \u001b[38;5;241m1\u001b[39m]\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\base.py:1473\u001b[0m, in \u001b[0;36m_fit_context..decorator..wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1466\u001b[0m estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m 1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m 1469\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m 1470\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 1471\u001b[0m )\n\u001b[0;32m 1472\u001b[0m ):\n\u001b[1;32m-> 1473\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\pipeline.py:469\u001b[0m, in \u001b[0;36mPipeline.fit\u001b[1;34m(self, X, y, **params)\u001b[0m\n\u001b[0;32m 426\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Fit the model.\u001b[39;00m\n\u001b[0;32m 427\u001b[0m \n\u001b[0;32m 428\u001b[0m \u001b[38;5;124;03mFit all the transformers one after the other and sequentially transform the\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 466\u001b[0m \u001b[38;5;124;03m Pipeline with fitted steps.\u001b[39;00m\n\u001b[0;32m 467\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 468\u001b[0m routed_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_method_params(method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfit\u001b[39m\u001b[38;5;124m\"\u001b[39m, props\u001b[38;5;241m=\u001b[39mparams)\n\u001b[1;32m--> 469\u001b[0m Xt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrouted_params\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 470\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _print_elapsed_time(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPipeline\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_log_message(\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msteps) \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m)):\n\u001b[0;32m 471\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_final_estimator \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpassthrough\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\pipeline.py:406\u001b[0m, in \u001b[0;36mPipeline._fit\u001b[1;34m(self, X, y, routed_params)\u001b[0m\n\u001b[0;32m 404\u001b[0m cloned_transformer \u001b[38;5;241m=\u001b[39m clone(transformer)\n\u001b[0;32m 405\u001b[0m \u001b[38;5;66;03m# Fit or load from cache the current transformer\u001b[39;00m\n\u001b[1;32m--> 406\u001b[0m X, fitted_transformer \u001b[38;5;241m=\u001b[39m \u001b[43mfit_transform_one_cached\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 407\u001b[0m \u001b[43m \u001b[49m\u001b[43mcloned_transformer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 408\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 409\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 410\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 411\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage_clsname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPipeline\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 412\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_log_message\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstep_idx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 413\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrouted_params\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 414\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 415\u001b[0m \u001b[38;5;66;03m# Replace the transformer of the step with the fitted\u001b[39;00m\n\u001b[0;32m 416\u001b[0m \u001b[38;5;66;03m# transformer. This is necessary when loading the transformer\u001b[39;00m\n\u001b[0;32m 417\u001b[0m \u001b[38;5;66;03m# from the cache.\u001b[39;00m\n\u001b[0;32m 418\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msteps[step_idx] \u001b[38;5;241m=\u001b[39m (name, fitted_transformer)\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\joblib\\memory.py:312\u001b[0m, in \u001b[0;36mNotMemorizedFunc.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 311\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m--> 312\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\pipeline.py:1310\u001b[0m, in \u001b[0;36m_fit_transform_one\u001b[1;34m(transformer, X, y, weight, message_clsname, message, params)\u001b[0m\n\u001b[0;32m 1308\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _print_elapsed_time(message_clsname, message):\n\u001b[0;32m 1309\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(transformer, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfit_transform\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m-> 1310\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mtransformer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_transform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfit_transform\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1311\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 1312\u001b[0m res \u001b[38;5;241m=\u001b[39m transformer\u001b[38;5;241m.\u001b[39mfit(X, y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfit\u001b[39m\u001b[38;5;124m\"\u001b[39m, {}))\u001b[38;5;241m.\u001b[39mtransform(\n\u001b[0;32m 1313\u001b[0m X, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtransform\u001b[39m\u001b[38;5;124m\"\u001b[39m, {})\n\u001b[0;32m 1314\u001b[0m )\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\base.py:1473\u001b[0m, in \u001b[0;36m_fit_context..decorator..wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1466\u001b[0m estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m 1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m 1469\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m 1470\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 1471\u001b[0m )\n\u001b[0;32m 1472\u001b[0m ):\n\u001b[1;32m-> 1473\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\pipeline.py:533\u001b[0m, in \u001b[0;36mPipeline.fit_transform\u001b[1;34m(self, X, y, **params)\u001b[0m\n\u001b[0;32m 490\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Fit the model and transform with the final estimator.\u001b[39;00m\n\u001b[0;32m 491\u001b[0m \n\u001b[0;32m 492\u001b[0m \u001b[38;5;124;03mFit all the transformers one after the other and sequentially transform\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 530\u001b[0m \u001b[38;5;124;03m Transformed samples.\u001b[39;00m\n\u001b[0;32m 531\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 532\u001b[0m routed_params \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_method_params(method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfit_transform\u001b[39m\u001b[38;5;124m\"\u001b[39m, props\u001b[38;5;241m=\u001b[39mparams)\n\u001b[1;32m--> 533\u001b[0m Xt \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_fit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrouted_params\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 535\u001b[0m last_step \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_final_estimator\n\u001b[0;32m 536\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _print_elapsed_time(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPipeline\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_log_message(\u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msteps) \u001b[38;5;241m-\u001b[39m \u001b[38;5;241m1\u001b[39m)):\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\pipeline.py:406\u001b[0m, in \u001b[0;36mPipeline._fit\u001b[1;34m(self, X, y, routed_params)\u001b[0m\n\u001b[0;32m 404\u001b[0m cloned_transformer \u001b[38;5;241m=\u001b[39m clone(transformer)\n\u001b[0;32m 405\u001b[0m \u001b[38;5;66;03m# Fit or load from cache the current transformer\u001b[39;00m\n\u001b[1;32m--> 406\u001b[0m X, fitted_transformer \u001b[38;5;241m=\u001b[39m \u001b[43mfit_transform_one_cached\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 407\u001b[0m \u001b[43m \u001b[49m\u001b[43mcloned_transformer\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 408\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 409\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 410\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;28;43;01mNone\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[0;32m 411\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage_clsname\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mPipeline\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 412\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessage\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_log_message\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstep_idx\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 413\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mrouted_params\u001b[49m\u001b[43m[\u001b[49m\u001b[43mname\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 414\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 415\u001b[0m \u001b[38;5;66;03m# Replace the transformer of the step with the fitted\u001b[39;00m\n\u001b[0;32m 416\u001b[0m \u001b[38;5;66;03m# transformer. This is necessary when loading the transformer\u001b[39;00m\n\u001b[0;32m 417\u001b[0m \u001b[38;5;66;03m# from the cache.\u001b[39;00m\n\u001b[0;32m 418\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39msteps[step_idx] \u001b[38;5;241m=\u001b[39m (name, fitted_transformer)\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\joblib\\memory.py:312\u001b[0m, in \u001b[0;36mNotMemorizedFunc.__call__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 311\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__call__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m--> 312\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\pipeline.py:1310\u001b[0m, in \u001b[0;36m_fit_transform_one\u001b[1;34m(transformer, X, y, weight, message_clsname, message, params)\u001b[0m\n\u001b[0;32m 1308\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _print_elapsed_time(message_clsname, message):\n\u001b[0;32m 1309\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(transformer, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfit_transform\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m-> 1310\u001b[0m res \u001b[38;5;241m=\u001b[39m \u001b[43mtransformer\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_transform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mfit_transform\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43m{\u001b[49m\u001b[43m}\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1311\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 1312\u001b[0m res \u001b[38;5;241m=\u001b[39m transformer\u001b[38;5;241m.\u001b[39mfit(X, y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mfit\u001b[39m\u001b[38;5;124m\"\u001b[39m, {}))\u001b[38;5;241m.\u001b[39mtransform(\n\u001b[0;32m 1313\u001b[0m X, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mparams\u001b[38;5;241m.\u001b[39mget(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtransform\u001b[39m\u001b[38;5;124m\"\u001b[39m, {})\n\u001b[0;32m 1314\u001b[0m )\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\utils\\_set_output.py:316\u001b[0m, in \u001b[0;36m_wrap_method_output..wrapped\u001b[1;34m(self, X, *args, **kwargs)\u001b[0m\n\u001b[0;32m 314\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(f)\n\u001b[0;32m 315\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapped\u001b[39m(\u001b[38;5;28mself\u001b[39m, X, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m--> 316\u001b[0m data_to_wrap \u001b[38;5;241m=\u001b[39m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 317\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_to_wrap, \u001b[38;5;28mtuple\u001b[39m):\n\u001b[0;32m 318\u001b[0m \u001b[38;5;66;03m# only wrap the first output for cross decomposition\u001b[39;00m\n\u001b[0;32m 319\u001b[0m return_tuple \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 320\u001b[0m _wrap_data_with_container(method, data_to_wrap[\u001b[38;5;241m0\u001b[39m], X, \u001b[38;5;28mself\u001b[39m),\n\u001b[0;32m 321\u001b[0m \u001b[38;5;241m*\u001b[39mdata_to_wrap[\u001b[38;5;241m1\u001b[39m:],\n\u001b[0;32m 322\u001b[0m )\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\base.py:1473\u001b[0m, in \u001b[0;36m_fit_context..decorator..wrapper\u001b[1;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[0;32m 1466\u001b[0m estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[0;32m 1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m 1469\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m 1470\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 1471\u001b[0m )\n\u001b[0;32m 1472\u001b[0m ):\n\u001b[1;32m-> 1473\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py:968\u001b[0m, in \u001b[0;36mColumnTransformer.fit_transform\u001b[1;34m(self, X, y, **params)\u001b[0m\n\u001b[0;32m 965\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_transformers()\n\u001b[0;32m 966\u001b[0m n_samples \u001b[38;5;241m=\u001b[39m _num_samples(X)\n\u001b[1;32m--> 968\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_column_callables\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 969\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_validate_remainder(X)\n\u001b[0;32m 971\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m _routing_enabled():\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\compose\\_column_transformer.py:536\u001b[0m, in \u001b[0;36mColumnTransformer._validate_column_callables\u001b[1;34m(self, X)\u001b[0m\n\u001b[0;32m 534\u001b[0m columns \u001b[38;5;241m=\u001b[39m columns(X)\n\u001b[0;32m 535\u001b[0m all_columns\u001b[38;5;241m.\u001b[39mappend(columns)\n\u001b[1;32m--> 536\u001b[0m transformer_to_input_indices[name] \u001b[38;5;241m=\u001b[39m \u001b[43m_get_column_indices\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 538\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_columns \u001b[38;5;241m=\u001b[39m all_columns\n\u001b[0;32m 539\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_transformer_to_input_indices \u001b[38;5;241m=\u001b[39m transformer_to_input_indices\n", + "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\sklearn\\utils\\_indexing.py:340\u001b[0m, in \u001b[0;36m_get_column_indices\u001b[1;34m(X, key)\u001b[0m\n\u001b[0;32m 338\u001b[0m all_columns \u001b[38;5;241m=\u001b[39m X\u001b[38;5;241m.\u001b[39mcolumns\n\u001b[0;32m 339\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mAttributeError\u001b[39;00m:\n\u001b[1;32m--> 340\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 341\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSpecifying the columns using strings is only supported for dataframes.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 342\u001b[0m )\n\u001b[0;32m 343\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(key, \u001b[38;5;28mstr\u001b[39m):\n\u001b[0;32m 344\u001b[0m columns \u001b[38;5;241m=\u001b[39m [key]\n", + "\u001b[1;31mValueError\u001b[0m: Specifying the columns using strings is only supported for dataframes." + ] + } + ], + "source": [ + "import numpy as np\n", + "from sklearn import metrics\n", + "\n", + "for model_name in class_models.keys():\n", + " print(f\"Model: {model_name}\")\n", + " model = class_models[model_name][\"model\"]\n", + "\n", + " model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n", + " model_pipeline = model_pipeline.fit(X_train, y_train.values.ravel())\n", + "\n", + " y_train_predict = model_pipeline.predict(X_train)\n", + " y_test_probs = model_pipeline.predict_proba(X_test)[:, 1]\n", + " y_test_predict = np.where(y_test_probs > 0.5, 1, 0)\n", + "\n", + " class_models[model_name][\"pipeline\"] = model_pipeline\n", + " class_models[model_name][\"probs\"] = y_test_probs\n", + " class_models[model_name][\"preds\"] = y_test_predict\n", + "\n", + " class_models[model_name][\"Precision_train\"] = metrics.precision_score(\n", + " y_train, y_train_predict\n", + " )\n", + " class_models[model_name][\"Precision_test\"] = metrics.precision_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Recall_train\"] = metrics.recall_score(\n", + " y_train, y_train_predict\n", + " )\n", + " class_models[model_name][\"Recall_test\"] = metrics.recall_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Accuracy_train\"] = metrics.accuracy_score(\n", + " y_train, y_train_predict\n", + " )\n", + " class_models[model_name][\"Accuracy_test\"] = metrics.accuracy_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"ROC_AUC_test\"] = metrics.roc_auc_score(\n", + " y_test, y_test_probs\n", + " )\n", + " class_models[model_name][\"F1_train\"] = metrics.f1_score(y_train, y_train_predict)\n", + " class_models[model_name][\"F1_test\"] = metrics.f1_score(y_test, y_test_predict)\n", + " class_models[model_name][\"MCC_test\"] = metrics.matthews_corrcoef(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Cohen_kappa_test\"] = metrics.cohen_kappa_score(\n", + " y_test, y_test_predict\n", + " )\n", + " class_models[model_name][\"Confusion_matrix\"] = metrics.confusion_matrix(\n", + " y_test, y_test_predict\n", + " )" ] } ],