diff --git a/lab_4/lab_4.ipynb b/lab_4/lab_4.ipynb index e7d76c9..6838ed8 100644 --- a/lab_4/lab_4.ipynb +++ b/lab_4/lab_4.ipynb @@ -2112,22 +2112,345 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 191, "metadata": {}, "outputs": [ { - "ename": "ValueError", - "evalue": "Shape of passed values is (8000, 21), indices imply (8000, 19)", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[184], line 123\u001b[0m\n\u001b[0;32m 121\u001b[0m preprocessing_result \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(preprocessing_result, columns\u001b[38;5;241m=\u001b[39mnum_columns \u001b[38;5;241m+\u001b[39m cat_columns \u001b[38;5;241m+\u001b[39m cols)\n\u001b[0;32m 122\u001b[0m preprocessing_result \u001b[38;5;241m=\u001b[39m features_engineering\u001b[38;5;241m.\u001b[39mfit_transform(preprocessing_result)\n\u001b[1;32m--> 123\u001b[0m preprocessing_result \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpreprocessing_result\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_columns\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mcat_columns\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 125\u001b[0m \u001b[38;5;66;03m# preprocessing_result = features_postprocessing.fit_transform(preprocessing_result)\u001b[39;00m\n\u001b[0;32m 126\u001b[0m \n\u001b[0;32m 127\u001b[0m \u001b[38;5;66;03m# preprocessing_result = pipeline_end.fit_transform(X_train)\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;66;03m# )\u001b[39;00m\n\u001b[0;32m 132\u001b[0m \u001b[38;5;66;03m# preprocessed_df\u001b[39;00m\n", - "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\pandas\\core\\frame.py:827\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[1;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[0;32m 816\u001b[0m mgr \u001b[38;5;241m=\u001b[39m dict_to_mgr(\n\u001b[0;32m 817\u001b[0m \u001b[38;5;66;03m# error: Item \"ndarray\" of \"Union[ndarray, Series, Index]\" has no\u001b[39;00m\n\u001b[0;32m 818\u001b[0m \u001b[38;5;66;03m# attribute \"name\"\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 824\u001b[0m copy\u001b[38;5;241m=\u001b[39m_copy,\n\u001b[0;32m 825\u001b[0m )\n\u001b[0;32m 826\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 827\u001b[0m mgr \u001b[38;5;241m=\u001b[39m \u001b[43mndarray_to_mgr\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 828\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 829\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 830\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 831\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 832\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 833\u001b[0m \u001b[43m \u001b[49m\u001b[43mtyp\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmanager\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 834\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 836\u001b[0m \u001b[38;5;66;03m# For data is list-like, or Iterable (will consume into list)\u001b[39;00m\n\u001b[0;32m 837\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m is_list_like(data):\n", - "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:336\u001b[0m, in \u001b[0;36mndarray_to_mgr\u001b[1;34m(values, index, columns, dtype, copy, typ)\u001b[0m\n\u001b[0;32m 331\u001b[0m \u001b[38;5;66;03m# _prep_ndarraylike ensures that values.ndim == 2 at this point\u001b[39;00m\n\u001b[0;32m 332\u001b[0m index, columns \u001b[38;5;241m=\u001b[39m _get_axes(\n\u001b[0;32m 333\u001b[0m values\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m], values\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m], index\u001b[38;5;241m=\u001b[39mindex, columns\u001b[38;5;241m=\u001b[39mcolumns\n\u001b[0;32m 334\u001b[0m )\n\u001b[1;32m--> 336\u001b[0m \u001b[43m_check_values_indices_shape_match\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 338\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m typ \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marray\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m 339\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(values\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mtype, \u001b[38;5;28mstr\u001b[39m):\n", - "File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:420\u001b[0m, in \u001b[0;36m_check_values_indices_shape_match\u001b[1;34m(values, index, columns)\u001b[0m\n\u001b[0;32m 418\u001b[0m passed \u001b[38;5;241m=\u001b[39m values\u001b[38;5;241m.\u001b[39mshape\n\u001b[0;32m 419\u001b[0m implied \u001b[38;5;241m=\u001b[39m (\u001b[38;5;28mlen\u001b[39m(index), \u001b[38;5;28mlen\u001b[39m(columns))\n\u001b[1;32m--> 420\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mShape of passed values is \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpassed\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, indices imply \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mimplied\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n", - "\u001b[1;31mValueError\u001b[0m: Shape of passed values is (8000, 21), indices imply (8000, 19)" - ] + "data": { + "text/html": [ + "
\n", + " | id | \n", + "price | \n", + "bedrooms | \n", + "bathrooms | \n", + "sqft_living | \n", + "sqft_lot | \n", + "floors | \n", + "condition | \n", + "grade | \n", + "sqft_above | \n", + "sqft_basement | \n", + "yr_built | \n", + "yr_renovated | \n", + "zipcode | \n", + "lat | \n", + "long | \n", + "sqft_living15 | \n", + "sqft_lot15 | \n", + "price_category | \n", + "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", + "3 | \n", + "3260000340 | \n", + "732600.0 | \n", + "4 | \n", + "2.5 | \n", + "2130 | \n", + "7300 | \n", + "1.0 | \n", + "4 | \n", + "7 | \n", + "1230 | \n", + "900 | \n", + "1963 | \n", + "0 | \n", + "98005 | \n", + "47.605 | \n", + "-122.167 | \n", + "2130 | \n", + "7560 | \n", + "
1 | \n", + "2 | \n", + "9828702055 | \n", + "358000.0 | \n", + "2 | \n", + "1.5 | \n", + "960 | \n", + "1808 | \n", + "2.0 | \n", + "3 | \n", + "7 | \n", + "960 | \n", + "0 | \n", + "1993 | \n", + "0 | \n", + "98122 | \n", + "47.6183 | \n", + "-122.298 | \n", + "1290 | \n", + "1668 | \n", + "
2 | \n", + "1 | \n", + "3438500625 | \n", + "210000.0 | \n", + "3 | \n", + "1.0 | \n", + "1080 | \n", + "21043 | \n", + "1.0 | \n", + "3 | \n", + "6 | \n", + "1080 | \n", + "0 | \n", + "1942 | \n", + "0 | \n", + "98106 | \n", + "47.5515 | \n", + "-122.357 | \n", + "1380 | \n", + "7620 | \n", + "
3 | \n", + "2 | \n", + "2422029094 | \n", + "517534.0 | \n", + "2 | \n", + "1.0 | \n", + "833 | \n", + "143947 | \n", + "1.0 | \n", + "3 | \n", + "5 | \n", + "833 | \n", + "0 | \n", + "2006 | \n", + "0 | \n", + "98070 | \n", + "47.3889 | \n", + "-122.482 | \n", + "1380 | \n", + "143947 | \n", + "
4 | \n", + "2 | \n", + "7462900015 | \n", + "387000.0 | \n", + "3 | \n", + "2.25 | \n", + "1760 | \n", + "45133 | \n", + "2.0 | \n", + "3 | \n", + "7 | \n", + "1760 | \n", + "0 | \n", + "1984 | \n", + "0 | \n", + "98065 | \n", + "47.5124 | \n", + "-121.866 | \n", + "1910 | \n", + "51773 | \n", + "
... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
7995 | \n", + "2 | \n", + "2787720140 | \n", + "416000.0 | \n", + "3 | \n", + "2.5 | \n", + "1790 | \n", + "11542 | \n", + "1.0 | \n", + "5 | \n", + "7 | \n", + "1190 | \n", + "600 | \n", + "1969 | \n", + "0 | \n", + "98059 | \n", + "47.5124 | \n", + "-122.16 | \n", + "1790 | \n", + "9131 | \n", + "
7996 | \n", + "3 | \n", + "6192400400 | \n", + "775000.0 | \n", + "4 | \n", + "2.5 | \n", + "3090 | \n", + "7112 | \n", + "2.0 | \n", + "3 | \n", + "9 | \n", + "3090 | \n", + "0 | \n", + "2001 | \n", + "0 | \n", + "98052 | \n", + "47.705 | \n", + "-122.118 | \n", + "3050 | \n", + "6000 | \n", + "
7997 | \n", + "2 | \n", + "2296500036 | \n", + "450000.0 | \n", + "4 | \n", + "2.75 | \n", + "2980 | \n", + "13260 | \n", + "1.0 | \n", + "4 | \n", + "8 | \n", + "1800 | \n", + "1180 | \n", + "1979 | \n", + "0 | \n", + "98056 | \n", + "47.5152 | \n", + "-122.197 | \n", + "1920 | \n", + "10731 | \n", + "
7998 | \n", + "1 | \n", + "2787310130 | \n", + "289950.0 | \n", + "4 | \n", + "1.75 | \n", + "2090 | \n", + "7416 | \n", + "1.0 | \n", + "4 | \n", + "7 | \n", + "1050 | \n", + "1040 | \n", + "1970 | \n", + "0 | \n", + "98031 | \n", + "47.4107 | \n", + "-122.179 | \n", + "1710 | \n", + "7527 | \n", + "
7999 | \n", + "2 | \n", + "8567300110 | \n", + "485000.0 | \n", + "3 | \n", + "2.5 | \n", + "2340 | \n", + "59058 | \n", + "1.0 | \n", + "3 | \n", + "8 | \n", + "2340 | \n", + "0 | \n", + "1985 | \n", + "0 | \n", + "98038 | \n", + "47.4052 | \n", + "-122.028 | \n", + "2700 | \n", + "37263 | \n", + "
8000 rows × 19 columns
\n", + "