ниче не работает Т.Т

This commit is contained in:
Максим Яковлев 2024-11-15 16:44:46 +04:00
parent da729ef74e
commit c5dfaec3e6

View File

@ -2112,22 +2112,345 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 191,
"metadata": {},
"outputs": [
{
"ename": "ValueError",
"evalue": "Shape of passed values is (8000, 21), indices imply (8000, 19)",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[184], line 123\u001b[0m\n\u001b[0;32m 121\u001b[0m preprocessing_result \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mDataFrame(preprocessing_result, columns\u001b[38;5;241m=\u001b[39mnum_columns \u001b[38;5;241m+\u001b[39m cat_columns \u001b[38;5;241m+\u001b[39m cols)\n\u001b[0;32m 122\u001b[0m preprocessing_result \u001b[38;5;241m=\u001b[39m features_engineering\u001b[38;5;241m.\u001b[39mfit_transform(preprocessing_result)\n\u001b[1;32m--> 123\u001b[0m preprocessing_result \u001b[38;5;241m=\u001b[39m \u001b[43mpd\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mDataFrame\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpreprocessing_result\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mnum_columns\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mcat_columns\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 125\u001b[0m \u001b[38;5;66;03m# preprocessing_result = features_postprocessing.fit_transform(preprocessing_result)\u001b[39;00m\n\u001b[0;32m 126\u001b[0m \n\u001b[0;32m 127\u001b[0m \u001b[38;5;66;03m# preprocessing_result = pipeline_end.fit_transform(X_train)\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 131\u001b[0m \u001b[38;5;66;03m# )\u001b[39;00m\n\u001b[0;32m 132\u001b[0m \u001b[38;5;66;03m# preprocessed_df\u001b[39;00m\n",
"File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\pandas\\core\\frame.py:827\u001b[0m, in \u001b[0;36mDataFrame.__init__\u001b[1;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[0;32m 816\u001b[0m mgr \u001b[38;5;241m=\u001b[39m dict_to_mgr(\n\u001b[0;32m 817\u001b[0m \u001b[38;5;66;03m# error: Item \"ndarray\" of \"Union[ndarray, Series, Index]\" has no\u001b[39;00m\n\u001b[0;32m 818\u001b[0m \u001b[38;5;66;03m# attribute \"name\"\u001b[39;00m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 824\u001b[0m copy\u001b[38;5;241m=\u001b[39m_copy,\n\u001b[0;32m 825\u001b[0m )\n\u001b[0;32m 826\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 827\u001b[0m mgr \u001b[38;5;241m=\u001b[39m \u001b[43mndarray_to_mgr\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 828\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 829\u001b[0m \u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 830\u001b[0m \u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 831\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdtype\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 832\u001b[0m \u001b[43m \u001b[49m\u001b[43mcopy\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcopy\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 833\u001b[0m \u001b[43m \u001b[49m\u001b[43mtyp\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmanager\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 834\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 836\u001b[0m \u001b[38;5;66;03m# For data is list-like, or Iterable (will consume into list)\u001b[39;00m\n\u001b[0;32m 837\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m is_list_like(data):\n",
"File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:336\u001b[0m, in \u001b[0;36mndarray_to_mgr\u001b[1;34m(values, index, columns, dtype, copy, typ)\u001b[0m\n\u001b[0;32m 331\u001b[0m \u001b[38;5;66;03m# _prep_ndarraylike ensures that values.ndim == 2 at this point\u001b[39;00m\n\u001b[0;32m 332\u001b[0m index, columns \u001b[38;5;241m=\u001b[39m _get_axes(\n\u001b[0;32m 333\u001b[0m values\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m0\u001b[39m], values\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m], index\u001b[38;5;241m=\u001b[39mindex, columns\u001b[38;5;241m=\u001b[39mcolumns\n\u001b[0;32m 334\u001b[0m )\n\u001b[1;32m--> 336\u001b[0m \u001b[43m_check_values_indices_shape_match\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalues\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mindex\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcolumns\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 338\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m typ \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124marray\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m 339\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28missubclass\u001b[39m(values\u001b[38;5;241m.\u001b[39mdtype\u001b[38;5;241m.\u001b[39mtype, \u001b[38;5;28mstr\u001b[39m):\n",
"File \u001b[1;32md:\\Study\\3 курс 5 семестр\\AIM\\AIM-PIbd-31-Yakovlev-M-G\\kernel\\Lib\\site-packages\\pandas\\core\\internals\\construction.py:420\u001b[0m, in \u001b[0;36m_check_values_indices_shape_match\u001b[1;34m(values, index, columns)\u001b[0m\n\u001b[0;32m 418\u001b[0m passed \u001b[38;5;241m=\u001b[39m values\u001b[38;5;241m.\u001b[39mshape\n\u001b[0;32m 419\u001b[0m implied \u001b[38;5;241m=\u001b[39m (\u001b[38;5;28mlen\u001b[39m(index), \u001b[38;5;28mlen\u001b[39m(columns))\n\u001b[1;32m--> 420\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mShape of passed values is \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mpassed\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m, indices imply \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mimplied\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n",
"\u001b[1;31mValueError\u001b[0m: Shape of passed values is (8000, 21), indices imply (8000, 19)"
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>id</th>\n",
" <th>price</th>\n",
" <th>bedrooms</th>\n",
" <th>bathrooms</th>\n",
" <th>sqft_living</th>\n",
" <th>sqft_lot</th>\n",
" <th>floors</th>\n",
" <th>condition</th>\n",
" <th>grade</th>\n",
" <th>sqft_above</th>\n",
" <th>sqft_basement</th>\n",
" <th>yr_built</th>\n",
" <th>yr_renovated</th>\n",
" <th>zipcode</th>\n",
" <th>lat</th>\n",
" <th>long</th>\n",
" <th>sqft_living15</th>\n",
" <th>sqft_lot15</th>\n",
" <th>price_category</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>3</td>\n",
" <td>3260000340</td>\n",
" <td>732600.0</td>\n",
" <td>4</td>\n",
" <td>2.5</td>\n",
" <td>2130</td>\n",
" <td>7300</td>\n",
" <td>1.0</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>1230</td>\n",
" <td>900</td>\n",
" <td>1963</td>\n",
" <td>0</td>\n",
" <td>98005</td>\n",
" <td>47.605</td>\n",
" <td>-122.167</td>\n",
" <td>2130</td>\n",
" <td>7560</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>9828702055</td>\n",
" <td>358000.0</td>\n",
" <td>2</td>\n",
" <td>1.5</td>\n",
" <td>960</td>\n",
" <td>1808</td>\n",
" <td>2.0</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>960</td>\n",
" <td>0</td>\n",
" <td>1993</td>\n",
" <td>0</td>\n",
" <td>98122</td>\n",
" <td>47.6183</td>\n",
" <td>-122.298</td>\n",
" <td>1290</td>\n",
" <td>1668</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>1</td>\n",
" <td>3438500625</td>\n",
" <td>210000.0</td>\n",
" <td>3</td>\n",
" <td>1.0</td>\n",
" <td>1080</td>\n",
" <td>21043</td>\n",
" <td>1.0</td>\n",
" <td>3</td>\n",
" <td>6</td>\n",
" <td>1080</td>\n",
" <td>0</td>\n",
" <td>1942</td>\n",
" <td>0</td>\n",
" <td>98106</td>\n",
" <td>47.5515</td>\n",
" <td>-122.357</td>\n",
" <td>1380</td>\n",
" <td>7620</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>2</td>\n",
" <td>2422029094</td>\n",
" <td>517534.0</td>\n",
" <td>2</td>\n",
" <td>1.0</td>\n",
" <td>833</td>\n",
" <td>143947</td>\n",
" <td>1.0</td>\n",
" <td>3</td>\n",
" <td>5</td>\n",
" <td>833</td>\n",
" <td>0</td>\n",
" <td>2006</td>\n",
" <td>0</td>\n",
" <td>98070</td>\n",
" <td>47.3889</td>\n",
" <td>-122.482</td>\n",
" <td>1380</td>\n",
" <td>143947</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>2</td>\n",
" <td>7462900015</td>\n",
" <td>387000.0</td>\n",
" <td>3</td>\n",
" <td>2.25</td>\n",
" <td>1760</td>\n",
" <td>45133</td>\n",
" <td>2.0</td>\n",
" <td>3</td>\n",
" <td>7</td>\n",
" <td>1760</td>\n",
" <td>0</td>\n",
" <td>1984</td>\n",
" <td>0</td>\n",
" <td>98065</td>\n",
" <td>47.5124</td>\n",
" <td>-121.866</td>\n",
" <td>1910</td>\n",
" <td>51773</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7995</th>\n",
" <td>2</td>\n",
" <td>2787720140</td>\n",
" <td>416000.0</td>\n",
" <td>3</td>\n",
" <td>2.5</td>\n",
" <td>1790</td>\n",
" <td>11542</td>\n",
" <td>1.0</td>\n",
" <td>5</td>\n",
" <td>7</td>\n",
" <td>1190</td>\n",
" <td>600</td>\n",
" <td>1969</td>\n",
" <td>0</td>\n",
" <td>98059</td>\n",
" <td>47.5124</td>\n",
" <td>-122.16</td>\n",
" <td>1790</td>\n",
" <td>9131</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7996</th>\n",
" <td>3</td>\n",
" <td>6192400400</td>\n",
" <td>775000.0</td>\n",
" <td>4</td>\n",
" <td>2.5</td>\n",
" <td>3090</td>\n",
" <td>7112</td>\n",
" <td>2.0</td>\n",
" <td>3</td>\n",
" <td>9</td>\n",
" <td>3090</td>\n",
" <td>0</td>\n",
" <td>2001</td>\n",
" <td>0</td>\n",
" <td>98052</td>\n",
" <td>47.705</td>\n",
" <td>-122.118</td>\n",
" <td>3050</td>\n",
" <td>6000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7997</th>\n",
" <td>2</td>\n",
" <td>2296500036</td>\n",
" <td>450000.0</td>\n",
" <td>4</td>\n",
" <td>2.75</td>\n",
" <td>2980</td>\n",
" <td>13260</td>\n",
" <td>1.0</td>\n",
" <td>4</td>\n",
" <td>8</td>\n",
" <td>1800</td>\n",
" <td>1180</td>\n",
" <td>1979</td>\n",
" <td>0</td>\n",
" <td>98056</td>\n",
" <td>47.5152</td>\n",
" <td>-122.197</td>\n",
" <td>1920</td>\n",
" <td>10731</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7998</th>\n",
" <td>1</td>\n",
" <td>2787310130</td>\n",
" <td>289950.0</td>\n",
" <td>4</td>\n",
" <td>1.75</td>\n",
" <td>2090</td>\n",
" <td>7416</td>\n",
" <td>1.0</td>\n",
" <td>4</td>\n",
" <td>7</td>\n",
" <td>1050</td>\n",
" <td>1040</td>\n",
" <td>1970</td>\n",
" <td>0</td>\n",
" <td>98031</td>\n",
" <td>47.4107</td>\n",
" <td>-122.179</td>\n",
" <td>1710</td>\n",
" <td>7527</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7999</th>\n",
" <td>2</td>\n",
" <td>8567300110</td>\n",
" <td>485000.0</td>\n",
" <td>3</td>\n",
" <td>2.5</td>\n",
" <td>2340</td>\n",
" <td>59058</td>\n",
" <td>1.0</td>\n",
" <td>3</td>\n",
" <td>8</td>\n",
" <td>2340</td>\n",
" <td>0</td>\n",
" <td>1985</td>\n",
" <td>0</td>\n",
" <td>98038</td>\n",
" <td>47.4052</td>\n",
" <td>-122.028</td>\n",
" <td>2700</td>\n",
" <td>37263</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>8000 rows × 19 columns</p>\n",
"</div>"
],
"text/plain": [
" id price bedrooms bathrooms sqft_living sqft_lot floors \\\n",
"0 3 3260000340 732600.0 4 2.5 2130 7300 \n",
"1 2 9828702055 358000.0 2 1.5 960 1808 \n",
"2 1 3438500625 210000.0 3 1.0 1080 21043 \n",
"3 2 2422029094 517534.0 2 1.0 833 143947 \n",
"4 2 7462900015 387000.0 3 2.25 1760 45133 \n",
"... .. ... ... ... ... ... ... \n",
"7995 2 2787720140 416000.0 3 2.5 1790 11542 \n",
"7996 3 6192400400 775000.0 4 2.5 3090 7112 \n",
"7997 2 2296500036 450000.0 4 2.75 2980 13260 \n",
"7998 1 2787310130 289950.0 4 1.75 2090 7416 \n",
"7999 2 8567300110 485000.0 3 2.5 2340 59058 \n",
"\n",
" condition grade sqft_above sqft_basement yr_built yr_renovated zipcode \\\n",
"0 1.0 4 7 1230 900 1963 0 \n",
"1 2.0 3 7 960 0 1993 0 \n",
"2 1.0 3 6 1080 0 1942 0 \n",
"3 1.0 3 5 833 0 2006 0 \n",
"4 2.0 3 7 1760 0 1984 0 \n",
"... ... ... ... ... ... ... ... \n",
"7995 1.0 5 7 1190 600 1969 0 \n",
"7996 2.0 3 9 3090 0 2001 0 \n",
"7997 1.0 4 8 1800 1180 1979 0 \n",
"7998 1.0 4 7 1050 1040 1970 0 \n",
"7999 1.0 3 8 2340 0 1985 0 \n",
"\n",
" lat long sqft_living15 sqft_lot15 price_category \n",
"0 98005 47.605 -122.167 2130 7560 \n",
"1 98122 47.6183 -122.298 1290 1668 \n",
"2 98106 47.5515 -122.357 1380 7620 \n",
"3 98070 47.3889 -122.482 1380 143947 \n",
"4 98065 47.5124 -121.866 1910 51773 \n",
"... ... ... ... ... ... \n",
"7995 98059 47.5124 -122.16 1790 9131 \n",
"7996 98052 47.705 -122.118 3050 6000 \n",
"7997 98056 47.5152 -122.197 1920 10731 \n",
"7998 98031 47.4107 -122.179 1710 7527 \n",
"7999 98038 47.4052 -122.028 2700 37263 \n",
"\n",
"[8000 rows x 19 columns]"
]
},
"execution_count": 191,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
@ -2209,17 +2532,18 @@
" transformers=[\n",
" (\"prepocessing_num\", preprocessing_num, num_columns),\n",
" (\"prepocessing_cat\", preprocessing_cat, cat_columns),\n",
" # (\"prepocessing_features\", cat_imputer, [\"price_category\"]),\n",
" ],\n",
" remainder=\"passthrough\"\n",
")\n",
"\n",
"# features_engineering = ColumnTransformer(\n",
"# verbose_feature_names_out=False,\n",
"# transformers=[\n",
"# (\"add_features\", HousesFeatures(), [\"price_category\"]),\n",
"# ],\n",
"# remainder=\"passthrough\",\n",
"# )\n",
"features_engineering = ColumnTransformer(\n",
" verbose_feature_names_out=False,\n",
" transformers=[\n",
" (\"add_features\", HousesFeatures(), [\"price_category\"]),\n",
" ],\n",
" remainder=\"passthrough\",\n",
")\n",
"\n",
"drop_columns = ColumnTransformer(\n",
" verbose_feature_names_out=False,\n",
@ -2249,10 +2573,11 @@
"cols = ['a', 'b']\n",
"preprocessing_result = drop_columns.fit_transform(X_train)\n",
"preprocessing_result = pd.DataFrame(preprocessing_result, columns=num_columns + cat_columns)\n",
"preprocessing_result = features_preprocessing.fit_transform(preprocessing_result)\n",
"preprocessing_result = pd.DataFrame(preprocessing_result, columns=num_columns + cat_columns + cols)\n",
"preprocessing_result = features_engineering.fit_transform(preprocessing_result)\n",
"preprocessing_result = pd.DataFrame(preprocessing_result, columns=num_columns + cat_columns)\n",
"preprocessing_result\n",
"# # preprocessing_result = features_preprocessing.fit_transform(preprocessing_result)\n",
"# # preprocessing_result = pd.DataFrame(preprocessing_result, columns=num_columns + cat_columns)\n",
"\n",
"# preprocessing_result = features_postprocessing.fit_transform(preprocessing_result)\n",
"\n",