diff --git a/lab_3/laba3.ipynb b/lab_3/laba3.ipynb index ad95c7a..de4b115 100644 --- a/lab_3/laba3.ipynb +++ b/lab_3/laba3.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 85, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -79,7 +79,7 @@ }, { "cell_type": "code", - "execution_count": 86, + "execution_count": 2, "metadata": {}, "outputs": [ { @@ -163,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 87, + "execution_count": 3, "metadata": {}, "outputs": [ { @@ -214,7 +214,7 @@ }, { "cell_type": "code", - "execution_count": 88, + "execution_count": 4, "metadata": {}, "outputs": [ { @@ -258,7 +258,7 @@ }, { "cell_type": "code", - "execution_count": 89, + "execution_count": 5, "metadata": {}, "outputs": [ { @@ -285,7 +285,7 @@ }, { "cell_type": "code", - "execution_count": 90, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -308,7 +308,7 @@ }, { "cell_type": "code", - "execution_count": 91, + "execution_count": 7, "metadata": {}, "outputs": [ { @@ -383,7 +383,7 @@ }, { "cell_type": "code", - "execution_count": 92, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -392,7 +392,7 @@ "(37760, 8091, 8092)" ] }, - "execution_count": 92, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -417,7 +417,7 @@ }, { "cell_type": "code", - "execution_count": 93, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -493,7 +493,7 @@ }, { "cell_type": "code", - "execution_count": 94, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -550,7 +550,7 @@ }, { "cell_type": "code", - "execution_count": 111, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -603,7 +603,7 @@ }, { "cell_type": "code", - "execution_count": 131, + "execution_count": 12, "metadata": {}, "outputs": [ { @@ -656,7 +656,7 @@ }, { "cell_type": "code", - "execution_count": 138, + "execution_count": 13, "metadata": {}, "outputs": [ { @@ -704,7 +704,7 @@ }, { "cell_type": "code", - "execution_count": 139, + "execution_count": 14, "metadata": {}, "outputs": [ { @@ -748,7 +748,7 @@ }, { "cell_type": "code", - "execution_count": 140, + "execution_count": 15, "metadata": {}, "outputs": [], "source": [ @@ -774,7 +774,7 @@ }, { "cell_type": "code", - "execution_count": 141, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -831,7 +831,7 @@ }, { "cell_type": "code", - "execution_count": 145, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -906,25 +906,303 @@ "print(feature_matrix.head())" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Оцениваем качество каждого набора
\n", + "В коде есть комментарии указывающие что мы сейчас оцениваем." + ] + }, { "cell_type": "code", - "execution_count": 146, + "execution_count": 18, "metadata": {}, "outputs": [ { - "ename": "ValueError", - "evalue": "\nAll the 5 fits failed.\nIt is very likely that your model is misconfigured.\nYou can try to debug the error by setting error_score='raise'.\n\nBelow are more details about the failures:\n--------------------------------------------------------------------------------\n1 fits failed with the following error:\nTraceback (most recent call last):\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 888, in _fit_and_score\n estimator.fit(X_train, y_train, **fit_params)\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n return fit_method(estimator, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 629, in fit\n X, y, X_offset, y_offset, X_scale = _preprocess_data(\n ^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 175, in _preprocess_data\n X = check_array(\n ^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\validation.py\", line 1012, in check_array\n array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\_array_api.py\", line 745, in _asarray_with_order\n array = numpy.asarray(array, order=order, dtype=dtype)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\nValueError: could not convert string to float: 'E'\n\n--------------------------------------------------------------------------------\n4 fits failed with the following error:\nTraceback (most recent call last):\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 888, in _fit_and_score\n estimator.fit(X_train, y_train, **fit_params)\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n return fit_method(estimator, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 629, in fit\n X, y, X_offset, y_offset, X_scale = _preprocess_data(\n ^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 175, in _preprocess_data\n X = check_array(\n ^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\validation.py\", line 1012, in check_array\n array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\_array_api.py\", line 745, in _asarray_with_order\n array = numpy.asarray(array, order=order, dtype=dtype)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\nValueError: could not convert string to float: 'G'\n", - "output_type": "error", - "traceback": [ - "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[1;32mIn[146], line 11\u001b[0m\n\u001b[0;32m 7\u001b[0m y \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mprice\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m 9\u001b[0m model \u001b[38;5;241m=\u001b[39m LinearRegression()\n\u001b[1;32m---> 11\u001b[0m scores \u001b[38;5;241m=\u001b[39m \u001b[43mcross_val_score\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcv\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mscoring\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mneg_mean_squared_error\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 13\u001b[0m mse_scores \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m-\u001b[39mscores\n\u001b[0;32m 14\u001b[0m mean_mse \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mmean(mse_scores)\n", - "File \u001b[1;32mc:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\_param_validation.py:213\u001b[0m, in \u001b[0;36mvalidate_params..decorator..wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 207\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 208\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m 209\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m 210\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 211\u001b[0m )\n\u001b[0;32m 212\u001b[0m ):\n\u001b[1;32m--> 213\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 214\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m InvalidParameterError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 215\u001b[0m \u001b[38;5;66;03m# When the function is just a wrapper around an estimator, we allow\u001b[39;00m\n\u001b[0;32m 216\u001b[0m \u001b[38;5;66;03m# the function to delegate validation to the estimator, but we replace\u001b[39;00m\n\u001b[0;32m 217\u001b[0m \u001b[38;5;66;03m# the name of the estimator by the name of the function in the error\u001b[39;00m\n\u001b[0;32m 218\u001b[0m \u001b[38;5;66;03m# message to avoid confusion.\u001b[39;00m\n\u001b[0;32m 219\u001b[0m msg \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39msub(\n\u001b[0;32m 220\u001b[0m \u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameter of \u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mw+ must be\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 221\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameter of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__qualname__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m must be\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 222\u001b[0m \u001b[38;5;28mstr\u001b[39m(e),\n\u001b[0;32m 223\u001b[0m )\n", - "File \u001b[1;32mc:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py:712\u001b[0m, in \u001b[0;36mcross_val_score\u001b[1;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, params, pre_dispatch, error_score)\u001b[0m\n\u001b[0;32m 709\u001b[0m \u001b[38;5;66;03m# To ensure multimetric format is not supported\u001b[39;00m\n\u001b[0;32m 710\u001b[0m scorer \u001b[38;5;241m=\u001b[39m check_scoring(estimator, scoring\u001b[38;5;241m=\u001b[39mscoring)\n\u001b[1;32m--> 712\u001b[0m cv_results \u001b[38;5;241m=\u001b[39m \u001b[43mcross_validate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 713\u001b[0m \u001b[43m \u001b[49m\u001b[43mestimator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 714\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 715\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 716\u001b[0m \u001b[43m \u001b[49m\u001b[43mgroups\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroups\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 717\u001b[0m \u001b[43m \u001b[49m\u001b[43mscoring\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mscore\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mscorer\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 718\u001b[0m \u001b[43m \u001b[49m\u001b[43mcv\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcv\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 719\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_jobs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_jobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 720\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 721\u001b[0m \u001b[43m \u001b[49m\u001b[43mfit_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfit_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 722\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 723\u001b[0m \u001b[43m \u001b[49m\u001b[43mpre_dispatch\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpre_dispatch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 724\u001b[0m \u001b[43m \u001b[49m\u001b[43merror_score\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merror_score\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 725\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 726\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cv_results[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_score\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n", - "File \u001b[1;32mc:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\_param_validation.py:213\u001b[0m, in \u001b[0;36mvalidate_params..decorator..wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 207\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 208\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m 209\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m 210\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 211\u001b[0m )\n\u001b[0;32m 212\u001b[0m ):\n\u001b[1;32m--> 213\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 214\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m InvalidParameterError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 215\u001b[0m \u001b[38;5;66;03m# When the function is just a wrapper around an estimator, we allow\u001b[39;00m\n\u001b[0;32m 216\u001b[0m \u001b[38;5;66;03m# the function to delegate validation to the estimator, but we replace\u001b[39;00m\n\u001b[0;32m 217\u001b[0m \u001b[38;5;66;03m# the name of the estimator by the name of the function in the error\u001b[39;00m\n\u001b[0;32m 218\u001b[0m \u001b[38;5;66;03m# message to avoid confusion.\u001b[39;00m\n\u001b[0;32m 219\u001b[0m msg \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39msub(\n\u001b[0;32m 220\u001b[0m \u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameter of \u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mw+ must be\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 221\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameter of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__qualname__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m must be\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 222\u001b[0m \u001b[38;5;28mstr\u001b[39m(e),\n\u001b[0;32m 223\u001b[0m )\n", - "File \u001b[1;32mc:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py:443\u001b[0m, in \u001b[0;36mcross_validate\u001b[1;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, params, pre_dispatch, return_train_score, return_estimator, return_indices, error_score)\u001b[0m\n\u001b[0;32m 422\u001b[0m parallel \u001b[38;5;241m=\u001b[39m Parallel(n_jobs\u001b[38;5;241m=\u001b[39mn_jobs, verbose\u001b[38;5;241m=\u001b[39mverbose, pre_dispatch\u001b[38;5;241m=\u001b[39mpre_dispatch)\n\u001b[0;32m 423\u001b[0m results \u001b[38;5;241m=\u001b[39m parallel(\n\u001b[0;32m 424\u001b[0m delayed(_fit_and_score)(\n\u001b[0;32m 425\u001b[0m clone(estimator),\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 440\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m train, test \u001b[38;5;129;01min\u001b[39;00m indices\n\u001b[0;32m 441\u001b[0m )\n\u001b[1;32m--> 443\u001b[0m \u001b[43m_warn_or_raise_about_fit_failures\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresults\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror_score\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 445\u001b[0m \u001b[38;5;66;03m# For callable scoring, the return type is only know after calling. If the\u001b[39;00m\n\u001b[0;32m 446\u001b[0m \u001b[38;5;66;03m# return type is a dictionary, the error scores can now be inserted with\u001b[39;00m\n\u001b[0;32m 447\u001b[0m \u001b[38;5;66;03m# the correct key.\u001b[39;00m\n\u001b[0;32m 448\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(scoring):\n", - "File \u001b[1;32mc:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py:529\u001b[0m, in \u001b[0;36m_warn_or_raise_about_fit_failures\u001b[1;34m(results, error_score)\u001b[0m\n\u001b[0;32m 522\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m num_failed_fits \u001b[38;5;241m==\u001b[39m num_fits:\n\u001b[0;32m 523\u001b[0m all_fits_failed_message \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 524\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mAll the \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnum_fits\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m fits failed.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 525\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIt is very likely that your model is misconfigured.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 526\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou can try to debug the error by setting error_score=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 527\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBelow are more details about the failures:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mfit_errors_summary\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 528\u001b[0m )\n\u001b[1;32m--> 529\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(all_fits_failed_message)\n\u001b[0;32m 531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 532\u001b[0m some_fits_failed_message \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 533\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mnum_failed_fits\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m fits failed out of a total of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnum_fits\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 534\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe score on these train-test partitions for these parameters\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 538\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBelow are more details about the failures:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mfit_errors_summary\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 539\u001b[0m )\n", - "\u001b[1;31mValueError\u001b[0m: \nAll the 5 fits failed.\nIt is very likely that your model is misconfigured.\nYou can try to debug the error by setting error_score='raise'.\n\nBelow are more details about the failures:\n--------------------------------------------------------------------------------\n1 fits failed with the following error:\nTraceback (most recent call last):\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 888, in _fit_and_score\n estimator.fit(X_train, y_train, **fit_params)\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n return fit_method(estimator, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 629, in fit\n X, y, X_offset, y_offset, X_scale = _preprocess_data(\n ^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 175, in _preprocess_data\n X = check_array(\n ^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\validation.py\", line 1012, in check_array\n array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\_array_api.py\", line 745, in _asarray_with_order\n array = numpy.asarray(array, order=order, dtype=dtype)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\nValueError: could not convert string to float: 'E'\n\n--------------------------------------------------------------------------------\n4 fits failed with the following error:\nTraceback (most recent call last):\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 888, in _fit_and_score\n estimator.fit(X_train, y_train, **fit_params)\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n return fit_method(estimator, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 629, in fit\n X, y, X_offset, y_offset, X_scale = _preprocess_data(\n ^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 175, in _preprocess_data\n X = check_array(\n ^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\validation.py\", line 1012, in check_array\n array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\_array_api.py\", line 745, in _asarray_with_order\n array = numpy.asarray(array, order=order, dtype=dtype)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\nValueError: could not convert string to float: 'G'\n" + "name": "stdout", + "output_type": "stream", + "text": [ + "Предсказательная способность (MSE): 769447.425412744\n", + "Скорость обучения: 0.9312052726745605 секунд\n", + "Скорость предсказания: 0.009320497512817383 секунд\n", + "Надежность (стабильность MSE): 9415.05041335384\n", + "Корреляционная матрица признаков:\n", + " index carat depth table x y \\\n", + "index 1.000000 0.918976 0.256560 -0.012994 0.881745 0.881997 \n", + "carat 0.918976 1.000000 0.317029 0.011338 0.965007 0.964633 \n", + "depth 0.256560 0.317029 1.000000 -0.523388 0.214227 0.212123 \n", + "table -0.012994 0.011338 -0.523388 1.000000 0.068165 0.061429 \n", + "x 0.881745 0.965007 0.214227 0.068165 1.000000 0.998661 \n", + "y 0.881997 0.964633 0.212123 0.061429 0.998661 1.000000 \n", + "z 0.874093 0.962961 0.422732 -0.067003 0.966991 0.966578 \n", + "carat_binned 0.936744 0.980069 0.299586 -0.006563 0.954800 0.954348 \n", + "cut_Good -0.197076 -0.178975 -0.244465 0.324476 -0.140993 -0.137115 \n", + "cut_Ideal -0.266915 -0.293529 -0.159223 -0.225245 -0.308059 -0.303008 \n", + "cut_Premium -0.106465 -0.150300 -0.286254 0.108195 -0.104058 -0.105738 \n", + "cut_Very Good -0.086479 -0.163226 -0.164405 -0.010754 -0.162451 -0.152861 \n", + "color_E -0.174812 -0.199907 -0.120239 0.132333 -0.204134 -0.202557 \n", + "color_F -0.253824 -0.269185 -0.048395 -0.001821 -0.268046 -0.268242 \n", + "color_G -0.264394 -0.282227 -0.076425 -0.032431 -0.262968 -0.263331 \n", + "color_H 0.260031 0.235182 0.080885 -0.015329 0.229047 0.223693 \n", + "color_I 0.117366 0.102229 -0.113178 0.080852 0.144121 0.146060 \n", + "color_J 0.202485 0.292015 0.215944 -0.024922 0.241304 0.244249 \n", + "clarity_IF -0.181797 -0.220384 -0.112810 -0.049651 -0.255666 -0.252669 \n", + "clarity_SI1 -0.212170 -0.222121 -0.108779 0.010751 -0.204318 -0.201694 \n", + "clarity_SI2 -0.162937 -0.168896 -0.141779 0.073969 -0.108325 -0.104168 \n", + "clarity_VS1 -0.202684 -0.220126 -0.116339 0.007504 -0.220583 -0.218858 \n", + "clarity_VS2 -0.190586 -0.206457 -0.112282 0.012328 -0.194913 -0.192109 \n", + "clarity_VVS1 -0.187562 -0.229651 -0.101446 -0.032463 -0.271730 -0.270343 \n", + "clarity_VVS2 -0.193781 -0.224968 -0.091511 -0.030425 -0.254022 -0.251780 \n", + "depth_binned 0.245417 0.295187 0.870892 -0.566471 0.205631 0.206883 \n", + "table_binned 0.074231 0.114544 -0.377101 0.817051 0.134186 0.131247 \n", + "price_binned 0.747669 0.815253 0.160678 0.042235 0.811691 0.817555 \n", + "cut_Good -0.197076 -0.178975 -0.244465 0.324476 -0.140993 -0.137115 \n", + "cut_Ideal -0.266915 -0.293529 -0.159223 -0.225245 -0.308059 -0.303008 \n", + "cut_Premium -0.106465 -0.150300 -0.286254 0.108195 -0.104058 -0.105738 \n", + "cut_Very Good -0.086479 -0.163226 -0.164405 -0.010754 -0.162451 -0.152861 \n", + "color_E -0.174812 -0.199907 -0.120239 0.132333 -0.204134 -0.202557 \n", + "color_F -0.253824 -0.269185 -0.048395 -0.001821 -0.268046 -0.268242 \n", + "color_G -0.264394 -0.282227 -0.076425 -0.032431 -0.262968 -0.263331 \n", + "color_H 0.260031 0.235182 0.080885 -0.015329 0.229047 0.223693 \n", + "color_I 0.117366 0.102229 -0.113178 0.080852 0.144121 0.146060 \n", + "color_J 0.202485 0.292015 0.215944 -0.024922 0.241304 0.244249 \n", + "clarity_IF -0.181797 -0.220384 -0.112810 -0.049651 -0.255666 -0.252669 \n", + "clarity_SI1 -0.212170 -0.222121 -0.108779 0.010751 -0.204318 -0.201694 \n", + "clarity_SI2 -0.162937 -0.168896 -0.141779 0.073969 -0.108325 -0.104168 \n", + "clarity_VS1 -0.202684 -0.220126 -0.116339 0.007504 -0.220583 -0.218858 \n", + "clarity_VS2 -0.190586 -0.206457 -0.112282 0.012328 -0.194913 -0.192109 \n", + "clarity_VVS1 -0.187562 -0.229651 -0.101446 -0.032463 -0.271730 -0.270343 \n", + "clarity_VVS2 -0.193781 -0.224968 -0.091511 -0.030425 -0.254022 -0.251780 \n", + "\n", + " z carat_binned cut_Good cut_Ideal ... color_H \\\n", + "index 0.874093 0.936744 -0.197076 -0.266915 ... 0.260031 \n", + "carat 0.962961 0.980069 -0.178975 -0.293529 ... 0.235182 \n", + "depth 0.422732 0.299586 -0.244465 -0.159223 ... 0.080885 \n", + "table -0.067003 -0.006563 0.324476 -0.225245 ... -0.015329 \n", + "x 0.966991 0.954800 -0.140993 -0.308059 ... 0.229047 \n", + "y 0.966578 0.954348 -0.137115 -0.303008 ... 0.223693 \n", + "z 1.000000 0.948784 -0.191932 -0.311613 ... 0.226114 \n", + "carat_binned 0.948784 1.000000 -0.175908 -0.285435 ... 0.273119 \n", + "cut_Good -0.191932 -0.175908 1.000000 -0.131819 ... -0.135046 \n", + "cut_Ideal -0.311613 -0.285435 -0.131819 1.000000 ... -0.071273 \n", + "cut_Premium -0.161545 -0.138476 -0.180437 -0.141673 ... -0.101136 \n", + "cut_Very Good -0.178681 -0.160568 -0.158199 -0.124212 ... -0.066590 \n", + "color_E -0.210177 -0.199353 0.180753 0.043481 ... -0.192503 \n", + "color_F -0.256080 -0.267207 0.020500 0.079422 ... -0.199995 \n", + "color_G -0.268521 -0.273979 0.029838 0.102871 ... -0.255060 \n", + "color_H 0.226114 0.273119 -0.135046 -0.071273 ... 1.000000 \n", + "color_I 0.102372 0.129941 0.064618 -0.038568 ... -0.309138 \n", + "color_J 0.279789 0.219450 -0.098352 -0.072643 ... -0.264988 \n", + "clarity_IF -0.253221 -0.213292 -0.008462 0.203602 ... -0.054606 \n", + "clarity_SI1 -0.210001 -0.227317 0.081039 0.051568 ... -0.024638 \n", + "clarity_SI2 -0.130004 -0.162642 0.110280 0.019205 ... -0.037742 \n", + "clarity_VS1 -0.225411 -0.224820 0.045321 0.096886 ... -0.065186 \n", + "clarity_VS2 -0.200809 -0.208082 0.030034 0.081414 ... -0.071251 \n", + "clarity_VVS1 -0.266684 -0.221868 0.002891 0.158572 ... -0.052581 \n", + "clarity_VVS2 -0.248962 -0.224522 0.018263 0.129101 ... -0.068533 \n", + "depth_binned 0.393501 0.276906 -0.352815 -0.126332 ... -0.037413 \n", + "table_binned 0.034246 0.076420 0.218950 -0.309130 ... -0.059176 \n", + "price_binned 0.792153 0.798011 -0.109808 -0.163178 ... 0.151799 \n", + "cut_Good -0.191932 -0.175908 1.000000 -0.131819 ... -0.135046 \n", + "cut_Ideal -0.311613 -0.285435 -0.131819 1.000000 ... -0.071273 \n", + "cut_Premium -0.161545 -0.138476 -0.180437 -0.141673 ... -0.101136 \n", + "cut_Very Good -0.178681 -0.160568 -0.158199 -0.124212 ... -0.066590 \n", + "color_E -0.210177 -0.199353 0.180753 0.043481 ... -0.192503 \n", + "color_F -0.256080 -0.267207 0.020500 0.079422 ... -0.199995 \n", + "color_G -0.268521 -0.273979 0.029838 0.102871 ... -0.255060 \n", + "color_H 0.226114 0.273119 -0.135046 -0.071273 ... 1.000000 \n", + "color_I 0.102372 0.129941 0.064618 -0.038568 ... -0.309138 \n", + "color_J 0.279789 0.219450 -0.098352 -0.072643 ... -0.264988 \n", + "clarity_IF -0.253221 -0.213292 -0.008462 0.203602 ... -0.054606 \n", + "clarity_SI1 -0.210001 -0.227317 0.081039 0.051568 ... -0.024638 \n", + "clarity_SI2 -0.130004 -0.162642 0.110280 0.019205 ... -0.037742 \n", + "clarity_VS1 -0.225411 -0.224820 0.045321 0.096886 ... -0.065186 \n", + "clarity_VS2 -0.200809 -0.208082 0.030034 0.081414 ... -0.071251 \n", + "clarity_VVS1 -0.266684 -0.221868 0.002891 0.158572 ... -0.052581 \n", + "clarity_VVS2 -0.248962 -0.224522 0.018263 0.129101 ... -0.068533 \n", + "\n", + " color_I color_J clarity_IF clarity_SI1 clarity_SI2 \\\n", + "index 0.117366 0.202485 -0.181797 -0.212170 -0.162937 \n", + "carat 0.102229 0.292015 -0.220384 -0.222121 -0.168896 \n", + "depth -0.113178 0.215944 -0.112810 -0.108779 -0.141779 \n", + "table 0.080852 -0.024922 -0.049651 0.010751 0.073969 \n", + "x 0.144121 0.241304 -0.255666 -0.204318 -0.108325 \n", + "y 0.146060 0.244249 -0.252669 -0.201694 -0.104168 \n", + "z 0.102372 0.279789 -0.253221 -0.210001 -0.130004 \n", + "carat_binned 0.129941 0.219450 -0.213292 -0.227317 -0.162642 \n", + "cut_Good 0.064618 -0.098352 -0.008462 0.081039 0.110280 \n", + "cut_Ideal -0.038568 -0.072643 0.203602 0.051568 0.019205 \n", + "cut_Premium 0.105213 -0.084217 0.012019 0.073734 0.076342 \n", + "cut_Very Good 0.062781 -0.077472 0.062992 0.077184 0.071627 \n", + "color_E -0.146245 -0.125359 0.006989 0.048760 0.060037 \n", + "color_F -0.151937 -0.130238 0.067814 0.026771 0.056315 \n", + "color_G -0.193770 -0.166096 0.118431 0.009543 0.012232 \n", + "color_H -0.309138 -0.264988 -0.054606 -0.024638 -0.037742 \n", + "color_I 1.000000 -0.201312 -0.056505 -0.029603 -0.010066 \n", + "color_J -0.201312 1.000000 -0.042570 -0.032981 -0.082478 \n", + "clarity_IF -0.056505 -0.042570 1.000000 -0.049155 -0.065849 \n", + "clarity_SI1 -0.029603 -0.032981 -0.049155 1.000000 -0.099840 \n", + "clarity_SI2 -0.010066 -0.082478 -0.065849 -0.099840 1.000000 \n", + "clarity_VS1 -0.008767 -0.024548 -0.041445 -0.062839 -0.084180 \n", + "clarity_VS2 -0.001448 0.005467 -0.044484 -0.067447 -0.090353 \n", + "clarity_VVS1 -0.038316 -0.052719 -0.033043 -0.050101 -0.067116 \n", + "clarity_VVS2 -0.051227 -0.036439 -0.034892 -0.052904 -0.070871 \n", + "depth_binned -0.075038 0.318499 -0.079937 -0.107482 -0.137301 \n", + "table_binned 0.017798 0.061547 -0.052251 0.019997 0.046550 \n", + "price_binned 0.080443 0.194458 -0.116621 -0.094520 0.030333 \n", + "cut_Good 0.064618 -0.098352 -0.008462 0.081039 0.110280 \n", + "cut_Ideal -0.038568 -0.072643 0.203602 0.051568 0.019205 \n", + "cut_Premium 0.105213 -0.084217 0.012019 0.073734 0.076342 \n", + "cut_Very Good 0.062781 -0.077472 0.062992 0.077184 0.071627 \n", + "color_E -0.146245 -0.125359 0.006989 0.048760 0.060037 \n", + "color_F -0.151937 -0.130238 0.067814 0.026771 0.056315 \n", + "color_G -0.193770 -0.166096 0.118431 0.009543 0.012232 \n", + "color_H -0.309138 -0.264988 -0.054606 -0.024638 -0.037742 \n", + "color_I 1.000000 -0.201312 -0.056505 -0.029603 -0.010066 \n", + "color_J -0.201312 1.000000 -0.042570 -0.032981 -0.082478 \n", + "clarity_IF -0.056505 -0.042570 1.000000 -0.049155 -0.065849 \n", + "clarity_SI1 -0.029603 -0.032981 -0.049155 1.000000 -0.099840 \n", + "clarity_SI2 -0.010066 -0.082478 -0.065849 -0.099840 1.000000 \n", + "clarity_VS1 -0.008767 -0.024548 -0.041445 -0.062839 -0.084180 \n", + "clarity_VS2 -0.001448 0.005467 -0.044484 -0.067447 -0.090353 \n", + "clarity_VVS1 -0.038316 -0.052719 -0.033043 -0.050101 -0.067116 \n", + "clarity_VVS2 -0.051227 -0.036439 -0.034892 -0.052904 -0.070871 \n", + "\n", + " clarity_VS1 clarity_VS2 clarity_VVS1 clarity_VVS2 \n", + "index -0.202684 -0.190586 -0.187562 -0.193781 \n", + "carat -0.220126 -0.206457 -0.229651 -0.224968 \n", + "depth -0.116339 -0.112282 -0.101446 -0.091511 \n", + "table 0.007504 0.012328 -0.032463 -0.030425 \n", + "x -0.220583 -0.194913 -0.271730 -0.254022 \n", + "y -0.218858 -0.192109 -0.270343 -0.251780 \n", + "z -0.225411 -0.200809 -0.266684 -0.248962 \n", + "carat_binned -0.224820 -0.208082 -0.221868 -0.224522 \n", + "cut_Good 0.045321 0.030034 0.002891 0.018263 \n", + "cut_Ideal 0.096886 0.081414 0.158572 0.129101 \n", + "cut_Premium 0.058620 0.089749 0.029293 0.012928 \n", + "cut_Very Good 0.059445 0.073481 0.070278 0.087436 \n", + "color_E 0.028625 0.023490 0.045116 0.055769 \n", + "color_F 0.046575 0.032305 0.065653 0.059278 \n", + "color_G 0.064229 0.036578 0.072317 0.078458 \n", + "color_H -0.065186 -0.071251 -0.052581 -0.068533 \n", + "color_I -0.008767 -0.001448 -0.038316 -0.051227 \n", + "color_J -0.024548 0.005467 -0.052719 -0.036439 \n", + "clarity_IF -0.041445 -0.044484 -0.033043 -0.034892 \n", + "clarity_SI1 -0.062839 -0.067447 -0.050101 -0.052904 \n", + "clarity_SI2 -0.084180 -0.090353 -0.067116 -0.070871 \n", + "clarity_VS1 1.000000 -0.056868 -0.042242 -0.044606 \n", + "clarity_VS2 -0.056868 1.000000 -0.045340 -0.047877 \n", + "clarity_VVS1 -0.042242 -0.045340 1.000000 -0.035564 \n", + "clarity_VVS2 -0.044606 -0.047877 -0.035564 1.000000 \n", + "depth_binned -0.094953 -0.096364 -0.082692 -0.080568 \n", + "table_binned 0.006478 0.018598 -0.041154 -0.040427 \n", + "price_binned -0.097141 -0.070453 -0.143954 -0.134063 \n", + "cut_Good 0.045321 0.030034 0.002891 0.018263 \n", + "cut_Ideal 0.096886 0.081414 0.158572 0.129101 \n", + "cut_Premium 0.058620 0.089749 0.029293 0.012928 \n", + "cut_Very Good 0.059445 0.073481 0.070278 0.087436 \n", + "color_E 0.028625 0.023490 0.045116 0.055769 \n", + "color_F 0.046575 0.032305 0.065653 0.059278 \n", + "color_G 0.064229 0.036578 0.072317 0.078458 \n", + "color_H -0.065186 -0.071251 -0.052581 -0.068533 \n", + "color_I -0.008767 -0.001448 -0.038316 -0.051227 \n", + "color_J -0.024548 0.005467 -0.052719 -0.036439 \n", + "clarity_IF -0.041445 -0.044484 -0.033043 -0.034892 \n", + "clarity_SI1 -0.062839 -0.067447 -0.050101 -0.052904 \n", + "clarity_SI2 -0.084180 -0.090353 -0.067116 -0.070871 \n", + "clarity_VS1 1.000000 -0.056868 -0.042242 -0.044606 \n", + "clarity_VS2 -0.056868 1.000000 -0.045340 -0.047877 \n", + "clarity_VVS1 -0.042242 -0.045340 1.000000 -0.035564 \n", + "clarity_VVS2 -0.044606 -0.047877 -0.035564 1.000000 \n", + "\n", + "[45 rows x 45 columns]\n", + "Пропуски в данных:\n", + " index 0\n", + "carat 0\n", + "color 0\n", + "depth 0\n", + "table 0\n", + "price 0\n", + "x 0\n", + "y 0\n", + "z 0\n", + "clarity 0\n", + "cut 0\n", + "carat_binned 0\n", + "cut_Good 0\n", + "cut_Ideal 0\n", + "cut_Premium 0\n", + "cut_Very Good 0\n", + "color_E 0\n", + "color_F 0\n", + "color_G 0\n", + "color_H 0\n", + "color_I 0\n", + "color_J 0\n", + "clarity_IF 0\n", + "clarity_SI1 0\n", + "clarity_SI2 0\n", + "clarity_VS1 0\n", + "clarity_VS2 0\n", + "clarity_VVS1 0\n", + "clarity_VVS2 0\n", + "depth_binned 0\n", + "table_binned 0\n", + "price_binned 0\n", + "dtype: int64\n", + "Сводка по данным:\n", + " index carat depth table price \\\n", + "count 70315.000000 70315.000000 70315.000000 70315.000000 70315.000000 \n", + "mean 35157.000000 2.257318 63.299607 58.124889 9377.150124 \n", + "std 20298.336426 1.249989 3.131217 2.946560 5572.610635 \n", + "min 0.000000 0.200000 44.000000 43.000000 327.000000 \n", + "25% 17578.500000 1.200000 61.400000 56.000000 4916.000000 \n", + "50% 35157.000000 2.040000 63.300000 58.000000 9664.000000 \n", + "75% 52735.500000 3.110000 65.800000 60.000000 13945.000000 \n", + "max 70314.000000 4.500000 79.000000 79.000000 18806.000000 \n", + "\n", + " x y z carat_binned cut_Good \\\n", + "count 70315.000000 70315.000000 70315.000000 70315.000000 70315.000000 \n", + "mean 7.939428 7.885353 5.016576 2.000000 0.143753 \n", + "std 1.696162 1.661164 1.150922 1.414224 0.350842 \n", + "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "25% 6.760000 6.740000 4.190000 1.000000 0.000000 \n", + "50% 8.050000 8.010000 5.140000 2.000000 0.000000 \n", + "75% 9.420000 9.340000 5.970000 3.000000 0.000000 \n", + "max 10.230000 10.160000 6.720000 4.000000 1.000000 \n", + "\n", + " ... clarity_IF clarity_SI1 clarity_SI2 clarity_VS1 \\\n", + "count ... 70315.000000 70315.000000 70315.000000 70315.000000 \n", + "mean ... 0.031402 0.069359 0.117969 0.050316 \n", + "std ... 0.174402 0.254066 0.322574 0.218599 \n", + "min ... 0.000000 0.000000 0.000000 0.000000 \n", + "25% ... 0.000000 0.000000 0.000000 0.000000 \n", + "50% ... 0.000000 0.000000 0.000000 0.000000 \n", + "75% ... 0.000000 0.000000 0.000000 0.000000 \n", + "max ... 1.000000 1.000000 1.000000 1.000000 \n", + "\n", + " clarity_VS2 clarity_VVS1 clarity_VVS2 depth_binned table_binned \\\n", + "count 70315.000000 70315.000000 70315.000000 70315.000000 70315.000000 \n", + "mean 0.057527 0.032582 0.036194 2.244059 1.599346 \n", + "std 0.232848 0.177541 0.186775 0.581360 0.537429 \n", + "min 0.000000 0.000000 0.000000 0.000000 0.000000 \n", + "25% 0.000000 0.000000 0.000000 2.000000 1.000000 \n", + "50% 0.000000 0.000000 0.000000 2.000000 2.000000 \n", + "75% 0.000000 0.000000 0.000000 3.000000 2.000000 \n", + "max 1.000000 1.000000 1.000000 4.000000 4.000000 \n", + "\n", + " price_binned \n", + "count 70315.000000 \n", + "mean 1.960620 \n", + "std 1.454469 \n", + "min 0.000000 \n", + "25% 1.000000 \n", + "50% 2.000000 \n", + "75% 3.000000 \n", + "max 4.000000 \n", + "\n", + "[8 rows x 29 columns]\n" ] } ], @@ -934,8 +1212,16 @@ "from sklearn.metrics import mean_squared_error, accuracy_score, f1_score\n", "import time\n", "\n", - "X = data.drop(columns=['price']) # Признаки\n", - "y = data['price'] # Целевая переменная\n", + "categorical_features = ['cut', 'color', 'clarity']\n", + "encoder = OneHotEncoder(sparse_output=False, drop='first')\n", + "encoded_data = pd.DataFrame(encoder.fit_transform(data[categorical_features]))\n", + "encoded_data.columns = encoder.get_feature_names_out(categorical_features)\n", + "\n", + "data_encoded = pd.concat([data.drop(columns=categorical_features), encoded_data], axis=1)\n", + "\n", + "X = data_encoded.drop(columns=['price']) # Признаки\n", + "y = data_encoded['price'] # Целевая переменная\n", + "\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "\n", @@ -972,6 +1258,13 @@ "print(\"Пропуски в данных:\\n\", data.isnull().sum())\n", "print(\"Сводка по данным:\\n\", data.describe())\n" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "По итогу MSE у меня равен 769447.43, что можно считать относительно высоким. В последующих работах я буду лучше больше уделять времени на выборки данных, для повышения точности предсказаний." + ] } ], "metadata": {