исправил ошибку

This commit is contained in:
salih 2024-11-30 13:19:01 +04:00
parent c5871e1ba6
commit 8ec15fa9bc

View File

@ -2,7 +2,7 @@
"cells": [ "cells": [
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 85, "execution_count": 1,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -79,7 +79,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 86, "execution_count": 2,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -163,7 +163,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 87, "execution_count": 3,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -214,7 +214,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 88, "execution_count": 4,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -258,7 +258,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 89, "execution_count": 5,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -285,7 +285,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 90, "execution_count": 6,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -308,7 +308,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 91, "execution_count": 7,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -383,7 +383,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 92, "execution_count": 8,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -392,7 +392,7 @@
"(37760, 8091, 8092)" "(37760, 8091, 8092)"
] ]
}, },
"execution_count": 92, "execution_count": 8,
"metadata": {}, "metadata": {},
"output_type": "execute_result" "output_type": "execute_result"
} }
@ -417,7 +417,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 93, "execution_count": 9,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -493,7 +493,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 94, "execution_count": 10,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -550,7 +550,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 111, "execution_count": 11,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -603,7 +603,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 131, "execution_count": 12,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -656,7 +656,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 138, "execution_count": 13,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -704,7 +704,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 139, "execution_count": 14,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -748,7 +748,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 140, "execution_count": 15,
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [ "source": [
@ -774,7 +774,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 141, "execution_count": 16,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -831,7 +831,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 145, "execution_count": 17,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
@ -906,25 +906,303 @@
"print(feature_matrix.head())" "print(feature_matrix.head())"
] ]
}, },
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Оцениваем качество каждого набора<br>\n",
"В коде есть комментарии указывающие что мы сейчас оцениваем."
]
},
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 146, "execution_count": 18,
"metadata": {}, "metadata": {},
"outputs": [ "outputs": [
{ {
"ename": "ValueError", "name": "stdout",
"evalue": "\nAll the 5 fits failed.\nIt is very likely that your model is misconfigured.\nYou can try to debug the error by setting error_score='raise'.\n\nBelow are more details about the failures:\n--------------------------------------------------------------------------------\n1 fits failed with the following error:\nTraceback (most recent call last):\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 888, in _fit_and_score\n estimator.fit(X_train, y_train, **fit_params)\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n return fit_method(estimator, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 629, in fit\n X, y, X_offset, y_offset, X_scale = _preprocess_data(\n ^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 175, in _preprocess_data\n X = check_array(\n ^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\validation.py\", line 1012, in check_array\n array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\_array_api.py\", line 745, in _asarray_with_order\n array = numpy.asarray(array, order=order, dtype=dtype)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\nValueError: could not convert string to float: 'E'\n\n--------------------------------------------------------------------------------\n4 fits failed with the following error:\nTraceback (most recent call last):\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 888, in _fit_and_score\n estimator.fit(X_train, y_train, **fit_params)\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n return fit_method(estimator, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 629, in fit\n X, y, X_offset, y_offset, X_scale = _preprocess_data(\n ^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 175, in _preprocess_data\n X = check_array(\n ^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\validation.py\", line 1012, in check_array\n array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\_array_api.py\", line 745, in _asarray_with_order\n array = numpy.asarray(array, order=order, dtype=dtype)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\nValueError: could not convert string to float: 'G'\n", "output_type": "stream",
"output_type": "error", "text": [
"traceback": [ "Предсказательная способность (MSE): 769447.425412744\n",
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", "Скорость обучения: 0.9312052726745605 секунд\n",
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)", "Скорость предсказания: 0.009320497512817383 секунд\n",
"Cell \u001b[1;32mIn[146], line 11\u001b[0m\n\u001b[0;32m 7\u001b[0m y \u001b[38;5;241m=\u001b[39m data[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mprice\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[0;32m 9\u001b[0m model \u001b[38;5;241m=\u001b[39m LinearRegression()\n\u001b[1;32m---> 11\u001b[0m scores \u001b[38;5;241m=\u001b[39m \u001b[43mcross_val_score\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmodel\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mcv\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m5\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mscoring\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mneg_mean_squared_error\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 13\u001b[0m mse_scores \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m-\u001b[39mscores\n\u001b[0;32m 14\u001b[0m mean_mse \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mmean(mse_scores)\n", "Надежность (стабильность MSE): 9415.05041335384\n",
"File \u001b[1;32mc:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\_param_validation.py:213\u001b[0m, in \u001b[0;36mvalidate_params.<locals>.decorator.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 207\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 208\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m 209\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m 210\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 211\u001b[0m )\n\u001b[0;32m 212\u001b[0m ):\n\u001b[1;32m--> 213\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 214\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m InvalidParameterError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 215\u001b[0m \u001b[38;5;66;03m# When the function is just a wrapper around an estimator, we allow\u001b[39;00m\n\u001b[0;32m 216\u001b[0m \u001b[38;5;66;03m# the function to delegate validation to the estimator, but we replace\u001b[39;00m\n\u001b[0;32m 217\u001b[0m \u001b[38;5;66;03m# the name of the estimator by the name of the function in the error\u001b[39;00m\n\u001b[0;32m 218\u001b[0m \u001b[38;5;66;03m# message to avoid confusion.\u001b[39;00m\n\u001b[0;32m 219\u001b[0m msg \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39msub(\n\u001b[0;32m 220\u001b[0m \u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameter of \u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mw+ must be\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 221\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameter of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__qualname__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m must be\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 222\u001b[0m \u001b[38;5;28mstr\u001b[39m(e),\n\u001b[0;32m 223\u001b[0m )\n", "Корреляционная матрица признаков:\n",
"File \u001b[1;32mc:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py:712\u001b[0m, in \u001b[0;36mcross_val_score\u001b[1;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, params, pre_dispatch, error_score)\u001b[0m\n\u001b[0;32m 709\u001b[0m \u001b[38;5;66;03m# To ensure multimetric format is not supported\u001b[39;00m\n\u001b[0;32m 710\u001b[0m scorer \u001b[38;5;241m=\u001b[39m check_scoring(estimator, scoring\u001b[38;5;241m=\u001b[39mscoring)\n\u001b[1;32m--> 712\u001b[0m cv_results \u001b[38;5;241m=\u001b[39m \u001b[43mcross_validate\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 713\u001b[0m \u001b[43m \u001b[49m\u001b[43mestimator\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 714\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 715\u001b[0m \u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 716\u001b[0m \u001b[43m \u001b[49m\u001b[43mgroups\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mgroups\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 717\u001b[0m \u001b[43m \u001b[49m\u001b[43mscoring\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m{\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mscore\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mscorer\u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 718\u001b[0m \u001b[43m \u001b[49m\u001b[43mcv\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcv\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 719\u001b[0m \u001b[43m \u001b[49m\u001b[43mn_jobs\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mn_jobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 720\u001b[0m \u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 721\u001b[0m \u001b[43m \u001b[49m\u001b[43mfit_params\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfit_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 722\u001b[0m \u001b[43m \u001b[49m\u001b[43mparams\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 723\u001b[0m \u001b[43m \u001b[49m\u001b[43mpre_dispatch\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mpre_dispatch\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 724\u001b[0m \u001b[43m \u001b[49m\u001b[43merror_score\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merror_score\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 725\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 726\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cv_results[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtest_score\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n", " index carat depth table x y \\\n",
"File \u001b[1;32mc:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\_param_validation.py:213\u001b[0m, in \u001b[0;36mvalidate_params.<locals>.decorator.<locals>.wrapper\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 207\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 208\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[0;32m 209\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[0;32m 210\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[0;32m 211\u001b[0m )\n\u001b[0;32m 212\u001b[0m ):\n\u001b[1;32m--> 213\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 214\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m InvalidParameterError \u001b[38;5;28;01mas\u001b[39;00m e:\n\u001b[0;32m 215\u001b[0m \u001b[38;5;66;03m# When the function is just a wrapper around an estimator, we allow\u001b[39;00m\n\u001b[0;32m 216\u001b[0m \u001b[38;5;66;03m# the function to delegate validation to the estimator, but we replace\u001b[39;00m\n\u001b[0;32m 217\u001b[0m \u001b[38;5;66;03m# the name of the estimator by the name of the function in the error\u001b[39;00m\n\u001b[0;32m 218\u001b[0m \u001b[38;5;66;03m# message to avoid confusion.\u001b[39;00m\n\u001b[0;32m 219\u001b[0m msg \u001b[38;5;241m=\u001b[39m re\u001b[38;5;241m.\u001b[39msub(\n\u001b[0;32m 220\u001b[0m \u001b[38;5;124mr\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameter of \u001b[39m\u001b[38;5;124m\\\u001b[39m\u001b[38;5;124mw+ must be\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 221\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mparameter of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfunc\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__qualname__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m must be\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 222\u001b[0m \u001b[38;5;28mstr\u001b[39m(e),\n\u001b[0;32m 223\u001b[0m )\n", "index 1.000000 0.918976 0.256560 -0.012994 0.881745 0.881997 \n",
"File \u001b[1;32mc:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py:443\u001b[0m, in \u001b[0;36mcross_validate\u001b[1;34m(estimator, X, y, groups, scoring, cv, n_jobs, verbose, fit_params, params, pre_dispatch, return_train_score, return_estimator, return_indices, error_score)\u001b[0m\n\u001b[0;32m 422\u001b[0m parallel \u001b[38;5;241m=\u001b[39m Parallel(n_jobs\u001b[38;5;241m=\u001b[39mn_jobs, verbose\u001b[38;5;241m=\u001b[39mverbose, pre_dispatch\u001b[38;5;241m=\u001b[39mpre_dispatch)\n\u001b[0;32m 423\u001b[0m results \u001b[38;5;241m=\u001b[39m parallel(\n\u001b[0;32m 424\u001b[0m delayed(_fit_and_score)(\n\u001b[0;32m 425\u001b[0m clone(estimator),\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 440\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m train, test \u001b[38;5;129;01min\u001b[39;00m indices\n\u001b[0;32m 441\u001b[0m )\n\u001b[1;32m--> 443\u001b[0m \u001b[43m_warn_or_raise_about_fit_failures\u001b[49m\u001b[43m(\u001b[49m\u001b[43mresults\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43merror_score\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 445\u001b[0m \u001b[38;5;66;03m# For callable scoring, the return type is only know after calling. If the\u001b[39;00m\n\u001b[0;32m 446\u001b[0m \u001b[38;5;66;03m# return type is a dictionary, the error scores can now be inserted with\u001b[39;00m\n\u001b[0;32m 447\u001b[0m \u001b[38;5;66;03m# the correct key.\u001b[39;00m\n\u001b[0;32m 448\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mcallable\u001b[39m(scoring):\n", "carat 0.918976 1.000000 0.317029 0.011338 0.965007 0.964633 \n",
"File \u001b[1;32mc:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py:529\u001b[0m, in \u001b[0;36m_warn_or_raise_about_fit_failures\u001b[1;34m(results, error_score)\u001b[0m\n\u001b[0;32m 522\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m num_failed_fits \u001b[38;5;241m==\u001b[39m num_fits:\n\u001b[0;32m 523\u001b[0m all_fits_failed_message \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 524\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mAll the \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnum_fits\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m fits failed.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 525\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mIt is very likely that your model is misconfigured.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 526\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mYou can try to debug the error by setting error_score=\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 527\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBelow are more details about the failures:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mfit_errors_summary\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 528\u001b[0m )\n\u001b[1;32m--> 529\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(all_fits_failed_message)\n\u001b[0;32m 531\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 532\u001b[0m some_fits_failed_message \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 533\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mnum_failed_fits\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m fits failed out of a total of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mnum_fits\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 534\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe score on these train-test partitions for these parameters\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 538\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mBelow are more details about the failures:\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;132;01m{\u001b[39;00mfit_errors_summary\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 539\u001b[0m )\n", "depth 0.256560 0.317029 1.000000 -0.523388 0.214227 0.212123 \n",
"\u001b[1;31mValueError\u001b[0m: \nAll the 5 fits failed.\nIt is very likely that your model is misconfigured.\nYou can try to debug the error by setting error_score='raise'.\n\nBelow are more details about the failures:\n--------------------------------------------------------------------------------\n1 fits failed with the following error:\nTraceback (most recent call last):\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 888, in _fit_and_score\n estimator.fit(X_train, y_train, **fit_params)\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n return fit_method(estimator, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 629, in fit\n X, y, X_offset, y_offset, X_scale = _preprocess_data(\n ^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 175, in _preprocess_data\n X = check_array(\n ^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\validation.py\", line 1012, in check_array\n array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\_array_api.py\", line 745, in _asarray_with_order\n array = numpy.asarray(array, order=order, dtype=dtype)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\nValueError: could not convert string to float: 'E'\n\n--------------------------------------------------------------------------------\n4 fits failed with the following error:\nTraceback (most recent call last):\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\model_selection\\_validation.py\", line 888, in _fit_and_score\n estimator.fit(X_train, y_train, **fit_params)\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\base.py\", line 1473, in wrapper\n return fit_method(estimator, *args, **kwargs)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 629, in fit\n X, y, X_offset, y_offset, X_scale = _preprocess_data(\n ^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\linear_model\\_base.py\", line 175, in _preprocess_data\n X = check_array(\n ^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\validation.py\", line 1012, in check_array\n array = _asarray_with_order(array, order=order, dtype=dtype, xp=xp)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"c:\\Users\\salih\\OneDrive\\Рабочий стол\\3 курас\\МИИ\\laba1\\AIM-PIbd-31-Yaruskin-S-A\\aimenv\\Lib\\site-packages\\sklearn\\utils\\_array_api.py\", line 745, in _asarray_with_order\n array = numpy.asarray(array, order=order, dtype=dtype)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\nValueError: could not convert string to float: 'G'\n" "table -0.012994 0.011338 -0.523388 1.000000 0.068165 0.061429 \n",
"x 0.881745 0.965007 0.214227 0.068165 1.000000 0.998661 \n",
"y 0.881997 0.964633 0.212123 0.061429 0.998661 1.000000 \n",
"z 0.874093 0.962961 0.422732 -0.067003 0.966991 0.966578 \n",
"carat_binned 0.936744 0.980069 0.299586 -0.006563 0.954800 0.954348 \n",
"cut_Good -0.197076 -0.178975 -0.244465 0.324476 -0.140993 -0.137115 \n",
"cut_Ideal -0.266915 -0.293529 -0.159223 -0.225245 -0.308059 -0.303008 \n",
"cut_Premium -0.106465 -0.150300 -0.286254 0.108195 -0.104058 -0.105738 \n",
"cut_Very Good -0.086479 -0.163226 -0.164405 -0.010754 -0.162451 -0.152861 \n",
"color_E -0.174812 -0.199907 -0.120239 0.132333 -0.204134 -0.202557 \n",
"color_F -0.253824 -0.269185 -0.048395 -0.001821 -0.268046 -0.268242 \n",
"color_G -0.264394 -0.282227 -0.076425 -0.032431 -0.262968 -0.263331 \n",
"color_H 0.260031 0.235182 0.080885 -0.015329 0.229047 0.223693 \n",
"color_I 0.117366 0.102229 -0.113178 0.080852 0.144121 0.146060 \n",
"color_J 0.202485 0.292015 0.215944 -0.024922 0.241304 0.244249 \n",
"clarity_IF -0.181797 -0.220384 -0.112810 -0.049651 -0.255666 -0.252669 \n",
"clarity_SI1 -0.212170 -0.222121 -0.108779 0.010751 -0.204318 -0.201694 \n",
"clarity_SI2 -0.162937 -0.168896 -0.141779 0.073969 -0.108325 -0.104168 \n",
"clarity_VS1 -0.202684 -0.220126 -0.116339 0.007504 -0.220583 -0.218858 \n",
"clarity_VS2 -0.190586 -0.206457 -0.112282 0.012328 -0.194913 -0.192109 \n",
"clarity_VVS1 -0.187562 -0.229651 -0.101446 -0.032463 -0.271730 -0.270343 \n",
"clarity_VVS2 -0.193781 -0.224968 -0.091511 -0.030425 -0.254022 -0.251780 \n",
"depth_binned 0.245417 0.295187 0.870892 -0.566471 0.205631 0.206883 \n",
"table_binned 0.074231 0.114544 -0.377101 0.817051 0.134186 0.131247 \n",
"price_binned 0.747669 0.815253 0.160678 0.042235 0.811691 0.817555 \n",
"cut_Good -0.197076 -0.178975 -0.244465 0.324476 -0.140993 -0.137115 \n",
"cut_Ideal -0.266915 -0.293529 -0.159223 -0.225245 -0.308059 -0.303008 \n",
"cut_Premium -0.106465 -0.150300 -0.286254 0.108195 -0.104058 -0.105738 \n",
"cut_Very Good -0.086479 -0.163226 -0.164405 -0.010754 -0.162451 -0.152861 \n",
"color_E -0.174812 -0.199907 -0.120239 0.132333 -0.204134 -0.202557 \n",
"color_F -0.253824 -0.269185 -0.048395 -0.001821 -0.268046 -0.268242 \n",
"color_G -0.264394 -0.282227 -0.076425 -0.032431 -0.262968 -0.263331 \n",
"color_H 0.260031 0.235182 0.080885 -0.015329 0.229047 0.223693 \n",
"color_I 0.117366 0.102229 -0.113178 0.080852 0.144121 0.146060 \n",
"color_J 0.202485 0.292015 0.215944 -0.024922 0.241304 0.244249 \n",
"clarity_IF -0.181797 -0.220384 -0.112810 -0.049651 -0.255666 -0.252669 \n",
"clarity_SI1 -0.212170 -0.222121 -0.108779 0.010751 -0.204318 -0.201694 \n",
"clarity_SI2 -0.162937 -0.168896 -0.141779 0.073969 -0.108325 -0.104168 \n",
"clarity_VS1 -0.202684 -0.220126 -0.116339 0.007504 -0.220583 -0.218858 \n",
"clarity_VS2 -0.190586 -0.206457 -0.112282 0.012328 -0.194913 -0.192109 \n",
"clarity_VVS1 -0.187562 -0.229651 -0.101446 -0.032463 -0.271730 -0.270343 \n",
"clarity_VVS2 -0.193781 -0.224968 -0.091511 -0.030425 -0.254022 -0.251780 \n",
"\n",
" z carat_binned cut_Good cut_Ideal ... color_H \\\n",
"index 0.874093 0.936744 -0.197076 -0.266915 ... 0.260031 \n",
"carat 0.962961 0.980069 -0.178975 -0.293529 ... 0.235182 \n",
"depth 0.422732 0.299586 -0.244465 -0.159223 ... 0.080885 \n",
"table -0.067003 -0.006563 0.324476 -0.225245 ... -0.015329 \n",
"x 0.966991 0.954800 -0.140993 -0.308059 ... 0.229047 \n",
"y 0.966578 0.954348 -0.137115 -0.303008 ... 0.223693 \n",
"z 1.000000 0.948784 -0.191932 -0.311613 ... 0.226114 \n",
"carat_binned 0.948784 1.000000 -0.175908 -0.285435 ... 0.273119 \n",
"cut_Good -0.191932 -0.175908 1.000000 -0.131819 ... -0.135046 \n",
"cut_Ideal -0.311613 -0.285435 -0.131819 1.000000 ... -0.071273 \n",
"cut_Premium -0.161545 -0.138476 -0.180437 -0.141673 ... -0.101136 \n",
"cut_Very Good -0.178681 -0.160568 -0.158199 -0.124212 ... -0.066590 \n",
"color_E -0.210177 -0.199353 0.180753 0.043481 ... -0.192503 \n",
"color_F -0.256080 -0.267207 0.020500 0.079422 ... -0.199995 \n",
"color_G -0.268521 -0.273979 0.029838 0.102871 ... -0.255060 \n",
"color_H 0.226114 0.273119 -0.135046 -0.071273 ... 1.000000 \n",
"color_I 0.102372 0.129941 0.064618 -0.038568 ... -0.309138 \n",
"color_J 0.279789 0.219450 -0.098352 -0.072643 ... -0.264988 \n",
"clarity_IF -0.253221 -0.213292 -0.008462 0.203602 ... -0.054606 \n",
"clarity_SI1 -0.210001 -0.227317 0.081039 0.051568 ... -0.024638 \n",
"clarity_SI2 -0.130004 -0.162642 0.110280 0.019205 ... -0.037742 \n",
"clarity_VS1 -0.225411 -0.224820 0.045321 0.096886 ... -0.065186 \n",
"clarity_VS2 -0.200809 -0.208082 0.030034 0.081414 ... -0.071251 \n",
"clarity_VVS1 -0.266684 -0.221868 0.002891 0.158572 ... -0.052581 \n",
"clarity_VVS2 -0.248962 -0.224522 0.018263 0.129101 ... -0.068533 \n",
"depth_binned 0.393501 0.276906 -0.352815 -0.126332 ... -0.037413 \n",
"table_binned 0.034246 0.076420 0.218950 -0.309130 ... -0.059176 \n",
"price_binned 0.792153 0.798011 -0.109808 -0.163178 ... 0.151799 \n",
"cut_Good -0.191932 -0.175908 1.000000 -0.131819 ... -0.135046 \n",
"cut_Ideal -0.311613 -0.285435 -0.131819 1.000000 ... -0.071273 \n",
"cut_Premium -0.161545 -0.138476 -0.180437 -0.141673 ... -0.101136 \n",
"cut_Very Good -0.178681 -0.160568 -0.158199 -0.124212 ... -0.066590 \n",
"color_E -0.210177 -0.199353 0.180753 0.043481 ... -0.192503 \n",
"color_F -0.256080 -0.267207 0.020500 0.079422 ... -0.199995 \n",
"color_G -0.268521 -0.273979 0.029838 0.102871 ... -0.255060 \n",
"color_H 0.226114 0.273119 -0.135046 -0.071273 ... 1.000000 \n",
"color_I 0.102372 0.129941 0.064618 -0.038568 ... -0.309138 \n",
"color_J 0.279789 0.219450 -0.098352 -0.072643 ... -0.264988 \n",
"clarity_IF -0.253221 -0.213292 -0.008462 0.203602 ... -0.054606 \n",
"clarity_SI1 -0.210001 -0.227317 0.081039 0.051568 ... -0.024638 \n",
"clarity_SI2 -0.130004 -0.162642 0.110280 0.019205 ... -0.037742 \n",
"clarity_VS1 -0.225411 -0.224820 0.045321 0.096886 ... -0.065186 \n",
"clarity_VS2 -0.200809 -0.208082 0.030034 0.081414 ... -0.071251 \n",
"clarity_VVS1 -0.266684 -0.221868 0.002891 0.158572 ... -0.052581 \n",
"clarity_VVS2 -0.248962 -0.224522 0.018263 0.129101 ... -0.068533 \n",
"\n",
" color_I color_J clarity_IF clarity_SI1 clarity_SI2 \\\n",
"index 0.117366 0.202485 -0.181797 -0.212170 -0.162937 \n",
"carat 0.102229 0.292015 -0.220384 -0.222121 -0.168896 \n",
"depth -0.113178 0.215944 -0.112810 -0.108779 -0.141779 \n",
"table 0.080852 -0.024922 -0.049651 0.010751 0.073969 \n",
"x 0.144121 0.241304 -0.255666 -0.204318 -0.108325 \n",
"y 0.146060 0.244249 -0.252669 -0.201694 -0.104168 \n",
"z 0.102372 0.279789 -0.253221 -0.210001 -0.130004 \n",
"carat_binned 0.129941 0.219450 -0.213292 -0.227317 -0.162642 \n",
"cut_Good 0.064618 -0.098352 -0.008462 0.081039 0.110280 \n",
"cut_Ideal -0.038568 -0.072643 0.203602 0.051568 0.019205 \n",
"cut_Premium 0.105213 -0.084217 0.012019 0.073734 0.076342 \n",
"cut_Very Good 0.062781 -0.077472 0.062992 0.077184 0.071627 \n",
"color_E -0.146245 -0.125359 0.006989 0.048760 0.060037 \n",
"color_F -0.151937 -0.130238 0.067814 0.026771 0.056315 \n",
"color_G -0.193770 -0.166096 0.118431 0.009543 0.012232 \n",
"color_H -0.309138 -0.264988 -0.054606 -0.024638 -0.037742 \n",
"color_I 1.000000 -0.201312 -0.056505 -0.029603 -0.010066 \n",
"color_J -0.201312 1.000000 -0.042570 -0.032981 -0.082478 \n",
"clarity_IF -0.056505 -0.042570 1.000000 -0.049155 -0.065849 \n",
"clarity_SI1 -0.029603 -0.032981 -0.049155 1.000000 -0.099840 \n",
"clarity_SI2 -0.010066 -0.082478 -0.065849 -0.099840 1.000000 \n",
"clarity_VS1 -0.008767 -0.024548 -0.041445 -0.062839 -0.084180 \n",
"clarity_VS2 -0.001448 0.005467 -0.044484 -0.067447 -0.090353 \n",
"clarity_VVS1 -0.038316 -0.052719 -0.033043 -0.050101 -0.067116 \n",
"clarity_VVS2 -0.051227 -0.036439 -0.034892 -0.052904 -0.070871 \n",
"depth_binned -0.075038 0.318499 -0.079937 -0.107482 -0.137301 \n",
"table_binned 0.017798 0.061547 -0.052251 0.019997 0.046550 \n",
"price_binned 0.080443 0.194458 -0.116621 -0.094520 0.030333 \n",
"cut_Good 0.064618 -0.098352 -0.008462 0.081039 0.110280 \n",
"cut_Ideal -0.038568 -0.072643 0.203602 0.051568 0.019205 \n",
"cut_Premium 0.105213 -0.084217 0.012019 0.073734 0.076342 \n",
"cut_Very Good 0.062781 -0.077472 0.062992 0.077184 0.071627 \n",
"color_E -0.146245 -0.125359 0.006989 0.048760 0.060037 \n",
"color_F -0.151937 -0.130238 0.067814 0.026771 0.056315 \n",
"color_G -0.193770 -0.166096 0.118431 0.009543 0.012232 \n",
"color_H -0.309138 -0.264988 -0.054606 -0.024638 -0.037742 \n",
"color_I 1.000000 -0.201312 -0.056505 -0.029603 -0.010066 \n",
"color_J -0.201312 1.000000 -0.042570 -0.032981 -0.082478 \n",
"clarity_IF -0.056505 -0.042570 1.000000 -0.049155 -0.065849 \n",
"clarity_SI1 -0.029603 -0.032981 -0.049155 1.000000 -0.099840 \n",
"clarity_SI2 -0.010066 -0.082478 -0.065849 -0.099840 1.000000 \n",
"clarity_VS1 -0.008767 -0.024548 -0.041445 -0.062839 -0.084180 \n",
"clarity_VS2 -0.001448 0.005467 -0.044484 -0.067447 -0.090353 \n",
"clarity_VVS1 -0.038316 -0.052719 -0.033043 -0.050101 -0.067116 \n",
"clarity_VVS2 -0.051227 -0.036439 -0.034892 -0.052904 -0.070871 \n",
"\n",
" clarity_VS1 clarity_VS2 clarity_VVS1 clarity_VVS2 \n",
"index -0.202684 -0.190586 -0.187562 -0.193781 \n",
"carat -0.220126 -0.206457 -0.229651 -0.224968 \n",
"depth -0.116339 -0.112282 -0.101446 -0.091511 \n",
"table 0.007504 0.012328 -0.032463 -0.030425 \n",
"x -0.220583 -0.194913 -0.271730 -0.254022 \n",
"y -0.218858 -0.192109 -0.270343 -0.251780 \n",
"z -0.225411 -0.200809 -0.266684 -0.248962 \n",
"carat_binned -0.224820 -0.208082 -0.221868 -0.224522 \n",
"cut_Good 0.045321 0.030034 0.002891 0.018263 \n",
"cut_Ideal 0.096886 0.081414 0.158572 0.129101 \n",
"cut_Premium 0.058620 0.089749 0.029293 0.012928 \n",
"cut_Very Good 0.059445 0.073481 0.070278 0.087436 \n",
"color_E 0.028625 0.023490 0.045116 0.055769 \n",
"color_F 0.046575 0.032305 0.065653 0.059278 \n",
"color_G 0.064229 0.036578 0.072317 0.078458 \n",
"color_H -0.065186 -0.071251 -0.052581 -0.068533 \n",
"color_I -0.008767 -0.001448 -0.038316 -0.051227 \n",
"color_J -0.024548 0.005467 -0.052719 -0.036439 \n",
"clarity_IF -0.041445 -0.044484 -0.033043 -0.034892 \n",
"clarity_SI1 -0.062839 -0.067447 -0.050101 -0.052904 \n",
"clarity_SI2 -0.084180 -0.090353 -0.067116 -0.070871 \n",
"clarity_VS1 1.000000 -0.056868 -0.042242 -0.044606 \n",
"clarity_VS2 -0.056868 1.000000 -0.045340 -0.047877 \n",
"clarity_VVS1 -0.042242 -0.045340 1.000000 -0.035564 \n",
"clarity_VVS2 -0.044606 -0.047877 -0.035564 1.000000 \n",
"depth_binned -0.094953 -0.096364 -0.082692 -0.080568 \n",
"table_binned 0.006478 0.018598 -0.041154 -0.040427 \n",
"price_binned -0.097141 -0.070453 -0.143954 -0.134063 \n",
"cut_Good 0.045321 0.030034 0.002891 0.018263 \n",
"cut_Ideal 0.096886 0.081414 0.158572 0.129101 \n",
"cut_Premium 0.058620 0.089749 0.029293 0.012928 \n",
"cut_Very Good 0.059445 0.073481 0.070278 0.087436 \n",
"color_E 0.028625 0.023490 0.045116 0.055769 \n",
"color_F 0.046575 0.032305 0.065653 0.059278 \n",
"color_G 0.064229 0.036578 0.072317 0.078458 \n",
"color_H -0.065186 -0.071251 -0.052581 -0.068533 \n",
"color_I -0.008767 -0.001448 -0.038316 -0.051227 \n",
"color_J -0.024548 0.005467 -0.052719 -0.036439 \n",
"clarity_IF -0.041445 -0.044484 -0.033043 -0.034892 \n",
"clarity_SI1 -0.062839 -0.067447 -0.050101 -0.052904 \n",
"clarity_SI2 -0.084180 -0.090353 -0.067116 -0.070871 \n",
"clarity_VS1 1.000000 -0.056868 -0.042242 -0.044606 \n",
"clarity_VS2 -0.056868 1.000000 -0.045340 -0.047877 \n",
"clarity_VVS1 -0.042242 -0.045340 1.000000 -0.035564 \n",
"clarity_VVS2 -0.044606 -0.047877 -0.035564 1.000000 \n",
"\n",
"[45 rows x 45 columns]\n",
"Пропуски в данных:\n",
" index 0\n",
"carat 0\n",
"color 0\n",
"depth 0\n",
"table 0\n",
"price 0\n",
"x 0\n",
"y 0\n",
"z 0\n",
"clarity 0\n",
"cut 0\n",
"carat_binned 0\n",
"cut_Good 0\n",
"cut_Ideal 0\n",
"cut_Premium 0\n",
"cut_Very Good 0\n",
"color_E 0\n",
"color_F 0\n",
"color_G 0\n",
"color_H 0\n",
"color_I 0\n",
"color_J 0\n",
"clarity_IF 0\n",
"clarity_SI1 0\n",
"clarity_SI2 0\n",
"clarity_VS1 0\n",
"clarity_VS2 0\n",
"clarity_VVS1 0\n",
"clarity_VVS2 0\n",
"depth_binned 0\n",
"table_binned 0\n",
"price_binned 0\n",
"dtype: int64\n",
"Сводка по данным:\n",
" index carat depth table price \\\n",
"count 70315.000000 70315.000000 70315.000000 70315.000000 70315.000000 \n",
"mean 35157.000000 2.257318 63.299607 58.124889 9377.150124 \n",
"std 20298.336426 1.249989 3.131217 2.946560 5572.610635 \n",
"min 0.000000 0.200000 44.000000 43.000000 327.000000 \n",
"25% 17578.500000 1.200000 61.400000 56.000000 4916.000000 \n",
"50% 35157.000000 2.040000 63.300000 58.000000 9664.000000 \n",
"75% 52735.500000 3.110000 65.800000 60.000000 13945.000000 \n",
"max 70314.000000 4.500000 79.000000 79.000000 18806.000000 \n",
"\n",
" x y z carat_binned cut_Good \\\n",
"count 70315.000000 70315.000000 70315.000000 70315.000000 70315.000000 \n",
"mean 7.939428 7.885353 5.016576 2.000000 0.143753 \n",
"std 1.696162 1.661164 1.150922 1.414224 0.350842 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 6.760000 6.740000 4.190000 1.000000 0.000000 \n",
"50% 8.050000 8.010000 5.140000 2.000000 0.000000 \n",
"75% 9.420000 9.340000 5.970000 3.000000 0.000000 \n",
"max 10.230000 10.160000 6.720000 4.000000 1.000000 \n",
"\n",
" ... clarity_IF clarity_SI1 clarity_SI2 clarity_VS1 \\\n",
"count ... 70315.000000 70315.000000 70315.000000 70315.000000 \n",
"mean ... 0.031402 0.069359 0.117969 0.050316 \n",
"std ... 0.174402 0.254066 0.322574 0.218599 \n",
"min ... 0.000000 0.000000 0.000000 0.000000 \n",
"25% ... 0.000000 0.000000 0.000000 0.000000 \n",
"50% ... 0.000000 0.000000 0.000000 0.000000 \n",
"75% ... 0.000000 0.000000 0.000000 0.000000 \n",
"max ... 1.000000 1.000000 1.000000 1.000000 \n",
"\n",
" clarity_VS2 clarity_VVS1 clarity_VVS2 depth_binned table_binned \\\n",
"count 70315.000000 70315.000000 70315.000000 70315.000000 70315.000000 \n",
"mean 0.057527 0.032582 0.036194 2.244059 1.599346 \n",
"std 0.232848 0.177541 0.186775 0.581360 0.537429 \n",
"min 0.000000 0.000000 0.000000 0.000000 0.000000 \n",
"25% 0.000000 0.000000 0.000000 2.000000 1.000000 \n",
"50% 0.000000 0.000000 0.000000 2.000000 2.000000 \n",
"75% 0.000000 0.000000 0.000000 3.000000 2.000000 \n",
"max 1.000000 1.000000 1.000000 4.000000 4.000000 \n",
"\n",
" price_binned \n",
"count 70315.000000 \n",
"mean 1.960620 \n",
"std 1.454469 \n",
"min 0.000000 \n",
"25% 1.000000 \n",
"50% 2.000000 \n",
"75% 3.000000 \n",
"max 4.000000 \n",
"\n",
"[8 rows x 29 columns]\n"
] ]
} }
], ],
@ -934,8 +1212,16 @@
"from sklearn.metrics import mean_squared_error, accuracy_score, f1_score\n", "from sklearn.metrics import mean_squared_error, accuracy_score, f1_score\n",
"import time\n", "import time\n",
"\n", "\n",
"X = data.drop(columns=['price']) # Признаки\n", "categorical_features = ['cut', 'color', 'clarity']\n",
"y = data['price'] # Целевая переменная\n", "encoder = OneHotEncoder(sparse_output=False, drop='first')\n",
"encoded_data = pd.DataFrame(encoder.fit_transform(data[categorical_features]))\n",
"encoded_data.columns = encoder.get_feature_names_out(categorical_features)\n",
"\n",
"data_encoded = pd.concat([data.drop(columns=categorical_features), encoded_data], axis=1)\n",
"\n",
"X = data_encoded.drop(columns=['price']) # Признаки\n",
"y = data_encoded['price'] # Целевая переменная\n",
"\n",
"\n", "\n",
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
"\n", "\n",
@ -972,6 +1258,13 @@
"print(\"Пропуски в данных:\\n\", data.isnull().sum())\n", "print(\"Пропуски в данных:\\n\", data.isnull().sum())\n",
"print(\"Сводка по данным:\\n\", data.describe())\n" "print(\"Сводка по данным:\\n\", data.describe())\n"
] ]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"По итогу MSE у меня равен 769447.43, что можно считать относительно высоким. В последующих работах я буду лучше больше уделять времени на выборки данных, для повышения точности предсказаний."
]
} }
], ],
"metadata": { "metadata": {