From 3e2ba790626da01c18150b5ab504a7f6ab7abd24 Mon Sep 17 00:00:00 2001 From: Allllen4a Date: Fri, 1 Nov 2024 17:49:40 +0400 Subject: [PATCH] =?UTF-8?q?=D0=9F=D0=BE=D0=BF=D1=8B=D1=82=D0=B0=D0=BB?= =?UTF-8?q?=D0=B0=D1=81=D1=8C=20=D0=B4=D0=BE=D0=B1=D0=B0=D0=B2=D0=B8=D1=82?= =?UTF-8?q?=D1=8C=20=D0=BA=D0=BE=D0=BD=D0=B2=D0=B5=D0=B9=D0=B5=D1=80=20?= =?UTF-8?q?=D0=B4=D0=BB=D1=8F=20=D1=80=D0=B5=D0=B3=D1=80=D0=B5=D1=81=D1=81?= =?UTF-8?q?=D0=B8=D0=B8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- lab_4/lab4.1.ipynb | 2065 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 1775 insertions(+), 290 deletions(-) diff --git a/lab_4/lab4.1.ipynb b/lab_4/lab4.1.ipynb index aef60c3..2f1598d 100644 --- a/lab_4/lab4.1.ipynb +++ b/lab_4/lab4.1.ipynb @@ -53,7 +53,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Определение достижимого уровня качества модели для первой задачи" + "## Определение достижимого уровня качества модели для первой задачи " ] }, { @@ -72,7 +72,7 @@ }, { "cell_type": "code", - "execution_count": 221, + "execution_count": 160, "metadata": {}, "outputs": [ { @@ -134,7 +134,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 161, "metadata": {}, "outputs": [ { @@ -845,7 +845,7 @@ }, { "cell_type": "code", - "execution_count": 223, + "execution_count": 162, "metadata": {}, "outputs": [], "source": [ @@ -934,7 +934,7 @@ }, { "cell_type": "code", - "execution_count": 263, + "execution_count": 163, "metadata": {}, "outputs": [ { @@ -958,53 +958,71 @@ " \n", " \n", " \n", - " numeric__Open\n", - " numeric__High\n", - " numeric__Low\n", - " numeric__Adj Close\n", - " numeric__Volume\n", + " Close\n", + " Open\n", + " Adj Close\n", + " High\n", + " Low\n", + " Volume\n", + " above_average_volume\n", + " volatility\n", " \n", " \n", " \n", " \n", - " 2484\n", - " -0.717267\n", - " -0.718936\n", - " -0.721563\n", - " -0.700283\n", - " -0.304340\n", + " 7159\n", + " 2.052122\n", + " 2.047553\n", + " 2.057055\n", + " 2.035800\n", + " 2.068394\n", + " -1.046507\n", + " -0.733850\n", + " 0.700004\n", " \n", " \n", - " 1576\n", - " -0.835490\n", - " -0.835755\n", - " -0.834432\n", - " -0.792049\n", - " 1.970579\n", + " 4505\n", + " -0.493609\n", + " -0.482248\n", + " -0.509368\n", + " -0.485819\n", + " -0.493841\n", + " 0.708938\n", + " 1.362677\n", + " 0.575000\n", " \n", " \n", - " 6595\n", - " 0.665106\n", - " 0.687359\n", - " 0.679824\n", - " 0.653502\n", - " -0.279264\n", + " 421\n", + " -0.867869\n", + " -0.867429\n", + " -0.818396\n", + " -0.868235\n", + " -0.866632\n", + " -0.450983\n", + " -0.733850\n", + " 0.031250\n", " \n", " \n", - " 7412\n", - " 2.358932\n", - " 2.375059\n", - " 2.374211\n", - " 2.413670\n", - " -0.380946\n", + " 1595\n", + " -0.819432\n", + " -0.817932\n", + " -0.778575\n", + " -0.818012\n", + " -0.819050\n", + " 0.558091\n", + " 1.362677\n", + " 0.148437\n", " \n", " \n", - " 7413\n", - " 2.400766\n", - " 2.441531\n", - " 2.359243\n", - " 2.384602\n", - " -0.515472\n", + " 3676\n", + " -0.373633\n", + " -0.364031\n", + " -0.412080\n", + " -0.367150\n", + " -0.368421\n", + " 1.153036\n", + " 1.362677\n", + " 0.420000\n", " \n", " \n", " ...\n", @@ -1013,83 +1031,101 @@ " ...\n", " ...\n", " ...\n", + " ...\n", + " ...\n", + " ...\n", " \n", " \n", - " 5519\n", - " 0.186241\n", - " 0.192637\n", - " 0.195457\n", - " 0.119036\n", - " -0.336428\n", + " 5976\n", + " 0.890812\n", + " 0.897589\n", + " 0.761079\n", + " 0.896985\n", + " 0.899914\n", + " -0.027099\n", + " -0.733850\n", + " 1.020001\n", " \n", " \n", - " 4531\n", - " -0.474942\n", - " -0.473560\n", - " -0.483945\n", - " -0.505016\n", - " 0.416194\n", + " 1305\n", + " -0.808836\n", + " -0.812807\n", + " -0.769864\n", + " -0.809815\n", + " -0.811178\n", + " 1.139386\n", + " 1.362677\n", + " 0.164062\n", " \n", " \n", - " 535\n", - " -0.864464\n", - " -0.865282\n", - " -0.863666\n", - " -0.816533\n", - " -0.502725\n", + " 6085\n", + " 0.792446\n", + " 0.786081\n", + " 0.683373\n", + " 0.781419\n", + " 0.796750\n", + " -0.571535\n", + " -0.733850\n", + " 0.540001\n", " \n", " \n", - " 787\n", - " -0.856235\n", - " -0.857125\n", - " -0.855130\n", - " -0.809579\n", - " -0.282496\n", + " 5470\n", + " 0.216858\n", + " 0.226603\n", + " 0.129761\n", + " 0.218514\n", + " 0.222586\n", + " -0.303594\n", + " -0.733850\n", + " 0.584999\n", " \n", " \n", - " 7987\n", - " 1.826366\n", - " 1.814159\n", - " 1.806921\n", - " 1.972431\n", - " 0.243087\n", + " 5781\n", + " 0.681859\n", + " 0.655790\n", + " 0.560632\n", + " 0.672651\n", + " 0.666218\n", + " -0.556786\n", + " -0.733850\n", + " 1.200001\n", " \n", " \n", "\n", - "

6428 rows × 5 columns

\n", + "

6428 rows × 8 columns

\n", "" ], "text/plain": [ - " numeric__Open numeric__High numeric__Low numeric__Adj Close \\\n", - "2484 -0.717267 -0.718936 -0.721563 -0.700283 \n", - "1576 -0.835490 -0.835755 -0.834432 -0.792049 \n", - "6595 0.665106 0.687359 0.679824 0.653502 \n", - "7412 2.358932 2.375059 2.374211 2.413670 \n", - "7413 2.400766 2.441531 2.359243 2.384602 \n", - "... ... ... ... ... \n", - "5519 0.186241 0.192637 0.195457 0.119036 \n", - "4531 -0.474942 -0.473560 -0.483945 -0.505016 \n", - "535 -0.864464 -0.865282 -0.863666 -0.816533 \n", - "787 -0.856235 -0.857125 -0.855130 -0.809579 \n", - "7987 1.826366 1.814159 1.806921 1.972431 \n", + " Close Open Adj Close High Low Volume \\\n", + "7159 2.052122 2.047553 2.057055 2.035800 2.068394 -1.046507 \n", + "4505 -0.493609 -0.482248 -0.509368 -0.485819 -0.493841 0.708938 \n", + "421 -0.867869 -0.867429 -0.818396 -0.868235 -0.866632 -0.450983 \n", + "1595 -0.819432 -0.817932 -0.778575 -0.818012 -0.819050 0.558091 \n", + "3676 -0.373633 -0.364031 -0.412080 -0.367150 -0.368421 1.153036 \n", + "... ... ... ... ... ... ... \n", + "5976 0.890812 0.897589 0.761079 0.896985 0.899914 -0.027099 \n", + "1305 -0.808836 -0.812807 -0.769864 -0.809815 -0.811178 1.139386 \n", + "6085 0.792446 0.786081 0.683373 0.781419 0.796750 -0.571535 \n", + "5470 0.216858 0.226603 0.129761 0.218514 0.222586 -0.303594 \n", + "5781 0.681859 0.655790 0.560632 0.672651 0.666218 -0.556786 \n", "\n", - " numeric__Volume \n", - "2484 -0.304340 \n", - "1576 1.970579 \n", - "6595 -0.279264 \n", - "7412 -0.380946 \n", - "7413 -0.515472 \n", - "... ... \n", - "5519 -0.336428 \n", - "4531 0.416194 \n", - "535 -0.502725 \n", - "787 -0.282496 \n", - "7987 0.243087 \n", + " above_average_volume volatility \n", + "7159 -0.733850 0.700004 \n", + "4505 1.362677 0.575000 \n", + "421 -0.733850 0.031250 \n", + "1595 1.362677 0.148437 \n", + "3676 1.362677 0.420000 \n", + "... ... ... \n", + "5976 -0.733850 1.020001 \n", + "1305 1.362677 0.164062 \n", + "6085 -0.733850 0.540001 \n", + "5470 -0.733850 0.584999 \n", + "5781 -0.733850 1.200001 \n", "\n", - "[6428 rows x 5 columns]" + "[6428 rows x 8 columns]" ] }, - "execution_count": 263, + "execution_count": 163, "metadata": {}, "output_type": "execute_result" } @@ -1129,7 +1165,7 @@ }, { "cell_type": "code", - "execution_count": 224, + "execution_count": 164, "metadata": {}, "outputs": [], "source": [ @@ -1172,7 +1208,7 @@ }, { "cell_type": "code", - "execution_count": 225, + "execution_count": 165, "metadata": {}, "outputs": [ { @@ -1252,18 +1288,45 @@ }, { "cell_type": "code", - "execution_count": 226, + "execution_count": 159, "metadata": {}, "outputs": [ { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" + "ename": "KeyboardInterrupt", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[159], line 13\u001b[0m\n\u001b[0;32m 10\u001b[0m disp\u001b[38;5;241m.\u001b[39max_\u001b[38;5;241m.\u001b[39mset_title(key)\n\u001b[0;32m 12\u001b[0m plt\u001b[38;5;241m.\u001b[39msubplots_adjust(top\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m, bottom\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0\u001b[39m, hspace\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.4\u001b[39m, wspace\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m0.1\u001b[39m)\n\u001b[1;32m---> 13\u001b[0m \u001b[43mplt\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshow\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\pyplot.py:612\u001b[0m, in \u001b[0;36mshow\u001b[1;34m(*args, **kwargs)\u001b[0m\n\u001b[0;32m 568\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 569\u001b[0m \u001b[38;5;124;03mDisplay all open figures.\u001b[39;00m\n\u001b[0;32m 570\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 609\u001b[0m \u001b[38;5;124;03mexplicitly there.\u001b[39;00m\n\u001b[0;32m 610\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 611\u001b[0m _warn_if_gui_out_of_main_thread()\n\u001b[1;32m--> 612\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_get_backend_mod\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mshow\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib_inline\\backend_inline.py:90\u001b[0m, in \u001b[0;36mshow\u001b[1;34m(close, block)\u001b[0m\n\u001b[0;32m 88\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 89\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m figure_manager \u001b[38;5;129;01min\u001b[39;00m Gcf\u001b[38;5;241m.\u001b[39mget_all_fig_managers():\n\u001b[1;32m---> 90\u001b[0m \u001b[43mdisplay\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 91\u001b[0m \u001b[43m \u001b[49m\u001b[43mfigure_manager\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcanvas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[43m,\u001b[49m\n\u001b[0;32m 92\u001b[0m \u001b[43m \u001b[49m\u001b[43mmetadata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m_fetch_figure_metadata\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfigure_manager\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcanvas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 93\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 94\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 95\u001b[0m show\u001b[38;5;241m.\u001b[39m_to_draw \u001b[38;5;241m=\u001b[39m []\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\IPython\\core\\display_functions.py:298\u001b[0m, in \u001b[0;36mdisplay\u001b[1;34m(include, exclude, metadata, transient, display_id, raw, clear, *objs, **kwargs)\u001b[0m\n\u001b[0;32m 296\u001b[0m publish_display_data(data\u001b[38;5;241m=\u001b[39mobj, metadata\u001b[38;5;241m=\u001b[39mmetadata, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[0;32m 297\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 298\u001b[0m format_dict, md_dict \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mformat\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43minclude\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mexclude\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mexclude\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 299\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m format_dict:\n\u001b[0;32m 300\u001b[0m \u001b[38;5;66;03m# nothing to display (e.g. _ipython_display_ took over)\u001b[39;00m\n\u001b[0;32m 301\u001b[0m \u001b[38;5;28;01mcontinue\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\IPython\\core\\formatters.py:182\u001b[0m, in \u001b[0;36mDisplayFormatter.format\u001b[1;34m(self, obj, include, exclude)\u001b[0m\n\u001b[0;32m 180\u001b[0m md \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[0;32m 181\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 182\u001b[0m data \u001b[38;5;241m=\u001b[39m \u001b[43mformatter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 183\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m:\n\u001b[0;32m 184\u001b[0m \u001b[38;5;66;03m# FIXME: log the exception\u001b[39;00m\n\u001b[0;32m 185\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\decorator.py:232\u001b[0m, in \u001b[0;36mdecorate..fun\u001b[1;34m(*args, **kw)\u001b[0m\n\u001b[0;32m 230\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m kwsyntax:\n\u001b[0;32m 231\u001b[0m args, kw \u001b[38;5;241m=\u001b[39m fix(args, kw, sig)\n\u001b[1;32m--> 232\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mcaller\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfunc\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mextras\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[43margs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\IPython\\core\\formatters.py:226\u001b[0m, in \u001b[0;36mcatch_format_error\u001b[1;34m(method, self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 224\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"show traceback on failed format call\"\"\"\u001b[39;00m\n\u001b[0;32m 225\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[1;32m--> 226\u001b[0m r \u001b[38;5;241m=\u001b[39m \u001b[43mmethod\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 227\u001b[0m \u001b[38;5;28;01mexcept\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m:\n\u001b[0;32m 228\u001b[0m \u001b[38;5;66;03m# don't warn on NotImplementedErrors\u001b[39;00m\n\u001b[0;32m 229\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_check_return(\u001b[38;5;28;01mNone\u001b[39;00m, args[\u001b[38;5;241m0\u001b[39m])\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\IPython\\core\\formatters.py:343\u001b[0m, in \u001b[0;36mBaseFormatter.__call__\u001b[1;34m(self, obj)\u001b[0m\n\u001b[0;32m 341\u001b[0m \u001b[38;5;28;01mpass\u001b[39;00m\n\u001b[0;32m 342\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m--> 343\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mprinter\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 344\u001b[0m \u001b[38;5;66;03m# Finally look for special method names\u001b[39;00m\n\u001b[0;32m 345\u001b[0m method \u001b[38;5;241m=\u001b[39m get_real_method(obj, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mprint_method)\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\IPython\\core\\pylabtools.py:170\u001b[0m, in \u001b[0;36mprint_figure\u001b[1;34m(fig, fmt, bbox_inches, base64, **kwargs)\u001b[0m\n\u001b[0;32m 167\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mbackend_bases\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m FigureCanvasBase\n\u001b[0;32m 168\u001b[0m FigureCanvasBase(fig)\n\u001b[1;32m--> 170\u001b[0m \u001b[43mfig\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcanvas\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mprint_figure\u001b[49m\u001b[43m(\u001b[49m\u001b[43mbytes_io\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkw\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 171\u001b[0m data \u001b[38;5;241m=\u001b[39m bytes_io\u001b[38;5;241m.\u001b[39mgetvalue()\n\u001b[0;32m 172\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m fmt \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m'\u001b[39m\u001b[38;5;124msvg\u001b[39m\u001b[38;5;124m'\u001b[39m:\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\backend_bases.py:2175\u001b[0m, in \u001b[0;36mFigureCanvasBase.print_figure\u001b[1;34m(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, pad_inches, bbox_extra_artists, backend, **kwargs)\u001b[0m\n\u001b[0;32m 2172\u001b[0m \u001b[38;5;66;03m# we do this instead of `self.figure.draw_without_rendering`\u001b[39;00m\n\u001b[0;32m 2173\u001b[0m \u001b[38;5;66;03m# so that we can inject the orientation\u001b[39;00m\n\u001b[0;32m 2174\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mgetattr\u001b[39m(renderer, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_draw_disabled\u001b[39m\u001b[38;5;124m\"\u001b[39m, nullcontext)():\n\u001b[1;32m-> 2175\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 2176\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m bbox_inches:\n\u001b[0;32m 2177\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m bbox_inches \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtight\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\artist.py:95\u001b[0m, in \u001b[0;36m_finalize_rasterization..draw_wrapper\u001b[1;34m(artist, renderer, *args, **kwargs)\u001b[0m\n\u001b[0;32m 93\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(draw)\n\u001b[0;32m 94\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdraw_wrapper\u001b[39m(artist, renderer, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m---> 95\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 96\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m renderer\u001b[38;5;241m.\u001b[39m_rasterizing:\n\u001b[0;32m 97\u001b[0m renderer\u001b[38;5;241m.\u001b[39mstop_rasterizing()\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\artist.py:72\u001b[0m, in \u001b[0;36mallow_rasterization..draw_wrapper\u001b[1;34m(artist, renderer)\u001b[0m\n\u001b[0;32m 69\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 70\u001b[0m renderer\u001b[38;5;241m.\u001b[39mstart_filter()\n\u001b[1;32m---> 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 73\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 74\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\figure.py:3162\u001b[0m, in \u001b[0;36mFigure.draw\u001b[1;34m(self, renderer)\u001b[0m\n\u001b[0;32m 3159\u001b[0m \u001b[38;5;66;03m# ValueError can occur when resizing a window.\u001b[39;00m\n\u001b[0;32m 3161\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mpatch\u001b[38;5;241m.\u001b[39mdraw(renderer)\n\u001b[1;32m-> 3162\u001b[0m \u001b[43mmimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_draw_list_compositing_images\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 3163\u001b[0m \u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43martists\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msuppressComposite\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3165\u001b[0m renderer\u001b[38;5;241m.\u001b[39mclose_group(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfigure\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 3166\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\image.py:132\u001b[0m, in \u001b[0;36m_draw_list_compositing_images\u001b[1;34m(renderer, parent, artists, suppress_composite)\u001b[0m\n\u001b[0;32m 130\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m not_composite \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m has_images:\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m artists:\n\u001b[1;32m--> 132\u001b[0m \u001b[43ma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 133\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 134\u001b[0m \u001b[38;5;66;03m# Composite any adjacent images together\u001b[39;00m\n\u001b[0;32m 135\u001b[0m image_group \u001b[38;5;241m=\u001b[39m []\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\artist.py:72\u001b[0m, in \u001b[0;36mallow_rasterization..draw_wrapper\u001b[1;34m(artist, renderer)\u001b[0m\n\u001b[0;32m 69\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 70\u001b[0m renderer\u001b[38;5;241m.\u001b[39mstart_filter()\n\u001b[1;32m---> 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 73\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 74\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\axes\\_base.py:3137\u001b[0m, in \u001b[0;36m_AxesBase.draw\u001b[1;34m(self, renderer)\u001b[0m\n\u001b[0;32m 3134\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artists_rasterized:\n\u001b[0;32m 3135\u001b[0m _draw_rasterized(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfigure, artists_rasterized, renderer)\n\u001b[1;32m-> 3137\u001b[0m \u001b[43mmimage\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_draw_list_compositing_images\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 3138\u001b[0m \u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43martists\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfigure\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msuppressComposite\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 3140\u001b[0m renderer\u001b[38;5;241m.\u001b[39mclose_group(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124maxes\u001b[39m\u001b[38;5;124m'\u001b[39m)\n\u001b[0;32m 3141\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mstale \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\image.py:132\u001b[0m, in \u001b[0;36m_draw_list_compositing_images\u001b[1;34m(renderer, parent, artists, suppress_composite)\u001b[0m\n\u001b[0;32m 130\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m not_composite \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m has_images:\n\u001b[0;32m 131\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m a \u001b[38;5;129;01min\u001b[39;00m artists:\n\u001b[1;32m--> 132\u001b[0m \u001b[43ma\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 133\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 134\u001b[0m \u001b[38;5;66;03m# Composite any adjacent images together\u001b[39;00m\n\u001b[0;32m 135\u001b[0m image_group \u001b[38;5;241m=\u001b[39m []\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\artist.py:72\u001b[0m, in \u001b[0;36mallow_rasterization..draw_wrapper\u001b[1;34m(artist, renderer)\u001b[0m\n\u001b[0;32m 69\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 70\u001b[0m renderer\u001b[38;5;241m.\u001b[39mstart_filter()\n\u001b[1;32m---> 72\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mdraw\u001b[49m\u001b[43m(\u001b[49m\u001b[43martist\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mrenderer\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 73\u001b[0m \u001b[38;5;28;01mfinally\u001b[39;00m:\n\u001b[0;32m 74\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m artist\u001b[38;5;241m.\u001b[39mget_agg_filter() \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\axis.py:1423\u001b[0m, in \u001b[0;36mAxis.draw\u001b[1;34m(self, renderer)\u001b[0m\n\u001b[0;32m 1420\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m\n\u001b[0;32m 1421\u001b[0m renderer\u001b[38;5;241m.\u001b[39mopen_group(\u001b[38;5;18m__name__\u001b[39m, gid\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_gid())\n\u001b[1;32m-> 1423\u001b[0m ticks_to_draw \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_update_ticks\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1424\u001b[0m tlb1, tlb2 \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_ticklabel_bboxes(ticks_to_draw, renderer)\n\u001b[0;32m 1426\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tick \u001b[38;5;129;01min\u001b[39;00m ticks_to_draw:\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\axis.py:1302\u001b[0m, in \u001b[0;36mAxis._update_ticks\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1300\u001b[0m major_locs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_majorticklocs()\n\u001b[0;32m 1301\u001b[0m major_labels \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmajor\u001b[38;5;241m.\u001b[39mformatter\u001b[38;5;241m.\u001b[39mformat_ticks(major_locs)\n\u001b[1;32m-> 1302\u001b[0m major_ticks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_major_ticks\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mlen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mmajor_locs\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1303\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m tick, loc, label \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mzip\u001b[39m(major_ticks, major_locs, major_labels):\n\u001b[0;32m 1304\u001b[0m tick\u001b[38;5;241m.\u001b[39mupdate_position(loc)\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\axis.py:1670\u001b[0m, in \u001b[0;36mAxis.get_major_ticks\u001b[1;34m(self, numticks)\u001b[0m\n\u001b[0;32m 1666\u001b[0m numticks \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mget_majorticklocs())\n\u001b[0;32m 1668\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmajorTicks) \u001b[38;5;241m<\u001b[39m numticks:\n\u001b[0;32m 1669\u001b[0m \u001b[38;5;66;03m# Update the new tick label properties from the old.\u001b[39;00m\n\u001b[1;32m-> 1670\u001b[0m tick \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_tick\u001b[49m\u001b[43m(\u001b[49m\u001b[43mmajor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[0;32m 1671\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmajorTicks\u001b[38;5;241m.\u001b[39mappend(tick)\n\u001b[0;32m 1672\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_copy_tick_props(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mmajorTicks[\u001b[38;5;241m0\u001b[39m], tick)\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\axis.py:1598\u001b[0m, in \u001b[0;36mAxis._get_tick\u001b[1;34m(self, major)\u001b[0m\n\u001b[0;32m 1594\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[0;32m 1595\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe Axis subclass \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m must define \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 1596\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_tick_class or reimplement _get_tick()\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m 1597\u001b[0m tick_kw \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_major_tick_kw \u001b[38;5;28;01mif\u001b[39;00m major \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_minor_tick_kw\n\u001b[1;32m-> 1598\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_tick_class\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43maxes\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mmajor\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmajor\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mtick_kw\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\axis.py:456\u001b[0m, in \u001b[0;36mYTick.__init__\u001b[1;34m(self, *args, **kwargs)\u001b[0m\n\u001b[0;32m 455\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m__init__\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[1;32m--> 456\u001b[0m \u001b[38;5;28;43msuper\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[38;5;21;43m__init__\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 457\u001b[0m \u001b[38;5;66;03m# x in axes coords, y in data coords\u001b[39;00m\n\u001b[0;32m 458\u001b[0m ax \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39maxes\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\axis.py:170\u001b[0m, in \u001b[0;36mTick.__init__\u001b[1;34m(self, axes, loc, size, width, color, tickdir, pad, labelsize, labelcolor, labelfontfamily, zorder, gridOn, tick1On, tick2On, label1On, label2On, major, labelrotation, grid_color, grid_linestyle, grid_linewidth, grid_alpha, **kwargs)\u001b[0m\n\u001b[0;32m 159\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtick2line \u001b[38;5;241m=\u001b[39m mlines\u001b[38;5;241m.\u001b[39mLine2D(\n\u001b[0;32m 160\u001b[0m [], [],\n\u001b[0;32m 161\u001b[0m color\u001b[38;5;241m=\u001b[39mcolor, linestyle\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mnone\u001b[39m\u001b[38;5;124m\"\u001b[39m, zorder\u001b[38;5;241m=\u001b[39mzorder, visible\u001b[38;5;241m=\u001b[39mtick2On,\n\u001b[0;32m 162\u001b[0m markeredgecolor\u001b[38;5;241m=\u001b[39mcolor, markersize\u001b[38;5;241m=\u001b[39msize, markeredgewidth\u001b[38;5;241m=\u001b[39mwidth,\n\u001b[0;32m 163\u001b[0m )\n\u001b[0;32m 164\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mgridline \u001b[38;5;241m=\u001b[39m mlines\u001b[38;5;241m.\u001b[39mLine2D(\n\u001b[0;32m 165\u001b[0m [], [],\n\u001b[0;32m 166\u001b[0m color\u001b[38;5;241m=\u001b[39mgrid_color, alpha\u001b[38;5;241m=\u001b[39mgrid_alpha, visible\u001b[38;5;241m=\u001b[39mgridOn,\n\u001b[0;32m 167\u001b[0m linestyle\u001b[38;5;241m=\u001b[39mgrid_linestyle, linewidth\u001b[38;5;241m=\u001b[39mgrid_linewidth, marker\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 168\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mgrid_kw,\n\u001b[0;32m 169\u001b[0m )\n\u001b[1;32m--> 170\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mgridline\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mget_path\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39m_interpolation_steps \u001b[38;5;241m=\u001b[39m \\\n\u001b[0;32m 171\u001b[0m GRIDLINE_INTERPOLATION_STEPS\n\u001b[0;32m 172\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlabel1 \u001b[38;5;241m=\u001b[39m mtext\u001b[38;5;241m.\u001b[39mText(\n\u001b[0;32m 173\u001b[0m np\u001b[38;5;241m.\u001b[39mnan, np\u001b[38;5;241m.\u001b[39mnan,\n\u001b[0;32m 174\u001b[0m fontsize\u001b[38;5;241m=\u001b[39mlabelsize, color\u001b[38;5;241m=\u001b[39mlabelcolor, visible\u001b[38;5;241m=\u001b[39mlabel1On,\n\u001b[0;32m 175\u001b[0m fontfamily\u001b[38;5;241m=\u001b[39mlabelfontfamily, rotation\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_labelrotation[\u001b[38;5;241m1\u001b[39m])\n\u001b[0;32m 176\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mlabel2 \u001b[38;5;241m=\u001b[39m mtext\u001b[38;5;241m.\u001b[39mText(\n\u001b[0;32m 177\u001b[0m np\u001b[38;5;241m.\u001b[39mnan, np\u001b[38;5;241m.\u001b[39mnan,\n\u001b[0;32m 178\u001b[0m fontsize\u001b[38;5;241m=\u001b[39mlabelsize, color\u001b[38;5;241m=\u001b[39mlabelcolor, visible\u001b[38;5;241m=\u001b[39mlabel2On,\n\u001b[0;32m 179\u001b[0m fontfamily\u001b[38;5;241m=\u001b[39mlabelfontfamily, rotation\u001b[38;5;241m=\u001b[39m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_labelrotation[\u001b[38;5;241m1\u001b[39m])\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\lines.py:1037\u001b[0m, in \u001b[0;36mLine2D.get_path\u001b[1;34m(self)\u001b[0m\n\u001b[0;32m 1035\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Return the `~matplotlib.path.Path` associated with this line.\"\"\"\u001b[39;00m\n\u001b[0;32m 1036\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_invalidy \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_invalidx:\n\u001b[1;32m-> 1037\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrecache\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 1038\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_path\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\matplotlib\\lines.py:683\u001b[0m, in \u001b[0;36mLine2D.recache\u001b[1;34m(self, always)\u001b[0m\n\u001b[0;32m 680\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 681\u001b[0m y \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_y\n\u001b[1;32m--> 683\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_xy \u001b[38;5;241m=\u001b[39m np\u001b[38;5;241m.\u001b[39mcolumn_stack(\u001b[43mnp\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mbroadcast_arrays\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m)\u001b[49m)\u001b[38;5;241m.\u001b[39mastype(\u001b[38;5;28mfloat\u001b[39m)\n\u001b[0;32m 684\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_x, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_y \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_xy\u001b[38;5;241m.\u001b[39mT \u001b[38;5;66;03m# views\u001b[39;00m\n\u001b[0;32m 686\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_subslice \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mFalse\u001b[39;00m\n", + "File \u001b[1;32mc:\\Users\\a3012\\AIM-PIbd-31-Zhirnova-A-E\\aimenv\\Lib\\site-packages\\numpy\\lib\\_stride_tricks_impl.py:560\u001b[0m, in \u001b[0;36mbroadcast_arrays\u001b[1;34m(subok, *args)\u001b[0m\n\u001b[0;32m 556\u001b[0m args \u001b[38;5;241m=\u001b[39m [np\u001b[38;5;241m.\u001b[39marray(_m, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m, subok\u001b[38;5;241m=\u001b[39msubok) \u001b[38;5;28;01mfor\u001b[39;00m _m \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[0;32m 558\u001b[0m shape \u001b[38;5;241m=\u001b[39m _broadcast_shape(\u001b[38;5;241m*\u001b[39margs)\n\u001b[1;32m--> 560\u001b[0m result \u001b[38;5;241m=\u001b[39m [array \u001b[38;5;28;01mif\u001b[39;00m array\u001b[38;5;241m.\u001b[39mshape \u001b[38;5;241m==\u001b[39m shape\n\u001b[0;32m 561\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m _broadcast_to(array, shape, subok\u001b[38;5;241m=\u001b[39msubok, readonly\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n\u001b[0;32m 562\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m array \u001b[38;5;129;01min\u001b[39;00m args]\n\u001b[0;32m 563\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mtuple\u001b[39m(result)\n", + "\u001b[1;31mKeyboardInterrupt\u001b[0m: " + ] } ], "source": [ @@ -1302,145 +1365,145 @@ }, { "cell_type": "code", - "execution_count": 227, + "execution_count": 166, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 Precision_trainPrecision_testRecall_trainRecall_testAccuracy_trainAccuracy_testF1_trainF1_testPrecision_trainPrecision_testRecall_trainRecall_testAccuracy_trainAccuracy_testF1_trainF1_test
logistic1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000logistic1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
ridge1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000ridge1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
decision_tree1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000decision_tree1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
knn1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000knn1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
naive_bayes1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000naive_bayes1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
gradient_boosting1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000gradient_boosting1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
random_forest1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000random_forest1.0000001.0000001.0000001.0000001.0000001.0000001.0000001.000000
mlp1.0000001.0000000.9942220.9946710.9979780.9981340.9971030.997329mlp1.0000001.0000000.9942220.9946710.9979780.9981340.9971030.997329
\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 227, + "execution_count": 166, "metadata": {}, "output_type": "execute_result" } @@ -1496,118 +1559,118 @@ }, { "cell_type": "code", - "execution_count": 228, + "execution_count": 167, "metadata": {}, "outputs": [ { "data": { "text/html": [ "\n", - "\n", + "
\n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", " \n", "
 Accuracy_testF1_testROC_AUC_testCohen_kappa_testMCC_testAccuracy_testF1_testROC_AUC_testCohen_kappa_testMCC_test
logistic1.0000001.0000001.0000001.0000001.000000logistic1.0000001.0000001.0000001.0000001.000000
ridge1.0000001.0000001.0000001.0000001.000000ridge1.0000001.0000001.0000001.0000001.000000
decision_tree1.0000001.0000001.0000001.0000001.000000decision_tree1.0000001.0000001.0000001.0000001.000000
knn1.0000001.0000001.0000001.0000001.000000knn1.0000001.0000001.0000001.0000001.000000
naive_bayes1.0000001.0000001.0000001.0000001.000000naive_bayes1.0000001.0000001.0000001.0000001.000000
gradient_boosting1.0000001.0000001.0000001.0000001.000000gradient_boosting1.0000001.0000001.0000001.0000001.000000
random_forest1.0000001.0000001.0000001.0000001.000000random_forest1.0000001.0000001.0000001.0000001.000000
mlp0.9981340.9973291.0000000.9958950.995904mlp0.9981340.9973291.0000000.9958950.995904
\n" ], "text/plain": [ - "" + "" ] }, - "execution_count": 228, + "execution_count": 167, "metadata": {}, "output_type": "execute_result" } @@ -1653,7 +1716,7 @@ }, { "cell_type": "code", - "execution_count": 229, + "execution_count": 168, "metadata": {}, "outputs": [ { @@ -1681,7 +1744,7 @@ }, { "cell_type": "code", - "execution_count": 230, + "execution_count": 169, "metadata": {}, "outputs": [ { @@ -1737,7 +1800,7 @@ "Index: []" ] }, - "execution_count": 230, + "execution_count": 169, "metadata": {}, "output_type": "execute_result" } @@ -1769,7 +1832,7 @@ }, { "cell_type": "code", - "execution_count": 231, + "execution_count": 170, "metadata": {}, "outputs": [ { @@ -1933,7 +1996,7 @@ }, { "cell_type": "code", - "execution_count": 233, + "execution_count": 171, "metadata": {}, "outputs": [ { @@ -1953,7 +2016,7 @@ " 'model__n_estimators': 10}" ] }, - "execution_count": 233, + "execution_count": 171, "metadata": {}, "output_type": "execute_result" } @@ -1988,9 +2051,21 @@ }, { "cell_type": "code", - "execution_count": 258, + "execution_count": 172, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "NameError", + "evalue": "name 'numeric_features' is not defined", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mNameError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[172], line 10\u001b[0m\n\u001b[0;32m 6\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msklearn\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m metrics\n\u001b[0;32m 8\u001b[0m \u001b[38;5;66;03m# Определение трансформера (пример)\u001b[39;00m\n\u001b[0;32m 9\u001b[0m pipeline_end \u001b[38;5;241m=\u001b[39m ColumnTransformer([\n\u001b[1;32m---> 10\u001b[0m (\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mnumeric\u001b[39m\u001b[38;5;124m'\u001b[39m, StandardScaler(), \u001b[43mnumeric_features\u001b[49m), \u001b[38;5;66;03m# numeric_features - это список числовых признаков\u001b[39;00m\n\u001b[0;32m 11\u001b[0m \u001b[38;5;66;03m# Добавьте другие трансформеры, если требуется\u001b[39;00m\n\u001b[0;32m 12\u001b[0m ])\n\u001b[0;32m 14\u001b[0m \u001b[38;5;66;03m# Объявление модели\u001b[39;00m\n\u001b[0;32m 15\u001b[0m optimized_model \u001b[38;5;241m=\u001b[39m RandomForestClassifier(\n\u001b[0;32m 16\u001b[0m random_state\u001b[38;5;241m=\u001b[39mrandom_state,\n\u001b[0;32m 17\u001b[0m criterion\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mgini\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 20\u001b[0m n_estimators\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m10\u001b[39m,\n\u001b[0;32m 21\u001b[0m )\n", + "\u001b[1;31mNameError\u001b[0m: name 'numeric_features' is not defined" + ] + } + ], "source": [ "from sklearn.pipeline import Pipeline\n", "from sklearn.preprocessing import StandardScaler\n", @@ -2051,7 +2126,7 @@ }, { "cell_type": "code", - "execution_count": 259, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -2075,7 +2150,7 @@ }, { "cell_type": "code", - "execution_count": 260, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2190,7 +2265,7 @@ }, { "cell_type": "code", - "execution_count": 261, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2292,7 +2367,7 @@ }, { "cell_type": "code", - "execution_count": 262, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2333,7 +2408,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Определение достижимого уровня качества модели для второй задачи" + "## Определение достижимого уровня качества модели для второй задачи (добавляю конвейер для решения задачи регрессии)" ] }, { @@ -2352,7 +2427,7 @@ }, { "cell_type": "code", - "execution_count": 239, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -2440,7 +2515,7 @@ }, { "cell_type": "code", - "execution_count": 240, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3158,7 +3233,7 @@ }, { "cell_type": "code", - "execution_count": 241, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3247,7 +3322,7 @@ }, { "cell_type": "code", - "execution_count": 242, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3478,7 +3553,7 @@ }, { "cell_type": "code", - "execution_count": 243, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -3521,7 +3596,7 @@ }, { "cell_type": "code", - "execution_count": 244, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3608,7 +3683,7 @@ }, { "cell_type": "code", - "execution_count": 245, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3655,7 +3730,7 @@ }, { "cell_type": "code", - "execution_count": 247, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -3853,7 +3928,7 @@ }, { "cell_type": "code", - "execution_count": 248, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4005,7 +4080,7 @@ }, { "cell_type": "code", - "execution_count": 249, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4033,7 +4108,7 @@ }, { "cell_type": "code", - "execution_count": 250, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4121,7 +4196,7 @@ }, { "cell_type": "code", - "execution_count": 251, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4285,7 +4360,7 @@ }, { "cell_type": "code", - "execution_count": 252, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4332,7 +4407,7 @@ }, { "cell_type": "code", - "execution_count": 264, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4374,7 +4449,7 @@ }, { "cell_type": "code", - "execution_count": 254, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -4398,7 +4473,7 @@ }, { "cell_type": "code", - "execution_count": 255, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4513,7 +4588,7 @@ }, { "cell_type": "code", - "execution_count": 256, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4615,7 +4690,7 @@ }, { "cell_type": "code", - "execution_count": 257, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -4651,6 +4726,1416 @@ "\n", "В зеленом квадрате значение 558 указывает на количество правильно классифицированных объектов, отнесенных к классу \"More\". Это также является показателем высокой точности модели в определении объектов данного класса." ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Определение достижимого уровня качества модели для второй задачи (задача регрессии)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "__2. Прогнозирование цены закрытия акций:__\n", + "\n", + "\n", + "Описание: Оценить, какая будет цена закрытия акций Starbucks на следующий день или через несколько дней на основе исторических данных.\n", + "Целевая переменная: Цена закрытия (Close). (среднее значение)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Загрузка данных и создание целевой переменной" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Среднее значение поля 'Close': 30.058856538825285\n", + " Date Open High Low Close Adj Close Volume \\\n", + "0 1992-06-26 0.328125 0.347656 0.320313 0.335938 0.260703 224358400 \n", + "1 1992-06-29 0.339844 0.367188 0.332031 0.359375 0.278891 58732800 \n", + "2 1992-06-30 0.367188 0.371094 0.343750 0.347656 0.269797 34777600 \n", + "3 1992-07-01 0.351563 0.359375 0.339844 0.355469 0.275860 18316800 \n", + "4 1992-07-02 0.359375 0.359375 0.347656 0.355469 0.275860 13996800 \n", + "\n", + " above_average_close Close_Next_Day \n", + "0 0 0.359375 \n", + "1 0 0.347656 \n", + "2 0 0.355469 \n", + "3 0 0.355469 \n", + "4 0 0.355469 \n", + "Статистическое описание DataFrame:\n", + " Open High Low Close Adj Close \\\n", + "count 8035.000000 8035.000000 8035.000000 8035.000000 8035.000000 \n", + "mean 30.048051 30.345221 29.745172 30.052733 26.667480 \n", + "std 33.613031 33.904070 33.312079 33.613521 31.724640 \n", + "min 0.328125 0.347656 0.320313 0.335938 0.260703 \n", + "25% 4.391563 4.531250 4.304844 4.399219 3.413997 \n", + "50% 13.325000 13.485000 13.150000 13.330000 10.352452 \n", + "75% 55.250000 55.715000 54.829999 55.254999 47.461098 \n", + "max 126.080002 126.320000 124.809998 126.059998 118.010414 \n", + "\n", + " Volume above_average_close Close_Next_Day \n", + "count 8.035000e+03 8035.000000 8035.000000 \n", + "mean 1.470584e+07 0.347480 30.062556 \n", + "std 1.340058e+07 0.476199 33.616368 \n", + "min 1.504000e+06 0.000000 0.347656 \n", + "25% 7.818550e+06 0.000000 4.403125 \n", + "50% 1.170240e+07 0.000000 13.330000 \n", + "75% 1.778850e+07 1.000000 55.274999 \n", + "max 5.855088e+08 1.000000 126.059998 \n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "from sklearn import set_config\n", + "\n", + "set_config(transform_output=\"pandas\")\n", + "\n", + "# Загрузка данных о ценах акций Starbucks из CSV файла\n", + "df = pd.read_csv(\".//static//csv//Starbucks Dataset.csv\")\n", + "\n", + "# Опция для настройки генерации случайных чисел (если это нужно для других частей кода)\n", + "random_state = 42\n", + "\n", + "# Вычисление среднего значения поля \"Close\"\n", + "average_close = df['Close'].mean()\n", + "print(f\"Среднее значение поля 'Close': {average_close}\")\n", + "\n", + "# Создание новой колонки, указывающей, выше или ниже среднего значение цена закрытия\n", + "df['above_average_close'] = (df['Close'] > average_close).astype(int)\n", + "\n", + "# Создание целевой переменной для прогнозирования (цена закрытия на следующий день)\n", + "df['Close_Next_Day'] = df['Close'].shift(-1)\n", + "\n", + "# Удаление последней строки, где нет значения для следующего дня\n", + "df.dropna(inplace=True)\n", + "\n", + "# Вывод DataFrame с новой колонкой\n", + "print(df.head())\n", + "\n", + "# Примерный анализ данных\n", + "print(\"Статистическое описание DataFrame:\")\n", + "print(df.describe())" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Разделение набора данных на обучающую и тестовые выборки (80/20) для задачи регрессии\n", + "\n", + "Целевой признак -- above_average_close" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'X_train'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateOpenHighLowCloseAdj CloseVolumeClose_Next_Day
55522014-07-1439.49000239.49000239.20999939.27999932.493519456200039.445000
34222006-01-2515.34000015.38000015.09500015.18000011.780375727660015.745000
62142017-02-2856.70999957.06000156.54999956.86999948.946602875070057.139999
35012006-05-1818.22500018.25000017.96500017.99000013.9610621336600018.165001
26882003-02-265.6575005.6825005.5200005.5500004.307055167384005.772500
...........................
52262013-03-2728.43000028.47500028.10500028.45500023.144903745700028.475000
53902013-11-1840.50999840.66999840.10500040.27000033.065239831640039.959999
8601995-11-201.3554691.3671881.3281251.3320311.033717309984001.343750
76032022-09-0285.47000185.76999782.55000382.94000279.6838071033680084.519997
72702021-05-10114.570000116.089996114.209999114.300003106.5773095759500113.550003
\n", + "

6428 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " Date Open High Low Close Adj Close \\\n", + "5552 2014-07-14 39.490002 39.490002 39.209999 39.279999 32.493519 \n", + "3422 2006-01-25 15.340000 15.380000 15.095000 15.180000 11.780375 \n", + "6214 2017-02-28 56.709999 57.060001 56.549999 56.869999 48.946602 \n", + "3501 2006-05-18 18.225000 18.250000 17.965000 17.990000 13.961062 \n", + "2688 2003-02-26 5.657500 5.682500 5.520000 5.550000 4.307055 \n", + "... ... ... ... ... ... ... \n", + "5226 2013-03-27 28.430000 28.475000 28.105000 28.455000 23.144903 \n", + "5390 2013-11-18 40.509998 40.669998 40.105000 40.270000 33.065239 \n", + "860 1995-11-20 1.355469 1.367188 1.328125 1.332031 1.033717 \n", + "7603 2022-09-02 85.470001 85.769997 82.550003 82.940002 79.683807 \n", + "7270 2021-05-10 114.570000 116.089996 114.209999 114.300003 106.577309 \n", + "\n", + " Volume Close_Next_Day \n", + "5552 4562000 39.445000 \n", + "3422 7276600 15.745000 \n", + "6214 8750700 57.139999 \n", + "3501 13366000 18.165001 \n", + "2688 16738400 5.772500 \n", + "... ... ... \n", + "5226 7457000 28.475000 \n", + "5390 8316400 39.959999 \n", + "860 30998400 1.343750 \n", + "7603 10336800 84.519997 \n", + "7270 5759500 113.550003 \n", + "\n", + "[6428 rows x 8 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'y_train'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
above_average_close
55521
34220
62141
35010
26880
......
52260
53901
8600
76031
72701
\n", + "

6428 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " above_average_close\n", + "5552 1\n", + "3422 0\n", + "6214 1\n", + "3501 0\n", + "2688 0\n", + "... ...\n", + "5226 0\n", + "5390 1\n", + "860 0\n", + "7603 1\n", + "7270 1\n", + "\n", + "[6428 rows x 1 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'X_test'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DateOpenHighLowCloseAdj CloseVolumeClose_Next_Day
66372018-10-3158.98000059.11999958.20999958.27000051.7544561156040058.630001
66322018-10-2458.57000059.27999957.95000158.06000151.5679401218970058.959999
73272021-07-30122.190002122.980003121.099998121.430000113.6760715712300120.370003
7301995-05-170.9375000.9414060.9023440.9101560.706323258112000.912109
15151998-06-253.2265633.3281253.2187503.2851562.549432346992003.382813
...........................
57772015-06-0451.86999952.18000051.57000051.72000143.400497623080052.189999
77192023-02-21105.500000105.949997104.709999104.779999101.7522435438000104.769997
16771999-02-172.9726563.0234382.9062502.9101562.258415177760002.933594
9211996-02-161.0312501.0546881.0156251.0312500.80029778096001.031250
3221993-10-050.8359380.8359380.8046880.8203130.63660091136000.812500
\n", + "

1607 rows × 8 columns

\n", + "
" + ], + "text/plain": [ + " Date Open High Low Close Adj Close \\\n", + "6637 2018-10-31 58.980000 59.119999 58.209999 58.270000 51.754456 \n", + "6632 2018-10-24 58.570000 59.279999 57.950001 58.060001 51.567940 \n", + "7327 2021-07-30 122.190002 122.980003 121.099998 121.430000 113.676071 \n", + "730 1995-05-17 0.937500 0.941406 0.902344 0.910156 0.706323 \n", + "1515 1998-06-25 3.226563 3.328125 3.218750 3.285156 2.549432 \n", + "... ... ... ... ... ... ... \n", + "5777 2015-06-04 51.869999 52.180000 51.570000 51.720001 43.400497 \n", + "7719 2023-02-21 105.500000 105.949997 104.709999 104.779999 101.752243 \n", + "1677 1999-02-17 2.972656 3.023438 2.906250 2.910156 2.258415 \n", + "921 1996-02-16 1.031250 1.054688 1.015625 1.031250 0.800297 \n", + "322 1993-10-05 0.835938 0.835938 0.804688 0.820313 0.636600 \n", + "\n", + " Volume Close_Next_Day \n", + "6637 11560400 58.630001 \n", + "6632 12189700 58.959999 \n", + "7327 5712300 120.370003 \n", + "730 25811200 0.912109 \n", + "1515 34699200 3.382813 \n", + "... ... ... \n", + "5777 6230800 52.189999 \n", + "7719 5438000 104.769997 \n", + "1677 17776000 2.933594 \n", + "921 7809600 1.031250 \n", + "322 9113600 0.812500 \n", + "\n", + "[1607 rows x 8 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/plain": [ + "'y_test'" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
above_average_close
66371
66321
73271
7300
15150
......
57771
77191
16770
9210
3220
\n", + "

1607 rows × 1 columns

\n", + "
" + ], + "text/plain": [ + " above_average_close\n", + "6637 1\n", + "6632 1\n", + "7327 1\n", + "730 0\n", + "1515 0\n", + "... ...\n", + "5777 1\n", + "7719 1\n", + "1677 0\n", + "921 0\n", + "322 0\n", + "\n", + "[1607 rows x 1 columns]" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "from typing import Tuple\n", + "import pandas as pd\n", + "from pandas import DataFrame\n", + "from sklearn.model_selection import train_test_split\n", + "\n", + "def split_into_train_test(\n", + " df_input: DataFrame,\n", + " target_colname: str = \"above_average_close\",\n", + " frac_train: float = 0.8,\n", + " random_state: int = None,\n", + ") -> Tuple[DataFrame, DataFrame, DataFrame, DataFrame]:\n", + " \n", + " if not (0 < frac_train < 1):\n", + " raise ValueError(\"Fraction must be between 0 and 1.\")\n", + " \n", + " # Проверка наличия целевого признака\n", + " if target_colname not in df_input.columns:\n", + " raise ValueError(f\"{target_colname} is not a column in the DataFrame.\")\n", + " \n", + " # Разделяем данные на признаки и целевую переменную\n", + " X = df_input.drop(columns=[target_colname]) # Признаки\n", + " y = df_input[[target_colname]] # Целевая переменная\n", + "\n", + " # Разделяем данные на обучающую и тестовую выборки\n", + " X_train, X_test, y_train, y_test = train_test_split(\n", + " X, y,\n", + " test_size=(1.0 - frac_train),\n", + " random_state=random_state\n", + " )\n", + " \n", + " return X_train, X_test, y_train, y_test\n", + "\n", + "# Применение функции для разделения данных\n", + "X_train, X_test, y_train, y_test = split_into_train_test(\n", + " df, \n", + " target_colname=\"above_average_close\", \n", + " frac_train=0.8, \n", + " random_state=42 # Убедитесь, что вы задали нужное значение random_state\n", + ")\n", + "\n", + "# Для отображения результатов\n", + "display(\"X_train\", X_train)\n", + "display(\"y_train\", y_train)\n", + "\n", + "display(\"X_test\", X_test)\n", + "display(\"y_test\", y_test)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Формирование конвейера для решения задачи регрессии" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "from sklearn.base import BaseEstimator, TransformerMixin\n", + "from sklearn.compose import ColumnTransformer\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.impute import SimpleImputer\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.preprocessing import OneHotEncoder\n", + "from sklearn.ensemble import RandomForestRegressor # Пример регрессионной модели\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.pipeline import make_pipeline\n", + "\n", + "class StarbucksFeatures(BaseEstimator, TransformerMixin):\n", + " def __init__(self):\n", + " pass\n", + " \n", + " def fit(self, X, y=None):\n", + " return self\n", + "\n", + " def transform(self, X, y=None):\n", + " X[\"Length_to_Width_Ratio\"] = X[\"x\"] / X[\"y\"]\n", + " return X\n", + "\n", + " def get_feature_names_out(self, features_in):\n", + " return np.append(features_in, [\"Length_to_Width_Ratio\"], axis=0)\n", + "\n", + "# Указываем столбцы, которые нужно удалить и обрабатывать\n", + "columns_to_drop = [\"Date\"]\n", + "num_columns = [\"Close\", \"Open\", \"Adj Close\", \"High\", \"Low\", \"Volume\"]\n", + "cat_columns = [] \n", + "\n", + "# Определяем предобработку для численных данных\n", + "num_imputer = SimpleImputer(strategy=\"median\")\n", + "num_scaler = StandardScaler()\n", + "preprocessing_num = Pipeline(\n", + " [\n", + " (\"imputer\", num_imputer),\n", + " (\"scaler\", num_scaler),\n", + " ]\n", + ")\n", + "\n", + "# Определяем предобработку для категориальных данных\n", + "cat_imputer = SimpleImputer(strategy=\"constant\", fill_value=\"unknown\")\n", + "cat_encoder = OneHotEncoder(handle_unknown=\"ignore\", sparse_output=False, drop=\"first\")\n", + "preprocessing_cat = Pipeline(\n", + " [\n", + " (\"imputer\", cat_imputer),\n", + " (\"encoder\", cat_encoder),\n", + " ]\n", + ")\n", + "\n", + "# Подготовка признаков с использованием ColumnTransformer\n", + "features_preprocessing = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"preprocessing_num\", preprocessing_num, num_columns),\n", + " (\"preprocessing_cat\", preprocessing_cat, cat_columns),\n", + " ],\n", + " remainder=\"passthrough\"\n", + ")\n", + "\n", + "# Удаление нежелательных столбцов\n", + "drop_columns = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"drop_columns\", \"drop\", columns_to_drop),\n", + " ],\n", + " remainder=\"passthrough\",\n", + ")\n", + "\n", + "# Постобработка признаков\n", + "features_postprocessing = ColumnTransformer(\n", + " verbose_feature_names_out=False,\n", + " transformers=[\n", + " (\"preprocessing_cat\", preprocessing_cat, [\"Cabin_type\"]), \n", + " ],\n", + " remainder=\"passthrough\",\n", + ")\n", + "\n", + "# Создание окончательного конвейера\n", + "pipeline = Pipeline(\n", + " [\n", + " (\"features_preprocessing\", features_preprocessing),\n", + " (\"drop_columns\", drop_columns),\n", + " (\"model\", RandomForestRegressor()) # Выбор модели для обучения\n", + " ]\n", + ")\n", + "\n", + "# Использование конвейера\n", + "def train_pipeline(X, y):\n", + " pipeline.fit(X, y)\n", + "\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Формирование набора моделей для регрессии" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Random Forest: Mean Score = 0.9746978079010529, Standard Deviation = 0.012793762025792637\n", + "Linear Regression: Mean Score = 0.9868838982543027, Standard Deviation = 0.0041016418339485\n", + "Gradient Boosting: Mean Score = 0.9790461912830413, Standard Deviation = 0.008537795226791314\n", + "Support Vector Regression: Mean Score = -0.10833533729231568, Standard Deviation = 0.29324311707552003\n" + ] + } + ], + "source": [ + "from sklearn.linear_model import LinearRegression\n", + "from sklearn.ensemble import GradientBoostingRegressor\n", + "from sklearn.svm import SVR\n", + "from sklearn.model_selection import cross_val_score\n", + "\n", + "def train_multiple_models(X, y, models):\n", + " results = {}\n", + " for model_name, model in models.items():\n", + " # Создаем конвейер для каждой модели\n", + " model_pipeline = Pipeline(\n", + " [\n", + " (\"features_preprocessing\", features_preprocessing),\n", + " (\"drop_columns\", drop_columns),\n", + " (\"model\", model) # Используем текущую модель\n", + " ]\n", + " )\n", + " \n", + " # Обучаем модель и вычисляем кросс-валидацию\n", + " scores = cross_val_score(model_pipeline, X, y, cv=5) # 5-кратная кросс-валидация\n", + " results[model_name] = {\n", + " \"mean_score\": scores.mean(),\n", + " \"std_dev\": scores.std()\n", + " }\n", + " \n", + " return results\n", + "\n", + "models = {\n", + " \"Random Forest\": RandomForestRegressor(),\n", + " \"Linear Regression\": LinearRegression(),\n", + " \"Gradient Boosting\": GradientBoostingRegressor(),\n", + " \"Support Vector Regression\": SVR()\n", + "}\n", + "\n", + "results = train_multiple_models(X_train, y_train, models)\n", + "\n", + "# Вывод результатов\n", + "for model_name, scores in results.items():\n", + " print(f\"{model_name}: Mean Score = {scores['mean_score']}, Standard Deviation = {scores['std_dev']}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Лидирующие модели: Линейная регрессия проявила наилучшие результаты, за ней следует градиентный бустинг и Random Forest. Они продемонстрировали высокую эффективность в предсказании закрытия акций.\n", + "Проблемы SVR: Резкое отличие в результатах SVR выявляет необходимость более тщательной настройки или выбора других подходов к решению задачи, поскольку текущие параметры не обеспечили адекватного уровня прогноза." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Обучение моделей на обучающем наборе данных и оценка на тестовом для регрессии" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: logistic\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: ridge\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: decision_tree\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: knn\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: naive_bayes\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: gradient_boosting\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: random_forest\n", + "MSE (train): 0.0\n", + "MSE (test): 0.0\n", + "MAE (train): 0.0\n", + "MAE (test): 0.0\n", + "R2 (train): 1.0\n", + "R2 (test): 1.0\n", + "STD (train): 0.0\n", + "STD (test): 0.0\n", + "----------------------------------------\n", + "Model: mlp\n", + "MSE (train): 0.0020224019912881146\n", + "MSE (test): 0.0018656716417910447\n", + "MAE (train): 0.0020224019912881146\n", + "MAE (test): 0.0018656716417910447\n", + "R2 (train): 0.9911106856018297\n", + "R2 (test): 0.9918005898000289\n", + "STD (train): 0.044925626111093304\n", + "STD (test): 0.04315311009783723\n", + "----------------------------------------\n" + ] + } + ], + "source": [ + "import numpy as np\n", + "from sklearn import metrics\n", + "from sklearn.pipeline import Pipeline\n", + "\n", + "# Проверка наличия необходимых переменных\n", + "if 'class_models' not in locals():\n", + " raise ValueError(\"class_models is not defined\")\n", + "if 'X_train' not in locals() or 'X_test' not in locals() or 'y_train' not in locals() or 'y_test' not in locals():\n", + " raise ValueError(\"Train/test data is not defined\")\n", + "\n", + "\n", + "y_train = np.ravel(y_train) \n", + "y_test = np.ravel(y_test) \n", + "\n", + "# Инициализация списка для хранения результатов\n", + "results = []\n", + "\n", + "# Проход по моделям и оценка их качества\n", + "for model_name in class_models.keys():\n", + " print(f\"Model: {model_name}\")\n", + " \n", + " # Извлечение модели из словаря\n", + " model = class_models[model_name][\"model\"]\n", + " \n", + " # Создание пайплайна\n", + " model_pipeline = Pipeline([(\"pipeline\", pipeline_end), (\"model\", model)])\n", + " \n", + " # Обучение модели\n", + " model_pipeline.fit(X_train, y_train)\n", + "\n", + " # Предсказание для обучающей и тестовой выборки\n", + " y_train_predict = model_pipeline.predict(X_train)\n", + " y_test_predict = model_pipeline.predict(X_test)\n", + "\n", + " # Сохранение пайплайна и предсказаний\n", + " class_models[model_name][\"pipeline\"] = model_pipeline\n", + " class_models[model_name][\"preds\"] = y_test_predict\n", + "\n", + " # Вычисление метрик для регрессии\n", + " class_models[model_name][\"MSE_train\"] = metrics.mean_squared_error(y_train, y_train_predict)\n", + " class_models[model_name][\"MSE_test\"] = metrics.mean_squared_error(y_test, y_test_predict)\n", + " class_models[model_name][\"MAE_train\"] = metrics.mean_absolute_error(y_train, y_train_predict)\n", + " class_models[model_name][\"MAE_test\"] = metrics.mean_absolute_error(y_test, y_test_predict)\n", + " class_models[model_name][\"R2_train\"] = metrics.r2_score(y_train, y_train_predict)\n", + " class_models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_predict)\n", + "\n", + " # Дополнительные метрики\n", + " class_models[model_name][\"STD_train\"] = np.std(y_train - y_train_predict)\n", + " class_models[model_name][\"STD_test\"] = np.std(y_test - y_test_predict)\n", + "\n", + " # Вывод результатов для текущей модели\n", + " print(f\"MSE (train): {class_models[model_name]['MSE_train']}\")\n", + " print(f\"MSE (test): {class_models[model_name]['MSE_test']}\")\n", + " print(f\"MAE (train): {class_models[model_name]['MAE_train']}\")\n", + " print(f\"MAE (test): {class_models[model_name]['MAE_test']}\")\n", + " print(f\"R2 (train): {class_models[model_name]['R2_train']}\")\n", + " print(f\"R2 (test): {class_models[model_name]['R2_test']}\")\n", + " print(f\"STD (train): {class_models[model_name]['STD_train']}\")\n", + " print(f\"STD (test): {class_models[model_name]['STD_test']}\")\n", + " print(\"-\" * 40) # Разделитель для разных моделей" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Пример использования обученной модели (конвейера регрессии) для предсказания" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: RandomForest\n", + "MSE (train): 0.0001403391412570006\n", + "MSE (test): 0.0006576851275668948\n", + "MAE (train): 0.0005491599253266957\n", + "MAE (test): 0.0011761045426260113\n", + "R2 (train): 0.9993811021756365\n", + "R2 (test): 0.9971008099591692\n", + "----------------------------------------\n", + "Прогноз: Цена закроется ниже среднего значения завтрашнего дня.\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn import metrics\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.model_selection import train_test_split\n", + "from sklearn.ensemble import RandomForestRegressor # пример модели\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "# 1. Загрузка данных\n", + "data = pd.read_csv(\".//static//csv//Starbucks Dataset.csv\") \n", + "data['Date'] = pd.to_datetime(data['Date'])\n", + "data.set_index('Date', inplace=True)\n", + "\n", + "# 2. Подготовка данных для прогноза\n", + "data['Close_shifted'] = data['Close'].shift(-1) # Смещение на 1 день для предсказания\n", + "data.dropna(inplace=True) # Удаление NaN, возникших из-за смещения\n", + "\n", + "# Вычисляем среднее значение закрытия\n", + "average_close = data['Close'].mean()\n", + "data['above_average_close'] = (data['Close_shifted'] > average_close).astype(int) # 1, если выше среднего, иначе 0\n", + "\n", + "# Предикторы и целевая переменная\n", + "X = data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "y = data['above_average_close']\n", + "\n", + "\n", + "# 3. Инициализация модели и пайплайна\n", + "class_models = {\n", + " \"RandomForest\": {\n", + " \"model\": RandomForestRegressor(n_estimators=100, random_state=42),\n", + " }\n", + "}\n", + "\n", + "pipeline_end = StandardScaler() \n", + "results = []\n", + "\n", + "# 4. Обучение модели и оценка\n", + "for model_name in class_models.keys():\n", + " print(f\"Model: {model_name}\")\n", + " \n", + " model = class_models[model_name][\"model\"]\n", + " model_pipeline = Pipeline([(\"scaler\", pipeline_end), (\"model\", model)])\n", + " \n", + " # Обучение модели\n", + " model_pipeline.fit(X_train, y_train)\n", + "\n", + " # Предсказание\n", + " y_train_predict = model_pipeline.predict(X_train)\n", + " y_test_predict = model_pipeline.predict(X_test)\n", + "\n", + " # Сохранение результатов\n", + " class_models[model_name][\"preds\"] = y_test_predict\n", + "\n", + " # Вычисление метрик\n", + " class_models[model_name][\"MSE_train\"] = metrics.mean_squared_error(y_train, y_train_predict)\n", + " class_models[model_name][\"MSE_test\"] = metrics.mean_squared_error(y_test, y_test_predict)\n", + " class_models[model_name][\"MAE_train\"] = metrics.mean_absolute_error(y_train, y_train_predict)\n", + " class_models[model_name][\"MAE_test\"] = metrics.mean_absolute_error(y_test, y_test_predict)\n", + " class_models[model_name][\"R2_train\"] = metrics.r2_score(y_train, y_train_predict)\n", + " class_models[model_name][\"R2_test\"] = metrics.r2_score(y_test, y_test_predict)\n", + "\n", + " # Вывод результатов\n", + " print(f\"MSE (train): {class_models[model_name]['MSE_train']}\")\n", + " print(f\"MSE (test): {class_models[model_name]['MSE_test']}\")\n", + " print(f\"MAE (train): {class_models[model_name]['MAE_train']}\")\n", + " print(f\"MAE (test): {class_models[model_name]['MAE_test']}\")\n", + " print(f\"R2 (train): {class_models[model_name]['R2_train']}\")\n", + " print(f\"R2 (test): {class_models[model_name]['R2_test']}\")\n", + " print(\"-\" * 40)\n", + "\n", + "# Прогнозирование выше среднего для следующего дня\n", + "latest_data = X_test.iloc[-1:].copy()\n", + "predicted_above_average = model_pipeline.predict(latest_data)\n", + "predicted_above_average = 1 if predicted_above_average[0] > 0.5 else 0 # Преобразуем в бинарный выход\n", + "\n", + "if predicted_above_average == 1:\n", + " print(\"Прогноз: Цена закроется выше среднего значения завтрашнего дня.\")\n", + "else:\n", + " print(\"Прогноз: Цена закроется ниже среднего значения завтрашнего дня.\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Подбор гиперпараметров методом поиска по сетке" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 5 folds for each of 36 candidates, totalling 180 fits\n", + "Лучшие параметры: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 200}\n", + "Лучший результат (MSE): 0.6848872116583115\n" + ] + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn import metrics\n", + "from sklearn.pipeline import Pipeline\n", + "from sklearn.model_selection import train_test_split, GridSearchCV\n", + "from sklearn.ensemble import RandomForestRegressor # Используем регрессор\n", + "from sklearn.preprocessing import StandardScaler\n", + "\n", + "\n", + "# 1. Подготовка данных для прогноза\n", + "data['above_average_close'] = data['Close'].shift(-1) # Смещение на 1 день для предсказания\n", + "data.dropna(inplace=True) # Удаление NaN, возникших из-за смещения\n", + "\n", + "# Предикторы и целевая переменная\n", + "X = data[['Open', 'High', 'Low', 'Close', 'Volume']]\n", + "y = data['above_average_close'] # Целевая переменная для регрессии\n", + "\n", + "# Делим данные на обучающую и тестовую выборки\n", + "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n", + "\n", + "# 2. Создание и настройка модели случайного леса\n", + "model = RandomForestRegressor() # Изменяем на регрессор\n", + "\n", + "# Установка параметров для поиска по сетке\n", + "param_grid = {\n", + " 'n_estimators': [50, 100, 200], # Количество деревьев\n", + " 'max_depth': [None, 10, 20, 30], # Максимальная глубина дерева\n", + " 'min_samples_split': [2, 5, 10] # Минимальное количество образцов для разбиения узла\n", + "}\n", + "\n", + "# 3. Подбор гиперпараметров с помощью Grid Search\n", + "grid_search = GridSearchCV(estimator=model, param_grid=param_grid,\n", + " scoring='neg_mean_squared_error', cv=5, n_jobs=-1, verbose=2)\n", + "\n", + "# Обучение модели на тренировочных данных\n", + "grid_search.fit(X_train, y_train)\n", + "\n", + "# 4. Результаты подбора гиперпараметров\n", + "print(\"Лучшие параметры:\", grid_search.best_params_)\n", + "print(\"Лучший результат (MSE):\", -grid_search.best_score_) # Меняем знак, так как берем отрицательное значение среднеквадратичной ошибки\n", + "\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Обучение модели с новыми гиперпараметрами и сравнение новых и старых данных" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Fitting 5 folds for each of 36 candidates, totalling 180 fits\n", + "Старые параметры: {'max_depth': 10, 'min_samples_split': 5, 'n_estimators': 200}\n", + "Лучший результат (MSE) на старых параметрах: 0.688662233031193\n", + "\n", + "Новые параметры: {'max_depth': 10, 'min_samples_split': 10, 'n_estimators': 200}\n", + "Лучший результат (MSE) на новых параметрах: 0.6794717145705662\n", + "Среднеквадратическая ошибка (MSE) на тестовых данных: 0.5876131198171756\n", + "Корень среднеквадратичной ошибки (RMSE) на тестовых данных: 0.7665592735184772\n" + ] + }, + { + "data": { + "image/png": "", + "text/plain": [ + "
" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import pandas as pd\n", + "import numpy as np\n", + "from sklearn import metrics\n", + "from sklearn.ensemble import RandomForestRegressor\n", + "from sklearn.model_selection import train_test_split, GridSearchCV\n", + "import matplotlib.pyplot as plt\n", + "\n", + "\n", + "# 1. Настройка параметров для старых значений\n", + "old_param_grid = {\n", + " 'n_estimators': [50, 100, 200], # Количество деревьев\n", + " 'max_depth': [None, 10, 20, 30], # Максимальная глубина дерева\n", + " 'min_samples_split': [2, 5, 10] # Минимальное количество образцов для разбиения узла\n", + "}\n", + "\n", + "# Подбор гиперпараметров с помощью Grid Search для старых параметров\n", + "old_grid_search = GridSearchCV(estimator=RandomForestRegressor(), \n", + " param_grid=old_param_grid,\n", + " scoring='neg_mean_squared_error', cv=5, n_jobs=-1, verbose=2)\n", + "\n", + "# Обучение модели на тренировочных данных\n", + "old_grid_search.fit(X_train, y_train)\n", + "\n", + "# 2. Результаты подбора для старых параметров\n", + "old_best_params = old_grid_search.best_params_\n", + "old_best_mse = -old_grid_search.best_score_ # Меняем знак, так как берем отрицательное значение MSE\n", + "\n", + "# 3. Настройка параметров для новых значений\n", + "new_param_grid = {\n", + " 'n_estimators': [200],\n", + " 'max_depth': [10],\n", + " 'min_samples_split': [10]\n", + "}\n", + "\n", + "# Подбор гиперпараметров с помощью Grid Search для новых параметров\n", + "new_grid_search = GridSearchCV(estimator=RandomForestRegressor(), \n", + " param_grid=new_param_grid,\n", + " scoring='neg_mean_squared_error', cv=2)\n", + "\n", + "# Обучение модели на тренировочных данных\n", + "new_grid_search.fit(X_train, y_train)\n", + "\n", + "# 4. Результаты подбора для новых параметров\n", + "new_best_params = new_grid_search.best_params_\n", + "new_best_mse = -new_grid_search.best_score_ # Меняем знак, так как берем отрицательное значение MSE\n", + "\n", + "# 5. Обучение модели с лучшими параметрами для новых значений\n", + "model_best = RandomForestRegressor(**new_best_params)\n", + "model_best.fit(X_train, y_train)\n", + "\n", + "# Прогнозирование на тестовой выборке\n", + "y_pred = model_best.predict(X_test)\n", + "\n", + "# Оценка производительности модели\n", + "mse = metrics.mean_squared_error(y_test, y_pred)\n", + "rmse = np.sqrt(mse)\n", + "\n", + "# Вывод результатов\n", + "print(\"Старые параметры:\", old_best_params)\n", + "print(\"Лучший результат (MSE) на старых параметрах:\", old_best_mse)\n", + "print(\"\\nНовые параметры:\", new_best_params)\n", + "print(\"Лучший результат (MSE) на новых параметрах:\", new_best_mse)\n", + "print(\"Среднеквадратическая ошибка (MSE) на тестовых данных:\", mse)\n", + "print(\"Корень среднеквадратичной ошибки (RMSE) на тестовых данных:\", rmse)\n", + "\n", + "# Визуализация ошибок\n", + "plt.figure(figsize=(10, 5))\n", + "plt.bar(['Старые параметры', 'Новые параметры'], [old_best_mse, new_best_mse], color=['blue', 'orange'])\n", + "plt.xlabel('Подбор параметров')\n", + "plt.ylabel('Среднеквадратическая ошибка (MSE)')\n", + "plt.title('Сравнение MSE для старых и новых параметров')\n", + "plt.show()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Сравнив результаты с использованием старых и новых параметров, наблюдается, что новые параметры модели позволили добиться меньшей среднеквадратической ошибки, что указывает на более эффективное предсказание по сравнению со старыми настройками. Значение RMSE на тестовых данных также подтверждает улучшение качества модели, так как оно стало меньше и указывает на более точные прогнозы по сравнению с предыдущими настройками." + ] } ], "metadata": {