1257 lines
276 KiB
Plaintext
1257 lines
276 KiB
Plaintext
|
{
|
|||
|
"cells": [
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Загрузка данных"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',\n",
|
|||
|
" 'SP_open', 'SP_high', 'SP_low', 'SP_close', 'SP_Ajclose', 'SP_volume',\n",
|
|||
|
" 'DJ_open', 'DJ_high', 'DJ_low', 'DJ_close', 'DJ_Ajclose', 'DJ_volume',\n",
|
|||
|
" 'EG_open', 'EG_high', 'EG_low', 'EG_close', 'EG_Ajclose', 'EG_volume',\n",
|
|||
|
" 'EU_Price', 'EU_open', 'EU_high', 'EU_low', 'EU_Trend', 'OF_Price',\n",
|
|||
|
" 'OF_Open', 'OF_High', 'OF_Low', 'OF_Volume', 'OF_Trend', 'OS_Price',\n",
|
|||
|
" 'OS_Open', 'OS_High', 'OS_Low', 'OS_Trend', 'SF_Price', 'SF_Open',\n",
|
|||
|
" 'SF_High', 'SF_Low', 'SF_Volume', 'SF_Trend', 'USB_Price', 'USB_Open',\n",
|
|||
|
" 'USB_High', 'USB_Low', 'USB_Trend', 'PLT_Price', 'PLT_Open', 'PLT_High',\n",
|
|||
|
" 'PLT_Low', 'PLT_Trend', 'PLD_Price', 'PLD_Open', 'PLD_High', 'PLD_Low',\n",
|
|||
|
" 'PLD_Trend', 'RHO_PRICE', 'USDI_Price', 'USDI_Open', 'USDI_High',\n",
|
|||
|
" 'USDI_Low', 'USDI_Volume', 'USDI_Trend', 'GDX_Open', 'GDX_High',\n",
|
|||
|
" 'GDX_Low', 'GDX_Close', 'GDX_Adj Close', 'GDX_Volume', 'USO_Open',\n",
|
|||
|
" 'USO_High', 'USO_Low', 'USO_Close', 'USO_Adj Close', 'USO_Volume'],\n",
|
|||
|
" dtype='object')\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"text/html": [
|
|||
|
"<div>\n",
|
|||
|
"<style scoped>\n",
|
|||
|
" .dataframe tbody tr th:only-of-type {\n",
|
|||
|
" vertical-align: middle;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe tbody tr th {\n",
|
|||
|
" vertical-align: top;\n",
|
|||
|
" }\n",
|
|||
|
"\n",
|
|||
|
" .dataframe thead th {\n",
|
|||
|
" text-align: right;\n",
|
|||
|
" }\n",
|
|||
|
"</style>\n",
|
|||
|
"<table border=\"1\" class=\"dataframe\">\n",
|
|||
|
" <thead>\n",
|
|||
|
" <tr style=\"text-align: right;\">\n",
|
|||
|
" <th></th>\n",
|
|||
|
" <th>Date</th>\n",
|
|||
|
" <th>Open</th>\n",
|
|||
|
" <th>High</th>\n",
|
|||
|
" <th>Low</th>\n",
|
|||
|
" <th>Close</th>\n",
|
|||
|
" <th>Adj Close</th>\n",
|
|||
|
" <th>Volume</th>\n",
|
|||
|
" <th>SP_open</th>\n",
|
|||
|
" <th>SP_high</th>\n",
|
|||
|
" <th>SP_low</th>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <th>GDX_Low</th>\n",
|
|||
|
" <th>GDX_Close</th>\n",
|
|||
|
" <th>GDX_Adj Close</th>\n",
|
|||
|
" <th>GDX_Volume</th>\n",
|
|||
|
" <th>USO_Open</th>\n",
|
|||
|
" <th>USO_High</th>\n",
|
|||
|
" <th>USO_Low</th>\n",
|
|||
|
" <th>USO_Close</th>\n",
|
|||
|
" <th>USO_Adj Close</th>\n",
|
|||
|
" <th>USO_Volume</th>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </thead>\n",
|
|||
|
" <tbody>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>0</th>\n",
|
|||
|
" <td>2011-12-15</td>\n",
|
|||
|
" <td>154.740005</td>\n",
|
|||
|
" <td>154.949997</td>\n",
|
|||
|
" <td>151.710007</td>\n",
|
|||
|
" <td>152.330002</td>\n",
|
|||
|
" <td>152.330002</td>\n",
|
|||
|
" <td>21521900</td>\n",
|
|||
|
" <td>123.029999</td>\n",
|
|||
|
" <td>123.199997</td>\n",
|
|||
|
" <td>121.989998</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>51.570000</td>\n",
|
|||
|
" <td>51.680000</td>\n",
|
|||
|
" <td>48.973877</td>\n",
|
|||
|
" <td>20605600</td>\n",
|
|||
|
" <td>36.900002</td>\n",
|
|||
|
" <td>36.939999</td>\n",
|
|||
|
" <td>36.049999</td>\n",
|
|||
|
" <td>36.130001</td>\n",
|
|||
|
" <td>36.130001</td>\n",
|
|||
|
" <td>12616700</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1</th>\n",
|
|||
|
" <td>2011-12-16</td>\n",
|
|||
|
" <td>154.309998</td>\n",
|
|||
|
" <td>155.369995</td>\n",
|
|||
|
" <td>153.899994</td>\n",
|
|||
|
" <td>155.229996</td>\n",
|
|||
|
" <td>155.229996</td>\n",
|
|||
|
" <td>18124300</td>\n",
|
|||
|
" <td>122.230003</td>\n",
|
|||
|
" <td>122.949997</td>\n",
|
|||
|
" <td>121.300003</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>52.040001</td>\n",
|
|||
|
" <td>52.680000</td>\n",
|
|||
|
" <td>49.921513</td>\n",
|
|||
|
" <td>16285400</td>\n",
|
|||
|
" <td>36.180000</td>\n",
|
|||
|
" <td>36.500000</td>\n",
|
|||
|
" <td>35.730000</td>\n",
|
|||
|
" <td>36.270000</td>\n",
|
|||
|
" <td>36.270000</td>\n",
|
|||
|
" <td>12578800</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>2</th>\n",
|
|||
|
" <td>2011-12-19</td>\n",
|
|||
|
" <td>155.479996</td>\n",
|
|||
|
" <td>155.860001</td>\n",
|
|||
|
" <td>154.360001</td>\n",
|
|||
|
" <td>154.869995</td>\n",
|
|||
|
" <td>154.869995</td>\n",
|
|||
|
" <td>12547200</td>\n",
|
|||
|
" <td>122.059998</td>\n",
|
|||
|
" <td>122.320000</td>\n",
|
|||
|
" <td>120.029999</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>51.029999</td>\n",
|
|||
|
" <td>51.169998</td>\n",
|
|||
|
" <td>48.490578</td>\n",
|
|||
|
" <td>15120200</td>\n",
|
|||
|
" <td>36.389999</td>\n",
|
|||
|
" <td>36.450001</td>\n",
|
|||
|
" <td>35.930000</td>\n",
|
|||
|
" <td>36.200001</td>\n",
|
|||
|
" <td>36.200001</td>\n",
|
|||
|
" <td>7418200</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>3</th>\n",
|
|||
|
" <td>2011-12-20</td>\n",
|
|||
|
" <td>156.820007</td>\n",
|
|||
|
" <td>157.429993</td>\n",
|
|||
|
" <td>156.580002</td>\n",
|
|||
|
" <td>156.979996</td>\n",
|
|||
|
" <td>156.979996</td>\n",
|
|||
|
" <td>9136300</td>\n",
|
|||
|
" <td>122.180000</td>\n",
|
|||
|
" <td>124.139999</td>\n",
|
|||
|
" <td>120.370003</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>52.369999</td>\n",
|
|||
|
" <td>52.990002</td>\n",
|
|||
|
" <td>50.215282</td>\n",
|
|||
|
" <td>11644900</td>\n",
|
|||
|
" <td>37.299999</td>\n",
|
|||
|
" <td>37.610001</td>\n",
|
|||
|
" <td>37.220001</td>\n",
|
|||
|
" <td>37.560001</td>\n",
|
|||
|
" <td>37.560001</td>\n",
|
|||
|
" <td>10041600</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>4</th>\n",
|
|||
|
" <td>2011-12-21</td>\n",
|
|||
|
" <td>156.979996</td>\n",
|
|||
|
" <td>157.529999</td>\n",
|
|||
|
" <td>156.130005</td>\n",
|
|||
|
" <td>157.160004</td>\n",
|
|||
|
" <td>157.160004</td>\n",
|
|||
|
" <td>11996100</td>\n",
|
|||
|
" <td>123.930000</td>\n",
|
|||
|
" <td>124.360001</td>\n",
|
|||
|
" <td>122.750000</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>52.419998</td>\n",
|
|||
|
" <td>52.959999</td>\n",
|
|||
|
" <td>50.186852</td>\n",
|
|||
|
" <td>8724300</td>\n",
|
|||
|
" <td>37.669998</td>\n",
|
|||
|
" <td>38.240002</td>\n",
|
|||
|
" <td>37.520000</td>\n",
|
|||
|
" <td>38.110001</td>\n",
|
|||
|
" <td>38.110001</td>\n",
|
|||
|
" <td>10728000</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>...</th>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1713</th>\n",
|
|||
|
" <td>2018-12-24</td>\n",
|
|||
|
" <td>119.570000</td>\n",
|
|||
|
" <td>120.139999</td>\n",
|
|||
|
" <td>119.570000</td>\n",
|
|||
|
" <td>120.019997</td>\n",
|
|||
|
" <td>120.019997</td>\n",
|
|||
|
" <td>9736400</td>\n",
|
|||
|
" <td>239.039993</td>\n",
|
|||
|
" <td>240.839996</td>\n",
|
|||
|
" <td>234.270004</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>20.650000</td>\n",
|
|||
|
" <td>21.090000</td>\n",
|
|||
|
" <td>21.090000</td>\n",
|
|||
|
" <td>60507000</td>\n",
|
|||
|
" <td>9.490000</td>\n",
|
|||
|
" <td>9.520000</td>\n",
|
|||
|
" <td>9.280000</td>\n",
|
|||
|
" <td>9.290000</td>\n",
|
|||
|
" <td>9.290000</td>\n",
|
|||
|
" <td>21598200</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1714</th>\n",
|
|||
|
" <td>2018-12-26</td>\n",
|
|||
|
" <td>120.620003</td>\n",
|
|||
|
" <td>121.000000</td>\n",
|
|||
|
" <td>119.570000</td>\n",
|
|||
|
" <td>119.660004</td>\n",
|
|||
|
" <td>119.660004</td>\n",
|
|||
|
" <td>14293500</td>\n",
|
|||
|
" <td>235.970001</td>\n",
|
|||
|
" <td>246.179993</td>\n",
|
|||
|
" <td>233.759995</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>20.530001</td>\n",
|
|||
|
" <td>20.620001</td>\n",
|
|||
|
" <td>20.620001</td>\n",
|
|||
|
" <td>76365200</td>\n",
|
|||
|
" <td>9.250000</td>\n",
|
|||
|
" <td>9.920000</td>\n",
|
|||
|
" <td>9.230000</td>\n",
|
|||
|
" <td>9.900000</td>\n",
|
|||
|
" <td>9.900000</td>\n",
|
|||
|
" <td>40978800</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1715</th>\n",
|
|||
|
" <td>2018-12-27</td>\n",
|
|||
|
" <td>120.570000</td>\n",
|
|||
|
" <td>120.900002</td>\n",
|
|||
|
" <td>120.139999</td>\n",
|
|||
|
" <td>120.570000</td>\n",
|
|||
|
" <td>120.570000</td>\n",
|
|||
|
" <td>11874400</td>\n",
|
|||
|
" <td>242.570007</td>\n",
|
|||
|
" <td>248.289993</td>\n",
|
|||
|
" <td>238.960007</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>20.700001</td>\n",
|
|||
|
" <td>20.969999</td>\n",
|
|||
|
" <td>20.969999</td>\n",
|
|||
|
" <td>52393000</td>\n",
|
|||
|
" <td>9.590000</td>\n",
|
|||
|
" <td>9.650000</td>\n",
|
|||
|
" <td>9.370000</td>\n",
|
|||
|
" <td>9.620000</td>\n",
|
|||
|
" <td>9.620000</td>\n",
|
|||
|
" <td>36578700</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1716</th>\n",
|
|||
|
" <td>2018-12-28</td>\n",
|
|||
|
" <td>120.800003</td>\n",
|
|||
|
" <td>121.080002</td>\n",
|
|||
|
" <td>120.720001</td>\n",
|
|||
|
" <td>121.059998</td>\n",
|
|||
|
" <td>121.059998</td>\n",
|
|||
|
" <td>6864700</td>\n",
|
|||
|
" <td>249.580002</td>\n",
|
|||
|
" <td>251.399994</td>\n",
|
|||
|
" <td>246.449997</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>20.570000</td>\n",
|
|||
|
" <td>20.600000</td>\n",
|
|||
|
" <td>20.600000</td>\n",
|
|||
|
" <td>49835000</td>\n",
|
|||
|
" <td>9.540000</td>\n",
|
|||
|
" <td>9.650000</td>\n",
|
|||
|
" <td>9.380000</td>\n",
|
|||
|
" <td>9.530000</td>\n",
|
|||
|
" <td>9.530000</td>\n",
|
|||
|
" <td>22803400</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" <tr>\n",
|
|||
|
" <th>1717</th>\n",
|
|||
|
" <td>2018-12-31</td>\n",
|
|||
|
" <td>120.980003</td>\n",
|
|||
|
" <td>121.260002</td>\n",
|
|||
|
" <td>120.830002</td>\n",
|
|||
|
" <td>121.250000</td>\n",
|
|||
|
" <td>121.250000</td>\n",
|
|||
|
" <td>8449400</td>\n",
|
|||
|
" <td>249.559998</td>\n",
|
|||
|
" <td>250.190002</td>\n",
|
|||
|
" <td>247.470001</td>\n",
|
|||
|
" <td>...</td>\n",
|
|||
|
" <td>20.559999</td>\n",
|
|||
|
" <td>21.090000</td>\n",
|
|||
|
" <td>21.090000</td>\n",
|
|||
|
" <td>53866600</td>\n",
|
|||
|
" <td>9.630000</td>\n",
|
|||
|
" <td>9.710000</td>\n",
|
|||
|
" <td>9.440000</td>\n",
|
|||
|
" <td>9.660000</td>\n",
|
|||
|
" <td>9.660000</td>\n",
|
|||
|
" <td>28417400</td>\n",
|
|||
|
" </tr>\n",
|
|||
|
" </tbody>\n",
|
|||
|
"</table>\n",
|
|||
|
"<p>1718 rows × 81 columns</p>\n",
|
|||
|
"</div>"
|
|||
|
],
|
|||
|
"text/plain": [
|
|||
|
" Date Open High Low Close Adj Close \\\n",
|
|||
|
"0 2011-12-15 154.740005 154.949997 151.710007 152.330002 152.330002 \n",
|
|||
|
"1 2011-12-16 154.309998 155.369995 153.899994 155.229996 155.229996 \n",
|
|||
|
"2 2011-12-19 155.479996 155.860001 154.360001 154.869995 154.869995 \n",
|
|||
|
"3 2011-12-20 156.820007 157.429993 156.580002 156.979996 156.979996 \n",
|
|||
|
"4 2011-12-21 156.979996 157.529999 156.130005 157.160004 157.160004 \n",
|
|||
|
"... ... ... ... ... ... ... \n",
|
|||
|
"1713 2018-12-24 119.570000 120.139999 119.570000 120.019997 120.019997 \n",
|
|||
|
"1714 2018-12-26 120.620003 121.000000 119.570000 119.660004 119.660004 \n",
|
|||
|
"1715 2018-12-27 120.570000 120.900002 120.139999 120.570000 120.570000 \n",
|
|||
|
"1716 2018-12-28 120.800003 121.080002 120.720001 121.059998 121.059998 \n",
|
|||
|
"1717 2018-12-31 120.980003 121.260002 120.830002 121.250000 121.250000 \n",
|
|||
|
"\n",
|
|||
|
" Volume SP_open SP_high SP_low ... GDX_Low GDX_Close \\\n",
|
|||
|
"0 21521900 123.029999 123.199997 121.989998 ... 51.570000 51.680000 \n",
|
|||
|
"1 18124300 122.230003 122.949997 121.300003 ... 52.040001 52.680000 \n",
|
|||
|
"2 12547200 122.059998 122.320000 120.029999 ... 51.029999 51.169998 \n",
|
|||
|
"3 9136300 122.180000 124.139999 120.370003 ... 52.369999 52.990002 \n",
|
|||
|
"4 11996100 123.930000 124.360001 122.750000 ... 52.419998 52.959999 \n",
|
|||
|
"... ... ... ... ... ... ... ... \n",
|
|||
|
"1713 9736400 239.039993 240.839996 234.270004 ... 20.650000 21.090000 \n",
|
|||
|
"1714 14293500 235.970001 246.179993 233.759995 ... 20.530001 20.620001 \n",
|
|||
|
"1715 11874400 242.570007 248.289993 238.960007 ... 20.700001 20.969999 \n",
|
|||
|
"1716 6864700 249.580002 251.399994 246.449997 ... 20.570000 20.600000 \n",
|
|||
|
"1717 8449400 249.559998 250.190002 247.470001 ... 20.559999 21.090000 \n",
|
|||
|
"\n",
|
|||
|
" GDX_Adj Close GDX_Volume USO_Open USO_High USO_Low USO_Close \\\n",
|
|||
|
"0 48.973877 20605600 36.900002 36.939999 36.049999 36.130001 \n",
|
|||
|
"1 49.921513 16285400 36.180000 36.500000 35.730000 36.270000 \n",
|
|||
|
"2 48.490578 15120200 36.389999 36.450001 35.930000 36.200001 \n",
|
|||
|
"3 50.215282 11644900 37.299999 37.610001 37.220001 37.560001 \n",
|
|||
|
"4 50.186852 8724300 37.669998 38.240002 37.520000 38.110001 \n",
|
|||
|
"... ... ... ... ... ... ... \n",
|
|||
|
"1713 21.090000 60507000 9.490000 9.520000 9.280000 9.290000 \n",
|
|||
|
"1714 20.620001 76365200 9.250000 9.920000 9.230000 9.900000 \n",
|
|||
|
"1715 20.969999 52393000 9.590000 9.650000 9.370000 9.620000 \n",
|
|||
|
"1716 20.600000 49835000 9.540000 9.650000 9.380000 9.530000 \n",
|
|||
|
"1717 21.090000 53866600 9.630000 9.710000 9.440000 9.660000 \n",
|
|||
|
"\n",
|
|||
|
" USO_Adj Close USO_Volume \n",
|
|||
|
"0 36.130001 12616700 \n",
|
|||
|
"1 36.270000 12578800 \n",
|
|||
|
"2 36.200001 7418200 \n",
|
|||
|
"3 37.560001 10041600 \n",
|
|||
|
"4 38.110001 10728000 \n",
|
|||
|
"... ... ... \n",
|
|||
|
"1713 9.290000 21598200 \n",
|
|||
|
"1714 9.900000 40978800 \n",
|
|||
|
"1715 9.620000 36578700 \n",
|
|||
|
"1716 9.530000 22803400 \n",
|
|||
|
"1717 9.660000 28417400 \n",
|
|||
|
"\n",
|
|||
|
"[1718 rows x 81 columns]"
|
|||
|
]
|
|||
|
},
|
|||
|
"execution_count": 1,
|
|||
|
"metadata": {},
|
|||
|
"output_type": "execute_result"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"df = pd.read_csv(\"../static/csv/FINAL_USO.csv\")\n",
|
|||
|
"print(df.columns)\n",
|
|||
|
"df"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## **1-я бизнес-цель (регрессия)**: \n",
|
|||
|
"\n",
|
|||
|
"Предсказание цены на золото с целью принятия инвесторами решения о покупке товаров."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Целевой признак: цена закрытия Adj Close.\n",
|
|||
|
"\n",
|
|||
|
"Вход: Volume, Hight, Low, Close, Open.\\\n",
|
|||
|
"Достижимый уровень качества: предсказания должны иметь погрешность в среднем не более 5$. Для проверки будет использоваться метрика MAE (средняя абсолютная ошибка)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.discriminant_analysis import StandardScaler\n",
|
|||
|
"from sklearn.impute import SimpleImputer\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.model_selection import GridSearchCV, train_test_split\n",
|
|||
|
"from sklearn.metrics import roc_auc_score, confusion_matrix, accuracy_score\n",
|
|||
|
"from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from sklearn.model_selection import cross_val_predict\n",
|
|||
|
"from sklearn.metrics import mean_squared_error\n",
|
|||
|
"import numpy as np\n",
|
|||
|
"from sklearn import metrics\n",
|
|||
|
"import sklearn.preprocessing as preproc\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, Ridge\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error\n",
|
|||
|
"from mlxtend.evaluate import bias_variance_decomp\n",
|
|||
|
"from sklearn.neural_network import MLPRegressor\n",
|
|||
|
"\n",
|
|||
|
"# Загрузка данных\n",
|
|||
|
"df = pd.read_csv(\"..//static//csv//FINAL_USO.csv\")\n",
|
|||
|
"data = df['Volume', 'High', 'Open', 'Close', 'Low','Adj Close']\n",
|
|||
|
"\n",
|
|||
|
"X = data.drop('Adj Close', axis=1)\n",
|
|||
|
"y = data['Adj Close']\n",
|
|||
|
"\n",
|
|||
|
"# Разделение данных на обучающую и тестовую выборки\n",
|
|||
|
"X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Преобразование числовых данных\n",
|
|||
|
"#заполнение пустых значений медианой\n",
|
|||
|
"num_imputer = SimpleImputer(strategy=\"median\")\n",
|
|||
|
"\n",
|
|||
|
"preprocessing_num = Pipeline(\n",
|
|||
|
" [\n",
|
|||
|
" (\"imputer\", num_imputer)\n",
|
|||
|
" ]\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"#Категориальных данных нет, поэтому преобразовывать их не надо\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Общая предобработка (только числовые данные)\n",
|
|||
|
"preprocessing = ColumnTransformer(\n",
|
|||
|
" [\n",
|
|||
|
" (\"nums\", preprocessing_num, X.columns)\n",
|
|||
|
" ]\n",
|
|||
|
")\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Лнейная регрессия:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Лучшие гиперпараметры: {'preprocessing': MinMaxScaler()}\n",
|
|||
|
"Cредняя абсолютная ошибка (MAE) = 1.8424538380756087e-14\n",
|
|||
|
"Смещение: -5.1553225998619436e-11\n",
|
|||
|
"Дисперсия: 3.270386026049708e-11\n",
|
|||
|
"R^2 = 1.0\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"pipeline_lin_reg = Pipeline([\n",
|
|||
|
" ('preprocessing', preprocessing),\n",
|
|||
|
" ('model', LinearRegression())]\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"# Определение сетки гиперпараметров (возможных знач-ий гиперпараметров) для перебора\n",
|
|||
|
"param_grid = {\n",
|
|||
|
" #как будут масштабироваться признаки\n",
|
|||
|
" 'preprocessing': [StandardScaler(), preproc.MinMaxScaler(), preproc.MaxAbsScaler(), None]\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Создание объекта GridSearchCV для поиска лучших гиперпараметров по сетке с максимальным знач-ием \n",
|
|||
|
"# отрицательного корня из среднеквадратичной ошибки (отриц., чтобы искался не минимум, а максимум)\n",
|
|||
|
"grid_search = GridSearchCV(pipeline_lin_reg, param_grid, cv=5, scoring='neg_root_mean_squared_error', n_jobs=-1)\n",
|
|||
|
"\n",
|
|||
|
"# Обучение модели с перебором гиперпараметров\n",
|
|||
|
"grid_search.fit(X_train, y_train)\n",
|
|||
|
"\n",
|
|||
|
"print(\"Лучшие гиперпараметры: \", grid_search.best_params_)\n",
|
|||
|
"\n",
|
|||
|
"# Лучшая модель лин. регрессии\n",
|
|||
|
"best_model = grid_search.best_estimator_\n",
|
|||
|
"\n",
|
|||
|
"y_pred = best_model.predict(X_test)\n",
|
|||
|
"\n",
|
|||
|
"print(f'Cредняя абсолютная ошибка (MAE) = {mean_absolute_error(y_test, y_pred)}')\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Оценка дисперсии и смещения\n",
|
|||
|
"cv_results = grid_search.cv_results_\n",
|
|||
|
"mean_test_score = cv_results['mean_test_score']\n",
|
|||
|
"std_test_score = cv_results['std_test_score']\n",
|
|||
|
"\n",
|
|||
|
"print(f\"Смещение: {mean_test_score.mean()}\")\n",
|
|||
|
"print(f\"Дисперсия: {std_test_score.mean()}\")\n",
|
|||
|
"\n",
|
|||
|
"from sklearn.metrics import r2_score\n",
|
|||
|
"\n",
|
|||
|
"print(f'R^2 = {r2_score(y_test, y_pred)}')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Гребневая регрессия"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 14,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Лучшие гиперпараметры: {'model__alpha': 0, 'preprocessing': StandardScaler()}\n",
|
|||
|
"Cредняя абсолютная ошибка (MAE) = 5.494726121130867e-13\n",
|
|||
|
"Смещение: -0.4263701358095246\n",
|
|||
|
"Дисперсия: 0.02072744817291101\n",
|
|||
|
"R^2 = 1.0\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"pipeline_ridge = Pipeline([\n",
|
|||
|
" ('preprocessing', preprocessing),\n",
|
|||
|
" ('model', Ridge())]\n",
|
|||
|
")\n",
|
|||
|
"\n",
|
|||
|
"# Определение сетки гиперпараметров (возможных знач-ий гиперпараметров) для перебора\n",
|
|||
|
"param_grid = {\n",
|
|||
|
" #как будут масштабироваться признаки\n",
|
|||
|
" 'preprocessing': [StandardScaler(), preproc.MinMaxScaler(), preproc.MaxAbsScaler(), None],\n",
|
|||
|
" #сила регуляризации\n",
|
|||
|
" 'model__alpha': [0, 0.5, 1.0, 1.5, 2.0, 5.0, 10.0] \n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Создание объекта GridSearchCV для поиска лучших гиперпараметров по сетке с максимальным знач-ием \n",
|
|||
|
"# отрицательного корня из среднеквадратичной ошибки (отриц., чтобы искался не минимум, а максимум)\n",
|
|||
|
"grid_search = GridSearchCV(pipeline_ridge, param_grid, cv=5, scoring='neg_root_mean_squared_error', n_jobs=-1, verbose=0)\n",
|
|||
|
"\n",
|
|||
|
"# Обучение модели с перебором гиперпараметров\n",
|
|||
|
"grid_search.fit(X_train, y_train)\n",
|
|||
|
"\n",
|
|||
|
"print(\"Лучшие гиперпараметры: \", grid_search.best_params_)\n",
|
|||
|
"\n",
|
|||
|
"# Лучшая модель регрессии\n",
|
|||
|
"best_model = grid_search.best_estimator_\n",
|
|||
|
"\n",
|
|||
|
"y_pred = best_model.predict(X_test)\n",
|
|||
|
"\n",
|
|||
|
"print(f'Cредняя абсолютная ошибка (MAE) = {mean_absolute_error(y_test, y_pred)}')\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"cv_results = grid_search.cv_results_\n",
|
|||
|
"mean_test_score = cv_results['mean_test_score']\n",
|
|||
|
"std_test_score = cv_results['std_test_score']\n",
|
|||
|
"\n",
|
|||
|
"print(f\"Смещение: {mean_test_score.mean()}\")\n",
|
|||
|
"print(f\"Дисперсия: {std_test_score.mean()}\")\n",
|
|||
|
"\n",
|
|||
|
"print(f'R^2 = {r2_score(y_test, y_pred)}')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Гребнавая регрессия дала более точные результаты, чем линейная."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## Метод градиентного бустинга (набор деревьев решений)"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 19,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Лучшие гиперпараметры: {'model__learning_rate': 0.1, 'model__max_depth': 5, 'model__n_estimators': 300, 'preprocessing': None}\n",
|
|||
|
"Cредняя абсолютная ошибка (MAE) = 0.040833243038698064\n",
|
|||
|
"Смещение: -0.2177327926836486\n",
|
|||
|
"Дисперсия: 0.021373424060567556\n",
|
|||
|
"R^2 = 0.9999842165416633\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"from sklearn.metrics import roc_auc_score, confusion_matrix, accuracy_score\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, RandomForestClassifier, GradientBoostingClassifier\n",
|
|||
|
"# Конвейер\n",
|
|||
|
"pipeline_grad = Pipeline([\n",
|
|||
|
" ('preprocessing', preprocessing),\n",
|
|||
|
" ('model', GradientBoostingRegressor())\n",
|
|||
|
"])\n",
|
|||
|
"\n",
|
|||
|
"# Определение сетки гиперпараметров\n",
|
|||
|
"param_grid = {\n",
|
|||
|
" 'preprocessing': [StandardScaler(), preproc.MinMaxScaler(), preproc.MaxAbsScaler(), None],\n",
|
|||
|
" 'model__n_estimators': [100, 200, 300],\n",
|
|||
|
" #Скорость обучения\n",
|
|||
|
" 'model__learning_rate': [0.1, 0.2],\n",
|
|||
|
" #Максимальная глубина дерева\n",
|
|||
|
" 'model__max_depth': [3, 5, 7]\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Создание объекта GridSearchCV\n",
|
|||
|
"grid_search = GridSearchCV(pipeline_grad, param_grid, cv=2, scoring='neg_root_mean_squared_error', n_jobs=-1)\n",
|
|||
|
"\n",
|
|||
|
"# Обучение модели с перебором гиперпараметров\n",
|
|||
|
"grid_search.fit(X_train, y_train)\n",
|
|||
|
"\n",
|
|||
|
"print(\"Лучшие гиперпараметры: \", grid_search.best_params_)\n",
|
|||
|
"\n",
|
|||
|
"# Лучшая модель случайного леса\n",
|
|||
|
"best_model = grid_search.best_estimator_\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"y_pred = best_model.predict(X_test)\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"print(f'Cредняя абсолютная ошибка (MAE) = {mean_absolute_error(y_test, y_pred)}')\n",
|
|||
|
"\n",
|
|||
|
"\n",
|
|||
|
"# Получение предсказаний на кросс-валидации\n",
|
|||
|
"y_cv_pred = cross_val_predict(best_model, X_train, y_train, cv=3)\n",
|
|||
|
"\n",
|
|||
|
"cv_results = grid_search.cv_results_\n",
|
|||
|
"mean_test_score = cv_results['mean_test_score']\n",
|
|||
|
"std_test_score = cv_results['std_test_score']\n",
|
|||
|
"\n",
|
|||
|
"print(f\"Смещение: {mean_test_score.mean()}\")\n",
|
|||
|
"print(f\"Дисперсия: {std_test_score.mean()}\")\n",
|
|||
|
"\n",
|
|||
|
"print(f'R^2 = {r2_score(y_test, y_pred)}')"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Вывод**: \n",
|
|||
|
"\n",
|
|||
|
"Все 3 модели регрессии показали допустимый уровень \"погрешности\". \n",
|
|||
|
"\n",
|
|||
|
"R² (коэффициент детерминации): 0.99 — это очень высокий уровень, указывающий на то, что модель объясняет 99% вариации целевой переменной. Это свидетельствует о высокой предсказательной способности модели.\n",
|
|||
|
"\n",
|
|||
|
"Из всех моделей градиентный бустинг показал самую низкую \"погрешность\"."
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"## **2-я бизнес-цель (классификация):** \n",
|
|||
|
"\n",
|
|||
|
"Определить оптимальные коэффициенты для различных факторов, влияющих на цену золота. \n",
|
|||
|
"\n",
|
|||
|
"Целевой признак: Adj Close.\n",
|
|||
|
"\n",
|
|||
|
"Вход: Volume, Hight, Low, Close, Open. "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Результаты для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Best Parameters: {'model__C': 10, 'model__solver': 'liblinear'}\n",
|
|||
|
"Accuracy: 0.9825581395348837\n",
|
|||
|
"Precision: 1.0\n",
|
|||
|
"Recall: 0.9469026548672567\n",
|
|||
|
"F1-score: 0.9727272727272728\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhQAAAHHCAYAAADnOMH5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABOwklEQVR4nO3deVxU1fsH8M8AMiAwICogCojgAokbpiEumAjinppftwLFNZfUXCt3kzJL00xNU9Q0LTUrS41UXMmve2lIgrikgAoBArLO+f3hj/k2Asp4LwzTfN697iu598y5zx1HeHjOOfcqhBACRERERBKY6DsAIiIiMnxMKIiIiEgyJhREREQkGRMKIiIikowJBREREUnGhIKIiIgkY0JBREREkjGhICIiIsmYUBAREZFkTCiM1LVr1xAUFARbW1soFArs3btX1v5v3LgBhUKByMhIWfs1ZAEBAQgICJCtv6ysLIwcORJOTk5QKBSYPHmybH1XFdHR0VAoFIiOjpalv8jISCgUCty4cUOW/giYP38+FAqFvsOgKoAJhR4lJCRgzJgxaNCgASwsLKBSqeDv749PPvkEjx49qtBzh4aG4vfff8d7772HrVu3onXr1hV6vsoUFhYGhUIBlUpV6vt47do1KBQKKBQKLFu2TOf+7969i/nz5+PixYsyRPv8lixZgsjISIwbNw5bt27Fa6+9VqHnq1+/Pnr27Fmh55DLkiVLZE+Sn1ScnBRvZmZmqFu3LsLCwnDnzp0KPTdRlSRIL/bt2ycsLS2FnZ2dmDRpkvj888/Fp59+KgYNGiSqVasmRo0aVWHnzsnJEQDEO++8U2HnUKvV4tGjR6KwsLDCzlGW0NBQYWZmJkxNTcXOnTtLHJ83b56wsLAQAMSHH36oc/9nzpwRAMSmTZt0el1eXp7Iy8vT+Xxladu2rfD395etv2dxc3MTPXr0qLTzCSFEUVGRePTokSgqKtLpdVZWViI0NLTE/sLCQvHo0SOhVqslx7Zp0yYBQCxcuFBs3bpVrF+/XoSHhwtTU1Ph4eEhHj16JPkchqCgoMBorpWezky/6YxxSkxMxKBBg+Dm5obDhw+jTp06mmPjx49HfHw8fvzxxwo7//379wEAdnZ2FXYOhUIBCwuLCuv/WZRKJfz9/fHVV19h4MCBWse2b9+OHj16YPfu3ZUSS05ODqpXrw5zc3NZ+7137x68vb1l66+wsBBqtVr2OKUwMTGR9XNkamoKU1NT2foDgJCQEE2Fb+TIkahVqxY++OADfP/99yU+exVJCIHc3FxYWlpW2jkBwMzMDGZm/FFCHPLQi6VLlyIrKwtffPGFVjJRzNPTE2+++abm68LCQixatAgeHh5QKpWoX78+3n77beTl5Wm9rrgkfeLECbRp0wYWFhZo0KABtmzZomkzf/58uLm5AQCmT58OhUKB+vXrA3g8VFD8538qbYw0KioK7du3h52dHaytrdG4cWO8/fbbmuNlzaE4fPgwOnToACsrK9jZ2aFPnz6IjY0t9Xzx8fEICwuDnZ0dbG1tMXz4cOTk5JT9xj5hyJAh2L9/P9LT0zX7zpw5g2vXrmHIkCEl2qelpWHatGnw8fGBtbU1VCoVQkJCcOnSJU2b6OhovPjiiwCA4cOHa8rdxdcZEBCApk2b4ty5c+jYsSOqV6+ueV+enEMRGhoKCwuLEtcfHByMGjVq4O7du6VeV/G8gsTERPz444+aGIrnBdy7dw/h4eFwdHSEhYUFmjdvjs2bN2v1Ufz3s2zZMqxYsULz2frjjz/K9d6WpbyfVbVajfnz58PZ2RnVq1dH586d8ccff6B+/foICwsrca3/nENx7do19O/fH05OTrCwsEC9evUwaNAgZGRkAHiczGZnZ2Pz5s2a96a4z7LmUOzfvx+dOnWCjY0NVCoVXnzxRWzfvv253oMOHToAeDyk+U9Xr17FgAEDYG9vDwsLC7Ru3Rrff/99idf/9ttv6NSpEywtLVGvXj0sXrwYmzZtKhF38b/3gwcPonXr1rC0tMS6desAAOnp6Zg8eTJcXFygVCrh6emJDz74AGq1WutcO3bsgK+vr+a6fXx88Mknn2iOFxQUYMGCBWjYsCEsLCxQs2ZNtG/fHlFRUZo2pX1/kPN7FhkOppV68MMPP6BBgwZo165dudqPHDkSmzdvxoABA/DWW2/h9OnTiIiIQGxsLL799luttvHx8RgwYADCw8MRGhqKjRs3IiwsDL6+vnjhhRfQr18/2NnZYcqUKRg8eDC6d+8Oa2trneK/cuUKevbsiWbNmmHhwoVQKpWIj4/HyZMnn/q6X375BSEhIWjQoAHmz5+PR48eYdWqVfD398f58+dLJDMDBw6Eu7s7IiIicP78eWzYsAEODg744IMPyhVnv379MHbsWOzZswcjRowA8Lg60aRJE7Rq1apE++vXr2Pv3r149dVX4e7ujpSUFKxbtw6dOnXCH3/8AWdnZ3h5eWHhwoWYO3cuRo8erfnh8c+/y9TUVISEhGDQoEEYNmwYHB0dS43vk08+weHDhxEaGoqYmBiYmppi3bp1+Pnnn7F161Y4OzuX+jovLy9s3boVU6ZMQb169fDWW28BAGrXro1Hjx4hICAA8fHxmDBhAtzd3fHNN98gLCwM6enpWokqAGzatAm5ubkYPXo0lEol7O3ty/XelqW8n9XZs2dj6dKl6NWrF4KDg3Hp0iUEBwcjNzf3qf3n5+cjODgYeXl5mDhxIpycnHDnzh3s27cP6enpsLW1xdatWzFy5Ei0adMGo0ePBgB4eHiU2WdkZCRGjBiBF154AbNnz4adnR0uXLiAAwcOlJp4PkvxD/0aNWpo9l25cgX+/v6oW7cuZs2aBSsrK3z99dfo27cvdu/ejVdeeQUAcOfOHXTu3BkKhQKzZ8+GlZUVNmzYAKVSWeq54uLiMHjwYIwZMwajRo1C48aNkZOTg06dOuHOnTsYM2YMXF1dcerUKcyePRtJSUlYsWIFgMe/FAwePBhdunTR/JuKjY3FyZMnNZ+T+fPnIyIiQvN+ZmZm4uzZszh//jy6du1a5nsg5/csMiD6HnMxNhkZGQKA6NOnT7naX7x4UQAQI0eO1No/bdo0AUAcPnxYs8/NzU0AEMeOHdPsu3fvnlAqleKtt97S7EtMTCx1/kBoaKhwc3MrEcO8efPEPz8qy5cvFwDE/fv3y4y7+Bz/nGfQokUL4eDgIFJTUzX7Ll26JExMTMTrr79e4nwjRozQ6vOVV14RNWvWLPOc/7wOKysrIYQQAwYMEF26dBFCPB6Pd3JyEgsWLCj1PcjNzS0xVp+YmCiUSqVYuHChZt/T5lB06tRJABBr164t9VinTp209h08eFAAEIsXLxbXr18X1tbWom/fvs+8RiFKn9OwYsUKAUB8+eWXmn35+fnCz89PWFtbi8zMTM11ARAqlUrcu3fvuc/3T+X9rCYnJwszM7MS1zl//nwBQGvuw5EjRwQAceTIESGEEBcuXBAAxDfffPPUWMuaQ1E87yExMVEIIUR6erqwsbERbdu2LTEP4FnzLIr7+uWXX8T9+/fF7du3xa5du0Tt2rWFUqkUt2/f1rTt0qWL8PHxEbm5uVr9t2vXTjRs2FCzb+LEiUKhUIgLFy5o9qWmpgp7e3utuIX437/3AwcOaMW1aNEiYWVlJf7880+t/bNmzRKmpqbi1q1bQggh3nzzTaFSqZ46z6l58+bPnDfz5PeHivieRYaBQx6VLDMzEwBgY2NTrvY//fQTAGDq1Kla+4t/K31yroW3t7fmt2bg8W+tjRs3xvXr15875icVz7347rvvSpRQy5KUlISLFy8iLCxM67fgZs2aoWvXrprr/KexY8dqfd2hQwekpqZq3sPyGDJkCKKjo5GcnIzDhw8jOTm5zN86lUolTEwe/5MoKipCamqqZjjn/Pnz5T6nUqnE8OHDy9U2KCgIY8aMwcKFC9GvXz9YWFhoytbP46effoKTkxMGDx6s2VetWjVMmjQJWVlZOHr0qFb7/v37o3bt2s99vifPDTz7s3ro0CEUFhbijTfe0Go3ceLEZ57D1tYWAHDw4EGdhr/
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Best Parameters: {'model__max_depth': None, 'model__n_estimators': 100}\n",
|
|||
|
"Accuracy: 1.0\n",
|
|||
|
"Precision: 1.0\n",
|
|||
|
"Recall: 1.0\n",
|
|||
|
"F1-score: 1.0\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhQAAAHHCAYAAADnOMH5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABSbElEQVR4nO3deVhUZfsH8O+AMqwDIrviiDvkrrmEayK45BKauYOipoH7/lauFeWSplmalpppVq6vS6apuJK5V4YkuG+oICAo6zy/P3w5P0dAZzyHZZzvx+tcl5zlOfcZDjP33M9zzlEJIQSIiIiIZLAo6QCIiIjI9DGhICIiItmYUBAREZFsTCiIiIhINiYUREREJBsTCiIiIpKNCQURERHJxoSCiIiIZGNCQURERLIxoSgiFy5cQGBgIBwdHaFSqbBlyxZF2798+TJUKhVWrVqlaLumrE2bNmjTpo1i7aWlpWHIkCHw8PCASqXCmDFjFGvbVPA8K91Kw++ncuXKCA0N1ZtX0PvfqlWroFKpcPny5WKPUaVSYcaMGcW+X3PzUicU8fHxeOedd1ClShVYW1tDo9HA398fn3/+OR49elSk+w4JCcFff/2Fjz76CGvWrEHjxo2LdH/FKTQ0FCqVChqNpsDX8cKFC1CpVFCpVJg3b57R7d+8eRMzZszAmTNnFIj2xX388cdYtWoVRowYgTVr1mDAgAFFur/KlStLr5tKpYKdnR2aNGmC7777rkj3a2qefp2enDIyMko6vHyOHj2KGTNmIDk52ajtoqKiEBwcDA8PD1hZWcHNzQ1dunTBpk2biiZQBZXE+9/OnTuZNJSwMiUdQFHZsWMH3nrrLajVagwcOBC1a9dGVlYWDh8+jIkTJ+LcuXP4+uuvi2Tfjx49QnR0NN577z1EREQUyT60Wi0ePXqEsmXLFkn7z1OmTBk8fPgQ27ZtQ69evfSWrV27FtbW1i/85n7z5k3MnDkTlStXRv369Q3ebvfu3S+0v8Ls27cPzZo1w/Tp0xVt91nq16+P8ePHAwBu3bqFFStWICQkBJmZmRg6dGixxVHaPfk6PcnKyqoEonm2o0ePYubMmQgNDYWTk5NB20yfPh2zZs1C9erV8c4770Cr1SIxMRE7d+5Ejx49sHbtWvTt27doAzdQbGwsLCz+/7tpYe9/AwYMQO/evaFWq4skjp07d2LJkiUFJhWPHj1CmTIv7cddqfFSvsKXLl1C7969odVqsW/fPnh6ekrLwsPDERcXhx07dhTZ/u/evQsABr95vAiVSgVra+sia/951Go1/P398cMPP+RLKNatW4fOnTtj48aNxRLLw4cPYWtrq/iHyZ07d+Dn56dYezk5OdDpdM+Ms0KFCujfv7/0c2hoKKpUqYIFCxYwoXjC06+TUnQ6HbKyskr0b2vDhg2YNWsWevbsiXXr1ul9aZg4cSJ+/fVXZGdnl1h8T3s6QSjs/c/S0hKWlpbFFZaekvx9mhXxEho+fLgAII4cOWLQ+tnZ2WLWrFmiSpUqwsrKSmi1WjF16lSRkZGht55WqxWdO3cWhw4dEq+++qpQq9XCx8dHrF69Wlpn+vTpAoDepNVqhRBChISESP9/Ut42T9q9e7fw9/cXjo6Ows7OTtSoUUNMnTpVWn7p0iUBQKxcuVJvu71794oWLVoIW1tb4ejoKLp27Sr++eefAvd34cIFERISIhwdHYVGoxGhoaEiPT39ua9XSEiIsLOzE6tWrRJqtVrcv39fWvbHH38IAGLjxo0CgJg7d660LDExUYwfP17Url1b2NnZCQcHB9GhQwdx5swZaZ39+/fne/2ePM7WrVuLV155RZw4cUK0bNlS2NjYiNGjR0vLWrduLbU1cOBAoVar8x1/YGCgcHJyEjdu3Cjw+AqL4dKlS0IIIRISEsTgwYOFm5ubUKvVom7dumLVqlV6beT9fubOnSsWLFggqlSpIiwsLMTp06cLfV3zzq+nNW7cWFhZWenNO3jwoOjZs6fw9vYWVlZWomLFimLMmDHi4cOHeuvl/a6uX78uunXrJuzs7ISLi4sYP368yMnJ0Vv3/v37IiQkRGg0GuHo6CgGDhwoTp8+Lfs8i42NFf369RMajUa4uLiI999/X+h0OnH16lXRtWtX4eDgINzd3cW8efMKfW0MeZ2elJaWJsaNGycqVqworKysRI0aNcTcuXOFTqfTWw+ACA8PF99//73w8/MTZcqUEZs3bxZCCHH9+nUxaNAg4ebmJqysrISfn5/45ptv8u1r0aJFws/PT9jY2AgnJyfRqFEjsXbtWr3XoLBzqSC1atUSzs7OIjU19bmvRUHvA2fPnhUhISHCx8dHqNVq4e7uLgYNGiTu3bunt21qaqoYPXq00Gq1wsrKSri6uoqAgABx8uRJaZ1///1XBAcHC3d3d6FWq0WFChXE22+/LZKTk6V1tFqtCAkJKfR4897zVq5cWeCx79y5U7Rq1UrY29sLBwcH0bhxY+n1E8Kwcz0kJKTA1zkPADF9+nS9/Z46dUp06NBBODg4CDs7O/H666+L6OhovXXyYj58+LAYO3ascHFxEba2tqJ79+7izp07z/39mJuXskKxbds2VKlSBa+99ppB6w8ZMgSrV69Gz549MX78eBw7dgyRkZGIiYnB5s2b9daNi4tDz549ERYWhpCQEHz77bcIDQ1Fo0aN8MorryA4OBhOTk4YO3Ys+vTpg06dOsHe3t6o+M+dO4c33ngDdevWxaxZs6BWqxEXF4cjR448c7vffvsNHTt2RJUqVTBjxgw8evQIixcvhr+/P06dOoXKlSvrrd+rVy/4+PggMjISp06dwooVK+Dm5oZPP/3UoDiDg4MxfPhwbNq0CYMHDwbwuDpRq1YtNGzYMN/6Fy9exJYtW/DWW2/Bx8cHCQkJWLZsGVq3bo1//vkHXl5e8PX1xaxZszBt2jQMGzYMLVu2BAC932ViYiI6duyI3r17o3///nB3dy8wvs8//xz79u1DSEgIoqOjYWlpiWXLlmH37t1Ys2YNvLy8CtzO19cXa9aswdixY1GxYkWptO7q6opHjx6hTZs2iIuLQ0REBHx8fPDzzz8jNDQUycnJGD16tF5bK1euREZGBoYNGwa1Wg1nZ2eDXts8OTk5uH79OsqVK6c3/+eff8bDhw8xYsQIlC9fHn/88QcWL16M69ev4+eff9ZbNzc3F0FBQWjatCnmzZuH3377DfPnz0fVqlUxYsQIAIAQAt26dcPhw4cxfPhw+Pr6YvPmzQgJCckXk7Hn2dtvvw1fX1988skn2LFjBz788EM4Oztj2bJleP311/Hpp59i7dq1mDBhAl599VW0atXqua9LdnY27t27pzfP1tYWtra2EEKga9eu2L9/P8LCwlC/fn38+uuvmDhxIm7cuIEFCxbobbdv3z789NNPiIiIgIuLCypXroyEhAQ0a9YMKpUKERERcHV1xS+//IKwsDCkpqZKA3SXL1+OUaNGoWfPnhg9ejQyMjLw559/4tixY+jbty+Cg4Px77//4ocffsCCBQvg4uIC4PG5VJALFy7g/PnzGDx4MBwcHJ77OhRkz549uHjxIgYNGgQPDw+pe/fcuXP4/fffoVKpAADDhw/Hhg0bEBERAT8/PyQmJuLw4cOIiYlBw4YNkZWVhaCgIGRmZmLkyJHw8PDAjRs3sH37diQnJ8PR0THfvo19/1u1ahUGDx6MV155BVOnToWTkxNOnz6NXbt2SV06hpzr77zzDm7evIk9e/ZgzZo1z32Nzp07h5YtW0Kj0WDSpEkoW7Ysli1bhjZt2uDAgQNo2rSp3vojR45EuXLlMH36dFy+fBkLFy5EREQEfvzxR4N/L2ahpDMapaWkpAgAolu3bgatf+bMGQFADBkyRG/+hAkTBACxb98+aZ5WqxUAxMGDB6V5d+7cEWq1WowfP16a9+S30ycZWqFYsGCBACDu3r1baNwFfTOpX7++cHNzE4mJidK8s2fPCgsLCzFw4MB8+xs8eLBem2+++aYoX758oft88jjs7OyEEEL07NlTtGvXTgghRG5urvDw8BAzZ84s8DXIyMgQubm5+Y5DrVa
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Best Parameters: {'model__learning_rate': 0.01, 'model__max_depth': 3, 'model__n_estimators': 100}\n",
|
|||
|
"Accuracy: 1.0\n",
|
|||
|
"Precision: 1.0\n",
|
|||
|
"Recall: 1.0\n",
|
|||
|
"F1-score: 1.0\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAhQAAAHHCAYAAADnOMH5AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAABVB0lEQVR4nO3deXxMZ/s/8M9kIpN1EiErEUlsiX0pJdaKRCgiVCmaEJQGbeye1tpq+tCWUkWptZTW+tilCEWq1i4amkQstQSJJJLIOvfvD9/Mz0jCjHOyTPN5e53XS845c5/rzEwm11z3fZ+jEEIIEBEREUlgUt4BEBERkfFjQkFERESSMaEgIiIiyZhQEBERkWRMKIiIiEgyJhREREQkGRMKIiIikowJBREREUnGhIKIiIgkY0Khh7i4OPj7+8PW1hYKhQI7d+6Utf1r165BoVBg7dq1srZrzDp37ozOnTvL1l5GRgZGjBgBZ2dnKBQKvP/++7K1XdEV9/6aPXs2FApF+QX1L2Msz2dF+KypXbs2QkNDddYV9xm7du1aKBQKXLt2rcxjVCgUmD17dpkf19gZTUKRkJCAd955B56enjA3N4darYavry++/PJLPH78uFSPHRISgj/++APz5s3Dhg0b0KpVq1I9XlkKDQ2FQqGAWq0u9nmMi4uDQqGAQqHAZ599ZnD7t2/fxuzZs3Hx4kUZon15n3zyCdauXYsxY8Zgw4YNGDp0aKkfU6PRYP369ejWrRuqV6+OKlWqwNHREf7+/vjmm2+Qk5NT6jGUJ0Nf+8I/IE8vjo6O6NKlC/bv31+6weohKysLs2fPRnR0dHmHUqzo6GgEBwfD2dkZZmZmcHR0RK9evbB9+/byDu2FyuMzdt++fUwa5CaMwJ49e4SFhYWws7MT48ePF99884346quvxMCBA0WVKlXEyJEjS+3YWVlZAoD44IMPSu0YGo1GPH78WOTn55faMUoSEhIiTE1NhVKpFFu2bCmyfdasWcLc3FwAEAsWLDC4/TNnzggAYs2aNQY9LicnR+Tk5Bh8vJK0adNG+Pr6ytbei2RlZYmAgAABQLRr105ERkaK1atXi88++0z06tVLKJVKMXz48DKJJTExschrkJeXJx4/flyqxzX0tV+zZo0AIObOnSs2bNgg1q9fLxYsWCAaNmwoAIjdu3eXarwvcv/+fQFAzJo1q8i2sng+n2fmzJkCgKhbt66YOXOm+Pbbb8X8+fNF586dBQCxceNGIUTx74Wylp2dLXJzc7U/l/QZm5+fLx4/fiw0Gk2pxBEeHi5K+hP4+PFjkZeXVyrH/TczLY8kxhCJiYkYOHAg3N3dceTIEbi4uGi3hYeHIz4+Hnv37i2149+/fx8AYGdnV2rHUCgUMDc3L7X2X0SlUsHX1xfff/89BgwYoLNt06ZN6NmzJ7Zt21YmsWRlZcHS0hJmZmaytnvv3j34+PjI1l5+fj40Gk2JcUZERODgwYNYtGgR3nvvPZ1tEydORFxcHKKioiQdQwpTU1OYmlbMX//AwECdb6hhYWFwcnLC999/j9dff70cIytZeT6fW7duxdy5c9G/f39s2rQJVapU0W6bPHkyDh48iLy8vHKJrTgqlUrn55I+Y5VKJZRKZVmFpaM8P4+NWnlnNC8yevRoAUCcPHlSr/3z8vLE3LlzhaenpzAzMxPu7u5i+vTpIjs7W2c/d3d30bNnT/Hzzz+LV155RahUKuHh4SHWrVun3WfWrFkCgM7i7u4uhHjyzb7w/08rfMzTDh06JHx9fYWtra2wsrIS9erVE9OnT9duL+lbw+HDh0X79u2FpaWlsLW1Fb179xZ//fVXsceLi4sTISEhwtbWVqjVahEaGioyMzNf+HyFhIQIKysrsXbtWqFSqcTDhw+123799VcBQGzbtq1IhSI5OVlMnDhRNGrUSFhZWQkbGxvRvXt3cfHiRe0+R48eLfL8PX2enTp1Eg0bNhRnz54VHTp0EBYWFuK9997TbuvUqZO2rbfffluoVKoi5+/v7y/s7OzErVu3ij2/kmJITEwUQgiRlJQkhg8fLhwdHYVKpRJNmjQRa9eu1Wmj8PVZsGCBWLhwofD09BQmJibiwoULxR7zxo0bQqlUiu7duz/nmdf1vGPk5OSIGTNmiBYtWgi1Wi0sLS1F+/btxZEjR4q08/DhQxESEiLUarWwtbUVb7/9trhw4UKR91dx71MhhNiwYYNo0aKFMDc3F1WrVhVvvvmmuHHjhs4+ha/bpUuXROfOnYWFhYVwdXUV//3vf7X7vOi1L05hheLMmTM66zUajVCr1eLtt9/WWZ+RkSEmTJggatasKczMzES9evXEggULinyj1fcz4cyZM8Lf319Uq1ZNmJubi9q1a4thw4bpvD7PLoXViuKeTwAiPDxc7NixQzRs2FCYmZkJHx8fsX///iLnfvToUdGyZUuhUqmEp6enWL58eYmv0bMaNGgg7O3tRXp6+gv3Le6z5rfffhMhISHCw8NDqFQq4eTkJIYNGyYePHig89j09HTx3nvvCXd3d2FmZiYcHByEn5+fOHfunHafv//+WwQHBwsnJyehUqlEjRo1xJtvvilSU1O1+7i7u4uQkBCd5624z9jC90Ph72qhffv2iY4dOwpra2thY2MjWrVqpa3ACCHE8ePHRf/+/YWbm5swMzMTNWvWFO+//77IysrS7hMSElLs61no6de20Pnz50X37t2FjY2NsLKyEq+99pqIiYnR2acw5hMnToiIiAhRvXp1YWlpKYKCgsS9e/de+PoYu4r5FeUpu3fvhqenJ9q1a6fX/iNGjMC6devQv39/TJw4EadPn0ZkZCRiY2OxY8cOnX3j4+PRv39/hIWFISQkBKtXr0ZoaChatmyJhg0bIjg4GHZ2doiIiMCgQYPQo0cPWFtbGxT/pUuX8Prrr6NJkyaYO3cuVCoV4uPjcfLkyec+7qeffkJgYCA8PT0xe/ZsPH78GEuWLIGvry/Onz+P2rVr6+w/YMAAeHh4IDIyEufPn8eqVavg6OiI//73v3rFGRwcjNGjR2P79u0YPnw4gCfViQYNGqBFixZF9r969Sp27tyJN954Ax4eHkhKSsKKFSvQqVMn/PXXX3B1dYW3tzfmzp2LmTNnYtSoUejQoQMA6LyWycnJCAwMxMCBAzFkyBA4OTkVG9+XX36JI0eOICQkBDExMVAqlVixYgUOHTqEDRs2wNXVtdjHeXt7Y8OGDYiIiEDNmjUxceJEAICDgwMeP36Mzp07Iz4+HmPHjoWHhwd+/PFHhIaGIjU1tUhlYc2aNcjOzsaoUaOgUqlgb29f7DH379+PgoICDBky5AXPelHFHSM9PR2rVq3CoEGDMHLkSDx69AjffvstAgIC8Ouvv6JZs2YAACEE+vTpgxMnTmD06NHw9vbGjh07EBISotex582bhxkzZmDAgAEYMWIE7t+/jyVLlqBjx464cOGCzjfIhw8fonv37ggODsaAAQOwdetWTJ06FY0bN0ZgYKBer31J0tLS8ODBAwghcO/ePSxZsgQZGRk6z6cQAr1798bRo0cRFhaGZs2a4eDBg5g8eTJu3bqFhQsXavfV5zPh3r178Pf3h4ODA6ZNmwY7Oztcu3ZNO/7AwcEBy5Ytw5gxY9C3b18EBwcDAJo0afLcczlx4gS2b9+Od999FzY2Nli8eDH69euHGzduoFq1agCACxcuoHv37nBxccGcOXNQUFCAuXPnwsHB4YXPVVxcHC5fvozhw4fDxsbmhfsXJyoqClevXsWwYcPg7OyMS5cu4ZtvvsGlS5fwyy+/aAebjh49Glu3bsXYsWPh4+OD5ORknDhxArGxsWjRogVyc3MREBCAnJwcjBs3Ds7Ozrh16xb27NmD1NRU2NraFjm2oZ+xa9euxfDhw9GwYUNMnz4ddnZ2uHDhAg4cOIC33noLAPDjjz8iKysLY8aMQbVq1fDrr79iyZIl+Oeff/Djjz8CAN555x3cvn0bUVFR2LBhwwufo0uXLqFDhw5Qq9WYMmUKqlSpghUrVqBz5844duwY2rRpo7P/uHHjULVqVcyaNQvXrl3DokWLMHb
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 640x480 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"import seaborn as sns\n",
|
|||
|
"from sklearn.model_selection import train_test_split, GridSearchCV\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, ConfusionMatrixDisplay\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\"..//static//csv//FINAL_USO.csv\")\n",
|
|||
|
"\n",
|
|||
|
"numerical_cols = ['Volume', 'High', 'Open', 'Close', 'Low']\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": (LinearRegression(), {}),\n",
|
|||
|
" \"Random Forest Regression\": (RandomForestRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Regression\": (GradientBoostingRegressor(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Adj Close']\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи регрессии\n",
|
|||
|
"X_train_reg, X_test_reg, y_train_reg, y_test_reg = train_test_split(X_reg, y_reg, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей и их гиперпараметров для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": (LogisticRegression(), {\n",
|
|||
|
" 'model__C': [0.1, 1, 10],\n",
|
|||
|
" 'model__solver': ['liblinear', 'lbfgs']\n",
|
|||
|
" }),\n",
|
|||
|
" \"Random Forest Classification\": (RandomForestClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__max_depth': [None, 10, 20]\n",
|
|||
|
" }),\n",
|
|||
|
" \"Gradient Boosting Classification\": (GradientBoostingClassifier(), {\n",
|
|||
|
" 'model__n_estimators': [100, 200],\n",
|
|||
|
" 'model__learning_rate': [0.01, 0.1],\n",
|
|||
|
" 'model__max_depth': [3, 5]\n",
|
|||
|
" })\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[ numerical_cols]\n",
|
|||
|
"y_class = (df['Adj Close'] > df['Adj Close'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на обучающую и тестовую выборки для задачи классификации\n",
|
|||
|
"X_train_class, X_test_class, y_train_class, y_test_class = train_test_split(X_class, y_class, test_size=0.2, random_state=42)\n",
|
|||
|
"\n",
|
|||
|
"# Обучаем и оцениваем модели для задачи классификации\n",
|
|||
|
"print(\"Результаты для задачи классификации:\")\n",
|
|||
|
"for name, (model, params) in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" grid_search = GridSearchCV(pipeline, params, cv=5, scoring='accuracy')\n",
|
|||
|
" grid_search.fit(X_train_class, y_train_class)\n",
|
|||
|
" best_model = grid_search.best_estimator_\n",
|
|||
|
" y_pred_class = best_model.predict(X_test_class)\n",
|
|||
|
" accuracy = accuracy_score(y_test_class, y_pred_class)\n",
|
|||
|
" precision = precision_score(y_test_class, y_pred_class)\n",
|
|||
|
" recall = recall_score(y_test_class, y_pred_class)\n",
|
|||
|
" f1 = f1_score(y_test_class, y_pred_class)\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Best Parameters: {grid_search.best_params_}\")\n",
|
|||
|
" print(f\"Accuracy: {accuracy}\")\n",
|
|||
|
" print(f\"Precision: {precision}\")\n",
|
|||
|
" print(f\"Recall: {recall}\")\n",
|
|||
|
" print(f\"F1-score: {f1}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
" # Визуализация матрицы ошибок\n",
|
|||
|
" cm = confusion_matrix(y_test_class, y_pred_class)\n",
|
|||
|
" disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=['Less', 'More'])\n",
|
|||
|
" disp.plot(cmap=plt.cm.Blues)\n",
|
|||
|
" plt.title(f'Confusion Matrix for {name}')\n",
|
|||
|
" plt.show()"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"**Вывод**:\n",
|
|||
|
"\n",
|
|||
|
"Градиентный бустинг и случайный лес выдали наилучшие результаты. "
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Давайте проанализируем полученные значения метрик и определим, являются ли они нормальными или их можно улучшить.\n",
|
|||
|
"\n",
|
|||
|
"### Оценка смещения и дисперсии для задачи регрессии:\n",
|
|||
|
"\n",
|
|||
|
"### Вывод для задачи регрессии:\n",
|
|||
|
"\n",
|
|||
|
"- **Random Forest Regression** демонстрирует наилучшие результаты по метрикам MAE и R², что указывает на высокую точность и стабильность модели.\n",
|
|||
|
"- **Linear Regression** и **Gradient Boosting Regression** также показывают хорошие результаты, но уступают случайному лесу.\n",
|
|||
|
"\n",
|
|||
|
"### Вывод для задачи классификации:\n",
|
|||
|
"\n",
|
|||
|
"- **Random Forest Classification** демонстрирует наилучшие результаты по всем метрикам (Accuracy, Precision, Recall, F1-score), что указывает на высокую точность и стабильность модели.\n",
|
|||
|
"- **Logistic Regression** и **Gradient Boosting Classification** также показывают хорошие результаты, но уступают случайному лесу.\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "markdown",
|
|||
|
"metadata": {},
|
|||
|
"source": [
|
|||
|
"Для оценки смещения (bias) и дисперсии (variance) моделей можно использовать метод перекрестной проверки (cross-validation). Этот метод позволяет оценить, насколько хорошо модель обобщается на новых данных.\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи регрессии:\n",
|
|||
|
"Для задачи регрессии мы будем использовать метрики MAE (Mean Absolute Error) и R² (R-squared) для оценки смещения и дисперсии.\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи классификации:\n",
|
|||
|
"Для задачи классификации мы будем использовать метрики Accuracy, Precision, Recall и F1-score для оценки смещения и дисперсии.\n",
|
|||
|
"\n",
|
|||
|
"Пример кода для оценки смещения и дисперсии:"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": null,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"name": "stdout",
|
|||
|
"output_type": "stream",
|
|||
|
"text": [
|
|||
|
"Оценка смещения и дисперсии для задачи регрессии:\n",
|
|||
|
"Model: Linear Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 3.475420657900542e-14, Std = 2.3108544967235046e-14\n",
|
|||
|
"R² (Cross-Validation): Mean = 1.0, Std = 0.0\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 4.770713368258853, Std = 9.027907502951276\n",
|
|||
|
"R² (Cross-Validation): Mean = -0.8676362010013315, Std = 3.6735082182967664\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Regression\n",
|
|||
|
"MAE (Cross-Validation): Mean = 4.790726208613611, Std = 8.978223486805094\n",
|
|||
|
"R² (Cross-Validation): Mean = -0.8531326799804774, Std = 3.6480201756306525\n",
|
|||
|
"\n",
|
|||
|
"Оценка смещения и дисперсии для задачи классификации:\n",
|
|||
|
"Model: Logistic Regression\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.9469472506610617, Std = 0.09607008028935687\n",
|
|||
|
"Precision (Cross-Validation): Mean = 0.9903846153846153, Std = 0.019230769230769253\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.8244897959183675, Std = 0.34090796763789555\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.8430120359555126, Std = 0.29664350339720796\n",
|
|||
|
"\n",
|
|||
|
"Model: Random Forest Classification\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.99533527696793, Std = 0.009329446064139945\n",
|
|||
|
"Precision (Cross-Validation): Mean = 1.0, Std = 0.0\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.9795918367346939, Std = 0.04081632653061225\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.9904843365764995, Std = 0.016633019819396317\n",
|
|||
|
"\n",
|
|||
|
"Model: Gradient Boosting Classification\n",
|
|||
|
"Accuracy (Cross-Validation): Mean = 0.9988338192419824, Std = 0.0023323615160349754\n",
|
|||
|
"Precision (Cross-Validation): Mean = 1.0, Std = 0.0\n",
|
|||
|
"Recall (Cross-Validation): Mean = 0.9959183673469388, Std = 0.008163265306122458\n",
|
|||
|
"F1-score (Cross-Validation): Mean = 0.9979381443298969, Std = 0.004123711340206171\n",
|
|||
|
"\n"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"from sklearn.model_selection import cross_val_score\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\"..//static//csv//FINAL_USO.csv\")\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"numerical_cols = ['Volume', 'High', 'Open', 'Close', 'Low']\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[numerical_cols]\n",
|
|||
|
"y_reg = df['Adj Close']\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи регрессии\n",
|
|||
|
"print(\"Оценка смещения и дисперсии для задачи регрессии:\")\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" mae_scores = -cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" r2_scores = cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='r2')\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"MAE (Cross-Validation): Mean = {mae_scores.mean()}, Std = {mae_scores.std()}\")\n",
|
|||
|
" print(f\"R² (Cross-Validation): Mean = {r2_scores.mean()}, Std = {r2_scores.std()}\")\n",
|
|||
|
" print()\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Adj Close'] > df['Adj Close'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи классификации\n",
|
|||
|
"print(\"Оценка смещения и дисперсии для задачи классификации:\")\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" accuracy_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='accuracy')\n",
|
|||
|
" precision_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='precision')\n",
|
|||
|
" recall_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='recall')\n",
|
|||
|
" f1_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='f1')\n",
|
|||
|
" print(f\"Model: {name}\")\n",
|
|||
|
" print(f\"Accuracy (Cross-Validation): Mean = {accuracy_scores.mean()}, Std = {accuracy_scores.std()}\")\n",
|
|||
|
" print(f\"Precision (Cross-Validation): Mean = {precision_scores.mean()}, Std = {precision_scores.std()}\")\n",
|
|||
|
" print(f\"Recall (Cross-Validation): Mean = {recall_scores.mean()}, Std = {recall_scores.std()}\")\n",
|
|||
|
" print(f\"F1-score (Cross-Validation): Mean = {f1_scores.mean()}, Std = {f1_scores.std()}\")\n",
|
|||
|
" print()\n"
|
|||
|
]
|
|||
|
},
|
|||
|
{
|
|||
|
"cell_type": "code",
|
|||
|
"execution_count": 13,
|
|||
|
"metadata": {},
|
|||
|
"outputs": [
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABccAAAJOCAYAAABycr+9AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAACOp0lEQVR4nOzdeXgT5f7+8TuFNKUrIIWCFAoFZQcF2RUQbFkE8YgoHGUREQVUwA08IlTRuoMKsriAC7gjHj0CZfcguADiEQQEZFGkbEIDLU1LO78//DXfhqRtCmnTZt6v6+pFMnkm85kkz+TJzZOJxTAMQwAAAAAAAAAAmEiQvwsAAAAAAAAAAKC0EY4DAAAAAAAAAEyHcBwAAAAAAAAAYDqE4wAAAAAAAAAA0yEcBwAAAAAAAACYDuE4AAAAAAAAAMB0CMcBAAAAAAAAAKZDOA4AAAAAAAAAMB3CcQAAAAAAAACA6RCOA3AaNmyYwsPDS3Wb+/fvl8Vi0YIFC0p1u4Hso48+UtWqVXXmzBl/l1Kg9u3b6+GHH/a6/ZkzZ3TnnXcqJiZGFotF48aNK7ni4GSxWDR16lR/l1FqLuZ4tHbtWlksFq1du9bndQEAYDbFGYN8//33Cg4O1oEDB3xex5w5c1SnTh05HA6v13n33XfVqFEjWa1WVa5c2ec1wd2wYcMUFxfn7zJKVdeuXdW1a9cLWjcuLk7Dhg3zaT1AeUY4DniwYMECWSwWWSwWrV+/3u12wzAUGxsri8Wi66+/3g8VFl9OTo5q1aoli8WipUuX+rscn8jIyNDUqVNLJIzKe/49/d19990+356v5OTkaMqUKbr33ntd/qMjLi5OFotFPXr08Lje66+/7ty/TZs2eWzz8MMPy2Kx6JZbbvF4e16wWNDfM88842z7yCOPaNasWUpNTfVqv55++mktWLBA99xzj959913dfvvtXq13ofIer7y/sLAwtW3bVu+8806Jbhd/mzp1qiwWi4KCgvT777+73W6321WpUiVZLBaNHTvWDxUCAFC4/J8nLBaLKlasqEsvvVTDhg3ToUOH/F1eQPnXv/6lQYMGqW7dus5lXbt2dXn8K1WqpBYtWmjGjBnKzc11Wf/2229X586d1a5dO3Xp0kU7duxw3jZs2DBlZWVp7ty5XtWyc+dODRs2TPHx8Xr99dc1b9483+xkAfLGTHl/VqtVcXFxuu+++3Tq1KkS3TZcP/9MmzbNY5t//vOfslgspT4JDYD3Kvq7AKAsCwkJ0aJFi9S5c2eX5evWrdMff/whm83mp8qKb/Xq1Tp8+LDi4uK0cOFC9erVy98lXbSMjAwlJSVJ0gX/r3lhrrvuOg0ZMsRt+WWXXebzbfnKF198oV27dumuu+5yuy0kJERr1qxRamqqYmJiXG5buHChQkJClJmZ6fF+DcPQ+++/r7i4OH3xxRc6ffq0IiIiPLYdNGiQevfu7bb8iiuucF6+4YYbFBkZqddee01PPPFEkfu1evVqtW/fXlOmTCmyra+0atVKDzzwgCTp8OHDeuONNzR06FA5HA6NHDmy1Orwp7Nnz6piRf8NFWw2m95//323bxksXrzYTxUBAFA8TzzxhOrVq6fMzEx9++23WrBggdavX69t27YpJCTE3+WVe1u3btXKlSu1YcMGt9tq166t5ORkSdLx48e1aNEijR8/XseOHdNTTz3lbDd58mTn+H7cuHEaPXq01qxZI+nv8fPQoUP10ksv6d5775XFYim0nrVr1yo3N1cvv/yyGjRo4KvdLNLs2bMVHh6u9PR0rVq1Sq+++qq2bNnicaJXIHr99dfd/tOjNIWEhOj999/XY4895rI8PT1dn3/+OX0dKOOYOQ4Uonfv3vr444917tw5l+WLFi1S69at3QLGsuy9997TlVdeqfHjx2vJkiVKT0/3d0ll3mWXXabbbrvN7a9t27aFrpeRkeFx+blz55SVlXVRNRX1vM2fP1+dOnXSpZde6nZbp06dFB4erg8//NBl+R9//KH//ve/6tOnT4H3u3btWv3xxx966623dO7cuULDySuvvNLj49a0aVNnm6CgIA0YMEDvvPOODMModJ8k6ejRoz79Wqo3z8Wll17qrP2hhx7S+vXrFR4erunTp/usDm/5q7+GhIT4NRzv3bu33n//fbflixYtKvT1CgBAWdGrVy/ddtttuvPOO/XGG2/owQcf1N69e/Xvf//b36WViILGwSVl/vz5qlOnjtq3b+92W1RUlHMsN27cOH399deqW7euXn31VeXk5Djb5Z/4YhiGgoJcY5KBAwfqwIEDzsC8MEePHpUkn45bvXlMBwwYoNtuu02jRo3SRx99pFtuuUXffPONvv/+e5/V4Y3c3NwCJ9uUJKvV6teJa71799Yvv/yin376yWX5559/rqysLF133XV+qgyANwjHgUIMGjRIJ06c0IoVK5zLsrKy9Mknn2jw4MEe18nNzdWMGTPUtGlThYSEqEaNGho1apROnjzp0u7zzz9Xnz59VKtWLdlsNsXHx+vJJ590GahJf8+IbtasmX755Rd169ZNoaGhuvTSS/Xcc895vR9nz57VZ599pltvvVUDBw7U2bNn9fnnnxfY/rffflNiYqLCwsJUq1YtPfHEE24B5gcffKDWrVsrIiJCkZGRat68uV5++WW3+7n55ptVtWpVhYaGqn379vrPf/5TZL0FnT8t/7nk9u/fr+joaElSUlKS8+ts+c9NuHPnTg0YMEBVq1ZVSEiI2rRp4/MPInnPz+bNm3XNNdcoNDRUjz76qPMrdi+88IJmzJih+Ph42Ww2/fLLL5L+ngl99dVXKywsTJUrV9YNN9zg8hVO6f++JvnLL79o8ODBqlKlitu3GPLLzMzUsmXLCjx1SkhIiP7xj39o0aJFLsvff/99ValSRYmJiQXe98KFC9WkSRN169ZNPXr00MKFC719iAp03XXX6cCBA9q6dWuBbfLO47xv3z795z//cT7P+/fvl/T3B5ARI0aoRo0aCgkJUcuWLfX222+73EdRz4W3oqOj1ahRI+3du9dlubd9Pjc3V1OnTlWtWrUUGhqqbt266ZdffnE751/e17DXrVun0aNHq3r16qpdu7bz9qVLlzpfOxEREerTp4+2b9/usq3U1FQNHz5ctWvXls1mU82aNXXDDTc4HzdJ2rRpkxITE1WtWjVVqlRJ9erV0x133OFyP57O9/njjz+qV69eioyMVHh4uLp3765vv/3WpU3ePnzzzTeaMGGCoqOjFRYWphtvvFHHjh3z9iHX4MGDtXXrVu3cudNl31avXl3gMdib14QknTp1SsOGDVNUVJQqV66soUOHFvj14ws9luzevVs33XSTYmJiFBISotq1a+vWW29VWlqadw8AACDgXH311ZLkNp7wxJv3c8MwNG3aNNWuXds5vti+fbvb+CJvXHm+vPfs/PdZ3M8p54+DJcnhcGjKlClq0KCBbDabYmNj9fDDD7udu9vhcGj8+PGKjo5WRESE+vXrpz/++KPIxybPkiVLdO211xY5o1v6eyx81VVX6fTp084QO79Vq1bpjTfecDkVoCS1bt1aVatWLfTzk/T3afnyvuUYHR3tNo567bXX1LRpU9lsNtWqVUtjxoxxG3sU9pgWR0Gvs++++049e/ZUVFSUQkND1aVLF33zzTdu669du1Zt2rRRSEiI4uPjNXfuXI+vobxT3C1cuNC5b8uWLZMkHTp0SHfccYdq1Kghm82mpk2b6q233nLb1quvvqqmTZsqNDRUVapUUZs2bVw+r5w+fVrjxo1TXFycbDabqlevruuuu05btmxxtvF0zvH09HQ98MADio2Nlc1m0+WXX64XXnjB7XNt3j4sWbJEzZo1c9aatx/e6NChg+rVq+f2OWvhwoXq2bOnqlat6nE9b14TkjRv3jzFx8erUqVKatu2rf773/96vD9v+935srOzlZSUpIYNGyokJESXXHKJOnfu7JKDAIGM06oAhYiLi1OHDh30/vvvO09DsnTpUqWlpenWW2/VK6+84rbOqFGjtGDBAg0fPlz33Xef9u3bp5kzZ+rHH3/UN998I6vVKun
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1500x600 with 2 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
},
|
|||
|
{
|
|||
|
"data": {
|
|||
|
"image/png": "iVBORw0KGgoAAAANSUhEUgAABpMAAASlCAYAAABEPCH1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjkuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8hTgPZAAAACXBIWXMAAA9hAAAPYQGoP6dpAAEAAElEQVR4nOzdeXhV1dk/7ieBkICAiEBApICzKAXFSnHWIjhhcUSxMql1oqKpVnEAsVW+alV8FUWtqK2o1NlXKYIItVYqrYpWKyoq4sCopUGGMGT//uiPvMZkYwIhJ4n3fV1cctZZ++znHPaKe+Vz9tpZSZIkAQAAAAAAAOXIznQBAAAAAAAA1FzCJAAAAAAAAFIJkwAAAAAAAEglTAIAAAAAACCVMAkAAAAAAIBUwiQAAAAAAABSCZMAAAAAAABIJUwCAAAAAAAglTAJAAAAAACAVMIkACIi4oMPPohevXrF1ltvHVlZWfHUU09luqQSgwYNig4dOmRs//fff39kZWXFvHnzSrXfeOONscMOO0S9evWia9euERHRoUOHGDRoULXXePXVV0dWVla17zeTNue4OOSQQ+KQQw6p0noAANi4TTl/mzFjRmRlZcWMGTO2SE3fpbxz/ppg3rx5kZWVFffff3/Gaihv7lPevDJtPlUdsrKy4uqrr672/WbK5hwXmR5rQM0nTALqlDvuuCOysrKie/fumS6l1hk4cGD885//jGuvvTb+8Ic/xD777LPF91lYWBijRo2KLl26ROPGjaNhw4ax5557xqWXXhpffPHFFt//5pgyZUr86le/iv333z/uu+++uO6667b4PleuXBlXX311jTu5z8rKiqysrDjzzDPLff6KK64o6bN06dJqrg4A4Ptrwy/xN/zJy8uLXXbZJYYOHRqLFi3KdHk1XibO+SP++0v9448/Plq3bh0NGjSIVq1aRZ8+feKJJ56olv1vjkzMKydNmlTjAqMNX/bLzs6OTz/9tMzzhYWF0bBhw8jKyoqhQ4dmoEKAyquf6QIAqtKECROiQ4cOMWvWrJg7d27stNNOmS6pVli1alXMnDkzrrjiimo7kf3oo4+iZ8+eMX/+/DjppJPi5z//eTRo0CDeeuutuPfee+PJJ5+M999/v1pq+S6nn356nHLKKZGbm1vS9uKLL0Z2dnbce++90aBBg5L29957L7Kzt8x3NVauXBmjRo2KiChzVc2VV14Zl1122RbZb0Xk5eXF448/HnfccUepzyMi4uGHH468vLxYvXp1hqoDAPh+u+aaa6Jjx46xevXqePnll+POO++MSZMmxdtvvx2NGjWqtjruueeeKC4urtQ2Bx10UKxatarMOWZ1SDvn35JGjhwZ11xzTey8885x9tlnR/v27ePLL7+MSZMmxQknnBATJkyI/v37V0st3+Xbc5+0eWV586mqNGnSpBg7dmy5gdKqVauifv3M/fozNzc3Hn744fjVr35Vqr02BIMA3+bKJKDO+Pjjj+OVV16Jm2++OVq2bBkTJkzIdEmpVqxYkekSSlmyZElERDRr1qzKXnNj73HdunVx/PHHx6JFi2LGjBnx8MMPx/nnnx9nnXVW3HbbbfHRRx/FSSedVGW1bK569epFXl5eqWXkFi9eHA0bNiwzqczNzY2cnJzqLjHq168feXl51b7fDY444ogoLCyMP/3pT6XaX3nllfj444/j6KOPzlBlAAAceeSR8bOf/SzOPPPMuP/+++PCCy+Mjz/+OJ5++unUbbbEnCUnJ6fSgUJ2dnbk5eVtsS9sbUzaOf+mSpIkVq1alfr8Y489Ftdcc02ceOKJ8c4778SoUaNiyJAhcckll8T06dNj8uTJ0bRp0yqppSp8e+6TNq8sbz5VXfLy8jIaJh111FHx8MMPl2l/6KGHzJGAWkeYBNQZEyZMiG222SaOPvroOPHEE1PDpGXLlsVFF10UHTp0iNzc3Nh+++1jwIABpZbfWr16dVx99dWxyy67RF5eXrRp0yaOP/74+PDDDyMifS3h8tYnHjRoUDRu3Dg+/PDDOOqoo6JJkyZx2mmnRUTEX/7ylzjppJPiBz/4QeTm5ka7du3ioosuKneCMWfOnDj55JOjZcuW0bBhw9h1113jiiuuiIiI6dOnR1ZWVjz55JNltnvooYciKysrZs6cWe7ncfXVV0f79u0jIuKSSy6JrKysUuuYv/HGG3HkkUdG06ZNo3HjxvGTn/wk/va3v5V6jQ3LZ/z5z3+O8847L1q1ahXbb799ufuLiHj88cfjzTffjCuuuCIOOOCAMs83bdo0rr322tTtIyJ++9vfxn777RfbbrttNGzYMLp16xaPPfZYmX5Tp06NAw44IJo1axaNGzeOXXfdNS6//PJSfW677bbYY489olGjRrHNNtvEPvvsEw899FCZ97dhje+srKy47777YsWKFSVLhmz4Ny9v3fDvOubWrFkTI0aMiG7dusXWW28dW221VRx44IExffr0kteYN29etGzZMiIiRo0aVbLfDd++K++eSevWrYtf//rXseOOO0Zubm506NAhLr/88igqKirVr0OHDnHMMcfEyy+/HPvuu2/k5eXFDjvsEL///e83+m/wTW3bto2DDjqo1OcW8d9x2blz59hzzz3L3e7RRx+Nbt26RcOGDaNFixbxs5/9LD7//PMy/Z566qnYc889Iy8vL/bcc89yj/WIiOLi4hgzZkzssccekZeXF/n5+XH22WfHv//97+98D991HAAA1BWHHXZYRPz3C3kRG5+zVOb86k9/+lMcfPDB0aRJk2jatGn86Ec/KnU+Vd49kx555JHo1q1byTadO3eOW2+9teT5tLlXRc4jN7yvzz//PPr27RuNGzeOli1bxsUXXxzr16/f6Ge0sXP+yp5nP//887HPPvtEw4YN46677krd51VXXRXNmzeP8ePHl/sFtd69e8cxxxyTuv1bb70VgwYNih122CHy8vKidevWMWTIkPjyyy9L9Vu+fHlceOGFJfOTVq1axeGHHx6vv/56SZ8PPvggTjjhhGjdunXk5eXF9ttvH6ecckr85z//KfX+Nsx9NjavTLtn0ncdLxWZLw8aNCjGjh0bEVFqSccNyrtnUmXmuH/961+joKAgWrZsGVtttVUcd9xxJaFZRfTv3z9mz54dc+bMKWlbuHBhvPjii6lXmC1evDjOOOOMyM/Pj7y8vOjSpUs88MADZfotW7YsBg0aFFtvvXU0a9YsBg4cGMuWLSv3NefMmRMnnnhiNG/ePPLy8mKfffaJZ5555jvrr8hxAHx/WOYOqDMmTJgQxx9/fDRo0CBOPfXUuPPOO+Pvf/97/OhHPyrp8/XXX8eBBx4Y7777bgwZMiT23nvvWLp0aTzzzDPx2WefRYsWLWL9+vVxzDHHxLRp0+KUU06JYcOGxfLly2Pq1Knx9ttvx4477ljp2tatWxe9e/eOAw44IH7729+WLCXx6KOPxsqVK+Pcc8+NbbfdNmbNmhW33XZbfPbZZ/Hoo4+WbP/WW2/FgQceGDk5OfHzn/88OnToEB9++GH87//+b1x77bVxyCGHRLt27WLChAlx3HHHlflcdtxxx+jRo0e5tR1//PHRrFmzuOiii+LUU0+No446Kho3bhwREe+8804ceOCB0bRp0/jVr34VOTk5cdddd8UhhxwSf/7zn8vcm+q8886Lli1bxogRIzb6TcYNJ62nn356pT/LDW699dY49thj47TTTos1a9bEI488EieddFI8++yzJd/weuedd+KYY46JH/7wh3HNNddEbm5uzJ07N/7617+WvM4999wTF1xwQZx44okxbNiwWL16dbz11lvx6quvpp7c/+EPf4i77747Zs2aFb/73e8iImK//fYrt29FjrnCwsL43e9+F6eeemqcddZZsXz58rj33nujd+/eMWvWrOjatWu0bNky7rzzzjj33HPjuOOOi+OPPz4iIn74wx+mfkZnnnlmPPDAA3HiiSfGL3/5y3j11Vdj9OjR8e6775YJY+bOnRsnnnhinHHGGTFw4MAYP358DBo0KLp16xZ77LFHhf5N+vfvH8OGDYuvv/46GjduHOvWrYt
|
|||
|
"text/plain": [
|
|||
|
"<Figure size 1700x1200 with 4 Axes>"
|
|||
|
]
|
|||
|
},
|
|||
|
"metadata": {},
|
|||
|
"output_type": "display_data"
|
|||
|
}
|
|||
|
],
|
|||
|
"source": [
|
|||
|
"import pandas as pd\n",
|
|||
|
"import matplotlib.pyplot as plt\n",
|
|||
|
"from sklearn.model_selection import cross_val_score\n",
|
|||
|
"from sklearn.preprocessing import StandardScaler\n",
|
|||
|
"from sklearn.linear_model import LinearRegression, LogisticRegression\n",
|
|||
|
"from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier\n",
|
|||
|
"from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier\n",
|
|||
|
"from sklearn.pipeline import Pipeline\n",
|
|||
|
"from sklearn.compose import ColumnTransformer\n",
|
|||
|
"from sklearn.preprocessing import OneHotEncoder\n",
|
|||
|
"\n",
|
|||
|
"# Загружаем набор данных\n",
|
|||
|
"df = pd.read_csv(\"..//static//csv//FINAL_USO.csv\")\n",
|
|||
|
"\n",
|
|||
|
"# Определяем категориальные и числовые столбцы\n",
|
|||
|
"numerical_cols = ['Volume', 'High', 'Open', 'Close', 'Low']\n",
|
|||
|
"\n",
|
|||
|
"# Создаем преобразователь для категориальных и числовых столбцов\n",
|
|||
|
"preprocessor = ColumnTransformer(\n",
|
|||
|
" transformers=[\n",
|
|||
|
" ('num', StandardScaler(), numerical_cols)\n",
|
|||
|
" ])\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи регрессии\n",
|
|||
|
"X_reg = df[ numerical_cols]\n",
|
|||
|
"y_reg = df['Adj Close']\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи регрессии\n",
|
|||
|
"models_reg = {\n",
|
|||
|
" \"Linear Regression\": LinearRegression(),\n",
|
|||
|
" \"Random Forest Regression\": RandomForestRegressor(),\n",
|
|||
|
" \"Gradient Boosting Regression\": GradientBoostingRegressor()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи регрессии\n",
|
|||
|
"mae_means = []\n",
|
|||
|
"mae_stds = []\n",
|
|||
|
"r2_means = []\n",
|
|||
|
"r2_stds = []\n",
|
|||
|
"\n",
|
|||
|
"for name, model in models_reg.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" mae_scores = -cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='neg_mean_absolute_error')\n",
|
|||
|
" r2_scores = cross_val_score(pipeline, X_reg, y_reg, cv=5, scoring='r2')\n",
|
|||
|
" mae_means.append(mae_scores.mean())\n",
|
|||
|
" mae_stds.append(mae_scores.std())\n",
|
|||
|
" r2_means.append(r2_scores.mean())\n",
|
|||
|
" r2_stds.append(r2_scores.std())\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов для задачи регрессии\n",
|
|||
|
"fig, ax = plt.subplots(1, 2, figsize=(15, 6))\n",
|
|||
|
"\n",
|
|||
|
"ax[0].bar(models_reg.keys(), mae_means, yerr=mae_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[0].set_ylabel('MAE')\n",
|
|||
|
"ax[0].set_title('Mean Absolute Error (MAE) for Regression Models')\n",
|
|||
|
"ax[0].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[1].bar(models_reg.keys(), r2_means, yerr=r2_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[1].set_ylabel('R²')\n",
|
|||
|
"ax[1].set_title('R-squared (R²) for Regression Models')\n",
|
|||
|
"ax[1].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()\n",
|
|||
|
"\n",
|
|||
|
"# Разделяем данные на признаки (X) и целевую переменную (y) для задачи классификации\n",
|
|||
|
"X_class = df[numerical_cols]\n",
|
|||
|
"y_class = (df['Adj Close'] > df['Adj Close'].mean()).astype(int)\n",
|
|||
|
"\n",
|
|||
|
"# Список моделей для задачи классификации\n",
|
|||
|
"models_class = {\n",
|
|||
|
" \"Logistic Regression\": LogisticRegression(),\n",
|
|||
|
" \"Random Forest Classification\": RandomForestClassifier(),\n",
|
|||
|
" \"Gradient Boosting Classification\": GradientBoostingClassifier()\n",
|
|||
|
"}\n",
|
|||
|
"\n",
|
|||
|
"# Оценка смещения и дисперсии для задачи классификации\n",
|
|||
|
"accuracy_means = []\n",
|
|||
|
"accuracy_stds = []\n",
|
|||
|
"precision_means = []\n",
|
|||
|
"precision_stds = []\n",
|
|||
|
"recall_means = []\n",
|
|||
|
"recall_stds = []\n",
|
|||
|
"f1_means = []\n",
|
|||
|
"f1_stds = []\n",
|
|||
|
"\n",
|
|||
|
"for name, model in models_class.items():\n",
|
|||
|
" pipeline = Pipeline(steps=[\n",
|
|||
|
" ('preprocessor', preprocessor),\n",
|
|||
|
" ('model', model)\n",
|
|||
|
" ])\n",
|
|||
|
" accuracy_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='accuracy')\n",
|
|||
|
" precision_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='precision')\n",
|
|||
|
" recall_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='recall')\n",
|
|||
|
" f1_scores = cross_val_score(pipeline, X_class, y_class, cv=5, scoring='f1')\n",
|
|||
|
" accuracy_means.append(accuracy_scores.mean())\n",
|
|||
|
" accuracy_stds.append(accuracy_scores.std())\n",
|
|||
|
" precision_means.append(precision_scores.mean())\n",
|
|||
|
" precision_stds.append(precision_scores.std())\n",
|
|||
|
" recall_means.append(recall_scores.mean())\n",
|
|||
|
" recall_stds.append(recall_scores.std())\n",
|
|||
|
" f1_means.append(f1_scores.mean())\n",
|
|||
|
" f1_stds.append(f1_scores.std())\n",
|
|||
|
"\n",
|
|||
|
"# Визуализация результатов для задачи классификации\n",
|
|||
|
"fig, ax = plt.subplots(2, 2, figsize=(17, 12))\n",
|
|||
|
"\n",
|
|||
|
"ax[0, 0].bar(models_class.keys(), accuracy_means, yerr=accuracy_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[0, 0].set_ylabel('Accuracy')\n",
|
|||
|
"ax[0, 0].set_title('Accuracy for Classification Models')\n",
|
|||
|
"ax[0, 0].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[0, 1].bar(models_class.keys(), precision_means, yerr=precision_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[0, 1].set_ylabel('Precision')\n",
|
|||
|
"ax[0, 1].set_title('Precision for Classification Models')\n",
|
|||
|
"ax[0, 1].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[1, 0].bar(models_class.keys(), recall_means, yerr=recall_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[1, 0].set_ylabel('Recall')\n",
|
|||
|
"ax[1, 0].set_title('Recall for Classification Models')\n",
|
|||
|
"ax[1, 0].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"ax[1, 1].bar(models_class.keys(), f1_means, yerr=f1_stds, align='center', alpha=0.5, ecolor='black', capsize=10)\n",
|
|||
|
"ax[1, 1].set_ylabel('F1-score')\n",
|
|||
|
"ax[1, 1].set_title('F1-score for Classification Models')\n",
|
|||
|
"ax[1, 1].yaxis.grid(True)\n",
|
|||
|
"\n",
|
|||
|
"plt.tight_layout()\n",
|
|||
|
"plt.show()"
|
|||
|
]
|
|||
|
}
|
|||
|
],
|
|||
|
"metadata": {
|
|||
|
"kernelspec": {
|
|||
|
"display_name": "Python 3",
|
|||
|
"language": "python",
|
|||
|
"name": "python3"
|
|||
|
},
|
|||
|
"language_info": {
|
|||
|
"codemirror_mode": {
|
|||
|
"name": "ipython",
|
|||
|
"version": 3
|
|||
|
},
|
|||
|
"file_extension": ".py",
|
|||
|
"mimetype": "text/x-python",
|
|||
|
"name": "python",
|
|||
|
"nbconvert_exporter": "python",
|
|||
|
"pygments_lexer": "ipython3",
|
|||
|
"version": "3.12.6"
|
|||
|
}
|
|||
|
},
|
|||
|
"nbformat": 4,
|
|||
|
"nbformat_minor": 2
|
|||
|
}
|