596 lines
85 KiB
Plaintext
596 lines
85 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Лабораторная работа 3\n",
|
|
"\n",
|
|
"Датасет - **Цены на кофе**\thttps://www.kaggle.com/datasets/mayankanand2701/starbucks-stock-price-dataset\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"**Бизнес-цели**: \n",
|
|
"1. Прогнозирование цены закрытия акции для поддержки принятия решений по инвестициям.\n",
|
|
"2. Оценка волатильности акций Starbucks для долгосрочных стратегий инвестирования."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"**Цели технического проекта**: \n",
|
|
"1. Создание модели машинного обучения для прогнозирования цены закрытия акций на основе исторических данных (дат, цен открытия, максимальных и минимальных цен, объёма торгов).\n",
|
|
"2. Разработка системы, которая вычисляет и анализирует волатильность на основе исторической ценовой информации и объёмов торгов."
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" Date Open High Low Close Adj Close \\\n",
|
|
"0 1992-06-26 0.328125 0.347656 0.320313 0.335938 0.260703 \n",
|
|
"1 1992-06-29 0.339844 0.367188 0.332031 0.359375 0.278891 \n",
|
|
"2 1992-06-30 0.367188 0.371094 0.343750 0.347656 0.269797 \n",
|
|
"3 1992-07-01 0.351563 0.359375 0.339844 0.355469 0.275860 \n",
|
|
"4 1992-07-02 0.359375 0.359375 0.347656 0.355469 0.275860 \n",
|
|
"... ... ... ... ... ... ... \n",
|
|
"8031 2024-05-17 75.269997 78.000000 74.919998 77.849998 77.849998 \n",
|
|
"8032 2024-05-20 77.680000 78.320000 76.709999 77.540001 77.540001 \n",
|
|
"8033 2024-05-21 77.559998 78.220001 77.500000 77.720001 77.720001 \n",
|
|
"8034 2024-05-22 77.699997 81.019997 77.440002 80.720001 80.720001 \n",
|
|
"8035 2024-05-23 80.099998 80.699997 79.169998 79.260002 79.260002 \n",
|
|
"\n",
|
|
" Volume \n",
|
|
"0 224358400 \n",
|
|
"1 58732800 \n",
|
|
"2 34777600 \n",
|
|
"3 18316800 \n",
|
|
"4 13996800 \n",
|
|
"... ... \n",
|
|
"8031 14436500 \n",
|
|
"8032 11183800 \n",
|
|
"8033 8916600 \n",
|
|
"8034 22063400 \n",
|
|
"8035 4651418 \n",
|
|
"\n",
|
|
"[8036 rows x 7 columns]\n",
|
|
"0 8212\n",
|
|
"1 8215\n",
|
|
"2 8216\n",
|
|
"3 8217\n",
|
|
"4 8218\n",
|
|
" ... \n",
|
|
"8031 19860\n",
|
|
"8032 19863\n",
|
|
"8033 19864\n",
|
|
"8034 19865\n",
|
|
"8035 19866\n",
|
|
"Name: Date_numeric, Length: 8036, dtype: int64\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"from sklearn.model_selection import train_test_split\n",
|
|
"from imblearn.under_sampling import RandomUnderSampler\n",
|
|
"\n",
|
|
"df = pd.read_csv(\"data/Coffe.csv\")\n",
|
|
"print(df)\n",
|
|
"\n",
|
|
"df['Date'] = pd.to_datetime(df['Date'])\n",
|
|
"df['Date_numeric'] = (df['Date'] - pd.Timestamp('1970-01-01')).dt.days\n",
|
|
"print(df['Date_numeric'])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 4,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Index(['Date', 'Open', 'High', 'Low', 'Close', 'Adj Close', 'Volume',\n",
|
|
" 'Date_numeric', 'Close_binned'],\n",
|
|
" dtype='object')\n",
|
|
"Обучающая выборка: (4821, 9)\n",
|
|
"Close\n",
|
|
"0.750000 17\n",
|
|
"0.765625 15\n",
|
|
"0.882813 11\n",
|
|
"0.753906 9\n",
|
|
"0.773438 9\n",
|
|
" ..\n",
|
|
"7.760000 1\n",
|
|
"88.459999 1\n",
|
|
"104.330002 1\n",
|
|
"10.850000 1\n",
|
|
"100.930000 1\n",
|
|
"Name: count, Length: 3690, dtype: int64\n",
|
|
"Контрольная выборка: (1607, 9)\n",
|
|
"Close\n",
|
|
"0.835938 6\n",
|
|
"0.781250 5\n",
|
|
"0.757813 5\n",
|
|
"1.851563 4\n",
|
|
"0.738281 4\n",
|
|
" ..\n",
|
|
"100.620003 1\n",
|
|
"6.020000 1\n",
|
|
"85.959999 1\n",
|
|
"91.529999 1\n",
|
|
"111.000000 1\n",
|
|
"Name: count, Length: 1436, dtype: int64\n",
|
|
"Тестовая выборка: (1607, 9)\n",
|
|
"Close\n",
|
|
"0.703125 6\n",
|
|
"0.851563 6\n",
|
|
"0.750000 6\n",
|
|
"0.742188 5\n",
|
|
"0.781250 5\n",
|
|
" ..\n",
|
|
"47.275002 1\n",
|
|
"31.760000 1\n",
|
|
"75.500000 1\n",
|
|
"2.406250 1\n",
|
|
"8.107500 1\n",
|
|
"Name: count, Length: 1427, dtype: int64\n",
|
|
"Обучающая выборка: (4821, 9)\n",
|
|
"Close_binned\n",
|
|
"High 1639\n",
|
|
"Low 1591\n",
|
|
"Medium 1591\n",
|
|
"Name: count, dtype: int64\n",
|
|
"Контрольная выборка: (1607, 9)\n",
|
|
"Close_binned\n",
|
|
"High 546\n",
|
|
"Medium 531\n",
|
|
"Low 530\n",
|
|
"Name: count, dtype: int64\n",
|
|
"Тестовая выборка: (1607, 9)\n",
|
|
"Close_binned\n",
|
|
"High 547\n",
|
|
"Low 530\n",
|
|
"Medium 530\n",
|
|
"Name: count, dtype: int64\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"def split_stratified_into_train_val_test(\n",
|
|
" df_input,\n",
|
|
" stratify_colname=\"y\",\n",
|
|
" frac_train=0.6,\n",
|
|
" frac_val=0.15,\n",
|
|
" frac_test=0.25,\n",
|
|
" random_state=None,\n",
|
|
"):\n",
|
|
" if frac_train + frac_val + frac_test != 1.0:\n",
|
|
" raise ValueError(\n",
|
|
" \"fractions %f, %f, %f do not add up to 1.0\"\n",
|
|
" % (frac_train, frac_val, frac_test)\n",
|
|
" )\n",
|
|
"\n",
|
|
" if stratify_colname not in df_input.columns:\n",
|
|
" raise ValueError(\"%s is not a column in the dataframe\" % (stratify_colname))\n",
|
|
"\n",
|
|
" X = df_input \n",
|
|
" y = df_input[\n",
|
|
" [stratify_colname]\n",
|
|
" ] \n",
|
|
"\n",
|
|
" df_train, df_temp, y_train, y_temp = train_test_split(\n",
|
|
" X, y, stratify=y, test_size=(1.0 - frac_train), random_state=random_state\n",
|
|
" )\n",
|
|
"\n",
|
|
" relative_frac_test = frac_test / (frac_val + frac_test)\n",
|
|
" df_val, df_test, y_val, y_test = train_test_split(\n",
|
|
" df_temp,\n",
|
|
" y_temp,\n",
|
|
" stratify=y_temp,\n",
|
|
" test_size=relative_frac_test,\n",
|
|
" random_state=random_state,\n",
|
|
" )\n",
|
|
"\n",
|
|
" assert len(df_input) == len(df_train) + len(df_val) + len(df_test)\n",
|
|
"\n",
|
|
" return df_train, df_val, df_test\n",
|
|
"\n",
|
|
"bins = [df['Close'].min(), df['Close'].quantile(0.33), df['Close'].quantile(0.66), df['Close'].max()]\n",
|
|
"labels = ['Low', 'Medium', 'High']\n",
|
|
"df['Close_binned'] = pd.cut(df['Close'], bins=bins, labels=labels)\n",
|
|
"df = df.dropna()\n",
|
|
"# Now stratify using the binned values\n",
|
|
"df_train, df_val, df_test = split_stratified_into_train_val_test(\n",
|
|
" df, stratify_colname=\"Close_binned\", frac_train=0.60, frac_val=0.20, frac_test=0.20\n",
|
|
")\n",
|
|
"\n",
|
|
"print(df_train.columns) \n",
|
|
" \n",
|
|
"print(\"Обучающая выборка: \", df_train.shape)\n",
|
|
"print(df_train.Close.value_counts()) \n",
|
|
"\n",
|
|
"print(\"Контрольная выборка: \", df_val.shape)\n",
|
|
"print(df_val.Close.value_counts())\n",
|
|
"\n",
|
|
"print(\"Тестовая выборка: \", df_test.shape)\n",
|
|
"print(df_test.Close.value_counts())\n",
|
|
"\n",
|
|
"print(\"Обучающая выборка: \", df_train.shape)\n",
|
|
"print(df_train['Close_binned'].value_counts())\n",
|
|
"\n",
|
|
"print(\"Контрольная выборка: \", df_val.shape)\n",
|
|
"print(df_val['Close_binned'].value_counts())\n",
|
|
"\n",
|
|
"print(\"Тестовая выборка: \", df_test.shape)\n",
|
|
"print(df_test['Close_binned'].value_counts())\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 5,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Обучающая выборка после undersampling: (4773, 9)\n",
|
|
"Close\n",
|
|
"0.750000 17\n",
|
|
"0.765625 15\n",
|
|
"0.882813 11\n",
|
|
"0.773438 9\n",
|
|
"0.753906 9\n",
|
|
" ..\n",
|
|
"58.810001 1\n",
|
|
"40.535000 1\n",
|
|
"91.860001 1\n",
|
|
"90.779999 1\n",
|
|
"96.970001 1\n",
|
|
"Name: count, Length: 3651, dtype: int64\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"rus = RandomUnderSampler(random_state=42)\n",
|
|
"X_resampled, y_resampled = rus.fit_resample(df_train, df_train[\"Close_binned\"])\n",
|
|
"df_train_rus = pd.DataFrame(X_resampled)\n",
|
|
"print(\"Обучающая выборка после undersampling: \", df_train_rus.shape)\n",
|
|
"print(df_train_rus.Close.value_counts())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 6,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" Date Open High Low Close Adj Close \\\n",
|
|
"75 1992-10-13 0.464844 0.472656 0.457031 0.472656 0.366803 \n",
|
|
"7819 2023-07-17 100.830002 101.809998 100.040001 100.930000 98.501541 \n",
|
|
"6447 2018-01-31 57.230000 57.450001 56.700001 56.810001 49.579262 \n",
|
|
"706 1995-04-12 0.769531 0.789063 0.769531 0.785156 0.609317 \n",
|
|
"4437 2010-02-05 10.895000 11.020000 10.630000 10.850000 8.420099 \n",
|
|
"... ... ... ... ... ... ... \n",
|
|
"4113 2008-10-22 5.120000 5.245000 4.880000 4.995000 3.876349 \n",
|
|
"4544 2010-07-12 12.635000 12.760000 12.490000 12.635000 9.845955 \n",
|
|
"6517 2018-05-11 57.720001 57.860001 57.070000 57.270000 50.514595 \n",
|
|
"3336 2005-09-21 11.642500 11.775000 11.530000 11.667500 9.054512 \n",
|
|
"3122 2004-11-15 13.797500 13.860000 13.687500 13.790000 10.701671 \n",
|
|
"\n",
|
|
" Volume Date_numeric Close_binned_Low Close_binned_Medium \\\n",
|
|
"75 4390400 8321 True False \n",
|
|
"7819 5244500 19555 False False \n",
|
|
"6447 13118400 17562 False False \n",
|
|
"706 10294400 9232 True False \n",
|
|
"4437 22069800 14645 False True \n",
|
|
"... ... ... ... ... \n",
|
|
"4113 29681400 14174 True False \n",
|
|
"4544 12906200 14802 False True \n",
|
|
"6517 5843400 17662 False False \n",
|
|
"3336 16207600 13047 False True \n",
|
|
"3122 10700400 12737 False True \n",
|
|
"\n",
|
|
" Close_binned_High Volume_binned Price_change \n",
|
|
"75 False 0 0.007812 \n",
|
|
"7819 True 0 0.099998 \n",
|
|
"6447 True 2 -0.419999 \n",
|
|
"706 False 1 0.015625 \n",
|
|
"4437 False 3 -0.045000 \n",
|
|
"... ... ... ... \n",
|
|
"4113 False 3 -0.125000 \n",
|
|
"4544 False 2 0.000000 \n",
|
|
"6517 True 0 -0.450001 \n",
|
|
"3336 False 2 0.025000 \n",
|
|
"3122 False 1 -0.007500 \n",
|
|
"\n",
|
|
"[4821 rows x 13 columns]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"df_train = pd.get_dummies(df_train, columns=['Close_binned'])\n",
|
|
"df_train['Volume_binned'] = pd.qcut(df_train['Volume'], q=4, labels=False)\n",
|
|
"df_train['Price_change'] = df_train['Close'] - df_train['Open']\n",
|
|
"print(df_train) "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 7,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
" Date Open High Low Close Adj Close Volume \\\n",
|
|
"75 1992-10-13 -0.881061 -0.882121 -0.880250 -0.881109 0.366803 -0.731159 \n",
|
|
"7819 2023-07-17 2.093371 2.095750 2.098759 2.096893 98.501541 -0.670368 \n",
|
|
"6447 2018-01-31 0.801237 0.792199 0.802249 0.788980 49.579262 -0.109940 \n",
|
|
"706 1995-04-12 -0.872031 -0.872824 -0.870902 -0.871845 0.609317 -0.310940 \n",
|
|
"4437 2010-02-05 -0.571952 -0.572180 -0.575927 -0.573479 8.420099 0.527179 \n",
|
|
"... ... ... ... ... ... ... ... \n",
|
|
"4113 2008-10-22 -0.743100 -0.741883 -0.747938 -0.747047 3.876349 1.068937 \n",
|
|
"4544 2010-07-12 -0.520385 -0.521049 -0.520286 -0.520563 9.845955 -0.125044 \n",
|
|
"6517 2018-05-11 0.815758 0.804247 0.813318 0.802616 50.514595 -0.627741 \n",
|
|
"3336 2005-09-21 -0.549799 -0.549994 -0.549004 -0.549244 9.054512 0.109935 \n",
|
|
"3122 2004-11-15 -0.485933 -0.488725 -0.484463 -0.486324 10.701671 -0.282042 \n",
|
|
"\n",
|
|
" Date_numeric Close_binned_Low Close_binned_Medium Close_binned_High \\\n",
|
|
"75 8321 True False False \n",
|
|
"7819 19555 False False True \n",
|
|
"6447 17562 False False True \n",
|
|
"706 9232 True False False \n",
|
|
"4437 14645 False True False \n",
|
|
"... ... ... ... ... \n",
|
|
"4113 14174 True False False \n",
|
|
"4544 14802 False True False \n",
|
|
"6517 17662 False False True \n",
|
|
"3336 13047 False True False \n",
|
|
"3122 12737 False True False \n",
|
|
"\n",
|
|
" Volume_binned Price_change Volatility \n",
|
|
"75 0 0.007812 -0.001871 \n",
|
|
"7819 0 0.099998 -0.003009 \n",
|
|
"6447 2 -0.419999 -0.010050 \n",
|
|
"706 1 0.015625 -0.001922 \n",
|
|
"4437 3 -0.045000 0.003747 \n",
|
|
"... ... ... ... \n",
|
|
"4113 3 -0.125000 0.006055 \n",
|
|
"4544 2 0.000000 -0.000763 \n",
|
|
"6517 0 -0.450001 -0.009070 \n",
|
|
"3336 2 0.025000 -0.000990 \n",
|
|
"3122 1 -0.007500 -0.004262 \n",
|
|
"\n",
|
|
"[4821 rows x 14 columns]\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.preprocessing import StandardScaler\n",
|
|
"\n",
|
|
"scaler = StandardScaler()\n",
|
|
"df_train[['Open', 'Close', 'High', 'Low', 'Volume']] = scaler.fit_transform(\n",
|
|
" df_train[['Open', 'Close', 'High', 'Low', 'Volume']])\n",
|
|
"df_train['Volatility'] = df_train['High'] - df_train['Low']\n",
|
|
"print(df_train) "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stderr",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"c:\\Python312\\Lib\\site-packages\\featuretools\\synthesis\\deep_feature_synthesis.py:169: UserWarning: Only one dataframe in entityset, changing max_depth to 1 since deeper features cannot be created\n",
|
|
" warnings.warn(\n"
|
|
]
|
|
},
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"[<Feature: Open>,\n",
|
|
" <Feature: High>,\n",
|
|
" <Feature: Low>,\n",
|
|
" <Feature: Close>,\n",
|
|
" <Feature: Adj Close>,\n",
|
|
" <Feature: Volume>,\n",
|
|
" <Feature: Date_numeric>,\n",
|
|
" <Feature: Close_binned_Low>,\n",
|
|
" <Feature: Close_binned_Medium>,\n",
|
|
" <Feature: Close_binned_High>,\n",
|
|
" <Feature: Volume_binned>,\n",
|
|
" <Feature: Price_change>,\n",
|
|
" <Feature: Volatility>,\n",
|
|
" <Feature: DAY(Date)>,\n",
|
|
" <Feature: MONTH(Date)>,\n",
|
|
" <Feature: WEEKDAY(Date)>,\n",
|
|
" <Feature: YEAR(Date)>]"
|
|
]
|
|
},
|
|
"execution_count": 8,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"import featuretools as ft\n",
|
|
"\n",
|
|
"es = ft.EntitySet(id=\"stocks\")\n",
|
|
"es = es.add_dataframe(\n",
|
|
" dataframe_name=\"stock_data\", \n",
|
|
" dataframe=df_train, \n",
|
|
" index=\"Date\")\n",
|
|
"\n",
|
|
"feature_matrix, feature_defs = ft.dfs(\n",
|
|
" entityset=es, \n",
|
|
" target_dataframe_name=\"stock_data\")\n",
|
|
"\n",
|
|
"feature_defs"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 9,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Open float64\n",
|
|
"High float64\n",
|
|
"Low float64\n",
|
|
"Adj Close float64\n",
|
|
"Volume float64\n",
|
|
"Date_numeric int64\n",
|
|
"Close_binned_Low bool\n",
|
|
"Close_binned_Medium bool\n",
|
|
"Close_binned_High bool\n",
|
|
"Volume_binned int64\n",
|
|
"Price_change float64\n",
|
|
"Volatility float64\n",
|
|
"dtype: object\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Оценка предсказательной способности\n",
|
|
"from sklearn.linear_model import LinearRegression\n",
|
|
"from sklearn.metrics import mean_absolute_error, mean_squared_error\n",
|
|
"df_train_regression = df_train.copy()\n",
|
|
"\n",
|
|
"X_train = df_train_regression.drop(['Close', 'Date'], axis=1)\n",
|
|
"y_train = df_train_regression['Close']\n",
|
|
"X_test = df_test.drop(['Close', 'Date'], axis=1)\n",
|
|
"y_test = df_test['Close']\n",
|
|
"\n",
|
|
"X_train_encoded = pd.get_dummies(X_train, drop_first=True)\n",
|
|
"X_test_encoded = pd.get_dummies(X_test, drop_first=True)\n",
|
|
"\n",
|
|
"X_test_encoded = X_test_encoded.reindex(columns=X_train_encoded.columns, fill_value=0)\n",
|
|
"\n",
|
|
"print(X_train_encoded.dtypes)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"Средняя абсолютная ошибка: 0.28573230577357767\n",
|
|
"Среднеквадратичная ошибка: 0.2813734754209575\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"model = LinearRegression()\n",
|
|
"model.fit(X_train_encoded, y_train)\n",
|
|
"\n",
|
|
"predictions = model.predict(X_test_encoded)\n",
|
|
"\n",
|
|
"mae = mean_absolute_error(y_test, predictions)\n",
|
|
"mse = mean_squared_error(y_test, predictions)\n",
|
|
"print(\"Средняя абсолютная ошибка:\", mae)\n",
|
|
"print(\"Среднеквадратичная ошибка:\", mse)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"время, затраченное на обучение модели: 0.025032997131347656. Время, затраченное на предсказание: 0.0\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# Оценка скорости вычисления\n",
|
|
"import time\n",
|
|
"start_time = time.time()\n",
|
|
"model.fit(X_train_encoded, y_train)\n",
|
|
"training_time = time.time() - start_time\n",
|
|
"\n",
|
|
"start_time = time.time()\n",
|
|
"predictions = model.predict(X_test_encoded)\n",
|
|
"prediction_time = time.time() - start_time\n",
|
|
"\n",
|
|
"print(f'время, затраченное на обучение модели: {training_time}. Время, затраченное на предсказание: {prediction_time}')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 12,
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"image/png": "",
|
|
"text/plain": [
|
|
"<Figure size 640x480 with 2 Axes>"
|
|
]
|
|
},
|
|
"metadata": {},
|
|
"output_type": "display_data"
|
|
}
|
|
],
|
|
"source": [
|
|
"# Оценка корреляции\n",
|
|
"import seaborn as sns\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"\n",
|
|
"corr_matrix = df_train_regression.corr()\n",
|
|
"sns.heatmap(corr_matrix, annot=False)\n",
|
|
"plt.show()"
|
|
]
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.12.5"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|