небольшая правка коментов
This commit is contained in:
parent
39f65612d8
commit
998128cb0f
@ -86,7 +86,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 47,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -107,8 +107,8 @@
|
|||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"# 3. Проверим датасет на наличие пропусков и удалим строки с недостающими данными\n",
|
"# 3. Проверим датасет на наличие пропусков и удалим строки с недостающими данными\n",
|
||||||
"print(data.isnull().sum()) # Суммируем пропуски по каждому столбцу\n",
|
"print(data.isnull().sum()) \n",
|
||||||
"data.dropna(inplace=True) # Удаляем строки с пропусками\n",
|
"data.dropna(inplace=True) \n",
|
||||||
"print(\"Данные после очистки:\", data.shape)\n"
|
"print(\"Данные после очистки:\", data.shape)\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -124,7 +124,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 48,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -144,8 +144,8 @@
|
|||||||
}
|
}
|
||||||
],
|
],
|
||||||
"source": [
|
"source": [
|
||||||
"print(data.isnull().sum()) # Вывод количества пропусков\n",
|
"print(data.isnull().sum()) \n",
|
||||||
"data.dropna(inplace=True) # Удаление строк с пропущенными значениями\n",
|
"data.dropna(inplace=True) \n",
|
||||||
"print(f\"Количество строк после удаления пропусков: {data.shape[0]}\")\n"
|
"print(f\"Количество строк после удаления пропусков: {data.shape[0]}\")\n"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
@ -215,7 +215,7 @@
|
|||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 51,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
@ -232,7 +232,7 @@
|
|||||||
"from sklearn.model_selection import train_test_split\n",
|
"from sklearn.model_selection import train_test_split\n",
|
||||||
"\n",
|
"\n",
|
||||||
"X = data[scaled_columns]\n",
|
"X = data[scaled_columns]\n",
|
||||||
"y = data['Close'] # Заменить на целевую переменную, если другая\n",
|
"y = data['Close']\n",
|
||||||
"X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n",
|
"X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n",
|
||||||
"X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n",
|
"X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n",
|
||||||
"\n",
|
"\n",
|
||||||
@ -573,7 +573,6 @@
|
|||||||
" dataframe_name=\"stock_data\",\n",
|
" dataframe_name=\"stock_data\",\n",
|
||||||
" dataframe=data,\n",
|
" dataframe=data,\n",
|
||||||
" index=\"datetime\", \n",
|
" index=\"datetime\", \n",
|
||||||
" # Убираем time_index, так как datetime уже используется как индекс\n",
|
|
||||||
" logical_types={\n",
|
" logical_types={\n",
|
||||||
" \"open\": ww.logical_types.Double, \n",
|
" \"open\": ww.logical_types.Double, \n",
|
||||||
" \"high\": ww.logical_types.Double,\n",
|
" \"high\": ww.logical_types.Double,\n",
|
||||||
@ -594,13 +593,10 @@
|
|||||||
"# Выводим имена столбцов в feature_matrix, чтобы убедиться, какие признаки были сгенерированы\n",
|
"# Выводим имена столбцов в feature_matrix, чтобы убедиться, какие признаки были сгенерированы\n",
|
||||||
"print(\"Generated feature columns:\", feature_matrix.columns)\n",
|
"print(\"Generated feature columns:\", feature_matrix.columns)\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Объединяем новые признаки с исходными данными, добавив суффиксы для дублирующихся столбцов\n",
|
|
||||||
"data_featuretools = data.join(feature_matrix, lsuffix='_orig', rsuffix='_feature')\n",
|
"data_featuretools = data.join(feature_matrix, lsuffix='_orig', rsuffix='_feature')\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Масштабирование данных\n",
|
|
||||||
"scaler = StandardScaler()\n",
|
"scaler = StandardScaler()\n",
|
||||||
"\n",
|
"\n",
|
||||||
"# Используем правильные имена столбцов для масштабирования\n",
|
|
||||||
"scaled_columns = ['open_orig', 'high_orig', 'low_orig', 'close_orig', 'volume_orig']\n",
|
"scaled_columns = ['open_orig', 'high_orig', 'low_orig', 'close_orig', 'volume_orig']\n",
|
||||||
"\n",
|
"\n",
|
||||||
"data_featuretools[scaled_columns] = scaler.fit_transform(data_featuretools[scaled_columns])\n",
|
"data_featuretools[scaled_columns] = scaler.fit_transform(data_featuretools[scaled_columns])\n",
|
||||||
@ -609,7 +605,7 @@
|
|||||||
"\n",
|
"\n",
|
||||||
"feature_columns = [col for col in feature_matrix.columns if 'feature' in col]\n",
|
"feature_columns = [col for col in feature_matrix.columns if 'feature' in col]\n",
|
||||||
"X = data_featuretools[scaled_columns + feature_columns]\n",
|
"X = data_featuretools[scaled_columns + feature_columns]\n",
|
||||||
"y = data_featuretools['close_orig'] # Целевая переменная\n",
|
"y = data_featuretools['close_orig'] \n",
|
||||||
"\n",
|
"\n",
|
||||||
"X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n",
|
"X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.4, random_state=42)\n",
|
||||||
"X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n",
|
"X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42)\n",
|
||||||
|
Loading…
Reference in New Issue
Block a user