102 lines
4.0 KiB
Python
102 lines
4.0 KiB
Python
|
import pandas as pd
|
|||
|
from sklearn.ensemble import RandomForestRegressor
|
|||
|
from sklearn.metrics import mean_squared_error
|
|||
|
from sklearn.model_selection import train_test_split
|
|||
|
from sklearn.preprocessing import LabelEncoder
|
|||
|
from sqlalchemy import create_engine
|
|||
|
|
|||
|
|
|||
|
def run_prediction_module():
|
|||
|
engine = create_engine('mysql+pymysql://wind:wind@193.124.203.110:3306/wind_towers')
|
|||
|
|
|||
|
query = """
|
|||
|
SELECT BarTrend, CRC, DateStamp, DewPoint, HeatIndex, ETDay, HumIn, HumOut,
|
|||
|
Pressure, RainDay, RainMonth, RainRate, RainStorm, RainYear,
|
|||
|
TempIn, TempOut, WindDir, WindSpeed, WindSpeed10Min
|
|||
|
FROM weather_data
|
|||
|
WHERE DateStamp >= '2024-10-14 21:00:00' - INTERVAL 36 HOUR;
|
|||
|
"""
|
|||
|
df = pd.read_sql(query, engine)
|
|||
|
|
|||
|
df['DateStamp'] = pd.to_datetime(df['DateStamp'])
|
|||
|
df.set_index('DateStamp', inplace=True)
|
|||
|
df.sort_index(inplace=True)
|
|||
|
|
|||
|
lags = 3
|
|||
|
shifted_dfs = [df]
|
|||
|
|
|||
|
for lag in range(1, lags + 1):
|
|||
|
shifted_df = df.shift(lag).add_suffix(f'_t-{lag}')
|
|||
|
shifted_dfs.append(shifted_df)
|
|||
|
|
|||
|
df_with_lags = pd.concat(shifted_dfs, axis=1)
|
|||
|
|
|||
|
df_with_lags.dropna(inplace=True)
|
|||
|
df_with_lags = df_with_lags.copy()
|
|||
|
|
|||
|
# Преобразуем BarTrend в числовой формат
|
|||
|
le = LabelEncoder()
|
|||
|
df_with_lags['BarTrend_encoded'] = le.fit_transform(df_with_lags['BarTrend'])
|
|||
|
|
|||
|
# Выбор только числовых данных
|
|||
|
df_with_lags = df_with_lags.select_dtypes(include=['float64', 'int64'])
|
|||
|
|
|||
|
# Словари для хранения моделей и MSE
|
|||
|
models = {}
|
|||
|
mse_scores = {}
|
|||
|
|
|||
|
# Обучение моделей для каждого целевого столбца
|
|||
|
for target_column in df.columns:
|
|||
|
if target_column not in df_with_lags.columns:
|
|||
|
continue
|
|||
|
|
|||
|
X = df_with_lags.drop(columns=[target_column]).values
|
|||
|
y = df_with_lags[target_column].values
|
|||
|
|
|||
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
|
|||
|
|
|||
|
model = RandomForestRegressor()
|
|||
|
model.fit(X_train, y_train)
|
|||
|
|
|||
|
y_pred = model.predict(X_test)
|
|||
|
mse = mean_squared_error(y_test, y_pred)
|
|||
|
mse_scores[target_column] = mse
|
|||
|
models[target_column] = model
|
|||
|
|
|||
|
quality = "хорошая" if mse < 1.0 else "плохая"
|
|||
|
print(f"MSE для {target_column}: {mse} ({quality})")
|
|||
|
|
|||
|
# Обучаем модель для BarTrend_encoded отдельно
|
|||
|
X_bartrend = df_with_lags.drop(columns=['BarTrend_encoded']).values
|
|||
|
y_bartrend = df_with_lags['BarTrend_encoded'].values
|
|||
|
|
|||
|
X_train_bartrend, X_test_bartrend, y_train_bartrend, y_test_bartrend = train_test_split(X_bartrend, y_bartrend,
|
|||
|
test_size=0.2,
|
|||
|
shuffle=False)
|
|||
|
|
|||
|
model_bartrend = RandomForestRegressor()
|
|||
|
model_bartrend.fit(X_train_bartrend, y_train_bartrend)
|
|||
|
|
|||
|
y_pred_bartrend = model_bartrend.predict(X_test_bartrend)
|
|||
|
mse_bartrend = mean_squared_error(y_test_bartrend, y_pred_bartrend)
|
|||
|
models['BarTrend_encoded'] = model_bartrend
|
|||
|
mse_scores['BarTrend_encoded'] = mse_bartrend
|
|||
|
|
|||
|
quality_bartrend = "хорошая" if mse_bartrend < 1.0 else "плохая"
|
|||
|
print(f"MSE для BarTrend: {mse_bartrend} ({quality_bartrend})")
|
|||
|
|
|||
|
last_data = X[-1].reshape(1, -1)
|
|||
|
|
|||
|
predictions = {}
|
|||
|
for target_column, model in models.items():
|
|||
|
prediction = model.predict(last_data)[0]
|
|||
|
if target_column == 'BarTrend_encoded':
|
|||
|
prediction = le.inverse_transform([int(prediction)])[0]
|
|||
|
predictions['BarTrend'] = prediction
|
|||
|
print(f"Предсказание для BarTrend: {prediction}")
|
|||
|
break
|
|||
|
predictions[target_column] = prediction
|
|||
|
print(f"Предсказание для {target_column}: {prediction}")
|
|||
|
|
|||
|
return predictions # Возвращаем словарь с предсказанными значениями и названиями столбцов
|