import pandas as pd from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_squared_error from sklearn.model_selection import train_test_split from sklearn.preprocessing import LabelEncoder from sqlalchemy import create_engine def run_prediction_module(): engine = create_engine('mysql+pymysql://wind:wind@193.124.203.110:3306/wind_towers') query = """ SELECT BarTrend, CRC, DateStamp, DewPoint, HeatIndex, ETDay, HumIn, HumOut, Pressure, RainDay, RainMonth, RainRate, RainStorm, RainYear, TempIn, TempOut, WindDir, WindSpeed, WindSpeed10Min FROM weather_data WHERE DateStamp >= '2024-10-14 21:00:00' - INTERVAL 36 HOUR; """ df = pd.read_sql(query, engine) df['DateStamp'] = pd.to_datetime(df['DateStamp']) df.set_index('DateStamp', inplace=True) df.sort_index(inplace=True) lags = 3 shifted_dfs = [df] for lag in range(1, lags + 1): shifted_df = df.shift(lag).add_suffix(f'_t-{lag}') shifted_dfs.append(shifted_df) df_with_lags = pd.concat(shifted_dfs, axis=1) df_with_lags.dropna(inplace=True) df_with_lags = df_with_lags.copy() # Преобразуем BarTrend в числовой формат le = LabelEncoder() df_with_lags['BarTrend_encoded'] = le.fit_transform(df_with_lags['BarTrend']) # Выбор только числовых данных df_with_lags = df_with_lags.select_dtypes(include=['float64', 'int64']) # Словари для хранения моделей и MSE models = {} mse_scores = {} # Обучение моделей для каждого целевого столбца for target_column in df.columns: if target_column not in df_with_lags.columns: continue X = df_with_lags.drop(columns=[target_column]).values y = df_with_lags[target_column].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False) model = RandomForestRegressor() model.fit(X_train, y_train) y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) mse_scores[target_column] = mse models[target_column] = model quality = "хорошая" if mse < 1.0 else "плохая" print(f"MSE для {target_column}: {mse} ({quality})") # Обучаем модель для BarTrend_encoded отдельно X_bartrend = df_with_lags.drop(columns=['BarTrend_encoded']).values y_bartrend = df_with_lags['BarTrend_encoded'].values X_train_bartrend, X_test_bartrend, y_train_bartrend, y_test_bartrend = train_test_split(X_bartrend, y_bartrend, test_size=0.2, shuffle=False) model_bartrend = RandomForestRegressor() model_bartrend.fit(X_train_bartrend, y_train_bartrend) y_pred_bartrend = model_bartrend.predict(X_test_bartrend) mse_bartrend = mean_squared_error(y_test_bartrend, y_pred_bartrend) models['BarTrend_encoded'] = model_bartrend mse_scores['BarTrend_encoded'] = mse_bartrend quality_bartrend = "хорошая" if mse_bartrend < 1.0 else "плохая" print(f"MSE для BarTrend: {mse_bartrend} ({quality_bartrend})") last_data = X[-1].reshape(1, -1) predictions = {} for target_column, model in models.items(): prediction = model.predict(last_data)[0] if target_column == 'BarTrend_encoded': prediction = le.inverse_transform([int(prediction)])[0] predictions['BarTrend'] = prediction print(f"Предсказание для BarTrend: {prediction}") break predictions[target_column] = prediction print(f"Предсказание для {target_column}: {prediction}") return predictions # Возвращаем словарь с предсказанными значениями и названиями столбцов