EvaluationEfficiencyOptimiz.../davisAPI/prediction.py

102 lines
4.0 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sqlalchemy import create_engine
def run_prediction_module():
engine = create_engine('mysql+pymysql://wind:wind@193.124.203.110:3306/wind_towers')
query = """
SELECT BarTrend, CRC, DateStamp, DewPoint, HeatIndex, ETDay, HumIn, HumOut,
Pressure, RainDay, RainMonth, RainRate, RainStorm, RainYear,
TempIn, TempOut, WindDir, WindSpeed, WindSpeed10Min
FROM weather_data
WHERE DateStamp >= '2024-10-14 21:00:00' - INTERVAL 36 HOUR;
"""
df = pd.read_sql(query, engine)
df['DateStamp'] = pd.to_datetime(df['DateStamp'])
df.set_index('DateStamp', inplace=True)
df.sort_index(inplace=True)
lags = 3
shifted_dfs = [df]
for lag in range(1, lags + 1):
shifted_df = df.shift(lag).add_suffix(f'_t-{lag}')
shifted_dfs.append(shifted_df)
df_with_lags = pd.concat(shifted_dfs, axis=1)
df_with_lags.dropna(inplace=True)
df_with_lags = df_with_lags.copy()
# Преобразуем BarTrend в числовой формат
le = LabelEncoder()
df_with_lags['BarTrend_encoded'] = le.fit_transform(df_with_lags['BarTrend'])
# Выбор только числовых данных
df_with_lags = df_with_lags.select_dtypes(include=['float64', 'int64'])
# Словари для хранения моделей и MSE
models = {}
mse_scores = {}
# Обучение моделей для каждого целевого столбца
for target_column in df.columns:
if target_column not in df_with_lags.columns:
continue
X = df_with_lags.drop(columns=[target_column]).values
y = df_with_lags[target_column].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, shuffle=False)
model = RandomForestRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
mse_scores[target_column] = mse
models[target_column] = model
quality = "хорошая" if mse < 1.0 else "плохая"
print(f"MSE для {target_column}: {mse} ({quality})")
# Обучаем модель для BarTrend_encoded отдельно
X_bartrend = df_with_lags.drop(columns=['BarTrend_encoded']).values
y_bartrend = df_with_lags['BarTrend_encoded'].values
X_train_bartrend, X_test_bartrend, y_train_bartrend, y_test_bartrend = train_test_split(X_bartrend, y_bartrend,
test_size=0.2,
shuffle=False)
model_bartrend = RandomForestRegressor()
model_bartrend.fit(X_train_bartrend, y_train_bartrend)
y_pred_bartrend = model_bartrend.predict(X_test_bartrend)
mse_bartrend = mean_squared_error(y_test_bartrend, y_pred_bartrend)
models['BarTrend_encoded'] = model_bartrend
mse_scores['BarTrend_encoded'] = mse_bartrend
quality_bartrend = "хорошая" if mse_bartrend < 1.0 else "плохая"
print(f"MSE для BarTrend: {mse_bartrend} ({quality_bartrend})")
last_data = X[-1].reshape(1, -1)
predictions = {}
for target_column, model in models.items():
prediction = model.predict(last_data)[0]
if target_column == 'BarTrend_encoded':
prediction = le.inverse_transform([int(prediction)])[0]
predictions['BarTrend'] = prediction
print(f"Предсказание для BarTrend: {prediction}")
break
predictions[target_column] = prediction
print(f"Предсказание для {target_column}: {prediction}")
return predictions # Возвращаем словарь с предсказанными значениями и названиями столбцов