Compare commits
6 Commits
40c9520274
...
1663477b19
Author | SHA1 | Date | |
---|---|---|---|
1663477b19 | |||
56dfeb5d60 | |||
9de6ce68ba | |||
8419a3a28e | |||
203945b3e4 | |||
01d05083dd |
12
analysis/README.md
Normal file
12
analysis/README.md
Normal file
@ -0,0 +1,12 @@
|
||||
# Price_Pulse
|
||||
|
||||
python -m venv venv
|
||||
|
||||
.\venv\Scripts\activate
|
||||
|
||||
pip install -r requirements.txt
|
||||
|
||||
python app.py
|
||||
|
||||
/predict_price
|
||||
/plot
|
168
analysis/app.py
Normal file
168
analysis/app.py
Normal file
@ -0,0 +1,168 @@
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from datetime import timedelta
|
||||
from tensorflow.keras.models import load_model
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
import matplotlib.pyplot as plt
|
||||
import io
|
||||
import joblib
|
||||
from flask import Flask, request, jsonify, Blueprint, send_file
|
||||
from flasgger import Swagger
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
api = Blueprint('api', __name__)
|
||||
Swagger(app)
|
||||
|
||||
# Загружаем модель и scaler
|
||||
model = load_model("my_model_1H.keras")
|
||||
scaler = MinMaxScaler(feature_range=(0, 1))
|
||||
|
||||
# Загружаем данные
|
||||
column_names = ['product_url', 'price', 'datetime']
|
||||
|
||||
df = pd.read_csv('parsed_data_public_price_history_all.csv')
|
||||
|
||||
# Преобразуем колонку 'datetime' в тип данных datetime
|
||||
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
|
||||
df['price'] = df['price'].astype(float)
|
||||
|
||||
q_low = df['price'].quantile(0.55)
|
||||
q_hi = df['price'].quantile(0.75)
|
||||
q_range = q_hi - q_low
|
||||
df = df[(df['price'] < q_hi + 1.5 * q_range) & (df['price'] > q_low - 1.5 * q_range)]
|
||||
|
||||
|
||||
|
||||
df_hourly_avg = df[['price', 'datetime']]
|
||||
df_hourly_avg['datetime'] = df_hourly_avg['datetime'].dt.floor('1H')
|
||||
df_hourly_avg = df_hourly_avg.groupby('datetime').agg({'price': 'mean'}).reset_index()
|
||||
|
||||
df_hourly_avg.set_index('datetime', inplace=True)
|
||||
|
||||
# Подготовка данных для прогнозирования
|
||||
def prepare_data(df, days_forward=7):
|
||||
last_date = df.index[-1]
|
||||
scaled_data = scaler.fit_transform(df[['price']].values)
|
||||
n = 3 # число временных шагов (можно менять)
|
||||
X_test = []
|
||||
|
||||
# Формируем X_test на основе последних n значений
|
||||
for i in range(n, len(scaled_data)):
|
||||
X_test.append(scaled_data[i - n:i, 0])
|
||||
X_test = np.array(X_test)
|
||||
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
|
||||
|
||||
# Предсказание на 7 дней вперед
|
||||
predictions = []
|
||||
current_input = X_test[-1] # начальное состояние для прогноза
|
||||
|
||||
for _ in range(days_forward):
|
||||
pred = model.predict(np.expand_dims(current_input, axis=0))
|
||||
predictions.append(pred[0, 0])
|
||||
|
||||
# Обновляем current_input, добавляя новое предсказание и удаляя старое
|
||||
current_input = np.append(current_input[1:], pred).reshape(n, 1)
|
||||
|
||||
# Масштабируем предсказания обратно
|
||||
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
|
||||
|
||||
future_dates = [last_date + timedelta(days=i) for i in range(1, days_forward + 1)]
|
||||
forecast_df = pd.DataFrame({'date': future_dates, 'predicted_price': predictions})
|
||||
return forecast_df
|
||||
|
||||
|
||||
# Построение графика
|
||||
def plot_price(forecast_df):
|
||||
plt.figure(figsize=(14, 7))
|
||||
plt.plot(df_hourly_avg.index, df_hourly_avg['price'], label='Actual Price', color='blue')
|
||||
plt.plot(forecast_df['date'], forecast_df['predicted_price'], label='Predicted Price', color='orange')
|
||||
plt.title("Price Prediction")
|
||||
plt.xlabel("Date")
|
||||
plt.ylabel("Price")
|
||||
plt.legend()
|
||||
plt.grid(True)
|
||||
|
||||
img = io.BytesIO()
|
||||
plt.savefig(img, format='png')
|
||||
img.seek(0)
|
||||
plt.close()
|
||||
return img
|
||||
|
||||
|
||||
@api.route('/predict_price', methods=['GET'])
|
||||
def predict_price():
|
||||
"""
|
||||
Предсказание цены на 7 дней вперед
|
||||
---
|
||||
responses:
|
||||
200:
|
||||
description: JSON с предсказаниями цен и днем минимальной цены
|
||||
schema:
|
||||
type: object
|
||||
properties:
|
||||
forecast:
|
||||
type: array
|
||||
items:
|
||||
type: object
|
||||
properties:
|
||||
date:
|
||||
type: string
|
||||
format: date
|
||||
predicted_price:
|
||||
type: number
|
||||
min_price_day:
|
||||
type: object
|
||||
properties:
|
||||
date:
|
||||
type: string
|
||||
format: date
|
||||
price:
|
||||
type: number
|
||||
"""
|
||||
forecast_df = prepare_data(df_hourly_avg)
|
||||
forecast_list = forecast_df.to_dict(orient='records') # Преобразование в список словарей
|
||||
|
||||
# Преобразуем значения 'predicted_price' в float
|
||||
for record in forecast_list:
|
||||
record['predicted_price'] = float(record['predicted_price'])
|
||||
|
||||
# Определяем день с минимальной предсказанной ценой
|
||||
min_price_day = forecast_df.loc[forecast_df['predicted_price'].idxmin()]
|
||||
|
||||
# Преобразуем минимальную цену в float
|
||||
min_price_day_price = float(min_price_day['predicted_price'])
|
||||
|
||||
# Формируем ответ
|
||||
return jsonify({
|
||||
'forecast': forecast_list,
|
||||
'min_price_day': {
|
||||
'date': min_price_day['date'].strftime('%Y-%m-%d'),
|
||||
'price': min_price_day_price
|
||||
}
|
||||
})
|
||||
|
||||
|
||||
# Эндпоинт для получения графика
|
||||
@api.route('/plot', methods=['GET'])
|
||||
def plot():
|
||||
"""
|
||||
Получение графика предсказанных и фактических цен
|
||||
---
|
||||
responses:
|
||||
200:
|
||||
description: Возвращает график предсказанных и фактических цен в формате PNG
|
||||
content:
|
||||
image/png:
|
||||
schema:
|
||||
type: string
|
||||
format: binary
|
||||
"""
|
||||
forecast_df = prepare_data(df_hourly_avg)
|
||||
img = plot_price(forecast_df)
|
||||
return send_file(img, mimetype='image/png')
|
||||
|
||||
app.register_blueprint(api, url_prefix='/api')
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(debug=True)
|
9
analysis/docker-compose.yml
Normal file
9
analysis/docker-compose.yml
Normal file
@ -0,0 +1,9 @@
|
||||
version: '3'
|
||||
services:
|
||||
clickhouse:
|
||||
image: yandex/clickhouse-server:latest
|
||||
ports:
|
||||
- "8123:8123"
|
||||
- "9000:9000"
|
||||
volumes:
|
||||
- ./clickhouse-data:/var/lib/clickhouse
|
BIN
analysis/my_model_1H.keras
Normal file
BIN
analysis/my_model_1H.keras
Normal file
Binary file not shown.
136052
analysis/parsed_data_public_price_history_all.csv
Normal file
136052
analysis/parsed_data_public_price_history_all.csv
Normal file
File diff suppressed because it is too large
Load Diff
127
analysis/platforms_train_v2.py
Normal file
127
analysis/platforms_train_v2.py
Normal file
@ -0,0 +1,127 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""Platforms_train_v2.ipynb
|
||||
|
||||
Automatically generated by Colab.
|
||||
|
||||
Original file is located at
|
||||
https://colab.research.google.com/drive/1yD7QxO8rUrHXvYLn_z5eofUKenJqXZoU
|
||||
"""
|
||||
|
||||
import os
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.model_selection import train_test_split
|
||||
from sklearn.metrics import mean_squared_error
|
||||
from sklearn.ensemble import RandomForestRegressor
|
||||
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
|
||||
import joblib
|
||||
from sklearn.preprocessing import MinMaxScaler
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Dropout, LSTM
|
||||
import matplotlib.pyplot as plt
|
||||
import seaborn as sns
|
||||
|
||||
column_names = ['product_url', 'price', 'datetime']
|
||||
|
||||
df = pd.read_csv('parsed_data_public_price_history_all.csv')
|
||||
|
||||
# Преобразуем колонку 'datetime' в тип данных datetime
|
||||
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
|
||||
df['price'] = df['price'].astype(float)
|
||||
|
||||
|
||||
q_low = df['price'].quantile(0.55)
|
||||
q_hi = df['price'].quantile(0.75)
|
||||
q_range = q_hi - q_low
|
||||
df = df[(df['price'] < q_hi + 1.5 * q_range) & (df['price'] > q_low - 1.5 * q_range)]
|
||||
df.describe()
|
||||
|
||||
# Оставляем только колонки 'price' и 'datetime'
|
||||
df_hourly_avg = df[['price', 'datetime']]
|
||||
|
||||
# Округляем время до дня
|
||||
df_hourly_avg['datetime'] = df_hourly_avg['datetime'].dt.floor('1H')
|
||||
|
||||
|
||||
# Группируем по каждому часу и вычисляем среднее значение цены
|
||||
df_hourly_avg = df_hourly_avg.groupby('datetime').agg({'price': 'mean'}).reset_index()
|
||||
|
||||
df_hourly_avg.set_index('datetime', inplace=True)
|
||||
|
||||
#only values
|
||||
df_hourly_avg_arr = df_hourly_avg.values
|
||||
|
||||
#Split
|
||||
split = int(0.8*len(df_hourly_avg_arr))
|
||||
|
||||
train, test = df_hourly_avg_arr[:split], df_hourly_avg_arr[split:]
|
||||
|
||||
|
||||
#Normalise data by scaling to a range of 0 to 1 to improve learning and convergence of model.
|
||||
# Feature scaling and fitting scaled data
|
||||
scaler = MinMaxScaler(feature_range=(0, 1))
|
||||
scaled_data = scaler.fit_transform(df_hourly_avg_arr)
|
||||
|
||||
# Creating a data structure with n time-steps and 1 output
|
||||
n = 3
|
||||
X_train, y_train = [], []
|
||||
for i in range(n,len(train)):
|
||||
X_train.append(scaled_data[i-n:i,0])
|
||||
y_train.append(scaled_data[i,0])
|
||||
|
||||
# Convert X_train and y_train to numpy arrays for training LSTM model
|
||||
|
||||
X_train, y_train = np.array(X_train), np.array(y_train)
|
||||
|
||||
# Reshape the data as LSTM expects 3-D data (samples, time steps, features)
|
||||
|
||||
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
|
||||
|
||||
# create and fit the LSTM network
|
||||
model = Sequential()
|
||||
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1],1)))
|
||||
model.add(LSTM(units=50))
|
||||
model.add(Dense(1))
|
||||
|
||||
model.compile(loss='mean_squared_error', optimizer='adam')
|
||||
model.fit(X_train, y_train, epochs=1000, batch_size=1, verbose=2)
|
||||
|
||||
inputs = df_hourly_avg_arr [len(df_hourly_avg_arr) - len(test) - n:]
|
||||
inputs = inputs.reshape(-1,1)
|
||||
inputs = scaler.transform(inputs)
|
||||
|
||||
# Create test data set
|
||||
X_test = []
|
||||
for i in range(n, inputs.shape[0]):
|
||||
X_test.append(inputs[i-n:i, 0])
|
||||
|
||||
# Convert data to numpy array
|
||||
X_test = np.array(X_test)
|
||||
|
||||
# Reshape data to be 3-D
|
||||
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
|
||||
predict_price = model.predict(X_test)
|
||||
predict_price = scaler.inverse_transform(predict_price)
|
||||
|
||||
print(X_test.shape)
|
||||
|
||||
rmse = np.sqrt(np.mean(np.power((test - predict_price),2)))
|
||||
|
||||
# Plot predicted vs actual values
|
||||
train = df_hourly_avg[:split]
|
||||
test = df_hourly_avg[split:]
|
||||
test['Predictions'] = predict_price
|
||||
|
||||
plt.figure(figsize=(20,10))
|
||||
sns.set_style("whitegrid")
|
||||
plt.plot(train['price'], label='Training')
|
||||
plt.plot(test['price'], label='Actual')
|
||||
plt.plot(test['Predictions'], label='Predicted')
|
||||
plt.title("AZN Close Price - LSTM", color = 'black', fontsize = 20)
|
||||
plt.xlabel('Date', color = 'black', fontsize = 15)
|
||||
plt.ylabel('Price', color = 'black', fontsize = 15)
|
||||
plt.legend()
|
||||
|
||||
model.save("/content/drive/MyDrive/Colab Notebooks/Platforms/my_model_.keras")
|
BIN
analysis/requirements.txt
Normal file
BIN
analysis/requirements.txt
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user