Compare commits
6 Commits
40c9520274
...
1663477b19
Author | SHA1 | Date | |
---|---|---|---|
1663477b19 | |||
56dfeb5d60 | |||
9de6ce68ba | |||
8419a3a28e | |||
203945b3e4 | |||
01d05083dd |
12
analysis/README.md
Normal file
12
analysis/README.md
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
# Price_Pulse
|
||||||
|
|
||||||
|
python -m venv venv
|
||||||
|
|
||||||
|
.\venv\Scripts\activate
|
||||||
|
|
||||||
|
pip install -r requirements.txt
|
||||||
|
|
||||||
|
python app.py
|
||||||
|
|
||||||
|
/predict_price
|
||||||
|
/plot
|
168
analysis/app.py
Normal file
168
analysis/app.py
Normal file
@ -0,0 +1,168 @@
|
|||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import timedelta
|
||||||
|
from tensorflow.keras.models import load_model
|
||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import io
|
||||||
|
import joblib
|
||||||
|
from flask import Flask, request, jsonify, Blueprint, send_file
|
||||||
|
from flasgger import Swagger
|
||||||
|
|
||||||
|
|
||||||
|
app = Flask(__name__)
|
||||||
|
api = Blueprint('api', __name__)
|
||||||
|
Swagger(app)
|
||||||
|
|
||||||
|
# Загружаем модель и scaler
|
||||||
|
model = load_model("my_model_1H.keras")
|
||||||
|
scaler = MinMaxScaler(feature_range=(0, 1))
|
||||||
|
|
||||||
|
# Загружаем данные
|
||||||
|
column_names = ['product_url', 'price', 'datetime']
|
||||||
|
|
||||||
|
df = pd.read_csv('parsed_data_public_price_history_all.csv')
|
||||||
|
|
||||||
|
# Преобразуем колонку 'datetime' в тип данных datetime
|
||||||
|
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
|
||||||
|
df['price'] = df['price'].astype(float)
|
||||||
|
|
||||||
|
q_low = df['price'].quantile(0.55)
|
||||||
|
q_hi = df['price'].quantile(0.75)
|
||||||
|
q_range = q_hi - q_low
|
||||||
|
df = df[(df['price'] < q_hi + 1.5 * q_range) & (df['price'] > q_low - 1.5 * q_range)]
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
df_hourly_avg = df[['price', 'datetime']]
|
||||||
|
df_hourly_avg['datetime'] = df_hourly_avg['datetime'].dt.floor('1H')
|
||||||
|
df_hourly_avg = df_hourly_avg.groupby('datetime').agg({'price': 'mean'}).reset_index()
|
||||||
|
|
||||||
|
df_hourly_avg.set_index('datetime', inplace=True)
|
||||||
|
|
||||||
|
# Подготовка данных для прогнозирования
|
||||||
|
def prepare_data(df, days_forward=7):
|
||||||
|
last_date = df.index[-1]
|
||||||
|
scaled_data = scaler.fit_transform(df[['price']].values)
|
||||||
|
n = 3 # число временных шагов (можно менять)
|
||||||
|
X_test = []
|
||||||
|
|
||||||
|
# Формируем X_test на основе последних n значений
|
||||||
|
for i in range(n, len(scaled_data)):
|
||||||
|
X_test.append(scaled_data[i - n:i, 0])
|
||||||
|
X_test = np.array(X_test)
|
||||||
|
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
|
||||||
|
|
||||||
|
# Предсказание на 7 дней вперед
|
||||||
|
predictions = []
|
||||||
|
current_input = X_test[-1] # начальное состояние для прогноза
|
||||||
|
|
||||||
|
for _ in range(days_forward):
|
||||||
|
pred = model.predict(np.expand_dims(current_input, axis=0))
|
||||||
|
predictions.append(pred[0, 0])
|
||||||
|
|
||||||
|
# Обновляем current_input, добавляя новое предсказание и удаляя старое
|
||||||
|
current_input = np.append(current_input[1:], pred).reshape(n, 1)
|
||||||
|
|
||||||
|
# Масштабируем предсказания обратно
|
||||||
|
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
|
||||||
|
|
||||||
|
future_dates = [last_date + timedelta(days=i) for i in range(1, days_forward + 1)]
|
||||||
|
forecast_df = pd.DataFrame({'date': future_dates, 'predicted_price': predictions})
|
||||||
|
return forecast_df
|
||||||
|
|
||||||
|
|
||||||
|
# Построение графика
|
||||||
|
def plot_price(forecast_df):
|
||||||
|
plt.figure(figsize=(14, 7))
|
||||||
|
plt.plot(df_hourly_avg.index, df_hourly_avg['price'], label='Actual Price', color='blue')
|
||||||
|
plt.plot(forecast_df['date'], forecast_df['predicted_price'], label='Predicted Price', color='orange')
|
||||||
|
plt.title("Price Prediction")
|
||||||
|
plt.xlabel("Date")
|
||||||
|
plt.ylabel("Price")
|
||||||
|
plt.legend()
|
||||||
|
plt.grid(True)
|
||||||
|
|
||||||
|
img = io.BytesIO()
|
||||||
|
plt.savefig(img, format='png')
|
||||||
|
img.seek(0)
|
||||||
|
plt.close()
|
||||||
|
return img
|
||||||
|
|
||||||
|
|
||||||
|
@api.route('/predict_price', methods=['GET'])
|
||||||
|
def predict_price():
|
||||||
|
"""
|
||||||
|
Предсказание цены на 7 дней вперед
|
||||||
|
---
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: JSON с предсказаниями цен и днем минимальной цены
|
||||||
|
schema:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
forecast:
|
||||||
|
type: array
|
||||||
|
items:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
date:
|
||||||
|
type: string
|
||||||
|
format: date
|
||||||
|
predicted_price:
|
||||||
|
type: number
|
||||||
|
min_price_day:
|
||||||
|
type: object
|
||||||
|
properties:
|
||||||
|
date:
|
||||||
|
type: string
|
||||||
|
format: date
|
||||||
|
price:
|
||||||
|
type: number
|
||||||
|
"""
|
||||||
|
forecast_df = prepare_data(df_hourly_avg)
|
||||||
|
forecast_list = forecast_df.to_dict(orient='records') # Преобразование в список словарей
|
||||||
|
|
||||||
|
# Преобразуем значения 'predicted_price' в float
|
||||||
|
for record in forecast_list:
|
||||||
|
record['predicted_price'] = float(record['predicted_price'])
|
||||||
|
|
||||||
|
# Определяем день с минимальной предсказанной ценой
|
||||||
|
min_price_day = forecast_df.loc[forecast_df['predicted_price'].idxmin()]
|
||||||
|
|
||||||
|
# Преобразуем минимальную цену в float
|
||||||
|
min_price_day_price = float(min_price_day['predicted_price'])
|
||||||
|
|
||||||
|
# Формируем ответ
|
||||||
|
return jsonify({
|
||||||
|
'forecast': forecast_list,
|
||||||
|
'min_price_day': {
|
||||||
|
'date': min_price_day['date'].strftime('%Y-%m-%d'),
|
||||||
|
'price': min_price_day_price
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
|
# Эндпоинт для получения графика
|
||||||
|
@api.route('/plot', methods=['GET'])
|
||||||
|
def plot():
|
||||||
|
"""
|
||||||
|
Получение графика предсказанных и фактических цен
|
||||||
|
---
|
||||||
|
responses:
|
||||||
|
200:
|
||||||
|
description: Возвращает график предсказанных и фактических цен в формате PNG
|
||||||
|
content:
|
||||||
|
image/png:
|
||||||
|
schema:
|
||||||
|
type: string
|
||||||
|
format: binary
|
||||||
|
"""
|
||||||
|
forecast_df = prepare_data(df_hourly_avg)
|
||||||
|
img = plot_price(forecast_df)
|
||||||
|
return send_file(img, mimetype='image/png')
|
||||||
|
|
||||||
|
app.register_blueprint(api, url_prefix='/api')
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
app.run(debug=True)
|
9
analysis/docker-compose.yml
Normal file
9
analysis/docker-compose.yml
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
version: '3'
|
||||||
|
services:
|
||||||
|
clickhouse:
|
||||||
|
image: yandex/clickhouse-server:latest
|
||||||
|
ports:
|
||||||
|
- "8123:8123"
|
||||||
|
- "9000:9000"
|
||||||
|
volumes:
|
||||||
|
- ./clickhouse-data:/var/lib/clickhouse
|
BIN
analysis/my_model_1H.keras
Normal file
BIN
analysis/my_model_1H.keras
Normal file
Binary file not shown.
136052
analysis/parsed_data_public_price_history_all.csv
Normal file
136052
analysis/parsed_data_public_price_history_all.csv
Normal file
File diff suppressed because it is too large
Load Diff
127
analysis/platforms_train_v2.py
Normal file
127
analysis/platforms_train_v2.py
Normal file
@ -0,0 +1,127 @@
|
|||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
"""Platforms_train_v2.ipynb
|
||||||
|
|
||||||
|
Automatically generated by Colab.
|
||||||
|
|
||||||
|
Original file is located at
|
||||||
|
https://colab.research.google.com/drive/1yD7QxO8rUrHXvYLn_z5eofUKenJqXZoU
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import numpy as np
|
||||||
|
import pandas as pd
|
||||||
|
from datetime import datetime
|
||||||
|
from sklearn.ensemble import RandomForestRegressor
|
||||||
|
from sklearn.model_selection import train_test_split
|
||||||
|
from sklearn.metrics import mean_squared_error
|
||||||
|
from sklearn.ensemble import RandomForestRegressor
|
||||||
|
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
|
||||||
|
import joblib
|
||||||
|
from sklearn.preprocessing import MinMaxScaler
|
||||||
|
from keras.models import Sequential
|
||||||
|
from keras.layers import Dense, Dropout, LSTM
|
||||||
|
import matplotlib.pyplot as plt
|
||||||
|
import seaborn as sns
|
||||||
|
|
||||||
|
column_names = ['product_url', 'price', 'datetime']
|
||||||
|
|
||||||
|
df = pd.read_csv('parsed_data_public_price_history_all.csv')
|
||||||
|
|
||||||
|
# Преобразуем колонку 'datetime' в тип данных datetime
|
||||||
|
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
|
||||||
|
df['price'] = df['price'].astype(float)
|
||||||
|
|
||||||
|
|
||||||
|
q_low = df['price'].quantile(0.55)
|
||||||
|
q_hi = df['price'].quantile(0.75)
|
||||||
|
q_range = q_hi - q_low
|
||||||
|
df = df[(df['price'] < q_hi + 1.5 * q_range) & (df['price'] > q_low - 1.5 * q_range)]
|
||||||
|
df.describe()
|
||||||
|
|
||||||
|
# Оставляем только колонки 'price' и 'datetime'
|
||||||
|
df_hourly_avg = df[['price', 'datetime']]
|
||||||
|
|
||||||
|
# Округляем время до дня
|
||||||
|
df_hourly_avg['datetime'] = df_hourly_avg['datetime'].dt.floor('1H')
|
||||||
|
|
||||||
|
|
||||||
|
# Группируем по каждому часу и вычисляем среднее значение цены
|
||||||
|
df_hourly_avg = df_hourly_avg.groupby('datetime').agg({'price': 'mean'}).reset_index()
|
||||||
|
|
||||||
|
df_hourly_avg.set_index('datetime', inplace=True)
|
||||||
|
|
||||||
|
#only values
|
||||||
|
df_hourly_avg_arr = df_hourly_avg.values
|
||||||
|
|
||||||
|
#Split
|
||||||
|
split = int(0.8*len(df_hourly_avg_arr))
|
||||||
|
|
||||||
|
train, test = df_hourly_avg_arr[:split], df_hourly_avg_arr[split:]
|
||||||
|
|
||||||
|
|
||||||
|
#Normalise data by scaling to a range of 0 to 1 to improve learning and convergence of model.
|
||||||
|
# Feature scaling and fitting scaled data
|
||||||
|
scaler = MinMaxScaler(feature_range=(0, 1))
|
||||||
|
scaled_data = scaler.fit_transform(df_hourly_avg_arr)
|
||||||
|
|
||||||
|
# Creating a data structure with n time-steps and 1 output
|
||||||
|
n = 3
|
||||||
|
X_train, y_train = [], []
|
||||||
|
for i in range(n,len(train)):
|
||||||
|
X_train.append(scaled_data[i-n:i,0])
|
||||||
|
y_train.append(scaled_data[i,0])
|
||||||
|
|
||||||
|
# Convert X_train and y_train to numpy arrays for training LSTM model
|
||||||
|
|
||||||
|
X_train, y_train = np.array(X_train), np.array(y_train)
|
||||||
|
|
||||||
|
# Reshape the data as LSTM expects 3-D data (samples, time steps, features)
|
||||||
|
|
||||||
|
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
|
||||||
|
|
||||||
|
# create and fit the LSTM network
|
||||||
|
model = Sequential()
|
||||||
|
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1],1)))
|
||||||
|
model.add(LSTM(units=50))
|
||||||
|
model.add(Dense(1))
|
||||||
|
|
||||||
|
model.compile(loss='mean_squared_error', optimizer='adam')
|
||||||
|
model.fit(X_train, y_train, epochs=1000, batch_size=1, verbose=2)
|
||||||
|
|
||||||
|
inputs = df_hourly_avg_arr [len(df_hourly_avg_arr) - len(test) - n:]
|
||||||
|
inputs = inputs.reshape(-1,1)
|
||||||
|
inputs = scaler.transform(inputs)
|
||||||
|
|
||||||
|
# Create test data set
|
||||||
|
X_test = []
|
||||||
|
for i in range(n, inputs.shape[0]):
|
||||||
|
X_test.append(inputs[i-n:i, 0])
|
||||||
|
|
||||||
|
# Convert data to numpy array
|
||||||
|
X_test = np.array(X_test)
|
||||||
|
|
||||||
|
# Reshape data to be 3-D
|
||||||
|
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
|
||||||
|
predict_price = model.predict(X_test)
|
||||||
|
predict_price = scaler.inverse_transform(predict_price)
|
||||||
|
|
||||||
|
print(X_test.shape)
|
||||||
|
|
||||||
|
rmse = np.sqrt(np.mean(np.power((test - predict_price),2)))
|
||||||
|
|
||||||
|
# Plot predicted vs actual values
|
||||||
|
train = df_hourly_avg[:split]
|
||||||
|
test = df_hourly_avg[split:]
|
||||||
|
test['Predictions'] = predict_price
|
||||||
|
|
||||||
|
plt.figure(figsize=(20,10))
|
||||||
|
sns.set_style("whitegrid")
|
||||||
|
plt.plot(train['price'], label='Training')
|
||||||
|
plt.plot(test['price'], label='Actual')
|
||||||
|
plt.plot(test['Predictions'], label='Predicted')
|
||||||
|
plt.title("AZN Close Price - LSTM", color = 'black', fontsize = 20)
|
||||||
|
plt.xlabel('Date', color = 'black', fontsize = 15)
|
||||||
|
plt.ylabel('Price', color = 'black', fontsize = 15)
|
||||||
|
plt.legend()
|
||||||
|
|
||||||
|
model.save("/content/drive/MyDrive/Colab Notebooks/Platforms/my_model_.keras")
|
BIN
analysis/requirements.txt
Normal file
BIN
analysis/requirements.txt
Normal file
Binary file not shown.
Loading…
Reference in New Issue
Block a user