done

2024-11-11 20:13:53 +04:00 · 2024-11-11 20:13:53 +04:00 · 9de6ce68ba
commit 9de6ce68ba
parent 8419a3a28e
6 changed files with 136058 additions and 136089 deletions
--- a/analysis/app.py
+++ b/analysis/app.py
@ -21,11 +21,7 @@ scaler = MinMaxScaler(feature_range=(0, 1))
 # Загружаем данные
 column_names = ['product_url', 'price', 'datetime']

-df1 = pd.read_csv('parsed_data_public_price_history_1.csv')
-df2 = pd.read_csv('parsed_data_public_price_history.csv', names=column_names, )
-df3 = pd.read_csv('price_history.csv', names=column_names,)
-
-df = pd.concat([df1, df2, df3])
+df = pd.read_csv('parsed_data_public_price_history_all.csv')

 # Преобразуем колонку 'datetime' в тип данных datetime
 df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
--- a/analysis/parsed_data_public_price_history.csv
+++ b/analysis/parsed_data_public_price_history.csv
--- a/analysis/parsed_data_public_price_history_1.csv
+++ b/analysis/parsed_data_public_price_history_1.csv
--- a/analysis/parsed_data_public_price_history_all.csv
+++ b/analysis/parsed_data_public_price_history_all.csv
--- a/analysis/platforms_train_v2.py
+++ b/analysis/platforms_train_v2.py
@ -25,23 +25,12 @@ import seaborn as sns

 column_names = ['product_url', 'price', 'datetime']

-df1 = pd.read_csv('parsed_data_public_price_history_1.csv')
-df2 = pd.read_csv('parsed_data_public_price_history.csv', names=column_names,)
-df3 = pd.read_csv('price_history.csv', names=column_names,)
-
-df = pd.concat([df1, df2, df3])
+df = pd.read_csv('parsed_data_public_price_history_all.csv')

 # Преобразуем колонку 'datetime' в тип данных datetime
 df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
 df['price'] = df['price'].astype(float)

-df.head()
-
-df.describe()
-
-df.info()
-
-len(df.product_url.unique())

 q_low = df['price'].quantile(0.55)
 q_hi = df['price'].quantile(0.75)
@ -55,35 +44,21 @@ df_hourly_avg = df[['price', 'datetime']]
 # Округляем время до дня
 df_hourly_avg['datetime'] = df_hourly_avg['datetime'].dt.floor('1H')

-df_hourly_avg.head()

 # Группируем по каждому часу и вычисляем среднее значение цены
 df_hourly_avg = df_hourly_avg.groupby('datetime').agg({'price': 'mean'}).reset_index()

-# Выводим описательную статистику
-df_hourly_avg.describe()
-
-# Просмотр первых строк
-df_hourly_avg.head()
-
-df_hourly_avg
-
 df_hourly_avg.set_index('datetime', inplace=True)

-df_hourly_avg
-
 #only values
 df_hourly_avg_arr = df_hourly_avg.values

 #Split
 split = int(0.8*len(df_hourly_avg_arr))

-
 train, test = df_hourly_avg_arr[:split], df_hourly_avg_arr[split:]


-train.shape, test.shape
-
 #Normalise data by scaling to a range of 0 to 1 to improve learning and convergence of model.
 # Feature scaling and fitting scaled data
 scaler = MinMaxScaler(feature_range=(0, 1))
@ -103,7 +78,6 @@ X_train, y_train = np.array(X_train), np.array(y_train)
 # Reshape the data as LSTM expects 3-D data (samples, time steps, features)

 X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
-X_train.shape

 # create and fit the LSTM network
 model = Sequential()
@ -134,7 +108,6 @@ predict_price = scaler.inverse_transform(predict_price)
 print(X_test.shape)

 rmse = np.sqrt(np.mean(np.power((test - predict_price),2)))
-rmse

 # Plot predicted vs actual values
 train = df_hourly_avg[:split]
@ -149,6 +122,6 @@ plt.plot(test['Predictions'], label='Predicted')
 plt.title("AZN Close Price - LSTM", color = 'black', fontsize = 20)
 plt.xlabel('Date', color = 'black', fontsize = 15)
 plt.ylabel('Price', color = 'black', fontsize = 15)
-plt.legend();
+plt.legend()

 model.save("/content/drive/MyDrive/Colab Notebooks/Platforms/my_model_.keras")
--- a/analysis/price_history.csv
+++ b/analysis/price_history.csv