This commit is contained in:
VictoriaPresnyakova 2024-11-11 20:13:53 +04:00
parent 8419a3a28e
commit 9de6ce68ba
6 changed files with 136058 additions and 136089 deletions

View File

@ -21,11 +21,7 @@ scaler = MinMaxScaler(feature_range=(0, 1))
# Загружаем данные
column_names = ['product_url', 'price', 'datetime']
df1 = pd.read_csv('parsed_data_public_price_history_1.csv')
df2 = pd.read_csv('parsed_data_public_price_history.csv', names=column_names, )
df3 = pd.read_csv('price_history.csv', names=column_names,)
df = pd.concat([df1, df2, df3])
df = pd.read_csv('parsed_data_public_price_history_all.csv')
# Преобразуем колонку 'datetime' в тип данных datetime
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -25,23 +25,12 @@ import seaborn as sns
column_names = ['product_url', 'price', 'datetime']
df1 = pd.read_csv('parsed_data_public_price_history_1.csv')
df2 = pd.read_csv('parsed_data_public_price_history.csv', names=column_names,)
df3 = pd.read_csv('price_history.csv', names=column_names,)
df = pd.concat([df1, df2, df3])
df = pd.read_csv('parsed_data_public_price_history_all.csv')
# Преобразуем колонку 'datetime' в тип данных datetime
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
df['price'] = df['price'].astype(float)
df.head()
df.describe()
df.info()
len(df.product_url.unique())
q_low = df['price'].quantile(0.55)
q_hi = df['price'].quantile(0.75)
@ -55,35 +44,21 @@ df_hourly_avg = df[['price', 'datetime']]
# Округляем время до дня
df_hourly_avg['datetime'] = df_hourly_avg['datetime'].dt.floor('1H')
df_hourly_avg.head()
# Группируем по каждому часу и вычисляем среднее значение цены
df_hourly_avg = df_hourly_avg.groupby('datetime').agg({'price': 'mean'}).reset_index()
# Выводим описательную статистику
df_hourly_avg.describe()
# Просмотр первых строк
df_hourly_avg.head()
df_hourly_avg
df_hourly_avg.set_index('datetime', inplace=True)
df_hourly_avg
#only values
df_hourly_avg_arr = df_hourly_avg.values
#Split
split = int(0.8*len(df_hourly_avg_arr))
train, test = df_hourly_avg_arr[:split], df_hourly_avg_arr[split:]
train.shape, test.shape
#Normalise data by scaling to a range of 0 to 1 to improve learning and convergence of model.
# Feature scaling and fitting scaled data
scaler = MinMaxScaler(feature_range=(0, 1))
@ -103,7 +78,6 @@ X_train, y_train = np.array(X_train), np.array(y_train)
# Reshape the data as LSTM expects 3-D data (samples, time steps, features)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_train.shape
# create and fit the LSTM network
model = Sequential()
@ -116,12 +90,12 @@ model.fit(X_train, y_train, epochs=1000, batch_size=1, verbose=2)
inputs = df_hourly_avg_arr [len(df_hourly_avg_arr) - len(test) - n:]
inputs = inputs.reshape(-1,1)
inputs = scaler.transform(inputs)
inputs = scaler.transform(inputs)
# Create test data set
X_test = []
for i in range(n,inputs.shape[0]):
X_test.append(inputs[i-n:i,0])
for i in range(n, inputs.shape[0]):
X_test.append(inputs[i-n:i, 0])
# Convert data to numpy array
X_test = np.array(X_test)
@ -134,7 +108,6 @@ predict_price = scaler.inverse_transform(predict_price)
print(X_test.shape)
rmse = np.sqrt(np.mean(np.power((test - predict_price),2)))
rmse
# Plot predicted vs actual values
train = df_hourly_avg[:split]
@ -149,6 +122,6 @@ plt.plot(test['Predictions'], label='Predicted')
plt.title("AZN Close Price - LSTM", color = 'black', fontsize = 20)
plt.xlabel('Date', color = 'black', fontsize = 15)
plt.ylabel('Price', color = 'black', fontsize = 15)
plt.legend();
plt.legend()
model.save("/content/drive/MyDrive/Colab Notebooks/Platforms/my_model_.keras")

File diff suppressed because it is too large Load Diff