done
This commit is contained in:
parent
8419a3a28e
commit
9de6ce68ba
@ -21,11 +21,7 @@ scaler = MinMaxScaler(feature_range=(0, 1))
|
||||
# Загружаем данные
|
||||
column_names = ['product_url', 'price', 'datetime']
|
||||
|
||||
df1 = pd.read_csv('parsed_data_public_price_history_1.csv')
|
||||
df2 = pd.read_csv('parsed_data_public_price_history.csv', names=column_names, )
|
||||
df3 = pd.read_csv('price_history.csv', names=column_names,)
|
||||
|
||||
df = pd.concat([df1, df2, df3])
|
||||
df = pd.read_csv('parsed_data_public_price_history_all.csv')
|
||||
|
||||
# Преобразуем колонку 'datetime' в тип данных datetime
|
||||
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
|
||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
136052
analysis/parsed_data_public_price_history_all.csv
Normal file
136052
analysis/parsed_data_public_price_history_all.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -25,23 +25,12 @@ import seaborn as sns
|
||||
|
||||
column_names = ['product_url', 'price', 'datetime']
|
||||
|
||||
df1 = pd.read_csv('parsed_data_public_price_history_1.csv')
|
||||
df2 = pd.read_csv('parsed_data_public_price_history.csv', names=column_names,)
|
||||
df3 = pd.read_csv('price_history.csv', names=column_names,)
|
||||
|
||||
df = pd.concat([df1, df2, df3])
|
||||
df = pd.read_csv('parsed_data_public_price_history_all.csv')
|
||||
|
||||
# Преобразуем колонку 'datetime' в тип данных datetime
|
||||
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
|
||||
df['price'] = df['price'].astype(float)
|
||||
|
||||
df.head()
|
||||
|
||||
df.describe()
|
||||
|
||||
df.info()
|
||||
|
||||
len(df.product_url.unique())
|
||||
|
||||
q_low = df['price'].quantile(0.55)
|
||||
q_hi = df['price'].quantile(0.75)
|
||||
@ -55,35 +44,21 @@ df_hourly_avg = df[['price', 'datetime']]
|
||||
# Округляем время до дня
|
||||
df_hourly_avg['datetime'] = df_hourly_avg['datetime'].dt.floor('1H')
|
||||
|
||||
df_hourly_avg.head()
|
||||
|
||||
# Группируем по каждому часу и вычисляем среднее значение цены
|
||||
df_hourly_avg = df_hourly_avg.groupby('datetime').agg({'price': 'mean'}).reset_index()
|
||||
|
||||
# Выводим описательную статистику
|
||||
df_hourly_avg.describe()
|
||||
|
||||
# Просмотр первых строк
|
||||
df_hourly_avg.head()
|
||||
|
||||
df_hourly_avg
|
||||
|
||||
df_hourly_avg.set_index('datetime', inplace=True)
|
||||
|
||||
df_hourly_avg
|
||||
|
||||
#only values
|
||||
df_hourly_avg_arr = df_hourly_avg.values
|
||||
|
||||
#Split
|
||||
split = int(0.8*len(df_hourly_avg_arr))
|
||||
|
||||
|
||||
train, test = df_hourly_avg_arr[:split], df_hourly_avg_arr[split:]
|
||||
|
||||
|
||||
train.shape, test.shape
|
||||
|
||||
#Normalise data by scaling to a range of 0 to 1 to improve learning and convergence of model.
|
||||
# Feature scaling and fitting scaled data
|
||||
scaler = MinMaxScaler(feature_range=(0, 1))
|
||||
@ -103,7 +78,6 @@ X_train, y_train = np.array(X_train), np.array(y_train)
|
||||
# Reshape the data as LSTM expects 3-D data (samples, time steps, features)
|
||||
|
||||
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
|
||||
X_train.shape
|
||||
|
||||
# create and fit the LSTM network
|
||||
model = Sequential()
|
||||
@ -116,12 +90,12 @@ model.fit(X_train, y_train, epochs=1000, batch_size=1, verbose=2)
|
||||
|
||||
inputs = df_hourly_avg_arr [len(df_hourly_avg_arr) - len(test) - n:]
|
||||
inputs = inputs.reshape(-1,1)
|
||||
inputs = scaler.transform(inputs)
|
||||
inputs = scaler.transform(inputs)
|
||||
|
||||
# Create test data set
|
||||
X_test = []
|
||||
for i in range(n,inputs.shape[0]):
|
||||
X_test.append(inputs[i-n:i,0])
|
||||
for i in range(n, inputs.shape[0]):
|
||||
X_test.append(inputs[i-n:i, 0])
|
||||
|
||||
# Convert data to numpy array
|
||||
X_test = np.array(X_test)
|
||||
@ -134,7 +108,6 @@ predict_price = scaler.inverse_transform(predict_price)
|
||||
print(X_test.shape)
|
||||
|
||||
rmse = np.sqrt(np.mean(np.power((test - predict_price),2)))
|
||||
rmse
|
||||
|
||||
# Plot predicted vs actual values
|
||||
train = df_hourly_avg[:split]
|
||||
@ -149,6 +122,6 @@ plt.plot(test['Predictions'], label='Predicted')
|
||||
plt.title("AZN Close Price - LSTM", color = 'black', fontsize = 20)
|
||||
plt.xlabel('Date', color = 'black', fontsize = 15)
|
||||
plt.ylabel('Price', color = 'black', fontsize = 15)
|
||||
plt.legend();
|
||||
plt.legend()
|
||||
|
||||
model.save("/content/drive/MyDrive/Colab Notebooks/Platforms/my_model_.keras")
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user