done
This commit is contained in:
parent
8419a3a28e
commit
9de6ce68ba
@ -21,11 +21,7 @@ scaler = MinMaxScaler(feature_range=(0, 1))
|
|||||||
# Загружаем данные
|
# Загружаем данные
|
||||||
column_names = ['product_url', 'price', 'datetime']
|
column_names = ['product_url', 'price', 'datetime']
|
||||||
|
|
||||||
df1 = pd.read_csv('parsed_data_public_price_history_1.csv')
|
df = pd.read_csv('parsed_data_public_price_history_all.csv')
|
||||||
df2 = pd.read_csv('parsed_data_public_price_history.csv', names=column_names, )
|
|
||||||
df3 = pd.read_csv('price_history.csv', names=column_names,)
|
|
||||||
|
|
||||||
df = pd.concat([df1, df2, df3])
|
|
||||||
|
|
||||||
# Преобразуем колонку 'datetime' в тип данных datetime
|
# Преобразуем колонку 'datetime' в тип данных datetime
|
||||||
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
|
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
|
||||||
|
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
136052
analysis/parsed_data_public_price_history_all.csv
Normal file
136052
analysis/parsed_data_public_price_history_all.csv
Normal file
File diff suppressed because it is too large
Load Diff
@ -25,23 +25,12 @@ import seaborn as sns
|
|||||||
|
|
||||||
column_names = ['product_url', 'price', 'datetime']
|
column_names = ['product_url', 'price', 'datetime']
|
||||||
|
|
||||||
df1 = pd.read_csv('parsed_data_public_price_history_1.csv')
|
df = pd.read_csv('parsed_data_public_price_history_all.csv')
|
||||||
df2 = pd.read_csv('parsed_data_public_price_history.csv', names=column_names,)
|
|
||||||
df3 = pd.read_csv('price_history.csv', names=column_names,)
|
|
||||||
|
|
||||||
df = pd.concat([df1, df2, df3])
|
|
||||||
|
|
||||||
# Преобразуем колонку 'datetime' в тип данных datetime
|
# Преобразуем колонку 'datetime' в тип данных datetime
|
||||||
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
|
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
|
||||||
df['price'] = df['price'].astype(float)
|
df['price'] = df['price'].astype(float)
|
||||||
|
|
||||||
df.head()
|
|
||||||
|
|
||||||
df.describe()
|
|
||||||
|
|
||||||
df.info()
|
|
||||||
|
|
||||||
len(df.product_url.unique())
|
|
||||||
|
|
||||||
q_low = df['price'].quantile(0.55)
|
q_low = df['price'].quantile(0.55)
|
||||||
q_hi = df['price'].quantile(0.75)
|
q_hi = df['price'].quantile(0.75)
|
||||||
@ -55,35 +44,21 @@ df_hourly_avg = df[['price', 'datetime']]
|
|||||||
# Округляем время до дня
|
# Округляем время до дня
|
||||||
df_hourly_avg['datetime'] = df_hourly_avg['datetime'].dt.floor('1H')
|
df_hourly_avg['datetime'] = df_hourly_avg['datetime'].dt.floor('1H')
|
||||||
|
|
||||||
df_hourly_avg.head()
|
|
||||||
|
|
||||||
# Группируем по каждому часу и вычисляем среднее значение цены
|
# Группируем по каждому часу и вычисляем среднее значение цены
|
||||||
df_hourly_avg = df_hourly_avg.groupby('datetime').agg({'price': 'mean'}).reset_index()
|
df_hourly_avg = df_hourly_avg.groupby('datetime').agg({'price': 'mean'}).reset_index()
|
||||||
|
|
||||||
# Выводим описательную статистику
|
|
||||||
df_hourly_avg.describe()
|
|
||||||
|
|
||||||
# Просмотр первых строк
|
|
||||||
df_hourly_avg.head()
|
|
||||||
|
|
||||||
df_hourly_avg
|
|
||||||
|
|
||||||
df_hourly_avg.set_index('datetime', inplace=True)
|
df_hourly_avg.set_index('datetime', inplace=True)
|
||||||
|
|
||||||
df_hourly_avg
|
|
||||||
|
|
||||||
#only values
|
#only values
|
||||||
df_hourly_avg_arr = df_hourly_avg.values
|
df_hourly_avg_arr = df_hourly_avg.values
|
||||||
|
|
||||||
#Split
|
#Split
|
||||||
split = int(0.8*len(df_hourly_avg_arr))
|
split = int(0.8*len(df_hourly_avg_arr))
|
||||||
|
|
||||||
|
|
||||||
train, test = df_hourly_avg_arr[:split], df_hourly_avg_arr[split:]
|
train, test = df_hourly_avg_arr[:split], df_hourly_avg_arr[split:]
|
||||||
|
|
||||||
|
|
||||||
train.shape, test.shape
|
|
||||||
|
|
||||||
#Normalise data by scaling to a range of 0 to 1 to improve learning and convergence of model.
|
#Normalise data by scaling to a range of 0 to 1 to improve learning and convergence of model.
|
||||||
# Feature scaling and fitting scaled data
|
# Feature scaling and fitting scaled data
|
||||||
scaler = MinMaxScaler(feature_range=(0, 1))
|
scaler = MinMaxScaler(feature_range=(0, 1))
|
||||||
@ -103,7 +78,6 @@ X_train, y_train = np.array(X_train), np.array(y_train)
|
|||||||
# Reshape the data as LSTM expects 3-D data (samples, time steps, features)
|
# Reshape the data as LSTM expects 3-D data (samples, time steps, features)
|
||||||
|
|
||||||
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
|
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
|
||||||
X_train.shape
|
|
||||||
|
|
||||||
# create and fit the LSTM network
|
# create and fit the LSTM network
|
||||||
model = Sequential()
|
model = Sequential()
|
||||||
@ -120,8 +94,8 @@ inputs = scaler.transform(inputs)
|
|||||||
|
|
||||||
# Create test data set
|
# Create test data set
|
||||||
X_test = []
|
X_test = []
|
||||||
for i in range(n,inputs.shape[0]):
|
for i in range(n, inputs.shape[0]):
|
||||||
X_test.append(inputs[i-n:i,0])
|
X_test.append(inputs[i-n:i, 0])
|
||||||
|
|
||||||
# Convert data to numpy array
|
# Convert data to numpy array
|
||||||
X_test = np.array(X_test)
|
X_test = np.array(X_test)
|
||||||
@ -134,7 +108,6 @@ predict_price = scaler.inverse_transform(predict_price)
|
|||||||
print(X_test.shape)
|
print(X_test.shape)
|
||||||
|
|
||||||
rmse = np.sqrt(np.mean(np.power((test - predict_price),2)))
|
rmse = np.sqrt(np.mean(np.power((test - predict_price),2)))
|
||||||
rmse
|
|
||||||
|
|
||||||
# Plot predicted vs actual values
|
# Plot predicted vs actual values
|
||||||
train = df_hourly_avg[:split]
|
train = df_hourly_avg[:split]
|
||||||
@ -149,6 +122,6 @@ plt.plot(test['Predictions'], label='Predicted')
|
|||||||
plt.title("AZN Close Price - LSTM", color = 'black', fontsize = 20)
|
plt.title("AZN Close Price - LSTM", color = 'black', fontsize = 20)
|
||||||
plt.xlabel('Date', color = 'black', fontsize = 15)
|
plt.xlabel('Date', color = 'black', fontsize = 15)
|
||||||
plt.ylabel('Price', color = 'black', fontsize = 15)
|
plt.ylabel('Price', color = 'black', fontsize = 15)
|
||||||
plt.legend();
|
plt.legend()
|
||||||
|
|
||||||
model.save("/content/drive/MyDrive/Colab Notebooks/Platforms/my_model_.keras")
|
model.save("/content/drive/MyDrive/Colab Notebooks/Platforms/my_model_.keras")
|
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user