This commit is contained in:
VictoriaPresnyakova 2024-11-11 20:13:53 +04:00
parent 8419a3a28e
commit 9de6ce68ba
6 changed files with 136058 additions and 136089 deletions

View File

@ -21,11 +21,7 @@ scaler = MinMaxScaler(feature_range=(0, 1))
# Загружаем данные # Загружаем данные
column_names = ['product_url', 'price', 'datetime'] column_names = ['product_url', 'price', 'datetime']
df1 = pd.read_csv('parsed_data_public_price_history_1.csv') df = pd.read_csv('parsed_data_public_price_history_all.csv')
df2 = pd.read_csv('parsed_data_public_price_history.csv', names=column_names, )
df3 = pd.read_csv('price_history.csv', names=column_names,)
df = pd.concat([df1, df2, df3])
# Преобразуем колонку 'datetime' в тип данных datetime # Преобразуем колонку 'datetime' в тип данных datetime
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True) df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -25,23 +25,12 @@ import seaborn as sns
column_names = ['product_url', 'price', 'datetime'] column_names = ['product_url', 'price', 'datetime']
df1 = pd.read_csv('parsed_data_public_price_history_1.csv') df = pd.read_csv('parsed_data_public_price_history_all.csv')
df2 = pd.read_csv('parsed_data_public_price_history.csv', names=column_names,)
df3 = pd.read_csv('price_history.csv', names=column_names,)
df = pd.concat([df1, df2, df3])
# Преобразуем колонку 'datetime' в тип данных datetime # Преобразуем колонку 'datetime' в тип данных datetime
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True) df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
df['price'] = df['price'].astype(float) df['price'] = df['price'].astype(float)
df.head()
df.describe()
df.info()
len(df.product_url.unique())
q_low = df['price'].quantile(0.55) q_low = df['price'].quantile(0.55)
q_hi = df['price'].quantile(0.75) q_hi = df['price'].quantile(0.75)
@ -55,35 +44,21 @@ df_hourly_avg = df[['price', 'datetime']]
# Округляем время до дня # Округляем время до дня
df_hourly_avg['datetime'] = df_hourly_avg['datetime'].dt.floor('1H') df_hourly_avg['datetime'] = df_hourly_avg['datetime'].dt.floor('1H')
df_hourly_avg.head()
# Группируем по каждому часу и вычисляем среднее значение цены # Группируем по каждому часу и вычисляем среднее значение цены
df_hourly_avg = df_hourly_avg.groupby('datetime').agg({'price': 'mean'}).reset_index() df_hourly_avg = df_hourly_avg.groupby('datetime').agg({'price': 'mean'}).reset_index()
# Выводим описательную статистику
df_hourly_avg.describe()
# Просмотр первых строк
df_hourly_avg.head()
df_hourly_avg
df_hourly_avg.set_index('datetime', inplace=True) df_hourly_avg.set_index('datetime', inplace=True)
df_hourly_avg
#only values #only values
df_hourly_avg_arr = df_hourly_avg.values df_hourly_avg_arr = df_hourly_avg.values
#Split #Split
split = int(0.8*len(df_hourly_avg_arr)) split = int(0.8*len(df_hourly_avg_arr))
train, test = df_hourly_avg_arr[:split], df_hourly_avg_arr[split:] train, test = df_hourly_avg_arr[:split], df_hourly_avg_arr[split:]
train.shape, test.shape
#Normalise data by scaling to a range of 0 to 1 to improve learning and convergence of model. #Normalise data by scaling to a range of 0 to 1 to improve learning and convergence of model.
# Feature scaling and fitting scaled data # Feature scaling and fitting scaled data
scaler = MinMaxScaler(feature_range=(0, 1)) scaler = MinMaxScaler(feature_range=(0, 1))
@ -103,7 +78,6 @@ X_train, y_train = np.array(X_train), np.array(y_train)
# Reshape the data as LSTM expects 3-D data (samples, time steps, features) # Reshape the data as LSTM expects 3-D data (samples, time steps, features)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1)) X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
X_train.shape
# create and fit the LSTM network # create and fit the LSTM network
model = Sequential() model = Sequential()
@ -116,12 +90,12 @@ model.fit(X_train, y_train, epochs=1000, batch_size=1, verbose=2)
inputs = df_hourly_avg_arr [len(df_hourly_avg_arr) - len(test) - n:] inputs = df_hourly_avg_arr [len(df_hourly_avg_arr) - len(test) - n:]
inputs = inputs.reshape(-1,1) inputs = inputs.reshape(-1,1)
inputs = scaler.transform(inputs) inputs = scaler.transform(inputs)
# Create test data set # Create test data set
X_test = [] X_test = []
for i in range(n,inputs.shape[0]): for i in range(n, inputs.shape[0]):
X_test.append(inputs[i-n:i,0]) X_test.append(inputs[i-n:i, 0])
# Convert data to numpy array # Convert data to numpy array
X_test = np.array(X_test) X_test = np.array(X_test)
@ -134,7 +108,6 @@ predict_price = scaler.inverse_transform(predict_price)
print(X_test.shape) print(X_test.shape)
rmse = np.sqrt(np.mean(np.power((test - predict_price),2))) rmse = np.sqrt(np.mean(np.power((test - predict_price),2)))
rmse
# Plot predicted vs actual values # Plot predicted vs actual values
train = df_hourly_avg[:split] train = df_hourly_avg[:split]
@ -149,6 +122,6 @@ plt.plot(test['Predictions'], label='Predicted')
plt.title("AZN Close Price - LSTM", color = 'black', fontsize = 20) plt.title("AZN Close Price - LSTM", color = 'black', fontsize = 20)
plt.xlabel('Date', color = 'black', fontsize = 15) plt.xlabel('Date', color = 'black', fontsize = 15)
plt.ylabel('Price', color = 'black', fontsize = 15) plt.ylabel('Price', color = 'black', fontsize = 15)
plt.legend(); plt.legend()
model.save("/content/drive/MyDrive/Colab Notebooks/Platforms/my_model_.keras") model.save("/content/drive/MyDrive/Colab Notebooks/Platforms/my_model_.keras")

File diff suppressed because it is too large Load Diff