Compare commits

..

21 Commits

Author SHA1 Message Date
danil.markov
8afc758987 Feature/parsing-service: save 2024-11-13 14:20:24 +04:00
danil.markov
4f5dda4dbf Merge branch 'feature/ozon-parser-v0.1' into feature/parsing-service
# Conflicts:
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/config/DynamicProxyInterceptor.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/config/ProxyProvider.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/wildberries_parser/service/client/ClientImpl.java
2024-11-13 14:00:53 +04:00
danil.markov
59c41a4912 Feature/parsing-service: save 2024-11-13 13:59:52 +04:00
c4bb7a5ffa features: add proxy checking 2024-11-13 13:28:14 +04:00
danil.markov
fd71513bbf Feature/ozon-parser-v0.1: правка апишки 2024-11-12 22:40:34 +04:00
danil.markov
171cc650f1 Feature/ozon-parser-v0.1: Версия рабочая, парсит быстро + api 2024-11-12 15:40:00 +04:00
1df7dc94b8 features: change parse logic 2024-10-15 12:13:46 +04:00
danil.markov
42d947440c Feature/parsing-service: fix after pull 2024-10-15 11:34:24 +04:00
danil.markov
83b1c5d72c Merge remote-tracking branch 'origin/feature/parsing-service' into feature/parsing-service
# Conflicts:
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/pool/WebDriverPool.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/marketplace/ozon/parsing/ParsingService.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/parsing/OzonCategoryPageParsingService.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/scheduler/OzonProductUpdater.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/persistence/repository/ProductRepository.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/wildberries_parser/DebugRunner.java
2024-10-15 10:12:04 +04:00
danil.markov
82f648e16c Feature/parsing-service: save commit 2024-10-15 10:10:11 +04:00
84e0af60c9 features: add proxy, change webClient to restTemplate, add checking 2024-10-15 09:55:32 +04:00
danil.markov
9895aaff33 Feature/parsing-service: save commit 2024-10-14 21:43:57 +04:00
danil.markov
ae8ac061bc Merge remote-tracking branch 'origin/feature/parsing-service' into feature/parsing-service
# Conflicts:
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/pool/WebDriverPool.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/marketplace/ozon/page/CategoryPage.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/marketplace/ozon/parsing/CategoryPageParsingService.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/marketplace/ozon/parsing/ParsingService.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/scheduler/PartitionScheduler.java
2024-10-14 12:47:34 +04:00
danil.markov
a0271125a1 Feature/parsing-service: package refactor 2024-10-14 12:46:43 +04:00
84f344084c merge branches 2024-10-13 19:05:30 +04:00
5ae300389c feature: wb parser 2024-10-13 18:34:11 +04:00
danil.markov
ef2240e8ab Feature/parsing-service: Add parsing Ozon, need test on another system 2024-10-13 17:44:52 +04:00
f58b0a4a02 Feature/parsing-service intermediate commit 2024-10-12 13:51:32 +04:00
ffe6920b29 feature: price history and model 2024-10-03 17:26:35 +04:00
30ca5acc34 Feature/parsing-service intermediate commit, not final migration + run config 2024-10-02 11:51:30 +04:00
a24bf08f52 Feature/parsing-service init project 2024-09-26 21:37:10 +04:00
117 changed files with 3420 additions and 155858 deletions

3
.idea/.gitignore vendored
View File

@ -1,3 +0,0 @@
# Default ignored files
/shelf/
/workspace.xml

View File

@ -1,9 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<module type="JAVA_MODULE" version="4">
<component name="NewModuleRootManager" inherit-compiler-output="true">
<exclude-output />
<content url="file://$MODULE_DIR$" />
<orderEntry type="inheritedJdk" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

View File

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager">
<output url="file://$PROJECT_DIR$/out" />
</component>
</project>

View File

@ -1,8 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectModuleManager">
<modules>
<module fileurl="file://$PROJECT_DIR$/.idea/Price_Pulse.iml" filepath="$PROJECT_DIR$/.idea/Price_Pulse.iml" />
</modules>
</component>
</project>

View File

@ -1,6 +0,0 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="VcsDirectoryMappings">
<mapping directory="" vcs="Git" />
</component>
</project>

View File

@ -0,0 +1,18 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
<option name="ACTIVE_PROFILES" value="dev" />
<option name="SCHEDULED_DEBUGGER" value="true" />
<envs>
<env name="JDBC_PASSWORD" value="postgres" />
<env name="JDBC_URL" value="localhost:5432/parsed_data" />
<env name="JDBC_USERNAME" value="postgres" />
<env name="SERVER_PORT" value="8080" />
<env name="WEBDRIVER_CHROME_PATH" value="$PROJECT_DIR$/parsing-service/web-driver/chromedriver" />
</envs>
<module name="parsing-service.main" />
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
</component>

View File

@ -1,12 +0,0 @@
# Price_Pulse
python -m venv venv
.\venv\Scripts\activate
pip install -r requirements.txt
python app.py
/predict_price
/plot

View File

@ -1,168 +0,0 @@
import numpy as np
import pandas as pd
from datetime import timedelta
from tensorflow.keras.models import load_model
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
import io
import joblib
from flask import Flask, request, jsonify, Blueprint, send_file
from flasgger import Swagger
app = Flask(__name__)
api = Blueprint('api', __name__)
Swagger(app)
# Загружаем модель и scaler
model = load_model("my_model_1H.keras")
scaler = MinMaxScaler(feature_range=(0, 1))
# Загружаем данные
column_names = ['product_url', 'price', 'datetime']
df = pd.read_csv('parsed_data_public_price_history_all.csv')
# Преобразуем колонку 'datetime' в тип данных datetime
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
df['price'] = df['price'].astype(float)
q_low = df['price'].quantile(0.55)
q_hi = df['price'].quantile(0.75)
q_range = q_hi - q_low
df = df[(df['price'] < q_hi + 1.5 * q_range) & (df['price'] > q_low - 1.5 * q_range)]
df_hourly_avg = df[['price', 'datetime']]
df_hourly_avg['datetime'] = df_hourly_avg['datetime'].dt.floor('1H')
df_hourly_avg = df_hourly_avg.groupby('datetime').agg({'price': 'mean'}).reset_index()
df_hourly_avg.set_index('datetime', inplace=True)
# Подготовка данных для прогнозирования
def prepare_data(df, days_forward=7):
last_date = df.index[-1]
scaled_data = scaler.fit_transform(df[['price']].values)
n = 3 # число временных шагов (можно менять)
X_test = []
# Формируем X_test на основе последних n значений
for i in range(n, len(scaled_data)):
X_test.append(scaled_data[i - n:i, 0])
X_test = np.array(X_test)
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
# Предсказание на 7 дней вперед
predictions = []
current_input = X_test[-1] # начальное состояние для прогноза
for _ in range(days_forward):
pred = model.predict(np.expand_dims(current_input, axis=0))
predictions.append(pred[0, 0])
# Обновляем current_input, добавляя новое предсказание и удаляя старое
current_input = np.append(current_input[1:], pred).reshape(n, 1)
# Масштабируем предсказания обратно
predictions = scaler.inverse_transform(np.array(predictions).reshape(-1, 1)).flatten()
future_dates = [last_date + timedelta(days=i) for i in range(1, days_forward + 1)]
forecast_df = pd.DataFrame({'date': future_dates, 'predicted_price': predictions})
return forecast_df
# Построение графика
def plot_price(forecast_df):
plt.figure(figsize=(14, 7))
plt.plot(df_hourly_avg.index, df_hourly_avg['price'], label='Actual Price', color='blue')
plt.plot(forecast_df['date'], forecast_df['predicted_price'], label='Predicted Price', color='orange')
plt.title("Price Prediction")
plt.xlabel("Date")
plt.ylabel("Price")
plt.legend()
plt.grid(True)
img = io.BytesIO()
plt.savefig(img, format='png')
img.seek(0)
plt.close()
return img
@api.route('/predict_price', methods=['GET'])
def predict_price():
"""
Предсказание цены на 7 дней вперед
---
responses:
200:
description: JSON с предсказаниями цен и днем минимальной цены
schema:
type: object
properties:
forecast:
type: array
items:
type: object
properties:
date:
type: string
format: date
predicted_price:
type: number
min_price_day:
type: object
properties:
date:
type: string
format: date
price:
type: number
"""
forecast_df = prepare_data(df_hourly_avg)
forecast_list = forecast_df.to_dict(orient='records') # Преобразование в список словарей
# Преобразуем значения 'predicted_price' в float
for record in forecast_list:
record['predicted_price'] = float(record['predicted_price'])
# Определяем день с минимальной предсказанной ценой
min_price_day = forecast_df.loc[forecast_df['predicted_price'].idxmin()]
# Преобразуем минимальную цену в float
min_price_day_price = float(min_price_day['predicted_price'])
# Формируем ответ
return jsonify({
'forecast': forecast_list,
'min_price_day': {
'date': min_price_day['date'].strftime('%Y-%m-%d'),
'price': min_price_day_price
}
})
# Эндпоинт для получения графика
@api.route('/plot', methods=['GET'])
def plot():
"""
Получение графика предсказанных и фактических цен
---
responses:
200:
description: Возвращает график предсказанных и фактических цен в формате PNG
content:
image/png:
schema:
type: string
format: binary
"""
forecast_df = prepare_data(df_hourly_avg)
img = plot_price(forecast_df)
return send_file(img, mimetype='image/png')
app.register_blueprint(api, url_prefix='/api')
if __name__ == "__main__":
app.run(debug=True)

View File

@ -1,9 +0,0 @@
version: '3'
services:
clickhouse:
image: yandex/clickhouse-server:latest
ports:
- "8123:8123"
- "9000:9000"
volumes:
- ./clickhouse-data:/var/lib/clickhouse

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -1,127 +0,0 @@
# -*- coding: utf-8 -*-
"""Platforms_train_v2.ipynb
Automatically generated by Colab.
Original file is located at
https://colab.research.google.com/drive/1yD7QxO8rUrHXvYLn_z5eofUKenJqXZoU
"""
import os
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import joblib
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout, LSTM
import matplotlib.pyplot as plt
import seaborn as sns
column_names = ['product_url', 'price', 'datetime']
df = pd.read_csv('parsed_data_public_price_history_all.csv')
# Преобразуем колонку 'datetime' в тип данных datetime
df['datetime'] = pd.to_datetime(df['datetime'], format='mixed', utc=True)
df['price'] = df['price'].astype(float)
q_low = df['price'].quantile(0.55)
q_hi = df['price'].quantile(0.75)
q_range = q_hi - q_low
df = df[(df['price'] < q_hi + 1.5 * q_range) & (df['price'] > q_low - 1.5 * q_range)]
df.describe()
# Оставляем только колонки 'price' и 'datetime'
df_hourly_avg = df[['price', 'datetime']]
# Округляем время до дня
df_hourly_avg['datetime'] = df_hourly_avg['datetime'].dt.floor('1H')
# Группируем по каждому часу и вычисляем среднее значение цены
df_hourly_avg = df_hourly_avg.groupby('datetime').agg({'price': 'mean'}).reset_index()
df_hourly_avg.set_index('datetime', inplace=True)
#only values
df_hourly_avg_arr = df_hourly_avg.values
#Split
split = int(0.8*len(df_hourly_avg_arr))
train, test = df_hourly_avg_arr[:split], df_hourly_avg_arr[split:]
#Normalise data by scaling to a range of 0 to 1 to improve learning and convergence of model.
# Feature scaling and fitting scaled data
scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(df_hourly_avg_arr)
# Creating a data structure with n time-steps and 1 output
n = 3
X_train, y_train = [], []
for i in range(n,len(train)):
X_train.append(scaled_data[i-n:i,0])
y_train.append(scaled_data[i,0])
# Convert X_train and y_train to numpy arrays for training LSTM model
X_train, y_train = np.array(X_train), np.array(y_train)
# Reshape the data as LSTM expects 3-D data (samples, time steps, features)
X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))
# create and fit the LSTM network
model = Sequential()
model.add(LSTM(units=50, return_sequences=True, input_shape=(X_train.shape[1],1)))
model.add(LSTM(units=50))
model.add(Dense(1))
model.compile(loss='mean_squared_error', optimizer='adam')
model.fit(X_train, y_train, epochs=1000, batch_size=1, verbose=2)
inputs = df_hourly_avg_arr [len(df_hourly_avg_arr) - len(test) - n:]
inputs = inputs.reshape(-1,1)
inputs = scaler.transform(inputs)
# Create test data set
X_test = []
for i in range(n, inputs.shape[0]):
X_test.append(inputs[i-n:i, 0])
# Convert data to numpy array
X_test = np.array(X_test)
# Reshape data to be 3-D
X_test = np.reshape(X_test, (X_test.shape[0],X_test.shape[1],1))
predict_price = model.predict(X_test)
predict_price = scaler.inverse_transform(predict_price)
print(X_test.shape)
rmse = np.sqrt(np.mean(np.power((test - predict_price),2)))
# Plot predicted vs actual values
train = df_hourly_avg[:split]
test = df_hourly_avg[split:]
test['Predictions'] = predict_price
plt.figure(figsize=(20,10))
sns.set_style("whitegrid")
plt.plot(train['price'], label='Training')
plt.plot(test['price'], label='Actual')
plt.plot(test['Predictions'], label='Predicted')
plt.title("AZN Close Price - LSTM", color = 'black', fontsize = 20)
plt.xlabel('Date', color = 'black', fontsize = 15)
plt.ylabel('Price', color = 'black', fontsize = 15)
plt.legend()
model.save("/content/drive/MyDrive/Colab Notebooks/Platforms/my_model_.keras")

Binary file not shown.

80
docker/docker-compose.yml Normal file
View File

@ -0,0 +1,80 @@
version: "3.8"
name: price-pulse
services:
postgres:
image: postgres:16
ports:
- "5432:5432"
environment:
POSTGRES_DB: parsed_data
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
deploy:
resources:
limits:
memory: 1024M
reservations:
memory: 256M
clickhouse:
image: clickhouse/clickhouse-server:latest
ports:
- "8123:8123" # HTTP интерфейс, /play для проверки запросов, но лучше использовать не браузер для этого
- "9000:9000" # Интерфейс для работы с клиентами
- "9009:9009" # Интерфейс для взаимодействия через TCP
volumes:
- clickhouse_data:/var/lib/clickhouse
- clickhouse_logs:/var/log/clickhouse
environment:
CLICKHOUSE_DB: parsed_data
CLICKHOUSE_USER: user
CLICKHOUSE_PASSWORD: password
deploy:
resources:
limits:
memory: 1024M
reservations:
memory: 256M
zookeeper:
image: confluentinc/cp-zookeeper:latest
environment:
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_TICK_TIME: 2000
ports:
- "2181:2181"
deploy:
resources:
limits:
memory: 1024M
reservations:
memory: 256M
kafka:
image: confluentinc/cp-kafka:latest
depends_on:
- zookeeper
ports:
- "9092:9092"
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
volumes:
- kafka_data:/var/lib/kafka
deploy:
resources:
limits:
memory: 1024M
reservations:
memory: 256M
volumes:
clickhouse_data:
clickhouse_logs:
kafka_data:

23
my-app/.gitignore vendored
View File

@ -1,23 +0,0 @@
# See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
# dependencies
/node_modules
/.pnp
.pnp.js
# testing
/coverage
# production
/build
# misc
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local
npm-debug.log*
yarn-debug.log*
yarn-error.log*

View File

@ -1,70 +0,0 @@
# Getting Started with Create React App
This project was bootstrapped with [Create React App](https://github.com/facebook/create-react-app).
## Available Scripts
In the project directory, you can run:
### `npm start`
Runs the app in the development mode.\
Open [http://localhost:3000](http://localhost:3000) to view it in your browser.
The page will reload when you make changes.\
You may also see any lint errors in the console.
### `npm test`
Launches the test runner in the interactive watch mode.\
See the section about [running tests](https://facebook.github.io/create-react-app/docs/running-tests) for more information.
### `npm run build`
Builds the app for production to the `build` folder.\
It correctly bundles React in production mode and optimizes the build for the best performance.
The build is minified and the filenames include the hashes.\
Your app is ready to be deployed!
See the section about [deployment](https://facebook.github.io/create-react-app/docs/deployment) for more information.
### `npm run eject`
**Note: this is a one-way operation. Once you `eject`, you can't go back!**
If you aren't satisfied with the build tool and configuration choices, you can `eject` at any time. This command will remove the single build dependency from your project.
Instead, it will copy all the configuration files and the transitive dependencies (webpack, Babel, ESLint, etc) right into your project so you have full control over them. All of the commands except `eject` will still work, but they will point to the copied scripts so you can tweak them. At this point you're on your own.
You don't have to ever use `eject`. The curated feature set is suitable for small and middle deployments, and you shouldn't feel obligated to use this feature. However we understand that this tool wouldn't be useful if you couldn't customize it when you are ready for it.
## Learn More
You can learn more in the [Create React App documentation](https://facebook.github.io/create-react-app/docs/getting-started).
To learn React, check out the [React documentation](https://reactjs.org/).
### Code Splitting
This section has moved here: [https://facebook.github.io/create-react-app/docs/code-splitting](https://facebook.github.io/create-react-app/docs/code-splitting)
### Analyzing the Bundle Size
This section has moved here: [https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size](https://facebook.github.io/create-react-app/docs/analyzing-the-bundle-size)
### Making a Progressive Web App
This section has moved here: [https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app](https://facebook.github.io/create-react-app/docs/making-a-progressive-web-app)
### Advanced Configuration
This section has moved here: [https://facebook.github.io/create-react-app/docs/advanced-configuration](https://facebook.github.io/create-react-app/docs/advanced-configuration)
### Deployment
This section has moved here: [https://facebook.github.io/create-react-app/docs/deployment](https://facebook.github.io/create-react-app/docs/deployment)
### `npm run build` fails to minify
This section has moved here: [https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify](https://facebook.github.io/create-react-app/docs/troubleshooting#npm-run-build-fails-to-minify)

18406
my-app/package-lock.json generated

File diff suppressed because it is too large Load Diff

View File

@ -1,51 +0,0 @@
{
"name": "my-app",
"version": "0.1.0",
"private": true,
"dependencies": {
"@emotion/react": "^11.13.3",
"@emotion/styled": "^11.13.0",
"@mui/material": "^6.1.1",
"@testing-library/jest-dom": "^5.17.0",
"@testing-library/react": "^13.4.0",
"@testing-library/user-event": "^13.5.0",
"axios": "^1.7.7",
"bootstrap": "^5.3.3",
"react": "^18.3.1",
"react-datepicker": "^7.4.0",
"react-dom": "^18.3.1",
"react-router-dom": "^6.26.2",
"react-scripts": "5.0.1",
"recharts": "^2.12.7",
"web-vitals": "^2.1.4"
},
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
"test": "react-scripts test",
"eject": "react-scripts eject"
},
"eslintConfig": {
"extends": [
"react-app",
"react-app/jest"
]
},
"browserslist": {
"production": [
">0.2%",
"not dead",
"not op_mini all"
],
"development": [
"last 1 chrome version",
"last 1 firefox version",
"last 1 safari version"
]
},
"devDependencies": {
"autoprefixer": "^10.4.20",
"postcss": "^8.4.47",
"tailwindcss": "^3.4.13"
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 3.8 KiB

View File

@ -1,43 +0,0 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<link rel="icon" href="%PUBLIC_URL%/favicon.ico" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="theme-color" content="#000000" />
<meta
name="description"
content="Web site created using create-react-app"
/>
<link rel="apple-touch-icon" href="%PUBLIC_URL%/logo192.png" />
<!--
manifest.json provides metadata used when your web app is installed on a
user's mobile device or desktop. See https://developers.google.com/web/fundamentals/web-app-manifest/
-->
<link rel="manifest" href="%PUBLIC_URL%/manifest.json" />
<!--
Notice the use of %PUBLIC_URL% in the tags above.
It will be replaced with the URL of the `public` folder during the build.
Only files inside the `public` folder can be referenced from the HTML.
Unlike "/favicon.ico" or "favicon.ico", "%PUBLIC_URL%/favicon.ico" will
work correctly both with client-side routing and a non-root public URL.
Learn how to configure a non-root public URL by running `npm run build`.
-->
<title>React App</title>
</head>
<body>
<noscript>You need to enable JavaScript to run this app.</noscript>
<div id="root"></div>
<!--
This HTML file is a template.
If you open it directly in the browser, you will see an empty page.
You can add webfonts, meta tags, or analytics to this file.
The build step will place the bundled scripts into the <body> tag.
To begin the development, run `npm start` or `yarn start`.
To create a production bundle, use `npm run build` or `yarn build`.
-->
</body>
</html>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 5.2 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 9.4 KiB

Binary file not shown.

Before

Width:  |  Height:  |  Size: 175 KiB

View File

@ -1,25 +0,0 @@
{
"short_name": "React App",
"name": "Create React App Sample",
"icons": [
{
"src": "favicon.ico",
"sizes": "64x64 32x32 24x24 16x16",
"type": "image/x-icon"
},
{
"src": "logo192.png",
"type": "image/png",
"sizes": "192x192"
},
{
"src": "logo512.png",
"type": "image/png",
"sizes": "512x512"
}
],
"start_url": ".",
"display": "standalone",
"theme_color": "#000000",
"background_color": "#ffffff"
}

View File

@ -1,3 +0,0 @@
# https://www.robotstxt.org/robotstxt.html
User-agent: *
Disallow:

View File

@ -1,82 +0,0 @@
.homepage {
}
.title {
font-size: 32px;
font-weight: bold;
margin-bottom: 20px;
}
.main-image {
max-width: 66%;
border-radius: 10px;
margin-bottom: 20px;
display: block;
margin-left: auto;
margin-right: auto;
}
.datePickerInput {
border-radius: 0.5rem; /* Закругление углов */
background-color: #ffffff; /* Цвет фона */
min-width: 100%; /* Минимальная ширина */
height: 50px; /* Высота */
font-size: 20px; /* Размер шрифта */
border: 1px solid #c2c2bf; /* Цвет рамки */
justify-content: center;
}
.input-field {
margin-bottom: 20px;
}
/*.date-pickers {
display: flex;
gap: 10px;
align-items: center;
justify-content: center;
margin-bottom: 20px;
}*/
.datePicker {
min-width: 46.5%;
justify-content: center;
}
.date-separator {
font-size: 18px;
color: #333;
}
/*.date-picker {
width: 200px;
height: 40px;
border: 1px solid #ccc;
border-radius: 5px;
padding: 5px;
cursor: pointer;
background-color: #fff;
font-size: 20px;
color: #333;
transition: all 0.3s ease;
display: flex;
align-items: center;
justify-content: center;
text-align: center;
}*/
/*.react-datepicker__input-container {
padding: 0;
background-color: transparent;
font-size: inherit;
color: inherit;
width: 100%;
display: flex;
align-items: center;
justify-content: center;
}*/
.get-button {
margin-top: 20px;
}

View File

@ -1,383 +0,0 @@
import React, { useState, useEffect } from 'react';
import { ThemeProvider, createTheme } from '@mui/material/styles';
import DatePicker from 'react-datepicker';
import 'react-datepicker/dist/react-datepicker.css';
import { useNavigate } from "react-router-dom";
import { Grid, Button, Typography, Select, MenuItem, Alert, FormControl, TextField } from '@mui/material';
import axios from 'axios';
import './App.css';
function HomePage() {
const navigate = useNavigate();
const [startDate, setStartDate] = useState(new Date());
const [endDate, setEndDate] = useState(new Date());
const [selectedMarketplace, setSelectedMarketplace] = useState('');
const [categories, setCategories] = useState([]);
const [marketplaces, setMarketplaces] = useState([]);
const [productUrl, setProductUrl] = useState('');
const [showPriceHistoryError, setShowPriceHistoryError] = useState(false);
useEffect(() => {
const fetchMarketplaces = async () => {
try {
const response = await axios.get('/api/v1/marketplaces');
setMarketplaces(response.data);
} catch (error) {
console.error('Error fetching marketplaces:', error);
}
};
fetchMarketplaces();
}, []);
useEffect(() => {
const fetchCategories = async () => {
try {
if (selectedMarketplace) {
const response = await axios.get(`/api/v1/categories?marketplace=${selectedMarketplace}`);
setCategories(response.data);
}
} catch (error) {
console.error('Error fetching categories:', error);
}
};
fetchCategories();
}, [selectedMarketplace]);
const handleSubmit = () => {
console.log('Отправлено:', startDate, endDate, selectedMarketplace);
navigate('/result', {
state: { startDate, endDate, selectedMarketplace }
});
};
const handleProductUrlChange = (event) => {
setProductUrl(event.target.value);
};
const handleViewPriceHistory = async () => {
if (!productUrl) {
setShowPriceHistoryError(true);
return;
}
// Проверка существования товара по ссылке
try {
const response = await fetch(`/api/v1/products/info?productUrl=${productUrl}`);
if (response.ok) {
// Товар найден
navigate('/viewProduct', { state: { productUrl } });
} else {
// Товар не найден
setShowPriceHistoryError(true);
}
} catch (error) {
console.error("Ошибка проверки товара:", error);
setShowPriceHistoryError(true);
}
};
const handleButtonClick = (marketplaceName) => {
if (selectedMarketplace === marketplaceName) {
setSelectedMarketplace('');
} else {
setSelectedMarketplace(marketplaceName);
}
};
const handleMarketplaceChange = (event) => {
setSelectedMarketplace(event.target.value);
};
const today = new Date();
const [startDateError, setStartDateError] = useState(false);
const [endDateError, setEndDateError] = useState(false);
const handleStartDateChange = (date) => {
if (date <= today) {
setStartDate(date);
setStartDateError(false);
} else {
setStartDateError(true);
}
};
const handleEndDateChange = (date) => {
if (date <= today && date >= startDate) {
setEndDate(date);
setEndDateError(false);
} else {
setEndDateError(true);
}
};
const theme = createTheme({
palette: {
primary: {
main: '#2a8e9e',
},
secondary: {
main: '#023247',
},
},
typography: {
fontFamily: ['Montserrat', 'sans-serif'].join(','),
h3: {
fontWeight: 700,
fontSize: '2.5rem',
},
body1: {
fontSize: '1.1rem',
},
},
});
return (
<ThemeProvider theme={theme}>
<div className="homepage" style={{ padding: '2rem', backgroundColor: '#d8eaff', height: '95vh', display: 'flex', flexDirection: 'column' }}>
<Grid container spacing={4} style={{ flexGrow: 1 }}>
<Grid item xs={12} md={6}>
<div
style={{
backgroundColor: '#fcfcf8',
borderRadius: '1rem',
padding: '2rem',
height: '90%',
display: 'flex',
flexDirection: 'column',
justifyContent: 'center',
}}
>
<Typography variant="h1" gutterBottom style={{ color: '#132a52', marginBottom: '1.5rem', fontWeight: 'bold' }}>
Price Pulse
</Typography>
<div className="description" style={{ color: '#16305e' }}>
<Typography variant="h3" gutterBottom>
<span style={{ fontWeight: 'bold' }}>
Платформа предназначена для сбора, анализа и рекомендаций,
<br></br>
исходя из собранных данных с общедоступных маркетплейсов.
</span>
</Typography>
<br></br>
<Typography variant="h4" gutterBottom>
Выберите маркетплейсы, которые вас интересуют:
</Typography>
<Grid container spacing={2}>
{marketplaces.map((marketplace) => (
<Grid item key={marketplace.name}>
<Button
variant="contained"
color="primary"
className="marketplace-button"
style={{
backgroundColor: selectedMarketplace === marketplace.name ? marketplace.bgColor : '#fcfcf8',
color: selectedMarketplace === marketplace.name ? marketplace.textColor : '#16305e',
borderRadius: '0.5rem',
padding: '1rem',
display: 'flex',
flexDirection: 'column',
maxHeight: '150px',
alignItems: 'center',
minWidth: '400px',
}}
onClick={() => handleButtonClick(marketplace.name)}
>
{marketplace.name === 'Wildberries' ? (
<img src="https://png.klev.club/uploads/posts/2024-04/png-klev-club-dejs-p-wildberries-logotip-png-16.png" alt="Wildberries" style={{ width: '60px', height: '60px', marginBottom: '0.5rem' }} />
) : (
<img src="https://pngimg.com/d/ozon_PNG3.png" alt="Ozon" style={{ width: '60px', height: '60px', marginBottom: '0.5rem' }} />
)}
<Typography variant="h5" gutterBottom style={{ color: selectedMarketplace === marketplace.name ? marketplace.textColor : '#16305e', marginBottom: '0rem' }}>
{marketplace.name}
</Typography>
</Button>
</Grid>
))}
</Grid>
<br></br>
{/* Комбобокс */}
<Typography variant="h4" gutterBottom>
Выберите категорию:
</Typography>
<FormControl variant="outlined" style={{ minWidth: '97%' }}>
<Select
labelId="marketplace-select-label"
value={selectedMarketplace}
onChange={handleMarketplaceChange}
style={{
color: '#023247',
borderColor: '#4875b2',
borderRadius: '0.5rem'// Цвет текста выпадающего списка
}}
inputProps={{
style: {
borderColor: '#4875b2', // Цвет рамки
},
}}
MenuProps={{
PaperProps: {
style: {
color: '#023247', // Цвет текста выпадающего меню
},
},
}}
>
{categories.map((category) => (
<MenuItem key={category.id} value={category.id}>
{category.name}
</MenuItem>
))}
</Select>
<br></br>
</FormControl>
<br></br>
<Typography variant="h4" gutterBottom>
Введите период для сбора данных:
</Typography>
{startDateError && (
<Alert severity="error" style={{ marginBottom: '10px' }}>
Дата начала периода не может быть после сегодняшнего дня.
</Alert>
)}
{endDateError && (
<Alert severity="error" style={{ marginBottom: '10px' }}>
Дата окончания периода не может быть после сегодняшнего дня или раньше даты начала.
</Alert>
)}
<div className="date-pickers" style={{ display: 'flex', alignItems: 'center', marginTop: '0.5rem' }}>
<DatePicker
selected={startDate}
onChange={handleStartDateChange}
className="datePickerInput"
wrapperClassName="datePicker"
dateFormat="dd.MM.yyyy"
popperPlacement="bottom"
showMonthDropdown
/>
<span className="date-separator" style={{ margin: '0 0.5rem', color: '#2a8e9e', fontSize: '30px' }}> - </span>
<DatePicker
selected={endDate}
onChange={handleEndDateChange}
className="datePickerInput"
wrapperClassName="datePicker"
dateFormat="dd.MM.yyyy"
popperPlacement="bottom"
showMonthDropdown
/>
</div>
<Button
variant="contained"
color="primary"
className="submit-button"
onClick={handleSubmit}
style={{
borderRadius: '0.5rem',
padding: '0.75rem 1.5rem',
backgroundColor: '#4875b2',
marginTop: '1.5rem',
minWidth: '97%'
}}
>
<Typography variant="h5" gutterBottom style={{ color: '#fcfcfb', marginBottom: '0rem' }}>
Получить рекомендации
</Typography>
</Button>
</div>
</div>
</Grid>
<Grid item xs={12} md={6}>
<div
style={{
backgroundColor: '#fcfcf8',
borderRadius: '1rem',
padding: '2rem',
height: '90%',
display: 'flex',
flexDirection: 'column',
justifyContent: 'flex-start',
alignItems: 'center',
}}
>
<div style={{
position: 'relative',
width: '97%',
height: '700px',
overflow: 'hidden'
}}>
<img
src="https://cdn.prod.website-files.com/61ebe5f773be1acd620f8208/61fb879dfccdca6a20c66d4a_e-commerce-marketplace.gif"
alt=""
className="main-image"
style={{
borderRadius: '0.5rem',
maxWidth: '80%',
height: 'auto',
position: 'absolute',
top: '80%',
left: '50%',
transform: 'translate(-50%, -100%)'
}}
/>
<Typography variant="h4" style={{
color: '#132a52',
position: 'absolute',
top: '60%',
width: '80%',
}}>
Вы можете посмотреть историю изменения цены конкретного товара при помощи его URL.
</Typography>
<Typography variant="h4" style={{
color: '#132a52',
position: 'absolute',
top: '80%',
width: '80%'
}}>
Введите ссылку на товар:
</Typography>
</div>
<TextField
label="Ссылка на товар"
placeholder="https://www.ozon.ru/..."
variant="outlined"
fullWidth
value={productUrl}
onChange={handleProductUrlChange}
style={{
marginTop: '-3.5rem',
color: '#132a52',
borderColor: '#4875b2',
borderRadius: '1rem',
maxWidth: '97%'
}}
/>
<Button variant="contained" color="primary" onClick={handleViewPriceHistory}
style={{
borderRadius: '0.5rem',
padding: '0.75rem 1.5rem',
backgroundColor: '#4875b2',
marginTop: '1.5rem',
minWidth: '97%'
}}>
<Typography variant="h5" gutterBottom style={{ color: '#fcfcfb', marginBottom: '0rem' }}>
Посмотреть историю цены
</Typography>
</Button>
{showPriceHistoryError && (
<Alert severity="error" style={{ marginTop: '1rem' }}>
Неверный URL товара или товар не найден.
</Alert>
)}
</div>
</Grid>
</Grid>
</div>
</ThemeProvider>
);
}
export default HomePage;

View File

@ -1,8 +0,0 @@
import { render, screen } from '@testing-library/react';
import App from './App';
test('renders learn react link', () => {
render(<App />);
const linkElement = screen.getByText(/learn react/i);
expect(linkElement).toBeInTheDocument();
});

View File

@ -1,19 +0,0 @@
import React from 'react';
import { BrowserRouter as Router, Route, Routes } from 'react-router-dom';
import App from './App';
import Result from './Result';
import ViewProduct from './ViewProduct';
const AppRouter = () => {
return (
<Router>
<Routes>
<Route path="/" element={<App />} />
<Route path="/result" element={<Result />} />
<Route path="/viewProduct" element={<ViewProduct />} />
</Routes>
</Router>
);
};
export default AppRouter;

View File

@ -1,120 +0,0 @@
import React from 'react';
import { useLocation, useNavigate } from 'react-router-dom';
import { ThemeProvider, createTheme } from '@mui/material/styles';
import { Typography, Box, Grid, Container, Card, CardContent, CardHeader, Button } from '@mui/material';
import { LineChart, Line, XAxis, YAxis, CartesianGrid, Tooltip, Legend } from 'recharts';
const Result = () => {
const location = useLocation();
const navigate = useNavigate();
const { startDate, endDate, selectedMarketplace } = location.state || {};
const generateDates = (startDate, endDate) => {
const dates = [];
let currentDate = new Date(startDate);
while (currentDate <= endDate) {
dates.push(new Date(currentDate));
currentDate.setDate(currentDate.getDate() + 1);
}
return dates;
};
const data = generateDates(startDate, endDate).map((date, index) => ({
date: date,
price: Math.floor(Math.random() * 200) + 50
}));
const theme = createTheme({
palette: {
primary: {
main: '#2a8e9e',
},
secondary: {
main: '#023247',
},
},
typography: {
fontFamily: ['Montserrat', 'sans-serif'].join(','),
h3: {
fontWeight: 700,
fontSize: '2.5rem',
},
body1: {
fontSize: '1.1rem',
},
},
});
return (
<ThemeProvider theme={theme}>
<div className="result-page" style={{ padding: '2rem', backgroundColor: '#d8eaff' }}>
<Container maxWidth="md">
<Box mt={4}>
<Typography variant="h3" gutterBottom align="center" style={{ color: '#023247' }}>
Результаты анализа
</Typography>
</Box>
<Button
variant="contained"
onClick={() => navigate(-1)}
sx={{ mb: 2, backgroundColor: '#2a8e9e', color: '#fcfcf8', '&:hover': { backgroundColor: '#023247' } }}
>
Назад
</Button>
<Grid container spacing={3} mt={2}>
<Grid item xs={12}>
<Card style={{ backgroundColor: '#fcfcf8', borderRadius: '1rem' }}>
<CardHeader title="Исходные данные" style={{ color: '#023247' }} />
<CardContent>
<Typography variant="body1" gutterBottom style={{ color: '#023247' }}>
Выбранный маркетплейс: {selectedMarketplace}
</Typography>
<Typography variant="body1" gutterBottom style={{ color: '#023247' }}>
Период: {startDate?.toLocaleDateString()} по {endDate?.toLocaleDateString()}
</Typography>
</CardContent>
</Card>
</Grid>
<Grid item xs={12}>
<Card style={{ backgroundColor: '#fcfcf8', borderRadius: '1rem' }}>
<CardHeader title="Анализ" style={{ color: '#023247' }} />
<CardContent>
<LineChart width={600} height={300} data={data}>
<XAxis
dataKey="date"
tickFormatter={(unixTime) => new Date(unixTime).toLocaleDateString()}
tickMargin={10}
stroke="#023247"
/>
<YAxis stroke="#023247" />
<CartesianGrid stroke="#f5f5f5" />
<Tooltip />
<Legend />
<Line type="monotone" dataKey="price" stroke="#2a8e9e" activeDot={{ r: 8 }} />
</LineChart>
</CardContent>
</Card>
</Grid>
<Grid item xs={12}>
<Card style={{ backgroundColor: '#fcfcf8', borderRadius: '1rem' }}>
<CardHeader title="Рекомендации" style={{ color: '#023247' }} />
<CardContent>
<Typography variant="body1" gutterBottom style={{ color: '#023247' }}>
Здесь будут отображаться рекомендации, основанные на анализе данных.
</Typography>
</CardContent>
</Card>
</Grid>
</Grid>
</Container>
</div>
</ThemeProvider>
);
};
export default Result;

View File

@ -1,176 +0,0 @@
import React, { useState, useEffect } from 'react';
import { useLocation, useNavigate } from 'react-router-dom';
import { ThemeProvider, createTheme } from '@mui/material/styles';
import { Typography, Box, Grid, Container, Card, CardContent, CardHeader, Button } from '@mui/material';
import { LineChart, Line, XAxis, YAxis, CartesianGrid, Tooltip, Legend } from 'recharts';
const ViewProduct = () => {
// const productData = {
// marketplaceName: 'OZON',
// link: 'https://www.ozon.ru/product/dt-retail-lt1-11-noutbuk-15-intel-celeron-n5095-ram-16-gb-ssd-512-gb-intel-uhd-graphics-windows-1660307910/?asb=bTlMJ%252Be%252BwTSvSMWf9FdQsDHYHE16shxbl6Wx6jH%252FrSw%253D&asb2=t80whxB1azJcjh95gA5dFGpG5UrhBd0POEAUHfzbXLnsoewHh99YxUCYki71WO2qUajSTcLDQ-A7vhxpFaEp3g&avtc=1&avte=4&avts=1731442704&keywords=%D0%BD%D0%BE%D1%83%D1%82%D0%B1%D1%83%D0%BA',
// brand: 'DT Retail',
// productName: 'DT Retail LT1-11 Ноутбук 15", Intel Celeron N5095, RAM 16 ГБ, SSD 512 ГБ, Intel UHD Graphics, Windows Pro, серый металлик, зеркальный, Русская раскладка',
// image: 'https://static.ru-mi.com/upload/resize_cache/iblock/875/440_440_1/2lip1znmyjfyay047kocswwpjqz71nhk.jpg',
// priceHistory: [
// { date: '2023-10-27', price: 100.00 },
// { date: '2023-10-26', price: 105.00 },
// { date: '2023-10-25', price: 102.00 },
// { date: '2023-10-27', price: 100.00 },
// { date: '2023-10-26', price: 105.00 },
// { date: '2023-10-25', price: 102.00 }
// ],
// };
const location = useLocation();
const navigate = useNavigate();
const date = new Date();
const zoneOffset = date.getTimezoneOffset();
const hours = Math.floor(Math.abs(zoneOffset) / 60);
const minutes = Math.abs(zoneOffset) % 60;
const sign = zoneOffset >= 0 ? '+' : '-';
const formattedZoneOffset = `${sign}${hours.toString().padStart(2, '0')}:${minutes.toString().padStart(2, '0')}`;
const { productUrl, from, to } = location.state || {};
const [productData, setProductData] = useState(null);
const [chartData, setChartData] = useState([]);
useEffect(() => {
const fetchProductData = async () => {
try {
const response = await fetch(`/api/v1/products/info?productUrl=${productUrl}`);
if (response.ok) {
const data = await response.json();
setProductData(data);
} else {
console.error("Ошибка запроса к API");
}
} catch (error) {
console.error("Ошибка при получении данных:", error);
}
};
const fetchPriceHistory = async () => {
try {
const response = await fetch(`/api/v1/products/price-history?productUrl=${productUrl}&from=${from}&to=${to}&zoneOffset=${formattedZoneOffset}`);
if (response.ok) {
const priceHistoryData = await response.json();
const priceHistory = Object.entries(priceHistoryData.priceHistory).map(([date, price]) => ({
date: new Date(date),
price: price,
}));
setChartData(priceHistory);
} else {
console.error("Ошибка запроса к API");
}
} catch (error) {
console.error("Ошибка при получении данных:", error);
}
};
if (productUrl) {
fetchProductData();
fetchPriceHistory();
}
}, [productUrl, from, to, formattedZoneOffset]);
const theme = createTheme({
palette: {
primary: {
main: '#2a8e9e',
},
secondary: {
main: '#023247',
},
},
typography: {
fontFamily: ['Montserrat', 'sans-serif'].join(','),
h3: {
fontWeight: 700,
fontSize: '2.5rem',
},
body1: {
fontSize: '1.1rem',
},
},
});
// const chartData = productData.priceHistory.map((entry) => ({
// date: new Date(entry.date).getTime(),
// price: entry.price,
// }));
return (
<ThemeProvider theme={theme}>
<div className="result-page" style={{ padding: '2rem', backgroundColor: '#d8eaff' }}>
<Container maxWidth="md">
<Box mt={4}>
<Typography variant="h3" gutterBottom align="center" style={{ color: theme.palette.secondary.main }}>
Просмотр товара
</Typography>
</Box>
<Button
variant="contained"
onClick={() => navigate(-1)}
sx={{ mb: 2, backgroundColor: theme.palette.primary.main, color: '#fcfcf8', '&:hover': { backgroundColor: theme.palette.secondary.main } }}
>
Назад
</Button>
{productData && (
<Grid container spacing={3} mt={2}>
<Grid item xs={12}>
<Card style={{ backgroundColor: '#fcfcf8', borderRadius: '1rem' }}>
<CardContent>
<Typography variant="h5" gutterBottom style={{ color: theme.palette.secondary.main, fontWeight: 'bold' }}>
{productData.marketplaceName}
</Typography>
<Typography variant="body1" gutterBottom style={{ color: theme.palette.secondary.main }}>
<a href={productData.link} target="_blank" rel="noopener noreferrer" style={{ color: theme.palette.primary.main, textDecoration: 'underline' }}>
Ссылка на товар
</a>
</Typography>
<Typography variant="body1" gutterBottom style={{ color: theme.palette.secondary.main, fontWeight: 'bold' }}>
{productData.brand}
</Typography>
<Typography variant="body1" gutterBottom style={{ color: theme.palette.secondary.main, fontWeight: 'bold' }}>
{productData.productName}
</Typography>
<img
src={productData.imageUrl}
alt={productData.productName}
style={{ maxWidth: '500px', maxHeight: '500px', margin: '1rem auto', display: 'block' }}
/>
</CardContent>
</Card>
</Grid>
<Grid item xs={12}>
<Card style={{ backgroundColor: '#fcfcf8', borderRadius: '1rem' }}>
<CardHeader title="История цен" style={{ color: theme.palette.secondary.main }} />
<CardContent>
<LineChart width={600} height={300} data={chartData}>
<XAxis
dataKey="date"
tickFormatter={(unixTime) => new Date(unixTime).toLocaleDateString()}
tickMargin={10}
stroke={theme.palette.secondary.main}
/>
<YAxis stroke={theme.palette.secondary.main} />
<CartesianGrid stroke="#f5f5f5" />
<Tooltip />
<Legend />
<Line type="monotone" dataKey="price" stroke={theme.palette.primary.main} activeDot={{ r: 8 }} />
</LineChart>
</CardContent>
</Card>
</Grid>
</Grid>
)}
</Container>
</div>
</ThemeProvider>
);
};
export default ViewProduct;

Binary file not shown.

Before

Width:  |  Height:  |  Size: 1.6 MiB

View File

@ -1,13 +0,0 @@
body {
margin: 0;
font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
sans-serif;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
}
code {
font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
monospace;
}

View File

@ -1,17 +0,0 @@
import React from 'react';
import ReactDOM from 'react-dom/client';
import './index.css';
import AppRouter from './AppRouter';
import reportWebVitals from './reportWebVitals';
const root = ReactDOM.createRoot(document.getElementById('root'));
root.render(
<React.StrictMode>
<AppRouter />
</React.StrictMode>
);
// If you want to start measuring performance in your app, pass a function
// to log results (for example: reportWebVitals(console.log))
// or send to an analytics endpoint. Learn more: https://bit.ly/CRA-vitals
reportWebVitals();

View File

@ -1 +0,0 @@
<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 841.9 595.3"><g fill="#61DAFB"><path d="M666.3 296.5c0-32.5-40.7-63.3-103.1-82.4 14.4-63.6 8-114.2-20.2-130.4-6.5-3.8-14.1-5.6-22.4-5.6v22.3c4.6 0 8.3.9 11.4 2.6 13.6 7.8 19.5 37.5 14.9 75.7-1.1 9.4-2.9 19.3-5.1 29.4-19.6-4.8-41-8.5-63.5-10.9-13.5-18.5-27.5-35.3-41.6-50 32.6-30.3 63.2-46.9 84-46.9V78c-27.5 0-63.5 19.6-99.9 53.6-36.4-33.8-72.4-53.2-99.9-53.2v22.3c20.7 0 51.4 16.5 84 46.6-14 14.7-28 31.4-41.3 49.9-22.6 2.4-44 6.1-63.6 11-2.3-10-4-19.7-5.2-29-4.7-38.2 1.1-67.9 14.6-75.8 3-1.8 6.9-2.6 11.5-2.6V78.5c-8.4 0-16 1.8-22.6 5.6-28.1 16.2-34.4 66.7-19.9 130.1-62.2 19.2-102.7 49.9-102.7 82.3 0 32.5 40.7 63.3 103.1 82.4-14.4 63.6-8 114.2 20.2 130.4 6.5 3.8 14.1 5.6 22.5 5.6 27.5 0 63.5-19.6 99.9-53.6 36.4 33.8 72.4 53.2 99.9 53.2 8.4 0 16-1.8 22.6-5.6 28.1-16.2 34.4-66.7 19.9-130.1 62-19.1 102.5-49.9 102.5-82.3zm-130.2-66.7c-3.7 12.9-8.3 26.2-13.5 39.5-4.1-8-8.4-16-13.1-24-4.6-8-9.5-15.8-14.4-23.4 14.2 2.1 27.9 4.7 41 7.9zm-45.8 106.5c-7.8 13.5-15.8 26.3-24.1 38.2-14.9 1.3-30 2-45.2 2-15.1 0-30.2-.7-45-1.9-8.3-11.9-16.4-24.6-24.2-38-7.6-13.1-14.5-26.4-20.8-39.8 6.2-13.4 13.2-26.8 20.7-39.9 7.8-13.5 15.8-26.3 24.1-38.2 14.9-1.3 30-2 45.2-2 15.1 0 30.2.7 45 1.9 8.3 11.9 16.4 24.6 24.2 38 7.6 13.1 14.5 26.4 20.8 39.8-6.3 13.4-13.2 26.8-20.7 39.9zm32.3-13c5.4 13.4 10 26.8 13.8 39.8-13.1 3.2-26.9 5.9-41.2 8 4.9-7.7 9.8-15.6 14.4-23.7 4.6-8 8.9-16.1 13-24.1zM421.2 430c-9.3-9.6-18.6-20.3-27.8-32 9 .4 18.2.7 27.5.7 9.4 0 18.7-.2 27.8-.7-9 11.7-18.3 22.4-27.5 32zm-74.4-58.9c-14.2-2.1-27.9-4.7-41-7.9 3.7-12.9 8.3-26.2 13.5-39.5 4.1 8 8.4 16 13.1 24 4.7 8 9.5 15.8 14.4 23.4zM420.7 163c9.3 9.6 18.6 20.3 27.8 32-9-.4-18.2-.7-27.5-.7-9.4 0-18.7.2-27.8.7 9-11.7 18.3-22.4 27.5-32zm-74 58.9c-4.9 7.7-9.8 15.6-14.4 23.7-4.6 8-8.9 16-13 24-5.4-13.4-10-26.8-13.8-39.8 13.1-3.1 26.9-5.8 41.2-7.9zm-90.5 125.2c-35.4-15.1-58.3-34.9-58.3-50.6 0-15.7 22.9-35.6 58.3-50.6 8.6-3.7 18-7 27.7-10.1 5.7 19.6 13.2 40 22.5 60.9-9.2 20.8-16.6 41.1-22.2 60.6-9.9-3.1-19.3-6.5-28-10.2zM310 490c-13.6-7.8-19.5-37.5-14.9-75.7 1.1-9.4 2.9-19.3 5.1-29.4 19.6 4.8 41 8.5 63.5 10.9 13.5 18.5 27.5 35.3 41.6 50-32.6 30.3-63.2 46.9-84 46.9-4.5-.1-8.3-1-11.3-2.7zm237.2-76.2c4.7 38.2-1.1 67.9-14.6 75.8-3 1.8-6.9 2.6-11.5 2.6-20.7 0-51.4-16.5-84-46.6 14-14.7 28-31.4 41.3-49.9 22.6-2.4 44-6.1 63.6-11 2.3 10.1 4.1 19.8 5.2 29.1zm38.5-66.7c-8.6 3.7-18 7-27.7 10.1-5.7-19.6-13.2-40-22.5-60.9 9.2-20.8 16.6-41.1 22.2-60.6 9.9 3.1 19.3 6.5 28.1 10.2 35.4 15.1 58.3 34.9 58.3 50.6-.1 15.7-23 35.6-58.4 50.6zM320.8 78.4z"/><circle cx="420.9" cy="296.5" r="45.7"/><path d="M520.5 78.1z"/></g></svg>

Before

Width:  |  Height:  |  Size: 2.6 KiB

View File

@ -1,13 +0,0 @@
const reportWebVitals = onPerfEntry => {
if (onPerfEntry && onPerfEntry instanceof Function) {
import('web-vitals').then(({ getCLS, getFID, getFCP, getLCP, getTTFB }) => {
getCLS(onPerfEntry);
getFID(onPerfEntry);
getFCP(onPerfEntry);
getLCP(onPerfEntry);
getTTFB(onPerfEntry);
});
}
};
export default reportWebVitals;

View File

@ -1,5 +0,0 @@
// jest-dom adds custom jest matchers for asserting on DOM nodes.
// allows you to do things like:
// expect(element).toHaveTextContent(/react/i)
// learn more: https://github.com/testing-library/jest-dom
import '@testing-library/jest-dom';

37
parsing-service/.gitignore vendored Normal file
View File

@ -0,0 +1,37 @@
HELP.md
.gradle
build/
!gradle/wrapper/gradle-wrapper.jar
!**/src/main/**/build/
!**/src/test/**/build/
### STS ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
bin/
!**/src/main/**/bin/
!**/src/test/**/bin/
### IntelliJ IDEA ###
.idea
*.iws
*.iml
*.ipr
out/
!**/src/main/**/out/
!**/src/test/**/out/
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
### VS Code ###
.vscode/

View File

@ -0,0 +1,17 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
<option name="ACTIVE_PROFILES" value="dev,ozon,headless,postgres_stat" />
<option name="SCHEDULED_DEBUGGER" value="true" />
<envs>
<env name="POSTGRES_JDBC_PASSWORD" value="postgres" />
<env name="POSTGRES_JDBC_USERNAME" value="postgres" />
<env name="POSTGRES_JDBC_URL" value="localhost:5432/parsed_data" />
<env name="SERVER_PORT" value="8080" />
</envs>
<module name="parsing-service.main" />
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
</component>

View File

@ -0,0 +1,61 @@
plugins {
id 'java'
id 'org.springframework.boot' version '3.3.4'
id 'io.spring.dependency-management' version '1.1.6'
}
group = 'ru.pricepulse'
version = '0.0.1-SNAPSHOT'
java {
toolchain {
languageVersion = JavaLanguageVersion.of(21)
}
}
configurations {
compileOnly {
extendsFrom annotationProcessor
}
}
repositories {
mavenCentral()
}
ext {
jsoupVesion = '1.18.1'
seleniumVersion = '4.25.0'
}
dependencies {
implementation 'org.springframework.boot:spring-boot-starter-data-jpa'
implementation 'org.springframework.boot:spring-boot-starter-web'
// implementation 'org.liquibase:liquibase-core'
implementation 'org.springframework.kafka:spring-kafka'
implementation "org.jsoup:jsoup:${jsoupVesion}"
implementation "org.seleniumhq.selenium:selenium-java:${seleniumVersion}"
implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
implementation 'org.apache.commons:commons-pool2:2.12.0'
implementation 'com.clickhouse:clickhouse-jdbc:0.6.5'
implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.6.0'
compileOnly 'org.projectlombok:lombok'
runtimeOnly 'org.postgresql:postgresql'
annotationProcessor 'org.projectlombok:lombok'
testImplementation 'org.springframework.boot:spring-boot-starter-test'
testImplementation 'org.springframework.kafka:spring-kafka-test'
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
implementation 'org.springframework.boot:spring-boot-starter-webflux'
implementation 'org.springframework.retry:spring-retry:2.0.9'
}
tasks.named('test') {
useJUnitPlatform()
}

Binary file not shown.

View File

@ -0,0 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

252
parsing-service/gradlew vendored Normal file
View File

@ -0,0 +1,252 @@
#!/bin/sh
#
# Copyright © 2015-2021 the original authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
#
##############################################################################
#
# Gradle start up script for POSIX generated by Gradle.
#
# Important for running:
#
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
# noncompliant, but you have some other compliant shell such as ksh or
# bash, then to run this script, type that shell name before the whole
# command line, like:
#
# ksh Gradle
#
# Busybox and similar reduced shells will NOT work, because this script
# requires all of these POSIX shell features:
# * functions;
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
# * compound commands having a testable exit status, especially «case»;
# * various built-in commands including «command», «set», and «ulimit».
#
# Important for patching:
#
# (2) This script targets any POSIX shell, so it avoids extensions provided
# by Bash, Ksh, etc; in particular arrays are avoided.
#
# The "traditional" practice of packing multiple parameters into a
# space-separated string is a well documented source of bugs and security
# problems, so this is (mostly) avoided, by progressively accumulating
# options in "$@", and eventually passing that to Java.
#
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
# see the in-line comments for details.
#
# There are tweaks for specific operating systems such as AIX, CygWin,
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
#
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
app_path=$0
# Need this for daisy-chained symlinks.
while
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
[ -h "$app_path" ]
do
ls=$( ls -ld "$app_path" )
link=${ls#*' -> '}
case $link in #(
/*) app_path=$link ;; #(
*) app_path=$APP_HOME$link ;;
esac
done
# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
' "$PWD" ) || exit
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
warn () {
echo "$*"
} >&2
die () {
echo
echo "$*"
echo
exit 1
} >&2
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "$( uname )" in #(
CYGWIN* ) cygwin=true ;; #(
Darwin* ) darwin=true ;; #(
MSYS* | MINGW* ) msys=true ;; #(
NONSTOP* ) nonstop=true ;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD=$JAVA_HOME/jre/sh/java
else
JAVACMD=$JAVA_HOME/bin/java
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD=java
if ! command -v java >/dev/null 2>&1
then
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
fi
# Increase the maximum file descriptors if we can.
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
case $MAX_FD in #(
max*)
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
MAX_FD=$( ulimit -H -n ) ||
warn "Could not query maximum file descriptor limit"
esac
case $MAX_FD in #(
'' | soft) :;; #(
*)
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
ulimit -n "$MAX_FD" ||
warn "Could not set maximum file descriptor limit to $MAX_FD"
esac
fi
# Collect all arguments for the java command, stacking in reverse order:
# * args from the command line
# * the main class name
# * -classpath
# * -D...appname settings
# * --module-path (only if needed)
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
# For Cygwin or MSYS, switch paths to Windows format before running java
if "$cygwin" || "$msys" ; then
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
JAVACMD=$( cygpath --unix "$JAVACMD" )
# Now convert the arguments - kludge to limit ourselves to /bin/sh
for arg do
if
case $arg in #(
-*) false ;; # don't mess with options #(
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
[ -e "$t" ] ;; #(
*) false ;;
esac
then
arg=$( cygpath --path --ignore --mixed "$arg" )
fi
# Roll the args list around exactly as many times as the number of
# args, so each arg winds up back in the position where it started, but
# possibly modified.
#
# NB: a `for` loop captures its iteration list before it begins, so
# changing the positional parameters here affects neither the number of
# iterations, nor the values presented in `arg`.
shift # remove old arg
set -- "$@" "$arg" # push replacement arg
done
fi
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Collect all arguments for the java command:
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
# and any embedded shellness will be escaped.
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
# treated as '${Hostname}' itself on the command line.
set -- \
"-Dorg.gradle.appname=$APP_BASE_NAME" \
-classpath "$CLASSPATH" \
org.gradle.wrapper.GradleWrapperMain \
"$@"
# Stop when "xargs" is not available.
if ! command -v xargs >/dev/null 2>&1
then
die "xargs is not available"
fi
# Use "xargs" to parse quoted args.
#
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
#
# In Bash we could simply go:
#
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
# set -- "${ARGS[@]}" "$@"
#
# but POSIX shell has neither arrays nor command substitution, so instead we
# post-process each arg (as a line of input to sed) to backslash-escape any
# character that might be a shell metacharacter, then use eval to reverse
# that process (while maintaining the separation between arguments), and wrap
# the whole thing up as a single "set" statement.
#
# This will of course break if any of these variables contains a newline or
# an unmatched quote.
#
eval "set -- $(
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
xargs -n1 |
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
tr '\n' ' '
)" '"$@"'
exec "$JAVACMD" "$@"

94
parsing-service/gradlew.bat vendored Normal file
View File

@ -0,0 +1,94 @@
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@rem SPDX-License-Identifier: Apache-2.0
@rem
@if "%DEBUG%"=="" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%"=="" set DIRNAME=.
@rem This is normally unused
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if %ERRORLEVEL% equ 0 goto execute
echo. 1>&2
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
echo. 1>&2
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
echo location of your Java installation. 1>&2
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto execute
echo. 1>&2
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
echo. 1>&2
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
echo location of your Java installation. 1>&2
goto fail
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
:end
@rem End local scope for the variables with windows NT shell
if %ERRORLEVEL% equ 0 goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
set EXIT_CODE=%ERRORLEVEL%
if %EXIT_CODE% equ 0 set EXIT_CODE=1
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
exit /b %EXIT_CODE%
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

View File

@ -0,0 +1 @@
rootProject.name = 'parsing-service'

View File

@ -0,0 +1,15 @@
package ru.pricepulse.parsingservice;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.retry.annotation.EnableRetry;
@SpringBootApplication
@EnableRetry
public class ParsingServiceApplication {
public static void main(String[] args) {
SpringApplication.run(ParsingServiceApplication.class, args);
}
}

View File

@ -0,0 +1,16 @@
package ru.pricepulse.parsingservice.config;
import java.time.format.DateTimeFormatter;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class DateTimeFormatterConfig {
@Bean
public DateTimeFormatter partitionDateTimeFormatter() {
return DateTimeFormatter.ofPattern("yyyy_MM");
}
}

View File

@ -0,0 +1,36 @@
package ru.pricepulse.parsingservice.config;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.HttpRequest;
import org.springframework.http.client.ClientHttpRequestExecution;
import org.springframework.http.client.ClientHttpRequestInterceptor;
import org.springframework.http.client.ClientHttpResponse;
import java.io.IOException;
import java.net.InetSocketAddress;
@Slf4j
@AllArgsConstructor
public class DynamicProxyInterceptor implements ClientHttpRequestInterceptor {
private final UserAgentProvider userAgentProvider;
private final ProxyProvider proxyProvider;
@Override
public ClientHttpResponse intercept(HttpRequest request, byte[] body, ClientHttpRequestExecution execution) throws IOException {
// Получаем случайный прокси
//InetSocketAddress proxyAddress = proxyProvider.getNextProxy();
//log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
// Устанавливаем прокси
//System.setProperty("http.proxyHost", proxyAddress.getHostName());
//System.setProperty("http.proxyPort", String.valueOf(proxyAddress.getPort()));
//Устанавливаем динамический user-agent
String randomUserAgent = userAgentProvider.getRandomUserAgent();
request.getHeaders().set("User-Agent", randomUserAgent);
return execution.execute(request, body);
}
}

View File

@ -0,0 +1,10 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.boot.autoconfigure.kafka.KafkaProperties;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Configuration;
@Configuration
@EnableConfigurationProperties(KafkaProperties.class)
public class KafkaConfig {
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.config;
import lombok.AllArgsConstructor;
import lombok.Getter;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
import ru.pricepulse.parsingservice.config.properties.WildberriesConfigProperties;
@Getter
@Configuration
@EnableConfigurationProperties({
OzonConfigProperties.class,
WildberriesConfigProperties.class
})
@AllArgsConstructor
public class MarketplacesConfig {
private final WildberriesConfigProperties wildberriesConfigProperties;
private final OzonConfigProperties ozonConfigProperties;
}

View File

@ -0,0 +1,57 @@
package ru.pricepulse.parsingservice.config;
import jakarta.annotation.PostConstruct;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.checkProxies;
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.readProxiesFromFile;
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.saveProxiesToFile;
@Component
public class ProxyProvider {
private List<String> workingProxies;
private final AtomicInteger currentProxyIndex = new AtomicInteger(0);
private final ResourceLoader resourceLoader;
public ProxyProvider(ResourceLoader resourceLoader) {
this.resourceLoader = resourceLoader;
}
//@PostConstruct
public void init() throws IOException {
Resource proxy = resourceLoader.getResource("classpath:proxy.txt");
Resource okProxy = resourceLoader.getResource("classpath:ok-proxy.txt");
List<String> proxies = Files.readAllLines(Path.of(proxy.getURI()));
System.out.println("Начата проверка проксей");
workingProxies = checkProxies(proxies);
System.out.println("Закончена проверка проксей");
saveProxiesToFile(workingProxies, Path.of(okProxy.getURI()));
if (workingProxies.isEmpty()) {
throw new RuntimeException("Нет доступных рабочих прокси.");
}
System.out.println("Найдено рабочих прокси: " + workingProxies.size());
}
public synchronized InetSocketAddress getNextProxy() {
// Получаем текущий индекс прокси
int currentIndex = currentProxyIndex.getAndUpdate(index -> (index + 1) % workingProxies.size());
String[] proxy = workingProxies.get(currentIndex).split(":");
return new InetSocketAddress(proxy[0], Integer.parseInt(proxy[1]));
}
}

View File

@ -0,0 +1,28 @@
package ru.pricepulse.parsingservice.config;
import lombok.AllArgsConstructor;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.http.client.ClientHttpRequestInterceptor;
import org.springframework.web.client.RestTemplate;
import java.util.Collections;
@Configuration
@AllArgsConstructor
public class RestTemplateConfig {
private final UserAgentProvider userAgentProvider;
private final ProxyProvider proxyProvider;
@Bean
public RestTemplate restTemplate() {
RestTemplate restTemplate = new RestTemplate();
ClientHttpRequestInterceptor dynamicProxyInterceptor = new DynamicProxyInterceptor(userAgentProvider, proxyProvider);
// Добавляем интерсептор в RestTemplate
restTemplate.setInterceptors(Collections.singletonList(dynamicProxyInterceptor));
return restTemplate;
}
}

View File

@ -0,0 +1,8 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.context.annotation.Configuration;
import org.springframework.retry.annotation.EnableRetry;
@Configuration
@EnableRetry
public class RetryConfig {}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler;
@Configuration
@EnableScheduling
public class SchedulerConfig {
@Bean
public ThreadPoolTaskScheduler taskScheduler() {
ThreadPoolTaskScheduler taskScheduler = new ThreadPoolTaskScheduler();
taskScheduler.setPoolSize(10);
taskScheduler.setThreadNamePrefix("ScheduledTask-");
return taskScheduler;
}
}

View File

@ -0,0 +1,10 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import ru.pricepulse.parsingservice.config.properties.SeleniumConfigProperties;
@Configuration
@EnableConfigurationProperties(SeleniumConfigProperties.class)
public class SeleniumConfig {
}

View File

@ -0,0 +1,19 @@
package ru.pricepulse.parsingservice.config;
import java.util.List;
import java.util.Random;
import org.springframework.stereotype.Component;
@Component
public class UserAgentProvider {
private static final List<String> userAgents = List.of(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36",
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15A372 Safari/604.1"
);
public String getRandomUserAgent() {
return userAgents.get(new Random().nextInt(userAgents.size()));
}
}

View File

@ -0,0 +1,58 @@
package ru.pricepulse.parsingservice.config;
import java.net.InetSocketAddress;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.http.HttpHeaders;
import org.springframework.http.client.reactive.ReactorClientHttpConnector;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.netty.http.client.HttpClient;
import reactor.netty.transport.ProxyProvider;
@Slf4j
@Configuration
@AllArgsConstructor
public class WebClientConfig {
private final UserAgentProvider userAgentProvider;
private final ru.pricepulse.parsingservice.config.ProxyProvider proxyProvider;
@Bean
public WebClient webClient() {
return WebClient.builder()
.filter((request, next) -> {
// Получаем случайный прокси для каждого запроса
InetSocketAddress proxyAddress = proxyProvider.getNextProxy();
log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
HttpClient httpClient = HttpClient.create()
.proxy(proxy -> proxy
.type(ProxyProvider.Proxy.HTTP)
.address(proxyAddress));
String randomUserAgent = userAgentProvider.getRandomUserAgent();
log.info("Используемый User-Agent: {}", randomUserAgent);
// Создаем новый WebClient с прокси
WebClient webClientWithProxy = WebClient.builder()
.clientConnector(new ReactorClientHttpConnector(httpClient))
.build();
// Выполняем запрос с обновленным User-Agent через WebClient с прокси
return webClientWithProxy
.method(request.method())
.uri(request.url())
.headers(headers -> headers.putAll(request.headers()))
.header(HttpHeaders.USER_AGENT, randomUserAgent)
.body(request.body()).exchange();
})
.codecs(configurer -> configurer
.defaultCodecs()
.maxInMemorySize(10 * 1024 * 1024))
.build();
}
}

View File

@ -0,0 +1,59 @@
package ru.pricepulse.parsingservice.config;
import java.util.HashMap;
import java.util.Map;
import io.github.bonigarcia.wdm.WebDriverManager;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.beans.factory.config.ConfigurableBeanFactory;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Profile;
import org.springframework.context.annotation.Scope;
@Configuration
public class WebDriverConfig {
@Bean
@Profile("visible")
@Scope(ConfigurableBeanFactory.SCOPE_PROTOTYPE)
public WebDriver webDriverVisible() {
Map<String, Object> prefs = new HashMap<>();
prefs.put("profile.managed_default_content_settings.images", 2);
prefs.put("profile.managed_default_content_settings.geolocation", 2);
var options = new ChromeOptions();
options.setExperimentalOption("prefs", prefs);
WebDriverManager.chromedriver().setup();
return new ChromeDriver(options);
}
@Bean
@Profile("headless")
@Scope(ConfigurableBeanFactory.SCOPE_PROTOTYPE)
public WebDriver webDriverHeadless(ChromeOptions options) {
WebDriverManager.chromedriver().setup();
return new ChromeDriver(options);
}
@Bean
@Profile("headless")
public ChromeOptions chromeOptions() {
Map<String, Object> prefs = new HashMap<>();
prefs.put("profile.managed_default_content_settings.images", 2);
prefs.put("profile.managed_default_content_settings.stylesheets", 2);
var options = new ChromeOptions();
options.setExperimentalOption("prefs", prefs);
options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36");
//options.addArguments("--window-size=1920,2000");
options.addArguments("--headless");
options.addArguments("--disable-gpu");
options.addArguments("--no-sandbox");
options.addArguments("--disable-dev-shm-usage");
return options;
}
}

View File

@ -0,0 +1,8 @@
package ru.pricepulse.parsingservice.config.properties;
import org.springframework.boot.context.properties.ConfigurationProperties;
@ConfigurationProperties(prefix = "application.kafka")
public class KafkaConfigProperties {
}

View File

@ -0,0 +1,16 @@
package ru.pricepulse.parsingservice.config.properties;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
@Getter
@Setter
@ConfigurationProperties(prefix = "marketplace.ozon")
public class OzonConfigProperties {
private Integer maxThreads;
private Integer maxNumOfPagesOnScreen;
}

View File

@ -0,0 +1,7 @@
package ru.pricepulse.parsingservice.config.properties;
import org.springframework.boot.context.properties.ConfigurationProperties;
@ConfigurationProperties("selenium")
public class SeleniumConfigProperties {
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.config.properties;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
@Getter
@Setter
@ConfigurationProperties(prefix = "marketplace.wildberries")
public class WildberriesConfigProperties {
private String baseUrl;
private String catalogUrl;
private String userAgent;
private String catalogWbUrl;
private int retryAttempts;
private long retryDelay;
private String laptopUrl;
private String shard;
}

View File

@ -0,0 +1,6 @@
package ru.pricepulse.parsingservice.enumeration;
public enum Category {
LAPTOP,
SMARTPHONE
}

View File

@ -0,0 +1,8 @@
package ru.pricepulse.parsingservice.enumeration;
public enum Marketplace {
WILDBERRIES,
OZON,
DNS
}

View File

@ -0,0 +1,31 @@
package ru.pricepulse.parsingservice.ozon_parser.enumeration;
import ru.pricepulse.parsingservice.enumeration.Category;
public enum OzonCategory {
LAPTOP ("/noutbuki-15692/?brandcertified=t", Category.LAPTOP),
SMARTPHONE ("/smartfony-15502/?brandcertified=t", Category.SMARTPHONE);
private static final String BASE_CATEGORY_URL = "https://www.ozon.ru/category";
private final String categoryUrl;
private final Category mappedCategory;
OzonCategory(String categoryUrl,
Category mappedCategory) {
this.categoryUrl = categoryUrl;
this.mappedCategory = mappedCategory;
}
public String getCategoryUrl() {
return BASE_CATEGORY_URL + categoryUrl;
}
public Category getMappedCategory() {
return mappedCategory;
}
}

View File

@ -0,0 +1,70 @@
package ru.pricepulse.parsingservice.ozon_parser.pool;
import java.util.NoSuchElementException;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
import jakarta.annotation.PreDestroy;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.WebDriver;
import org.springframework.beans.factory.ObjectFactory;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
@Slf4j
@Component
@Profile("ozon")
public class WebDriverPool {
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>();
private final Queue<WebDriver> busyDrivers = new ConcurrentLinkedQueue<>();
private final ObjectFactory<WebDriver> webDriverFactory;
private final OzonConfigProperties ozonConfigProperties;
public WebDriverPool(ObjectFactory<WebDriver> webDriverFactory,
OzonConfigProperties ozonConfigProperties) {
this.webDriverFactory = webDriverFactory;
this.ozonConfigProperties = ozonConfigProperties;
int poolSize = ozonConfigProperties.getMaxThreads();
for (int i = 0; i < poolSize; i++) {
availableDrivers.add(createNewDriver());
}
}
private WebDriver createNewDriver() {
return webDriverFactory.getObject();
}
public WebDriver borrowDriver() {
WebDriver driver = availableDrivers.poll();
if (driver != null) {
busyDrivers.add(driver);
return driver;
}
throw new NoSuchElementException("No available driver found");
}
public void returnDriver(WebDriver driver) {
busyDrivers.remove(driver);
availableDrivers.add(driver);
}
@PreDestroy
public void shutdownPool() {
for (WebDriver driver : availableDrivers) {
driver.quit();
}
for (WebDriver driver : busyDrivers) {
driver.quit();
}
availableDrivers.clear();
busyDrivers.clear();
}
}

View File

@ -0,0 +1,17 @@
package ru.pricepulse.parsingservice.ozon_parser.service;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
@Slf4j
@Service
@RequiredArgsConstructor
public class OzonService {
public OzonCategory[] getCategories() {
return OzonCategory.values();
}
}

View File

@ -0,0 +1,27 @@
package ru.pricepulse.parsingservice.ozon_parser.service;
import lombok.RequiredArgsConstructor;
import org.springframework.context.annotation.Profile;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Service;
@Service
@RequiredArgsConstructor
@Profile("postgres_stat")
public class PartitionService {
private final JdbcTemplate postgresDataSource;
public boolean checkPartitionExists(String partitionName) {
String query = "SELECT to_regclass('public." + partitionName + "')";
String result = postgresDataSource.queryForObject(query, String.class);
return result != null;
}
public void createPartition(String partitionName, String startDate, String endDate) {
String createPartitionSQL = "CREATE TABLE IF NOT EXISTS " + partitionName +
" PARTITION OF price_history FOR VALUES FROM ('" + startDate + "') TO ('" + endDate + "')";
postgresDataSource.execute(createPartitionSQL);
}
}

View File

@ -0,0 +1,30 @@
package ru.pricepulse.parsingservice.ozon_parser.service.dto;
import java.math.BigDecimal;
import lombok.Builder;
import lombok.Getter;
import lombok.Setter;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
@Getter
@Setter
@Builder
public class ParsedData {
private Marketplace marketplace;
private Category category;
private String brand;
private String productName;
private String url;
private String imageUrl;
private BigDecimal price;
}

View File

@ -0,0 +1,64 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.support.ui.WebDriverWait;
@Slf4j
public class AccessDeniedPage implements MarketplacePage {
private static final String RELOAD_BUTTON_ID = "reload-button";
private static final String RELOAD_BUTTON_XPATH = "//button[contains(text(),'Обновить')]";
private static final String WARNING_IMAGE_CSS = "img[alt='warning']";
private static final String ACCESS_DENIED_TEXT_XPATH = "//h1[text()='Доступ ограничен']";
private final By reloadButtonById = By.id(RELOAD_BUTTON_ID);
private final By reloadButtonByXpath = By.xpath(RELOAD_BUTTON_XPATH);
private final By warningImage = By.cssSelector(WARNING_IMAGE_CSS);
private final By accessDeniedText = By.xpath(ACCESS_DENIED_TEXT_XPATH);
private WebDriver driver;
private WebDriverWait wait;
public AccessDeniedPage(WebDriver driver,
WebDriverWait wait) {
this.driver = driver;
this.wait = wait;
}
public void clickReloadButton() {
try {
log.debug("Пытаемся найти кнопку по id и нажать");
driver.findElement(reloadButtonById).click();
return;
} catch (Exception e) {
log.debug("Кнопка обновления страницы не найдена по id");
}
try {
log.debug("Пытаемся найти кнопку по xpath и нажать");
driver.findElement(reloadButtonByXpath).click();
log.debug("Успешно нашли кнопку по xpath");
return;
} catch (Exception e) {
log.debug("Кнопка обновления страницы не найдена по xpath");
}
}
private boolean isWarningImage() {
return driver.findElement(warningImage) != null;
}
private boolean isAccessDeniedText() {
return driver.findElement(accessDeniedText) != null;
}
@Override
public boolean isLoaded() {
try {
return isWarningImage() && isAccessDeniedText();
} catch (Exception e) {
return false;
}
}
}

View File

@ -0,0 +1,90 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import java.math.BigDecimal;
import java.util.ArrayList;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.support.ui.WebDriverWait;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
@Slf4j
public class CategoryPage implements MarketplacePage {
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsV2']";
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
private WebDriver driver;
private WebDriverWait wait;
public CategoryPage(WebDriver driver, WebDriverWait wait) {
this.driver = driver;
this.wait = wait;
}
public ArrayList<ParsedData> getParsedProducts() {
wait.until(visibilityOfElementLocated(searchResults));
log.info("Нашли SearchResultsV2");
var searchResultsElement = driver.findElement(searchResults);
wait.until(driver -> visibilityOfElementLocated(By.cssSelector(":scope > div")));
log.info("Нашли внешний блок списка");
var outerDiv = searchResultsElement.findElement(By.cssSelector(":scope > div")); // Внешний блок со списком товаров
wait.until(driver -> visibilityOfAllElements(outerDiv.findElements(By.cssSelector(":scope > div"))));
log.info("Нашли элементы списка");
var innerDivs = outerDiv.findElements(By.cssSelector(":scope > div")); // Блок карточки товара
var products = new ArrayList<ParsedData>();
innerDivs.forEach(innerDiv -> {
var productDataDivs = innerDiv.findElements(By.cssSelector(":scope > div"));
var productImageUrl = productDataDivs.get(0)
.findElement(By.cssSelector(":scope > a > div"))
.findElements(By.cssSelector(":scope > div")).getFirst()
.findElement(By.tagName("img")).getAttribute("src");
var productBrand = productDataDivs.get(1).findElement(By.cssSelector(":scope > div"))
.findElements(By.cssSelector(":scope > div")).getFirst()
.findElement(By.tagName("b")).getText();
var productNameLink = productDataDivs.get(1).findElement(By.cssSelector(":scope > div > a"));
var productUrl = productNameLink.getAttribute("href");
var productName = productNameLink.findElement(By.tagName("span")).getText();
var productPrice = parseCurrency(productDataDivs.get(2).findElement(By.cssSelector(":scope > div > div"))
.findElements(By.tagName("span")).getFirst().getText());
/*var parsedData = new ParsedData();
parsedData.setUrl(productUrl);
parsedData.setBrand(productBrand);
parsedData.setProductName(productName);
parsedData.setImageUrl(productImageUrl);
parsedData.setPrice(productPrice);
products.add(parsedData);*/
});
return products;
}
private BigDecimal parseCurrency(String currencyStr) {
String cleanedString = currencyStr.replaceAll("[^\\d]", "");
return new BigDecimal(cleanedString);
}
@Override
public boolean isLoaded() {
try {
return driver.findElement(searchResults) != null;
} catch (Exception e) {
return false;
}
}
}

View File

@ -0,0 +1,7 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
public interface MarketplacePage {
boolean isLoaded();
}

View File

@ -0,0 +1,38 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.support.ui.WebDriverWait;
@Slf4j
public class NoContentPage {
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
private static final String NOT_FOUND_TEXT_XPATH = "\"//*[contains(text(), 'По вашим параметрам ничего не нашлось. Попробуйте сбросить фильтры. ')]\"";
private static final String SEARCH_RESULTS_ERROR = "div[data-widget='searchResultsError']";
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
private final By notFoundText = By.xpath(NOT_FOUND_TEXT_XPATH);
private final By searchResultsError = By.cssSelector(SEARCH_RESULTS_ERROR);
private WebDriver driver;
private WebDriverWait wait;
public NoContentPage(WebDriver driver, WebDriverWait wait) {
this.driver = driver;
this.wait = wait;
}
public boolean isLoaded() {
try {
return driver.findElement(searchResultsError) != null
|| driver.findElement(errorText) != null
|| driver.findElement(notFoundText) != null;
} catch (Exception e) {
return false;
}
}
}

View File

@ -0,0 +1,228 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
@Slf4j
public class OzonCategoryPage {
private static final String OZON_MAIN_LINK = "https://www.ozon.ru";
public static final String SEARCH_RESULTS_CSS_SELECTOR = "div[data-widget='searchResultsV2']";
public static final int INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT = 1;
public static final int INDEX_OF_PRODUCT_PRICE = 0;
public static final int INDEX_OF_PRODUCT_BRAND = 1;
public static final int INDEX_OF_PRODUCT_NAME = 2;
private final Document document;
public OzonCategoryPage(String pageHtml) {
this.document = Jsoup.parse(pageHtml);
}
public List<ParsedData> getProducts(Category category) {
List<ParsedData> products = new ArrayList<>();
Elements searchResultsDivs = getSearchResultsDivs();
if (searchResultsDivs.isEmpty()) {
return List.of();
}
log.info("нашли столько результатов на странице {}", searchResultsDivs.size());
for (Element searchResultsDiv : searchResultsDivs) {
Elements productsDivs = getProductsDivs(searchResultsDiv);
List<Elements> allProductDataDivs = getAllProductDataDivs(productsDivs);
List<ParsedData> parsedProductsData = extractParsedData(allProductDataDivs, category);
products.addAll(parsedProductsData);
}
/*try {
for (Element searchResultsDiv : searchResultsDivs) {
var productDivs = searchResultsDiv.select("> div > div");
for (Element productDiv : productDivs) {
Elements productDataDivs = productDivs.select("> div > *");
if (productDataDivs.select("> *").isEmpty()) {
continue;
}
productDataDivs.removeLast();
Element productUrlAndImageUrlA = productDataDivs.first();
Element productDataDiv = productDataDivs.last();
Elements productDataInnerDivs = productDataDiv.select("> *");
try {
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
.select("span").text().toLowerCase()
.contains("осталось")) {
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
}
} catch (Exception ignored) {}
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND).select("> span");
String productUrl = OZON_MAIN_LINK + productUrlAndImageUrlA.attr("href").replaceAll("\\?.*$", "");
String productImageUrl = productUrlAndImageUrlA.select("> div > div")
.first().getElementsByTag("img")
.first().attr("src");
BigDecimal productPrice;
try {
productPrice = parseOzonPriceToBigDecimal(
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
.first().text());
} catch (Exception e) {
log.error("не удалось распарсить цену");
continue;
}
String productBrand = productBrandBlockSpans.first().selectFirst("> span > b").text();
String productName = productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME).select("> div > span").text();
ParsedData parsedData = new ParsedData();
parsedData.setCategory(category);
parsedData.setMarketplace(Marketplace.OZON);
parsedData.setUrl(productUrl);
parsedData.setImageUrl(productImageUrl);
parsedData.setPrice(productPrice);
parsedData.setBrand(productBrand);
parsedData.setProductName(productName);
products.add(parsedData);
}
}
} catch (Exception e) {
log.error(e.getMessage(), e);
}*/
return products;
}
private Elements getSearchResultsDivs() {
try {
return document.select(SEARCH_RESULTS_CSS_SELECTOR);
} catch (Exception e) {
log.warn("Не удалось достать блоки searchResultsV2");
return new Elements();
}
}
private Elements getProductsDivs(Element searchResultsDiv) {
return searchResultsDiv.select("> div > div");
}
private List<Elements> getAllProductDataDivs(Elements productsDivs) {
List<Elements> allProductDataDivs = new ArrayList<>();
for (Element productDiv : productsDivs) {
Elements productDataDivs = productDiv.select("> div > *");
if (productDataDivs.select("> *").isEmpty()) {
continue;
}
removeAddInFavouriteDiv(productDataDivs);
allProductDataDivs.add(productDataDivs);
}
return allProductDataDivs;
}
private void removeAddInFavouriteDiv(Elements productDataDivs) {
productDataDivs.removeLast();
}
private List<ParsedData> extractParsedData(List<Elements> allProductDataDivs,
Category category) {
List<ParsedData> parsedData = new ArrayList<>();
for (Elements productDataDivs : allProductDataDivs) {
try {
ParsedData parsedDataItem = getParsedDataItem(productDataDivs, category);
parsedData.add(parsedDataItem);
} catch (Exception e) {
//log.error(e.getMessage(), e);
}
}
return parsedData;
}
private ParsedData getParsedDataItem(Elements productDataDivs,
Category category) {
removeExtraDivIfExists(productDataDivs);
return ParsedData.builder()
.category(category)
.marketplace(Marketplace.OZON)
.url(extractUrl(productDataDivs))
.imageUrl(extractImageUrl(productDataDivs))
.brand(extractBrand(productDataDivs))
.productName(extractProductName(productDataDivs))
.price(extractPrice(productDataDivs))
.build();
}
private void removeExtraDivIfExists(Elements productDataDivs) {
Element productDataDiv = productDataDivs.last();
Elements productDataInnerDivs = productDataDiv.select("> *");
try {
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
.select("span").text().toLowerCase()
.contains("осталось")) {
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
}
} catch (Exception ignored) {}
}
private String extractUrl(Elements productDataDivs) {
Element productUrlA = productDataDivs.first();
return OZON_MAIN_LINK + productUrlA
.attr("href").replaceAll("\\?.*$", "");
}
private String extractImageUrl(Elements productDataDivs) {
Element productImageUrlA = productDataDivs.first();
return productImageUrlA.select("> div > div")
.first().getElementsByTag("img")
.first().attr("src");
}
private String extractBrand(Elements productDataDivs) {
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
//log.info(productDataInnerDivs.html());
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND)
.select("> span");
String brand = productBrandBlockSpans.first().selectFirst("> span > b").text();
if (productBrandBlockSpans.size() == 1 && "Оригинал".equals(brand)) {
return "БРЕНД_НЕ_УКАЗАН";
}
return brand;
}
private String extractProductName(Elements productDataDivs) {
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
return productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME)
.select("> div > span").text();
}
private BigDecimal extractPrice(Elements productDataDivs) {
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
return parseOzonPriceToBigDecimal(
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
.first().text());
}
private Elements getProductMainDataInnerDivs(Elements productDataDivs) {
return productDataDivs.last().select("> *");
}
private BigDecimal parseOzonPriceToBigDecimal(String ozonPrice) {
String cleanedString = ozonPrice.replaceAll("[^\\d]", "");
return new BigDecimal(cleanedString);
}
}

View File

@ -0,0 +1,110 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.time.Duration;
import java.time.temporal.ChronoUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.springframework.context.annotation.Profile;
import org.springframework.retry.annotation.Recover;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool;
import ru.pricepulse.parsingservice.ozon_parser.service.page.AccessDeniedPage;
import ru.pricepulse.parsingservice.ozon_parser.service.page.CategoryPage;
import ru.pricepulse.parsingservice.ozon_parser.service.page.NoContentPage;
@Slf4j
@Service
@Profile("ozon")
public class OzonHtmlFetcher {
private final WebDriverPool webDriverPool;
private final PageScroller pageScroller;
public OzonHtmlFetcher(WebDriverPool webDriverPool,
PageScroller pageScroller) {
this.webDriverPool = webDriverPool;
this.pageScroller = pageScroller;
}
@Retryable(maxAttempts = 10, recover = "recover")
public String fetchPageHtml(String pageUrl,
AtomicBoolean lastPageInCategory) {
var driver = webDriverPool.borrowDriver();
try {
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
driver.get(pageUrl);
WebDriverWait wait = new WebDriverWait(driver, Duration.of(10, ChronoUnit.SECONDS));
var accessDeniedPage = new AccessDeniedPage(driver, wait);
var categoryPage = new CategoryPage(driver, wait);
var noContentPage = new NoContentPage(driver, wait);
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage, noContentPage, lastPageInCategory));
checkAceesDeniedAndResolve(accessDeniedPage);
pageScroller.scrollToEndOfPage(driver);
return driver.getPageSource();
} catch (Exception e) {
log.error(e.getMessage(), e);
throw new RuntimeException(e);
} finally {
webDriverPool.returnDriver(driver);
}
}
private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
CategoryPage categoryPage,
NoContentPage noContentPage,
AtomicBoolean stopFlag) {
log.debug("Проверка что страница 'Доступ ограничен'");
if (checkAccessDeniedPage(accessDeniedPage)) {
return true;
}
log.debug("Проверка что страница 'Страница категории'");
if (checkCategoryPage(categoryPage)) {
return true;
}
if (checkNoContentPage(noContentPage)) {
stopFlag.set(true);
return true;
}
log.debug("Проверка загрузки страницы неудачна");
return false;
}
private boolean checkCategoryPage(CategoryPage categoryPage) {
return categoryPage.isLoaded();
}
private void checkAceesDeniedAndResolve(AccessDeniedPage accessDeniedPage) {
if (checkAccessDeniedPage(accessDeniedPage)) {
log.info("Доступ ограничен, пробуем решить проблему");
resolveAccessDeniedPage(accessDeniedPage);
log.info("Проблема успешно решена");
}
}
private boolean checkNoContentPage(NoContentPage noContentPage) {
if (noContentPage.isLoaded()) {
log.info("Страница не найдена");
return true;
}
return false;
}
private boolean checkAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
return accessDeniedPage.isLoaded();
}
private void resolveAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
accessDeniedPage.clickReloadButton();
}
@Recover
private void recover(Exception e) {
log.error("Все ретраи провалились");
}
}

View File

@ -0,0 +1,19 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.util.List;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.ozon_parser.service.page.OzonCategoryPage;
@Service
public class OzonPageParser {
public List<ParsedData> parseProductsFromCategoryPage(String pageSource,
Category category) {
OzonCategoryPage categoryPage = new OzonCategoryPage(pageSource);
return categoryPage.getProducts(category);
}
}

View File

@ -0,0 +1,114 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Semaphore;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
import org.slf4j.MDC;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.service.ProductService;
@Slf4j
@Service
@Profile("ozon")
public class OzonParsingService {
private final Map<String, Set<String>> urlCache;
private final ExecutorService pageExecutorService;
private final Semaphore semaphore;
private final OzonHtmlFetcher categoryPageParsingService;
private final OzonConfigProperties ozonConfigProperties;
private final OzonPageParser ozonPageParser;
private final ProductService productService;
public OzonParsingService(OzonHtmlFetcher categoryPageParsingService,
OzonConfigProperties ozonConfigProperties, OzonPageParser ozonPageParser,
ProductService productService) {
this.pageExecutorService = Executors.newFixedThreadPool(ozonConfigProperties.getMaxThreads());
this.semaphore = new Semaphore(ozonConfigProperties.getMaxThreads());
this.urlCache = new ConcurrentHashMap<>();
for (OzonCategory category : OzonCategory.values()) {
urlCache.put(category.getCategoryUrl(), ConcurrentHashMap.newKeySet());
}
this.categoryPageParsingService = categoryPageParsingService;
this.ozonConfigProperties = ozonConfigProperties;
this.ozonPageParser = ozonPageParser;
this.productService = productService;
}
public void startProcessing() {
for (OzonCategory category : OzonCategory.values()) {
log.info("НАЧАЛО ОБРАБОТКИ КАТЕГОРИИ {}", category);
processCategory(category);
}
}
private void processCategory(OzonCategory category) {
int pageIndex = 1;
AtomicBoolean lastPageInCategory = new AtomicBoolean(false);
while (!lastPageInCategory.get()) {
try {
semaphore.acquire();
int finalPageIndex = pageIndex;
String pageUrl = category.getCategoryUrl() + "&page=" + finalPageIndex;
pageExecutorService.submit(() -> processCategoryPage(pageUrl, category, lastPageInCategory));
pageIndex += ozonConfigProperties.getMaxNumOfPagesOnScreen();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
}
}
if (lastPageInCategory.get()) {
log.info("Достигли последней страницы категории");
}
}
private void processCategoryPage(String pageUrl,
OzonCategory category,
AtomicBoolean lastPageInCategory) {
try {
MDC.put("pageUrl", pageUrl);
String pageSource = categoryPageParsingService.fetchPageHtml(pageUrl, lastPageInCategory);
List<ParsedData> parsedProducts =
ozonPageParser.parseProductsFromCategoryPage(pageSource, category.getMappedCategory());
log.info("""
КОНЕЦ ПАРСИНГА СТРАНИЦЫ КАТЕГОРИИ
КОЛИЧЕСТВО НАЙДЕННЫХ ТОВАРОВ НА СТРАНИЦЕ {},
""", parsedProducts.size());
if (urlCache.size() > 1000000) {
urlCache.clear();
}
Set<String> categoryCachecUrl = urlCache.get(category.getCategoryUrl());
List<ParsedData> uniqueData = parsedProducts.stream()
.filter(data -> categoryCachecUrl.add(data.getUrl()))
.toList();
productService.saveBatch(uniqueData);
} finally {
MDC.clear();
semaphore.release();
}
}
}

View File

@ -0,0 +1,56 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.util.concurrent.atomic.AtomicLong;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.WebDriver;
import org.springframework.stereotype.Service;
@Slf4j
@Service
@RequiredArgsConstructor
public class PageScroller {
private static final String ALL_CONTENT_PAGE_HEIGHT = "return document.body.scrollHeight";
private static final String SCROLL_TO_PAGE_HEIGHT = "window.scrollTo(0, document.body.scrollHeight);";
public void scrollToEndOfPage(WebDriver driver) throws InterruptedException {
JavascriptExecutor js = (JavascriptExecutor) driver;
AtomicLong lastHeight = new AtomicLong((long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT));
int attemptsLimit = 100;
log.info("Начинаем пролистывать страницу до конца");
while (true) {
js.executeScript(SCROLL_TO_PAGE_HEIGHT);
long newHeight = (long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT);
try {
var nextPageButtons = driver.findElements(By.cssSelector("div[data-widget='megaPaginator'] > div")).get(1)
.findElement(By.cssSelector(":scope > div > div > div"))
.findElements(By.tagName("a"));
if (nextPageButtons != null && newHeight > lastHeight.get()) {
log.info("ЗАКОНЧИЛИ СКРОЛЛИТЬ");
break;
}
} catch (Exception ignored) {}
if (newHeight > lastHeight.get()) {
attemptsLimit = 100;
lastHeight.set(newHeight);
} else {
attemptsLimit--;
Thread.sleep(1000);
if (attemptsLimit == 0) {
break;
}
}
}
}
}

View File

@ -0,0 +1,21 @@
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
import lombok.RequiredArgsConstructor;
import org.springframework.context.annotation.Profile;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingService;
@Service
@RequiredArgsConstructor
@Profile("ozon")
public class OzonProductUpdater {
private final OzonParsingService ozonParsingService;
@Scheduled(fixedRate = 7200000)
public void updateOzonProducts() {
ozonParsingService.startProcessing();
}
}

View File

@ -0,0 +1,4 @@
package ru.pricepulse.parsingservice.ozon_parser.service.task;
public class OzonParsingTask {
}

View File

@ -0,0 +1,64 @@
package ru.pricepulse.parsingservice.persistence.entity;
import java.math.BigDecimal;
import java.time.ZonedDateTime;
import java.util.Objects;
import jakarta.persistence.Column;
import jakarta.persistence.EmbeddedId;
import jakarta.persistence.Entity;
import jakarta.persistence.PrePersist;
import jakarta.persistence.Table;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.proxy.HibernateProxy;
@Getter
@Setter
@Entity
@Table(name = "price_history")
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class PriceHistoryEntity {
@EmbeddedId
private PriceHistoryId id;
@Column(name = "price", nullable = false, precision = 10, scale = 2)
private BigDecimal price;
@Override
public final boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null) {
return false;
}
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
Class<?> thisEffectiveClass =
this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
if (thisEffectiveClass != oEffectiveClass) {
return false;
}
PriceHistoryEntity that = (PriceHistoryEntity) o;
return getId() != null && Objects.equals(getId(), that.getId());
}
@Override
public final int hashCode() {
return Objects.hash(id);
}
@PrePersist
protected void onCreate() {
if (id.getDate() == null) {
id.setDate(ZonedDateTime.now());
}
}
}

View File

@ -0,0 +1,51 @@
package ru.pricepulse.parsingservice.persistence.entity;
import java.io.Serializable;
import java.time.ZonedDateTime;
import java.util.Objects;
import jakarta.persistence.Column;
import jakarta.persistence.Embeddable;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.proxy.HibernateProxy;
@Getter
@Setter
@AllArgsConstructor
@NoArgsConstructor
@Embeddable
public class PriceHistoryId implements Serializable {
@Column(name = "product_url", nullable = false, unique = true)
private String productUrl;
@Column(name = "date", nullable = false)
private ZonedDateTime date;
@Override
public final boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null) {
return false;
}
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
Class<?> thisEffectiveClass =
this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
if (thisEffectiveClass != oEffectiveClass) {
return false;
}
PriceHistoryId that = (PriceHistoryId) o;
return getDate() != null && Objects.equals(getDate(), that.getDate());
}
@Override
public final int hashCode() {
return Objects.hash(date);
}
}

View File

@ -0,0 +1,81 @@
package ru.pricepulse.parsingservice.persistence.entity;
import java.time.LocalDateTime;
import java.util.Objects;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.EnumType;
import jakarta.persistence.Enumerated;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id;
import jakarta.persistence.PrePersist;
import jakarta.persistence.Table;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.proxy.HibernateProxy;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
@Getter
@Setter
@Entity
@Table(name = "product")
@AllArgsConstructor
@NoArgsConstructor
@Builder
public class ProductEntity {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "id", nullable = false)
private Long id;
@Column(name = "marketplace", nullable = false, length = Integer.MAX_VALUE)
@Enumerated(EnumType.STRING)
private Marketplace marketplace;
@Column(name = "category", nullable = false, length = Integer.MAX_VALUE)
@Enumerated(EnumType.STRING)
private Category category;
@Column(name = "brand", nullable = false, length = Integer.MAX_VALUE)
private String brand;
@Column(name = "product_name", nullable = false, length = Integer.MAX_VALUE)
private String productName;
@Column(name = "created_at", nullable = false)
private LocalDateTime createdAt;
@Column(name = "url", nullable = false, unique = true)
private String url;
@Column(name = "image-url", nullable = false)
private String imageUrl;
@Override
public final boolean equals(Object o) {
if (this == o) return true;
if (o == null) return false;
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
Class<?> thisEffectiveClass = this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
if (thisEffectiveClass != oEffectiveClass) return false;
ProductEntity that = (ProductEntity) o;
return getId() != null && Objects.equals(getId(), that.getId());
}
@Override
public final int hashCode() {
return this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass().hashCode() : getClass().hashCode();
}
@PrePersist
protected void onCreate() {
createdAt = LocalDateTime.now();
}
}

View File

@ -0,0 +1,16 @@
package ru.pricepulse.parsingservice.persistence.repository;
import java.time.ZonedDateTime;
import java.util.List;
import org.springframework.data.jpa.repository.JpaRepository;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, PriceHistoryId> {
List<PriceHistoryEntity> findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(String productUrl,
ZonedDateTime from,
ZonedDateTime to);
}

View File

@ -0,0 +1,28 @@
package ru.pricepulse.parsingservice.persistence.repository;
import java.util.List;
import java.util.Optional;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.stereotype.Repository;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
@Repository
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
List<ProductEntity> findAllByUrlIn(List<String> urls);
@Query("""
select p.url from ProductEntity p where p.url in :urls
""")
List<String> findSavedUrl(List<String> urls);
Optional<ProductEntity> findByUrl(String url);
Page<ProductEntity> findAllByMarketplaceAndCategory(Marketplace marketplace, Category category, Pageable pageable);
}

View File

@ -0,0 +1,105 @@
package ru.pricepulse.parsingservice.service;
import java.time.ZonedDateTime;
import java.util.List;
import jakarta.persistence.EntityNotFoundException;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.domain.Pageable;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
import ru.pricepulse.parsingservice.service.dto.ProductDto;
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
import ru.pricepulse.parsingservice.service.mapper.PriceHistoryMapper;
import ru.pricepulse.parsingservice.service.mapper.ProductMapper;
@Slf4j
@Service
@RequiredArgsConstructor
public class ProductService {
private final ProductRepository productRepository;
private final ProductPriceRepository productPriceRepository;
private final ProductMapper productMapper;
private final PriceHistoryMapper priceHistoryMapper;
@Transactional
@Retryable
public void saveBatch(List<ParsedData> parsedData) {
List<String> productsUrls = parsedData.stream().map(ParsedData::getUrl).toList();
List<String> alreadySavedUrls = productRepository.findSavedUrl(productsUrls);
List<ProductEntity> products = parsedData.stream()
.filter(data -> !alreadySavedUrls.contains(data.getUrl()))
.map(this::getProduct)
.toList();
List<PriceHistoryEntity> prices = parsedData.stream().map(this::getPriceHistory).toList();
productRepository.saveAll(products);
log.info("Сохранили пачку товаров {}", products.size());
productPriceRepository.saveAll(prices);
log.info("Сохранили историю цен {}", prices.size());
}
@Transactional(readOnly = true)
public ProductDto findByUrl(String productUrl) {
var product = productRepository.findByUrl(productUrl).orElseThrow(EntityNotFoundException::new);
return productMapper.toProductDto(product);
}
@Transactional(readOnly = true)
public PriceHistoryDto findPriceHistoryByRange(String productUrl,
ZonedDateTime from,
ZonedDateTime to) {
var priceHistory = productPriceRepository
.findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(productUrl, from, to);
return priceHistoryMapper.toPriceHistoryDto(priceHistory);
}
@Transactional(readOnly = true)
public ProductsPageDto findAllProductsByPage(Marketplace marketplace,
Category category,
Pageable pageable) {
var page = productRepository.findAllByMarketplaceAndCategory(marketplace, category, pageable);
return new ProductsPageDto(
page.getNumberOfElements(),
page.getTotalPages(),
page.getNumber(),
page.getContent().stream().map(productMapper::toProductDto).toList()
);
}
private PriceHistoryEntity getPriceHistory(ParsedData product) {
var priceHistoryId = new PriceHistoryId();
priceHistoryId.setProductUrl(product.getUrl());
priceHistoryId.setDate(ZonedDateTime.now());
var priceHistory = new PriceHistoryEntity();
priceHistory.setId(priceHistoryId);
priceHistory.setPrice(product.getPrice());
return priceHistory;
}
private ProductEntity getProduct(ParsedData product) {
var productEntity = new ProductEntity();
productEntity.setCategory(product.getCategory());
productEntity.setBrand(product.getBrand());
productEntity.setProductName(product.getProductName());
productEntity.setUrl(product.getUrl());
productEntity.setMarketplace(product.getMarketplace());
productEntity.setImageUrl(product.getImageUrl());
return productEntity;
}
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.service.dto;
import java.math.BigDecimal;
import java.time.ZonedDateTime;
import java.util.HashMap;
import java.util.Map;
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
public class PriceHistoryDto {
private final Map<ZonedDateTime, BigDecimal> priceHistory;
public PriceHistoryDto() {
this.priceHistory = new HashMap<>();
}
}

View File

@ -0,0 +1,28 @@
package ru.pricepulse.parsingservice.service.dto;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.Setter;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
@Getter
@Setter
@RequiredArgsConstructor
public class ProductDto {
private final Long id;
private final Marketplace marketplace;
private final Category category;
private final String brand;
private final String productName;
private final String url;
private final String imageUrl;
}

View File

@ -0,0 +1,22 @@
package ru.pricepulse.parsingservice.service.dto;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
@AllArgsConstructor
public class ProductsPageDto {
private final int totalItems;
private final int totalPages;
private final int currentPage;
private final List<ProductDto> products;
}

View File

@ -0,0 +1,19 @@
package ru.pricepulse.parsingservice.service.mapper;
import java.util.List;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
@Component
public class PriceHistoryMapper {
public PriceHistoryDto toPriceHistoryDto (List<PriceHistoryEntity> priceHistory) {
var priceHistoryDto = new PriceHistoryDto();
priceHistory.forEach(item ->
priceHistoryDto.getPriceHistory().put(item.getId().getDate().withNano(0), item.getPrice()));
return priceHistoryDto;
}
}

View File

@ -0,0 +1,22 @@
package ru.pricepulse.parsingservice.service.mapper;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.service.dto.ProductDto;
@Component
public class ProductMapper {
public ProductDto toProductDto(ProductEntity product) {
return new ProductDto(
product.getId(),
product.getMarketplace(),
product.getCategory(),
product.getBrand(),
product.getProductName(),
product.getUrl(),
product.getImageUrl()
);
}
}

View File

@ -0,0 +1,58 @@
package ru.pricepulse.parsingservice.service.scheduler;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import jakarta.annotation.PostConstruct;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Profile;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService;
@Slf4j
@Service
@RequiredArgsConstructor
@Profile("postgres_stat")
public class PartitionScheduler {
private final PartitionService partitionService;
private final DateTimeFormatter partitionDateTimeFormatter;
@PostConstruct
public void init() {
checkAndCreateMonthlyPartitions();
}
@Scheduled(cron = "@monthly")
public void checkAndCreatePartitionsMonthly() {
checkAndCreateMonthlyPartitions();
}
public void checkAndCreateMonthlyPartitions() {
LocalDate currentMonth = LocalDate.now().withDayOfMonth(1);
LocalDate nextMonth = currentMonth.plusMonths(1);
String currentMonthPartition = getPartitionName(currentMonth);
String nextMonthPartition = getPartitionName(nextMonth);
checkAndCreatePartition(currentMonthPartition, currentMonth);
checkAndCreatePartition(nextMonthPartition, nextMonth);
}
private String getPartitionName(LocalDate date) {
return "price_history_" + partitionDateTimeFormatter.format(date);
}
private void checkAndCreatePartition(String partitionName, LocalDate startDate) {
if (!partitionService.checkPartitionExists(partitionName)) {
LocalDate endDate = startDate.plusMonths(1);
partitionService.createPartition(partitionName, startDate.toString(), endDate.toString());
log.info("Партиция {} создана для диапазона: {} - {} ", partitionName, startDate, endDate);
} else {
log.info("Партиция {} уже существует.", partitionName);
}
}
}

View File

@ -0,0 +1,60 @@
package ru.pricepulse.parsingservice.web.handler;
import java.net.URI;
import jakarta.persistence.EntityNotFoundException;
import jakarta.servlet.http.HttpServletRequest;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ControllerAdvice;
import org.springframework.web.bind.annotation.ExceptionHandler;
import org.springframework.web.bind.annotation.ResponseStatus;
@ControllerAdvice
public class CommonExceptionHandler {
@ExceptionHandler(IllegalArgumentException.class)
@ResponseStatus(HttpStatus.BAD_REQUEST)
public ResponseEntity<ErrorResponse> exceptionHandler(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.BAD_REQUEST, request, ex);
}
@ExceptionHandler(EntityNotFoundException.class)
@ResponseStatus(HttpStatus.NOT_FOUND)
public ResponseEntity<ErrorResponse> handleNotFoundException(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.NOT_FOUND, request, ex);
}
@ExceptionHandler(Exception.class)
@ResponseStatus(HttpStatus.INTERNAL_SERVER_ERROR)
public ResponseEntity<ErrorResponse> handleInternalServerErrorException(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.INTERNAL_SERVER_ERROR, request, ex);
}
/*@ExceptionHandler(AccessDeniedException.class)
@ResponseStatus(HttpStatus.FORBIDDEN)
public ResponseEntity<ErrorResponse> handleForbiddenException(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.FORBIDDEN, request, ex);
}
@ExceptionHandler(AuthenticationException.class)
@ResponseStatus(HttpStatus.UNAUTHORIZED)
public ResponseEntity<ErrorResponse> handleUnauthorizedException(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.UNAUTHORIZED, request, ex);
}*/
private ResponseEntity<ErrorResponse> handleException(HttpStatus status, HttpServletRequest request, Exception ex) {
var errorResponse = new ErrorResponse(
status.value(),
status,
URI.create(request.getRequestURI()),
ex.getMessage()
);
return ResponseEntity.status(status).body(errorResponse);
}
}

View File

@ -0,0 +1,13 @@
package ru.pricepulse.parsingservice.web.handler;
import java.net.URI;
import org.springframework.http.HttpStatus;
public record ErrorResponse (
Integer statusCode,
HttpStatus status,
URI requestURI,
String message
){
}

View File

@ -0,0 +1,27 @@
package ru.pricepulse.parsingservice.web.rest;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.ozon_parser.service.OzonService;
@RestController
@RequestMapping("/api/v1/categories")
@RequiredArgsConstructor
public class CategoryApi {
private final OzonService ozonService;
@GetMapping
public ResponseEntity<?> getCategories(Marketplace marketplace) {
if (Marketplace.OZON.equals(marketplace)) {
return ResponseEntity.ok(ozonService.getCategories());
}
return ResponseEntity.ok(Category.values());
}
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.web.rest;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
@RestController
@RequestMapping("/api/v1/marketplaces")
@RequiredArgsConstructor
public class MarketplaceApi {
@GetMapping
public ResponseEntity<Marketplace[]> getMarketplace() {
return ResponseEntity.ok(Marketplace.values());
}
}

View File

@ -0,0 +1,52 @@
package ru.pricepulse.parsingservice.web.rest;
import java.time.LocalDate;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import lombok.RequiredArgsConstructor;
import org.springframework.data.domain.Pageable;
import org.springframework.format.annotation.DateTimeFormat;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.service.ProductService;
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
import ru.pricepulse.parsingservice.service.dto.ProductDto;
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
@RestController
@RequestMapping("/api/v1/products")
@RequiredArgsConstructor
public class ProductApi {
private final ProductService productService;
@GetMapping("/info")
public ResponseEntity<ProductDto> getProductInfo(@RequestParam String productUrl) {
return ResponseEntity.ok(productService.findByUrl(productUrl));
}
@GetMapping("/price-history")
public ResponseEntity<PriceHistoryDto> getProductPriceHistoryByRange(@RequestParam String productUrl,
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate from,
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate to,
String zoneOffset) {
ZoneOffset zone = ZoneOffset.of(zoneOffset);
ZonedDateTime fromDateTime = from.atStartOfDay(zone);
ZonedDateTime toDateTime = to.atStartOfDay(zone);
return ResponseEntity.ok(productService.findPriceHistoryByRange(productUrl, fromDateTime, toDateTime));
}
@GetMapping
public ResponseEntity<ProductsPageDto> getAllProductsByCategoryAndPage(Marketplace marketplace,
Category category,
Pageable pageable) {
return ResponseEntity.ok(productService.findAllProductsByPage(marketplace, category, pageable));
}
}

View File

@ -0,0 +1,26 @@
package ru.pricepulse.parsingservice.wildberries_parser.converter;
import java.time.LocalDateTime;
import org.springframework.core.convert.converter.Converter;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
@Component
public class ProductInfoDto2ProductEntity implements Converter<ProductInfoDto, ProductEntity> {
@Override
public ProductEntity convert(ProductInfoDto source) {
return ProductEntity.builder()
.marketplace(Marketplace.WILDBERRIES)
.category(Category.LAPTOP)
.brand(source.getBrand())
.productName(source.getName())
.createdAt(LocalDateTime.now())
.imageUrl("")
.build();
}
}

View File

@ -0,0 +1,115 @@
package ru.pricepulse.parsingservice.wildberries_parser.proxy;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.stream.Collectors;
public class ProxyChecker {
private static final int TIMEOUT = 2000;
private static final int THREAD_COUNT = 30;
public static List<String> readProxiesFromFile(String filePath) {
try {
return Files.readAllLines(Paths.get(filePath));
} catch (IOException e) {
System.err.println("Ошибка при чтении файла: " + e.getMessage());
return new ArrayList<>();
}
}
public static List<String> checkProxies(List<String> proxies) {
ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT);
List<Future<String>> futures = new ArrayList<>();
// Отправляем задачи проверки прокси в пул потоков
for (String proxyAddress : proxies) {
futures.add(executor.submit(() -> isProxyWorking(proxyAddress) ? proxyAddress : null));
}
// Получаем результаты выполнения
List<String> workingProxies = futures.stream()
.map(future -> {
try {
return future.get();
} catch (Exception e) {
System.err.println("Ошибка при получении результата проверки прокси: " + e.getMessage());
return null;
}
})
.filter(proxy -> proxy != null)
.collect(Collectors.toList());
executor.shutdown(); // Завершаем работу пула потоков
return workingProxies;
}
private static boolean isProxyWorking(String proxyAddress) {
String[] parts = proxyAddress.split(":");
if (parts.length != 2) {
System.err.println("Некорректный формат прокси: " + proxyAddress);
return false;
}
String ip = parts[0];
int port;
try {
port = Integer.parseInt(parts[1]);
} catch (NumberFormatException e) {
System.err.println("Некорректный порт у прокси: " + proxyAddress);
return false;
}
try {
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ip, port));
URL url = new URL("http://www.google.com");
HttpURLConnection connection = (HttpURLConnection) url.openConnection(proxy);
connection.setConnectTimeout(TIMEOUT);
connection.setReadTimeout(TIMEOUT);
connection.setRequestMethod("GET");
connection.connect();
int responseCode = connection.getResponseCode();
if (
responseCode == 200
// responseCode == 403 ||
// responseCode == 500 ||
// responseCode == 407 ||
// responseCode == 501
) {
System.out.println("Прокси работает (код ответа " + responseCode + "): " + proxyAddress);
return true;
} else {
System.out.println("Прокси не отвечает (код ответа " + responseCode + "): " + proxyAddress);
return false;
}
} catch (IOException e) {
System.out.println("Прокси не отвечает: " + proxyAddress);
return false;
}
}
public static void saveProxiesToFile(List<String> proxies, Path filePath) {
try (BufferedWriter writer = Files.newBufferedWriter(filePath)) {
for (String proxy : proxies) {
writer.write(proxy);
writer.newLine();
}
} catch (IOException e) {
System.err.println("Ошибка при записи в файл: " + e.getMessage());
}
}
}

View File

@ -0,0 +1,24 @@
package ru.pricepulse.parsingservice.wildberries_parser.scheduler;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.wildberries_parser.service.ParsingService;
@Slf4j
@Service
@RequiredArgsConstructor
@ConditionalOnProperty(prefix = "marketplace.wildberries", name = "status", havingValue = "true")
public class WildberriesProductUpdater {
private final ParsingService parsingService;
@Scheduled(fixedRate = 3600000)
public void updateWildberriesProducts() {
log.info("Начинаем отладку...");
parsingService.parse();
log.info("Заканчиваем отладку...");
}
}

Some files were not shown because too many files have changed in this diff Show More