Compare commits

...

21 Commits

Author SHA1 Message Date
danil.markov
8afc758987 Feature/parsing-service: save 2024-11-13 14:20:24 +04:00
danil.markov
4f5dda4dbf Merge branch 'feature/ozon-parser-v0.1' into feature/parsing-service
# Conflicts:
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/config/DynamicProxyInterceptor.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/config/ProxyProvider.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/wildberries_parser/service/client/ClientImpl.java
2024-11-13 14:00:53 +04:00
danil.markov
59c41a4912 Feature/parsing-service: save 2024-11-13 13:59:52 +04:00
c4bb7a5ffa features: add proxy checking 2024-11-13 13:28:14 +04:00
danil.markov
fd71513bbf Feature/ozon-parser-v0.1: правка апишки 2024-11-12 22:40:34 +04:00
danil.markov
171cc650f1 Feature/ozon-parser-v0.1: Версия рабочая, парсит быстро + api 2024-11-12 15:40:00 +04:00
1df7dc94b8 features: change parse logic 2024-10-15 12:13:46 +04:00
danil.markov
42d947440c Feature/parsing-service: fix after pull 2024-10-15 11:34:24 +04:00
danil.markov
83b1c5d72c Merge remote-tracking branch 'origin/feature/parsing-service' into feature/parsing-service
# Conflicts:
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/pool/WebDriverPool.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/marketplace/ozon/parsing/ParsingService.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/parsing/OzonCategoryPageParsingService.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/scheduler/OzonProductUpdater.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/persistence/repository/ProductRepository.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/wildberries_parser/DebugRunner.java
2024-10-15 10:12:04 +04:00
danil.markov
82f648e16c Feature/parsing-service: save commit 2024-10-15 10:10:11 +04:00
84e0af60c9 features: add proxy, change webClient to restTemplate, add checking 2024-10-15 09:55:32 +04:00
danil.markov
9895aaff33 Feature/parsing-service: save commit 2024-10-14 21:43:57 +04:00
danil.markov
ae8ac061bc Merge remote-tracking branch 'origin/feature/parsing-service' into feature/parsing-service
# Conflicts:
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/pool/WebDriverPool.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/marketplace/ozon/page/CategoryPage.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/marketplace/ozon/parsing/CategoryPageParsingService.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/marketplace/ozon/parsing/ParsingService.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/ozon_parser/service/scheduler/PartitionScheduler.java
2024-10-14 12:47:34 +04:00
danil.markov
a0271125a1 Feature/parsing-service: package refactor 2024-10-14 12:46:43 +04:00
84f344084c merge branches 2024-10-13 19:05:30 +04:00
5ae300389c feature: wb parser 2024-10-13 18:34:11 +04:00
danil.markov
ef2240e8ab Feature/parsing-service: Add parsing Ozon, need test on another system 2024-10-13 17:44:52 +04:00
f58b0a4a02 Feature/parsing-service intermediate commit 2024-10-12 13:51:32 +04:00
ffe6920b29 feature: price history and model 2024-10-03 17:26:35 +04:00
30ca5acc34 Feature/parsing-service intermediate commit, not final migration + run config 2024-10-02 11:51:30 +04:00
a24bf08f52 Feature/parsing-service init project 2024-09-26 21:37:10 +04:00
82 changed files with 3420 additions and 0 deletions

View File

@ -0,0 +1,18 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
<option name="ACTIVE_PROFILES" value="dev" />
<option name="SCHEDULED_DEBUGGER" value="true" />
<envs>
<env name="JDBC_PASSWORD" value="postgres" />
<env name="JDBC_URL" value="localhost:5432/parsed_data" />
<env name="JDBC_USERNAME" value="postgres" />
<env name="SERVER_PORT" value="8080" />
<env name="WEBDRIVER_CHROME_PATH" value="$PROJECT_DIR$/parsing-service/web-driver/chromedriver" />
</envs>
<module name="parsing-service.main" />
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
</component>

80
docker/docker-compose.yml Normal file
View File

@ -0,0 +1,80 @@
version: "3.8"
name: price-pulse
services:
postgres:
image: postgres:16
ports:
- "5432:5432"
environment:
POSTGRES_DB: parsed_data
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
deploy:
resources:
limits:
memory: 1024M
reservations:
memory: 256M
clickhouse:
image: clickhouse/clickhouse-server:latest
ports:
- "8123:8123" # HTTP интерфейс, /play для проверки запросов, но лучше использовать не браузер для этого
- "9000:9000" # Интерфейс для работы с клиентами
- "9009:9009" # Интерфейс для взаимодействия через TCP
volumes:
- clickhouse_data:/var/lib/clickhouse
- clickhouse_logs:/var/log/clickhouse
environment:
CLICKHOUSE_DB: parsed_data
CLICKHOUSE_USER: user
CLICKHOUSE_PASSWORD: password
deploy:
resources:
limits:
memory: 1024M
reservations:
memory: 256M
zookeeper:
image: confluentinc/cp-zookeeper:latest
environment:
ZOOKEEPER_CLIENT_PORT: 2181
ZOOKEEPER_TICK_TIME: 2000
ports:
- "2181:2181"
deploy:
resources:
limits:
memory: 1024M
reservations:
memory: 256M
kafka:
image: confluentinc/cp-kafka:latest
depends_on:
- zookeeper
ports:
- "9092:9092"
environment:
KAFKA_BROKER_ID: 1
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
volumes:
- kafka_data:/var/lib/kafka
deploy:
resources:
limits:
memory: 1024M
reservations:
memory: 256M
volumes:
clickhouse_data:
clickhouse_logs:
kafka_data:

37
parsing-service/.gitignore vendored Normal file
View File

@ -0,0 +1,37 @@
HELP.md
.gradle
build/
!gradle/wrapper/gradle-wrapper.jar
!**/src/main/**/build/
!**/src/test/**/build/
### STS ###
.apt_generated
.classpath
.factorypath
.project
.settings
.springBeans
.sts4-cache
bin/
!**/src/main/**/bin/
!**/src/test/**/bin/
### IntelliJ IDEA ###
.idea
*.iws
*.iml
*.ipr
out/
!**/src/main/**/out/
!**/src/test/**/out/
### NetBeans ###
/nbproject/private/
/nbbuild/
/dist/
/nbdist/
/.nb-gradle/
### VS Code ###
.vscode/

View File

@ -0,0 +1,17 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
<option name="ACTIVE_PROFILES" value="dev,ozon,headless,postgres_stat" />
<option name="SCHEDULED_DEBUGGER" value="true" />
<envs>
<env name="POSTGRES_JDBC_PASSWORD" value="postgres" />
<env name="POSTGRES_JDBC_USERNAME" value="postgres" />
<env name="POSTGRES_JDBC_URL" value="localhost:5432/parsed_data" />
<env name="SERVER_PORT" value="8080" />
</envs>
<module name="parsing-service.main" />
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
</component>

View File

@ -0,0 +1,61 @@
plugins {
id 'java'
id 'org.springframework.boot' version '3.3.4'
id 'io.spring.dependency-management' version '1.1.6'
}
group = 'ru.pricepulse'
version = '0.0.1-SNAPSHOT'
java {
toolchain {
languageVersion = JavaLanguageVersion.of(21)
}
}
configurations {
compileOnly {
extendsFrom annotationProcessor
}
}
repositories {
mavenCentral()
}
ext {
jsoupVesion = '1.18.1'
seleniumVersion = '4.25.0'
}
dependencies {
implementation 'org.springframework.boot:spring-boot-starter-data-jpa'
implementation 'org.springframework.boot:spring-boot-starter-web'
// implementation 'org.liquibase:liquibase-core'
implementation 'org.springframework.kafka:spring-kafka'
implementation "org.jsoup:jsoup:${jsoupVesion}"
implementation "org.seleniumhq.selenium:selenium-java:${seleniumVersion}"
implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
implementation 'org.apache.commons:commons-pool2:2.12.0'
implementation 'com.clickhouse:clickhouse-jdbc:0.6.5'
implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.6.0'
compileOnly 'org.projectlombok:lombok'
runtimeOnly 'org.postgresql:postgresql'
annotationProcessor 'org.projectlombok:lombok'
testImplementation 'org.springframework.boot:spring-boot-starter-test'
testImplementation 'org.springframework.kafka:spring-kafka-test'
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
implementation 'org.springframework.boot:spring-boot-starter-webflux'
implementation 'org.springframework.retry:spring-retry:2.0.9'
}
tasks.named('test') {
useJUnitPlatform()
}

Binary file not shown.

View File

@ -0,0 +1,7 @@
distributionBase=GRADLE_USER_HOME
distributionPath=wrapper/dists
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-bin.zip
networkTimeout=10000
validateDistributionUrl=true
zipStoreBase=GRADLE_USER_HOME
zipStorePath=wrapper/dists

252
parsing-service/gradlew vendored Normal file
View File

@ -0,0 +1,252 @@
#!/bin/sh
#
# Copyright © 2015-2021 the original authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# SPDX-License-Identifier: Apache-2.0
#
##############################################################################
#
# Gradle start up script for POSIX generated by Gradle.
#
# Important for running:
#
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
# noncompliant, but you have some other compliant shell such as ksh or
# bash, then to run this script, type that shell name before the whole
# command line, like:
#
# ksh Gradle
#
# Busybox and similar reduced shells will NOT work, because this script
# requires all of these POSIX shell features:
# * functions;
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
# * compound commands having a testable exit status, especially «case»;
# * various built-in commands including «command», «set», and «ulimit».
#
# Important for patching:
#
# (2) This script targets any POSIX shell, so it avoids extensions provided
# by Bash, Ksh, etc; in particular arrays are avoided.
#
# The "traditional" practice of packing multiple parameters into a
# space-separated string is a well documented source of bugs and security
# problems, so this is (mostly) avoided, by progressively accumulating
# options in "$@", and eventually passing that to Java.
#
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
# see the in-line comments for details.
#
# There are tweaks for specific operating systems such as AIX, CygWin,
# Darwin, MinGW, and NonStop.
#
# (3) This script is generated from the Groovy template
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
# within the Gradle project.
#
# You can find Gradle at https://github.com/gradle/gradle/.
#
##############################################################################
# Attempt to set APP_HOME
# Resolve links: $0 may be a link
app_path=$0
# Need this for daisy-chained symlinks.
while
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
[ -h "$app_path" ]
do
ls=$( ls -ld "$app_path" )
link=${ls#*' -> '}
case $link in #(
/*) app_path=$link ;; #(
*) app_path=$APP_HOME$link ;;
esac
done
# This is normally unused
# shellcheck disable=SC2034
APP_BASE_NAME=${0##*/}
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
' "$PWD" ) || exit
# Use the maximum available, or set MAX_FD != -1 to use that value.
MAX_FD=maximum
warn () {
echo "$*"
} >&2
die () {
echo
echo "$*"
echo
exit 1
} >&2
# OS specific support (must be 'true' or 'false').
cygwin=false
msys=false
darwin=false
nonstop=false
case "$( uname )" in #(
CYGWIN* ) cygwin=true ;; #(
Darwin* ) darwin=true ;; #(
MSYS* | MINGW* ) msys=true ;; #(
NONSTOP* ) nonstop=true ;;
esac
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
# Determine the Java command to use to start the JVM.
if [ -n "$JAVA_HOME" ] ; then
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
# IBM's JDK on AIX uses strange locations for the executables
JAVACMD=$JAVA_HOME/jre/sh/java
else
JAVACMD=$JAVA_HOME/bin/java
fi
if [ ! -x "$JAVACMD" ] ; then
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
else
JAVACMD=java
if ! command -v java >/dev/null 2>&1
then
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
Please set the JAVA_HOME variable in your environment to match the
location of your Java installation."
fi
fi
# Increase the maximum file descriptors if we can.
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
case $MAX_FD in #(
max*)
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
MAX_FD=$( ulimit -H -n ) ||
warn "Could not query maximum file descriptor limit"
esac
case $MAX_FD in #(
'' | soft) :;; #(
*)
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
# shellcheck disable=SC2039,SC3045
ulimit -n "$MAX_FD" ||
warn "Could not set maximum file descriptor limit to $MAX_FD"
esac
fi
# Collect all arguments for the java command, stacking in reverse order:
# * args from the command line
# * the main class name
# * -classpath
# * -D...appname settings
# * --module-path (only if needed)
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
# For Cygwin or MSYS, switch paths to Windows format before running java
if "$cygwin" || "$msys" ; then
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
JAVACMD=$( cygpath --unix "$JAVACMD" )
# Now convert the arguments - kludge to limit ourselves to /bin/sh
for arg do
if
case $arg in #(
-*) false ;; # don't mess with options #(
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
[ -e "$t" ] ;; #(
*) false ;;
esac
then
arg=$( cygpath --path --ignore --mixed "$arg" )
fi
# Roll the args list around exactly as many times as the number of
# args, so each arg winds up back in the position where it started, but
# possibly modified.
#
# NB: a `for` loop captures its iteration list before it begins, so
# changing the positional parameters here affects neither the number of
# iterations, nor the values presented in `arg`.
shift # remove old arg
set -- "$@" "$arg" # push replacement arg
done
fi
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
# Collect all arguments for the java command:
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
# and any embedded shellness will be escaped.
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
# treated as '${Hostname}' itself on the command line.
set -- \
"-Dorg.gradle.appname=$APP_BASE_NAME" \
-classpath "$CLASSPATH" \
org.gradle.wrapper.GradleWrapperMain \
"$@"
# Stop when "xargs" is not available.
if ! command -v xargs >/dev/null 2>&1
then
die "xargs is not available"
fi
# Use "xargs" to parse quoted args.
#
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
#
# In Bash we could simply go:
#
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
# set -- "${ARGS[@]}" "$@"
#
# but POSIX shell has neither arrays nor command substitution, so instead we
# post-process each arg (as a line of input to sed) to backslash-escape any
# character that might be a shell metacharacter, then use eval to reverse
# that process (while maintaining the separation between arguments), and wrap
# the whole thing up as a single "set" statement.
#
# This will of course break if any of these variables contains a newline or
# an unmatched quote.
#
eval "set -- $(
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
xargs -n1 |
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
tr '\n' ' '
)" '"$@"'
exec "$JAVACMD" "$@"

94
parsing-service/gradlew.bat vendored Normal file
View File

@ -0,0 +1,94 @@
@rem
@rem Copyright 2015 the original author or authors.
@rem
@rem Licensed under the Apache License, Version 2.0 (the "License");
@rem you may not use this file except in compliance with the License.
@rem You may obtain a copy of the License at
@rem
@rem https://www.apache.org/licenses/LICENSE-2.0
@rem
@rem Unless required by applicable law or agreed to in writing, software
@rem distributed under the License is distributed on an "AS IS" BASIS,
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@rem See the License for the specific language governing permissions and
@rem limitations under the License.
@rem
@rem SPDX-License-Identifier: Apache-2.0
@rem
@if "%DEBUG%"=="" @echo off
@rem ##########################################################################
@rem
@rem Gradle startup script for Windows
@rem
@rem ##########################################################################
@rem Set local scope for the variables with windows NT shell
if "%OS%"=="Windows_NT" setlocal
set DIRNAME=%~dp0
if "%DIRNAME%"=="" set DIRNAME=.
@rem This is normally unused
set APP_BASE_NAME=%~n0
set APP_HOME=%DIRNAME%
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
@rem Find java.exe
if defined JAVA_HOME goto findJavaFromJavaHome
set JAVA_EXE=java.exe
%JAVA_EXE% -version >NUL 2>&1
if %ERRORLEVEL% equ 0 goto execute
echo. 1>&2
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
echo. 1>&2
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
echo location of your Java installation. 1>&2
goto fail
:findJavaFromJavaHome
set JAVA_HOME=%JAVA_HOME:"=%
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
if exist "%JAVA_EXE%" goto execute
echo. 1>&2
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
echo. 1>&2
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
echo location of your Java installation. 1>&2
goto fail
:execute
@rem Setup the command line
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
@rem Execute Gradle
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
:end
@rem End local scope for the variables with windows NT shell
if %ERRORLEVEL% equ 0 goto mainEnd
:fail
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
rem the _cmd.exe /c_ return code!
set EXIT_CODE=%ERRORLEVEL%
if %EXIT_CODE% equ 0 set EXIT_CODE=1
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
exit /b %EXIT_CODE%
:mainEnd
if "%OS%"=="Windows_NT" endlocal
:omega

View File

@ -0,0 +1 @@
rootProject.name = 'parsing-service'

View File

@ -0,0 +1,15 @@
package ru.pricepulse.parsingservice;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.retry.annotation.EnableRetry;
@SpringBootApplication
@EnableRetry
public class ParsingServiceApplication {
public static void main(String[] args) {
SpringApplication.run(ParsingServiceApplication.class, args);
}
}

View File

@ -0,0 +1,16 @@
package ru.pricepulse.parsingservice.config;
import java.time.format.DateTimeFormatter;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class DateTimeFormatterConfig {
@Bean
public DateTimeFormatter partitionDateTimeFormatter() {
return DateTimeFormatter.ofPattern("yyyy_MM");
}
}

View File

@ -0,0 +1,36 @@
package ru.pricepulse.parsingservice.config;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.http.HttpRequest;
import org.springframework.http.client.ClientHttpRequestExecution;
import org.springframework.http.client.ClientHttpRequestInterceptor;
import org.springframework.http.client.ClientHttpResponse;
import java.io.IOException;
import java.net.InetSocketAddress;
@Slf4j
@AllArgsConstructor
public class DynamicProxyInterceptor implements ClientHttpRequestInterceptor {
private final UserAgentProvider userAgentProvider;
private final ProxyProvider proxyProvider;
@Override
public ClientHttpResponse intercept(HttpRequest request, byte[] body, ClientHttpRequestExecution execution) throws IOException {
// Получаем случайный прокси
//InetSocketAddress proxyAddress = proxyProvider.getNextProxy();
//log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
// Устанавливаем прокси
//System.setProperty("http.proxyHost", proxyAddress.getHostName());
//System.setProperty("http.proxyPort", String.valueOf(proxyAddress.getPort()));
//Устанавливаем динамический user-agent
String randomUserAgent = userAgentProvider.getRandomUserAgent();
request.getHeaders().set("User-Agent", randomUserAgent);
return execution.execute(request, body);
}
}

View File

@ -0,0 +1,10 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.boot.autoconfigure.kafka.KafkaProperties;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Configuration;
@Configuration
@EnableConfigurationProperties(KafkaProperties.class)
public class KafkaConfig {
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.config;
import lombok.AllArgsConstructor;
import lombok.Getter;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
import ru.pricepulse.parsingservice.config.properties.WildberriesConfigProperties;
@Getter
@Configuration
@EnableConfigurationProperties({
OzonConfigProperties.class,
WildberriesConfigProperties.class
})
@AllArgsConstructor
public class MarketplacesConfig {
private final WildberriesConfigProperties wildberriesConfigProperties;
private final OzonConfigProperties ozonConfigProperties;
}

View File

@ -0,0 +1,57 @@
package ru.pricepulse.parsingservice.config;
import jakarta.annotation.PostConstruct;
import org.springframework.core.io.Resource;
import org.springframework.core.io.ResourceLoader;
import org.springframework.stereotype.Component;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.checkProxies;
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.readProxiesFromFile;
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.saveProxiesToFile;
@Component
public class ProxyProvider {
private List<String> workingProxies;
private final AtomicInteger currentProxyIndex = new AtomicInteger(0);
private final ResourceLoader resourceLoader;
public ProxyProvider(ResourceLoader resourceLoader) {
this.resourceLoader = resourceLoader;
}
//@PostConstruct
public void init() throws IOException {
Resource proxy = resourceLoader.getResource("classpath:proxy.txt");
Resource okProxy = resourceLoader.getResource("classpath:ok-proxy.txt");
List<String> proxies = Files.readAllLines(Path.of(proxy.getURI()));
System.out.println("Начата проверка проксей");
workingProxies = checkProxies(proxies);
System.out.println("Закончена проверка проксей");
saveProxiesToFile(workingProxies, Path.of(okProxy.getURI()));
if (workingProxies.isEmpty()) {
throw new RuntimeException("Нет доступных рабочих прокси.");
}
System.out.println("Найдено рабочих прокси: " + workingProxies.size());
}
public synchronized InetSocketAddress getNextProxy() {
// Получаем текущий индекс прокси
int currentIndex = currentProxyIndex.getAndUpdate(index -> (index + 1) % workingProxies.size());
String[] proxy = workingProxies.get(currentIndex).split(":");
return new InetSocketAddress(proxy[0], Integer.parseInt(proxy[1]));
}
}

View File

@ -0,0 +1,28 @@
package ru.pricepulse.parsingservice.config;
import lombok.AllArgsConstructor;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.http.client.ClientHttpRequestInterceptor;
import org.springframework.web.client.RestTemplate;
import java.util.Collections;
@Configuration
@AllArgsConstructor
public class RestTemplateConfig {
private final UserAgentProvider userAgentProvider;
private final ProxyProvider proxyProvider;
@Bean
public RestTemplate restTemplate() {
RestTemplate restTemplate = new RestTemplate();
ClientHttpRequestInterceptor dynamicProxyInterceptor = new DynamicProxyInterceptor(userAgentProvider, proxyProvider);
// Добавляем интерсептор в RestTemplate
restTemplate.setInterceptors(Collections.singletonList(dynamicProxyInterceptor));
return restTemplate;
}
}

View File

@ -0,0 +1,8 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.context.annotation.Configuration;
import org.springframework.retry.annotation.EnableRetry;
@Configuration
@EnableRetry
public class RetryConfig {}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableScheduling;
import org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler;
@Configuration
@EnableScheduling
public class SchedulerConfig {
@Bean
public ThreadPoolTaskScheduler taskScheduler() {
ThreadPoolTaskScheduler taskScheduler = new ThreadPoolTaskScheduler();
taskScheduler.setPoolSize(10);
taskScheduler.setThreadNamePrefix("ScheduledTask-");
return taskScheduler;
}
}

View File

@ -0,0 +1,10 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import ru.pricepulse.parsingservice.config.properties.SeleniumConfigProperties;
@Configuration
@EnableConfigurationProperties(SeleniumConfigProperties.class)
public class SeleniumConfig {
}

View File

@ -0,0 +1,19 @@
package ru.pricepulse.parsingservice.config;
import java.util.List;
import java.util.Random;
import org.springframework.stereotype.Component;
@Component
public class UserAgentProvider {
private static final List<String> userAgents = List.of(
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36",
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15A372 Safari/604.1"
);
public String getRandomUserAgent() {
return userAgents.get(new Random().nextInt(userAgents.size()));
}
}

View File

@ -0,0 +1,58 @@
package ru.pricepulse.parsingservice.config;
import java.net.InetSocketAddress;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.http.HttpHeaders;
import org.springframework.http.client.reactive.ReactorClientHttpConnector;
import org.springframework.web.reactive.function.client.WebClient;
import reactor.netty.http.client.HttpClient;
import reactor.netty.transport.ProxyProvider;
@Slf4j
@Configuration
@AllArgsConstructor
public class WebClientConfig {
private final UserAgentProvider userAgentProvider;
private final ru.pricepulse.parsingservice.config.ProxyProvider proxyProvider;
@Bean
public WebClient webClient() {
return WebClient.builder()
.filter((request, next) -> {
// Получаем случайный прокси для каждого запроса
InetSocketAddress proxyAddress = proxyProvider.getNextProxy();
log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
HttpClient httpClient = HttpClient.create()
.proxy(proxy -> proxy
.type(ProxyProvider.Proxy.HTTP)
.address(proxyAddress));
String randomUserAgent = userAgentProvider.getRandomUserAgent();
log.info("Используемый User-Agent: {}", randomUserAgent);
// Создаем новый WebClient с прокси
WebClient webClientWithProxy = WebClient.builder()
.clientConnector(new ReactorClientHttpConnector(httpClient))
.build();
// Выполняем запрос с обновленным User-Agent через WebClient с прокси
return webClientWithProxy
.method(request.method())
.uri(request.url())
.headers(headers -> headers.putAll(request.headers()))
.header(HttpHeaders.USER_AGENT, randomUserAgent)
.body(request.body()).exchange();
})
.codecs(configurer -> configurer
.defaultCodecs()
.maxInMemorySize(10 * 1024 * 1024))
.build();
}
}

View File

@ -0,0 +1,59 @@
package ru.pricepulse.parsingservice.config;
import java.util.HashMap;
import java.util.Map;
import io.github.bonigarcia.wdm.WebDriverManager;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.beans.factory.config.ConfigurableBeanFactory;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Profile;
import org.springframework.context.annotation.Scope;
@Configuration
public class WebDriverConfig {
@Bean
@Profile("visible")
@Scope(ConfigurableBeanFactory.SCOPE_PROTOTYPE)
public WebDriver webDriverVisible() {
Map<String, Object> prefs = new HashMap<>();
prefs.put("profile.managed_default_content_settings.images", 2);
prefs.put("profile.managed_default_content_settings.geolocation", 2);
var options = new ChromeOptions();
options.setExperimentalOption("prefs", prefs);
WebDriverManager.chromedriver().setup();
return new ChromeDriver(options);
}
@Bean
@Profile("headless")
@Scope(ConfigurableBeanFactory.SCOPE_PROTOTYPE)
public WebDriver webDriverHeadless(ChromeOptions options) {
WebDriverManager.chromedriver().setup();
return new ChromeDriver(options);
}
@Bean
@Profile("headless")
public ChromeOptions chromeOptions() {
Map<String, Object> prefs = new HashMap<>();
prefs.put("profile.managed_default_content_settings.images", 2);
prefs.put("profile.managed_default_content_settings.stylesheets", 2);
var options = new ChromeOptions();
options.setExperimentalOption("prefs", prefs);
options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36");
//options.addArguments("--window-size=1920,2000");
options.addArguments("--headless");
options.addArguments("--disable-gpu");
options.addArguments("--no-sandbox");
options.addArguments("--disable-dev-shm-usage");
return options;
}
}

View File

@ -0,0 +1,8 @@
package ru.pricepulse.parsingservice.config.properties;
import org.springframework.boot.context.properties.ConfigurationProperties;
@ConfigurationProperties(prefix = "application.kafka")
public class KafkaConfigProperties {
}

View File

@ -0,0 +1,16 @@
package ru.pricepulse.parsingservice.config.properties;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
@Getter
@Setter
@ConfigurationProperties(prefix = "marketplace.ozon")
public class OzonConfigProperties {
private Integer maxThreads;
private Integer maxNumOfPagesOnScreen;
}

View File

@ -0,0 +1,7 @@
package ru.pricepulse.parsingservice.config.properties;
import org.springframework.boot.context.properties.ConfigurationProperties;
@ConfigurationProperties("selenium")
public class SeleniumConfigProperties {
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.config.properties;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
@Getter
@Setter
@ConfigurationProperties(prefix = "marketplace.wildberries")
public class WildberriesConfigProperties {
private String baseUrl;
private String catalogUrl;
private String userAgent;
private String catalogWbUrl;
private int retryAttempts;
private long retryDelay;
private String laptopUrl;
private String shard;
}

View File

@ -0,0 +1,6 @@
package ru.pricepulse.parsingservice.enumeration;
public enum Category {
LAPTOP,
SMARTPHONE
}

View File

@ -0,0 +1,8 @@
package ru.pricepulse.parsingservice.enumeration;
public enum Marketplace {
WILDBERRIES,
OZON,
DNS
}

View File

@ -0,0 +1,31 @@
package ru.pricepulse.parsingservice.ozon_parser.enumeration;
import ru.pricepulse.parsingservice.enumeration.Category;
public enum OzonCategory {
LAPTOP ("/noutbuki-15692/?brandcertified=t", Category.LAPTOP),
SMARTPHONE ("/smartfony-15502/?brandcertified=t", Category.SMARTPHONE);
private static final String BASE_CATEGORY_URL = "https://www.ozon.ru/category";
private final String categoryUrl;
private final Category mappedCategory;
OzonCategory(String categoryUrl,
Category mappedCategory) {
this.categoryUrl = categoryUrl;
this.mappedCategory = mappedCategory;
}
public String getCategoryUrl() {
return BASE_CATEGORY_URL + categoryUrl;
}
public Category getMappedCategory() {
return mappedCategory;
}
}

View File

@ -0,0 +1,70 @@
package ru.pricepulse.parsingservice.ozon_parser.pool;
import java.util.NoSuchElementException;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
import jakarta.annotation.PreDestroy;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.WebDriver;
import org.springframework.beans.factory.ObjectFactory;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
@Slf4j
@Component
@Profile("ozon")
public class WebDriverPool {
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>();
private final Queue<WebDriver> busyDrivers = new ConcurrentLinkedQueue<>();
private final ObjectFactory<WebDriver> webDriverFactory;
private final OzonConfigProperties ozonConfigProperties;
public WebDriverPool(ObjectFactory<WebDriver> webDriverFactory,
OzonConfigProperties ozonConfigProperties) {
this.webDriverFactory = webDriverFactory;
this.ozonConfigProperties = ozonConfigProperties;
int poolSize = ozonConfigProperties.getMaxThreads();
for (int i = 0; i < poolSize; i++) {
availableDrivers.add(createNewDriver());
}
}
private WebDriver createNewDriver() {
return webDriverFactory.getObject();
}
public WebDriver borrowDriver() {
WebDriver driver = availableDrivers.poll();
if (driver != null) {
busyDrivers.add(driver);
return driver;
}
throw new NoSuchElementException("No available driver found");
}
public void returnDriver(WebDriver driver) {
busyDrivers.remove(driver);
availableDrivers.add(driver);
}
@PreDestroy
public void shutdownPool() {
for (WebDriver driver : availableDrivers) {
driver.quit();
}
for (WebDriver driver : busyDrivers) {
driver.quit();
}
availableDrivers.clear();
busyDrivers.clear();
}
}

View File

@ -0,0 +1,17 @@
package ru.pricepulse.parsingservice.ozon_parser.service;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
@Slf4j
@Service
@RequiredArgsConstructor
public class OzonService {
public OzonCategory[] getCategories() {
return OzonCategory.values();
}
}

View File

@ -0,0 +1,27 @@
package ru.pricepulse.parsingservice.ozon_parser.service;
import lombok.RequiredArgsConstructor;
import org.springframework.context.annotation.Profile;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Service;
@Service
@RequiredArgsConstructor
@Profile("postgres_stat")
public class PartitionService {
private final JdbcTemplate postgresDataSource;
public boolean checkPartitionExists(String partitionName) {
String query = "SELECT to_regclass('public." + partitionName + "')";
String result = postgresDataSource.queryForObject(query, String.class);
return result != null;
}
public void createPartition(String partitionName, String startDate, String endDate) {
String createPartitionSQL = "CREATE TABLE IF NOT EXISTS " + partitionName +
" PARTITION OF price_history FOR VALUES FROM ('" + startDate + "') TO ('" + endDate + "')";
postgresDataSource.execute(createPartitionSQL);
}
}

View File

@ -0,0 +1,30 @@
package ru.pricepulse.parsingservice.ozon_parser.service.dto;
import java.math.BigDecimal;
import lombok.Builder;
import lombok.Getter;
import lombok.Setter;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
@Getter
@Setter
@Builder
public class ParsedData {
private Marketplace marketplace;
private Category category;
private String brand;
private String productName;
private String url;
private String imageUrl;
private BigDecimal price;
}

View File

@ -0,0 +1,64 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.support.ui.WebDriverWait;
@Slf4j
public class AccessDeniedPage implements MarketplacePage {
private static final String RELOAD_BUTTON_ID = "reload-button";
private static final String RELOAD_BUTTON_XPATH = "//button[contains(text(),'Обновить')]";
private static final String WARNING_IMAGE_CSS = "img[alt='warning']";
private static final String ACCESS_DENIED_TEXT_XPATH = "//h1[text()='Доступ ограничен']";
private final By reloadButtonById = By.id(RELOAD_BUTTON_ID);
private final By reloadButtonByXpath = By.xpath(RELOAD_BUTTON_XPATH);
private final By warningImage = By.cssSelector(WARNING_IMAGE_CSS);
private final By accessDeniedText = By.xpath(ACCESS_DENIED_TEXT_XPATH);
private WebDriver driver;
private WebDriverWait wait;
public AccessDeniedPage(WebDriver driver,
WebDriverWait wait) {
this.driver = driver;
this.wait = wait;
}
public void clickReloadButton() {
try {
log.debug("Пытаемся найти кнопку по id и нажать");
driver.findElement(reloadButtonById).click();
return;
} catch (Exception e) {
log.debug("Кнопка обновления страницы не найдена по id");
}
try {
log.debug("Пытаемся найти кнопку по xpath и нажать");
driver.findElement(reloadButtonByXpath).click();
log.debug("Успешно нашли кнопку по xpath");
return;
} catch (Exception e) {
log.debug("Кнопка обновления страницы не найдена по xpath");
}
}
private boolean isWarningImage() {
return driver.findElement(warningImage) != null;
}
private boolean isAccessDeniedText() {
return driver.findElement(accessDeniedText) != null;
}
@Override
public boolean isLoaded() {
try {
return isWarningImage() && isAccessDeniedText();
} catch (Exception e) {
return false;
}
}
}

View File

@ -0,0 +1,90 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import java.math.BigDecimal;
import java.util.ArrayList;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.support.ui.WebDriverWait;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
@Slf4j
public class CategoryPage implements MarketplacePage {
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsV2']";
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
private WebDriver driver;
private WebDriverWait wait;
public CategoryPage(WebDriver driver, WebDriverWait wait) {
this.driver = driver;
this.wait = wait;
}
public ArrayList<ParsedData> getParsedProducts() {
wait.until(visibilityOfElementLocated(searchResults));
log.info("Нашли SearchResultsV2");
var searchResultsElement = driver.findElement(searchResults);
wait.until(driver -> visibilityOfElementLocated(By.cssSelector(":scope > div")));
log.info("Нашли внешний блок списка");
var outerDiv = searchResultsElement.findElement(By.cssSelector(":scope > div")); // Внешний блок со списком товаров
wait.until(driver -> visibilityOfAllElements(outerDiv.findElements(By.cssSelector(":scope > div"))));
log.info("Нашли элементы списка");
var innerDivs = outerDiv.findElements(By.cssSelector(":scope > div")); // Блок карточки товара
var products = new ArrayList<ParsedData>();
innerDivs.forEach(innerDiv -> {
var productDataDivs = innerDiv.findElements(By.cssSelector(":scope > div"));
var productImageUrl = productDataDivs.get(0)
.findElement(By.cssSelector(":scope > a > div"))
.findElements(By.cssSelector(":scope > div")).getFirst()
.findElement(By.tagName("img")).getAttribute("src");
var productBrand = productDataDivs.get(1).findElement(By.cssSelector(":scope > div"))
.findElements(By.cssSelector(":scope > div")).getFirst()
.findElement(By.tagName("b")).getText();
var productNameLink = productDataDivs.get(1).findElement(By.cssSelector(":scope > div > a"));
var productUrl = productNameLink.getAttribute("href");
var productName = productNameLink.findElement(By.tagName("span")).getText();
var productPrice = parseCurrency(productDataDivs.get(2).findElement(By.cssSelector(":scope > div > div"))
.findElements(By.tagName("span")).getFirst().getText());
/*var parsedData = new ParsedData();
parsedData.setUrl(productUrl);
parsedData.setBrand(productBrand);
parsedData.setProductName(productName);
parsedData.setImageUrl(productImageUrl);
parsedData.setPrice(productPrice);
products.add(parsedData);*/
});
return products;
}
private BigDecimal parseCurrency(String currencyStr) {
String cleanedString = currencyStr.replaceAll("[^\\d]", "");
return new BigDecimal(cleanedString);
}
@Override
public boolean isLoaded() {
try {
return driver.findElement(searchResults) != null;
} catch (Exception e) {
return false;
}
}
}

View File

@ -0,0 +1,7 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
public interface MarketplacePage {
boolean isLoaded();
}

View File

@ -0,0 +1,38 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.support.ui.WebDriverWait;
@Slf4j
public class NoContentPage {
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
private static final String NOT_FOUND_TEXT_XPATH = "\"//*[contains(text(), 'По вашим параметрам ничего не нашлось. Попробуйте сбросить фильтры. ')]\"";
private static final String SEARCH_RESULTS_ERROR = "div[data-widget='searchResultsError']";
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
private final By notFoundText = By.xpath(NOT_FOUND_TEXT_XPATH);
private final By searchResultsError = By.cssSelector(SEARCH_RESULTS_ERROR);
private WebDriver driver;
private WebDriverWait wait;
public NoContentPage(WebDriver driver, WebDriverWait wait) {
this.driver = driver;
this.wait = wait;
}
public boolean isLoaded() {
try {
return driver.findElement(searchResultsError) != null
|| driver.findElement(errorText) != null
|| driver.findElement(notFoundText) != null;
} catch (Exception e) {
return false;
}
}
}

View File

@ -0,0 +1,228 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
@Slf4j
public class OzonCategoryPage {
private static final String OZON_MAIN_LINK = "https://www.ozon.ru";
public static final String SEARCH_RESULTS_CSS_SELECTOR = "div[data-widget='searchResultsV2']";
public static final int INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT = 1;
public static final int INDEX_OF_PRODUCT_PRICE = 0;
public static final int INDEX_OF_PRODUCT_BRAND = 1;
public static final int INDEX_OF_PRODUCT_NAME = 2;
private final Document document;
public OzonCategoryPage(String pageHtml) {
this.document = Jsoup.parse(pageHtml);
}
public List<ParsedData> getProducts(Category category) {
List<ParsedData> products = new ArrayList<>();
Elements searchResultsDivs = getSearchResultsDivs();
if (searchResultsDivs.isEmpty()) {
return List.of();
}
log.info("нашли столько результатов на странице {}", searchResultsDivs.size());
for (Element searchResultsDiv : searchResultsDivs) {
Elements productsDivs = getProductsDivs(searchResultsDiv);
List<Elements> allProductDataDivs = getAllProductDataDivs(productsDivs);
List<ParsedData> parsedProductsData = extractParsedData(allProductDataDivs, category);
products.addAll(parsedProductsData);
}
/*try {
for (Element searchResultsDiv : searchResultsDivs) {
var productDivs = searchResultsDiv.select("> div > div");
for (Element productDiv : productDivs) {
Elements productDataDivs = productDivs.select("> div > *");
if (productDataDivs.select("> *").isEmpty()) {
continue;
}
productDataDivs.removeLast();
Element productUrlAndImageUrlA = productDataDivs.first();
Element productDataDiv = productDataDivs.last();
Elements productDataInnerDivs = productDataDiv.select("> *");
try {
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
.select("span").text().toLowerCase()
.contains("осталось")) {
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
}
} catch (Exception ignored) {}
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND).select("> span");
String productUrl = OZON_MAIN_LINK + productUrlAndImageUrlA.attr("href").replaceAll("\\?.*$", "");
String productImageUrl = productUrlAndImageUrlA.select("> div > div")
.first().getElementsByTag("img")
.first().attr("src");
BigDecimal productPrice;
try {
productPrice = parseOzonPriceToBigDecimal(
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
.first().text());
} catch (Exception e) {
log.error("не удалось распарсить цену");
continue;
}
String productBrand = productBrandBlockSpans.first().selectFirst("> span > b").text();
String productName = productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME).select("> div > span").text();
ParsedData parsedData = new ParsedData();
parsedData.setCategory(category);
parsedData.setMarketplace(Marketplace.OZON);
parsedData.setUrl(productUrl);
parsedData.setImageUrl(productImageUrl);
parsedData.setPrice(productPrice);
parsedData.setBrand(productBrand);
parsedData.setProductName(productName);
products.add(parsedData);
}
}
} catch (Exception e) {
log.error(e.getMessage(), e);
}*/
return products;
}
private Elements getSearchResultsDivs() {
try {
return document.select(SEARCH_RESULTS_CSS_SELECTOR);
} catch (Exception e) {
log.warn("Не удалось достать блоки searchResultsV2");
return new Elements();
}
}
private Elements getProductsDivs(Element searchResultsDiv) {
return searchResultsDiv.select("> div > div");
}
private List<Elements> getAllProductDataDivs(Elements productsDivs) {
List<Elements> allProductDataDivs = new ArrayList<>();
for (Element productDiv : productsDivs) {
Elements productDataDivs = productDiv.select("> div > *");
if (productDataDivs.select("> *").isEmpty()) {
continue;
}
removeAddInFavouriteDiv(productDataDivs);
allProductDataDivs.add(productDataDivs);
}
return allProductDataDivs;
}
private void removeAddInFavouriteDiv(Elements productDataDivs) {
productDataDivs.removeLast();
}
private List<ParsedData> extractParsedData(List<Elements> allProductDataDivs,
Category category) {
List<ParsedData> parsedData = new ArrayList<>();
for (Elements productDataDivs : allProductDataDivs) {
try {
ParsedData parsedDataItem = getParsedDataItem(productDataDivs, category);
parsedData.add(parsedDataItem);
} catch (Exception e) {
//log.error(e.getMessage(), e);
}
}
return parsedData;
}
private ParsedData getParsedDataItem(Elements productDataDivs,
Category category) {
removeExtraDivIfExists(productDataDivs);
return ParsedData.builder()
.category(category)
.marketplace(Marketplace.OZON)
.url(extractUrl(productDataDivs))
.imageUrl(extractImageUrl(productDataDivs))
.brand(extractBrand(productDataDivs))
.productName(extractProductName(productDataDivs))
.price(extractPrice(productDataDivs))
.build();
}
private void removeExtraDivIfExists(Elements productDataDivs) {
Element productDataDiv = productDataDivs.last();
Elements productDataInnerDivs = productDataDiv.select("> *");
try {
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
.select("span").text().toLowerCase()
.contains("осталось")) {
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
}
} catch (Exception ignored) {}
}
private String extractUrl(Elements productDataDivs) {
Element productUrlA = productDataDivs.first();
return OZON_MAIN_LINK + productUrlA
.attr("href").replaceAll("\\?.*$", "");
}
private String extractImageUrl(Elements productDataDivs) {
Element productImageUrlA = productDataDivs.first();
return productImageUrlA.select("> div > div")
.first().getElementsByTag("img")
.first().attr("src");
}
private String extractBrand(Elements productDataDivs) {
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
//log.info(productDataInnerDivs.html());
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND)
.select("> span");
String brand = productBrandBlockSpans.first().selectFirst("> span > b").text();
if (productBrandBlockSpans.size() == 1 && "Оригинал".equals(brand)) {
return "БРЕНД_НЕ_УКАЗАН";
}
return brand;
}
private String extractProductName(Elements productDataDivs) {
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
return productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME)
.select("> div > span").text();
}
private BigDecimal extractPrice(Elements productDataDivs) {
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
return parseOzonPriceToBigDecimal(
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
.first().text());
}
private Elements getProductMainDataInnerDivs(Elements productDataDivs) {
return productDataDivs.last().select("> *");
}
private BigDecimal parseOzonPriceToBigDecimal(String ozonPrice) {
String cleanedString = ozonPrice.replaceAll("[^\\d]", "");
return new BigDecimal(cleanedString);
}
}

View File

@ -0,0 +1,110 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.time.Duration;
import java.time.temporal.ChronoUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.springframework.context.annotation.Profile;
import org.springframework.retry.annotation.Recover;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool;
import ru.pricepulse.parsingservice.ozon_parser.service.page.AccessDeniedPage;
import ru.pricepulse.parsingservice.ozon_parser.service.page.CategoryPage;
import ru.pricepulse.parsingservice.ozon_parser.service.page.NoContentPage;
@Slf4j
@Service
@Profile("ozon")
public class OzonHtmlFetcher {
private final WebDriverPool webDriverPool;
private final PageScroller pageScroller;
public OzonHtmlFetcher(WebDriverPool webDriverPool,
PageScroller pageScroller) {
this.webDriverPool = webDriverPool;
this.pageScroller = pageScroller;
}
@Retryable(maxAttempts = 10, recover = "recover")
public String fetchPageHtml(String pageUrl,
AtomicBoolean lastPageInCategory) {
var driver = webDriverPool.borrowDriver();
try {
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
driver.get(pageUrl);
WebDriverWait wait = new WebDriverWait(driver, Duration.of(10, ChronoUnit.SECONDS));
var accessDeniedPage = new AccessDeniedPage(driver, wait);
var categoryPage = new CategoryPage(driver, wait);
var noContentPage = new NoContentPage(driver, wait);
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage, noContentPage, lastPageInCategory));
checkAceesDeniedAndResolve(accessDeniedPage);
pageScroller.scrollToEndOfPage(driver);
return driver.getPageSource();
} catch (Exception e) {
log.error(e.getMessage(), e);
throw new RuntimeException(e);
} finally {
webDriverPool.returnDriver(driver);
}
}
private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
CategoryPage categoryPage,
NoContentPage noContentPage,
AtomicBoolean stopFlag) {
log.debug("Проверка что страница 'Доступ ограничен'");
if (checkAccessDeniedPage(accessDeniedPage)) {
return true;
}
log.debug("Проверка что страница 'Страница категории'");
if (checkCategoryPage(categoryPage)) {
return true;
}
if (checkNoContentPage(noContentPage)) {
stopFlag.set(true);
return true;
}
log.debug("Проверка загрузки страницы неудачна");
return false;
}
private boolean checkCategoryPage(CategoryPage categoryPage) {
return categoryPage.isLoaded();
}
private void checkAceesDeniedAndResolve(AccessDeniedPage accessDeniedPage) {
if (checkAccessDeniedPage(accessDeniedPage)) {
log.info("Доступ ограничен, пробуем решить проблему");
resolveAccessDeniedPage(accessDeniedPage);
log.info("Проблема успешно решена");
}
}
private boolean checkNoContentPage(NoContentPage noContentPage) {
if (noContentPage.isLoaded()) {
log.info("Страница не найдена");
return true;
}
return false;
}
private boolean checkAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
return accessDeniedPage.isLoaded();
}
private void resolveAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
accessDeniedPage.clickReloadButton();
}
@Recover
private void recover(Exception e) {
log.error("Все ретраи провалились");
}
}

View File

@ -0,0 +1,19 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.util.List;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.ozon_parser.service.page.OzonCategoryPage;
@Service
public class OzonPageParser {
public List<ParsedData> parseProductsFromCategoryPage(String pageSource,
Category category) {
OzonCategoryPage categoryPage = new OzonCategoryPage(pageSource);
return categoryPage.getProducts(category);
}
}

View File

@ -0,0 +1,114 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Semaphore;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
import org.slf4j.MDC;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.service.ProductService;
@Slf4j
@Service
@Profile("ozon")
public class OzonParsingService {
private final Map<String, Set<String>> urlCache;
private final ExecutorService pageExecutorService;
private final Semaphore semaphore;
private final OzonHtmlFetcher categoryPageParsingService;
private final OzonConfigProperties ozonConfigProperties;
private final OzonPageParser ozonPageParser;
private final ProductService productService;
public OzonParsingService(OzonHtmlFetcher categoryPageParsingService,
OzonConfigProperties ozonConfigProperties, OzonPageParser ozonPageParser,
ProductService productService) {
this.pageExecutorService = Executors.newFixedThreadPool(ozonConfigProperties.getMaxThreads());
this.semaphore = new Semaphore(ozonConfigProperties.getMaxThreads());
this.urlCache = new ConcurrentHashMap<>();
for (OzonCategory category : OzonCategory.values()) {
urlCache.put(category.getCategoryUrl(), ConcurrentHashMap.newKeySet());
}
this.categoryPageParsingService = categoryPageParsingService;
this.ozonConfigProperties = ozonConfigProperties;
this.ozonPageParser = ozonPageParser;
this.productService = productService;
}
public void startProcessing() {
for (OzonCategory category : OzonCategory.values()) {
log.info("НАЧАЛО ОБРАБОТКИ КАТЕГОРИИ {}", category);
processCategory(category);
}
}
private void processCategory(OzonCategory category) {
int pageIndex = 1;
AtomicBoolean lastPageInCategory = new AtomicBoolean(false);
while (!lastPageInCategory.get()) {
try {
semaphore.acquire();
int finalPageIndex = pageIndex;
String pageUrl = category.getCategoryUrl() + "&page=" + finalPageIndex;
pageExecutorService.submit(() -> processCategoryPage(pageUrl, category, lastPageInCategory));
pageIndex += ozonConfigProperties.getMaxNumOfPagesOnScreen();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
}
}
if (lastPageInCategory.get()) {
log.info("Достигли последней страницы категории");
}
}
private void processCategoryPage(String pageUrl,
OzonCategory category,
AtomicBoolean lastPageInCategory) {
try {
MDC.put("pageUrl", pageUrl);
String pageSource = categoryPageParsingService.fetchPageHtml(pageUrl, lastPageInCategory);
List<ParsedData> parsedProducts =
ozonPageParser.parseProductsFromCategoryPage(pageSource, category.getMappedCategory());
log.info("""
КОНЕЦ ПАРСИНГА СТРАНИЦЫ КАТЕГОРИИ
КОЛИЧЕСТВО НАЙДЕННЫХ ТОВАРОВ НА СТРАНИЦЕ {},
""", parsedProducts.size());
if (urlCache.size() > 1000000) {
urlCache.clear();
}
Set<String> categoryCachecUrl = urlCache.get(category.getCategoryUrl());
List<ParsedData> uniqueData = parsedProducts.stream()
.filter(data -> categoryCachecUrl.add(data.getUrl()))
.toList();
productService.saveBatch(uniqueData);
} finally {
MDC.clear();
semaphore.release();
}
}
}

View File

@ -0,0 +1,56 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.util.concurrent.atomic.AtomicLong;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.WebDriver;
import org.springframework.stereotype.Service;
@Slf4j
@Service
@RequiredArgsConstructor
public class PageScroller {
private static final String ALL_CONTENT_PAGE_HEIGHT = "return document.body.scrollHeight";
private static final String SCROLL_TO_PAGE_HEIGHT = "window.scrollTo(0, document.body.scrollHeight);";
public void scrollToEndOfPage(WebDriver driver) throws InterruptedException {
JavascriptExecutor js = (JavascriptExecutor) driver;
AtomicLong lastHeight = new AtomicLong((long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT));
int attemptsLimit = 100;
log.info("Начинаем пролистывать страницу до конца");
while (true) {
js.executeScript(SCROLL_TO_PAGE_HEIGHT);
long newHeight = (long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT);
try {
var nextPageButtons = driver.findElements(By.cssSelector("div[data-widget='megaPaginator'] > div")).get(1)
.findElement(By.cssSelector(":scope > div > div > div"))
.findElements(By.tagName("a"));
if (nextPageButtons != null && newHeight > lastHeight.get()) {
log.info("ЗАКОНЧИЛИ СКРОЛЛИТЬ");
break;
}
} catch (Exception ignored) {}
if (newHeight > lastHeight.get()) {
attemptsLimit = 100;
lastHeight.set(newHeight);
} else {
attemptsLimit--;
Thread.sleep(1000);
if (attemptsLimit == 0) {
break;
}
}
}
}
}

View File

@ -0,0 +1,21 @@
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
import lombok.RequiredArgsConstructor;
import org.springframework.context.annotation.Profile;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingService;
@Service
@RequiredArgsConstructor
@Profile("ozon")
public class OzonProductUpdater {
private final OzonParsingService ozonParsingService;
@Scheduled(fixedRate = 7200000)
public void updateOzonProducts() {
ozonParsingService.startProcessing();
}
}

View File

@ -0,0 +1,4 @@
package ru.pricepulse.parsingservice.ozon_parser.service.task;
public class OzonParsingTask {
}

View File

@ -0,0 +1,64 @@
package ru.pricepulse.parsingservice.persistence.entity;
import java.math.BigDecimal;
import java.time.ZonedDateTime;
import java.util.Objects;
import jakarta.persistence.Column;
import jakarta.persistence.EmbeddedId;
import jakarta.persistence.Entity;
import jakarta.persistence.PrePersist;
import jakarta.persistence.Table;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.proxy.HibernateProxy;
@Getter
@Setter
@Entity
@Table(name = "price_history")
@Builder
@AllArgsConstructor
@NoArgsConstructor
public class PriceHistoryEntity {
@EmbeddedId
private PriceHistoryId id;
@Column(name = "price", nullable = false, precision = 10, scale = 2)
private BigDecimal price;
@Override
public final boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null) {
return false;
}
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
Class<?> thisEffectiveClass =
this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
if (thisEffectiveClass != oEffectiveClass) {
return false;
}
PriceHistoryEntity that = (PriceHistoryEntity) o;
return getId() != null && Objects.equals(getId(), that.getId());
}
@Override
public final int hashCode() {
return Objects.hash(id);
}
@PrePersist
protected void onCreate() {
if (id.getDate() == null) {
id.setDate(ZonedDateTime.now());
}
}
}

View File

@ -0,0 +1,51 @@
package ru.pricepulse.parsingservice.persistence.entity;
import java.io.Serializable;
import java.time.ZonedDateTime;
import java.util.Objects;
import jakarta.persistence.Column;
import jakarta.persistence.Embeddable;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.proxy.HibernateProxy;
@Getter
@Setter
@AllArgsConstructor
@NoArgsConstructor
@Embeddable
public class PriceHistoryId implements Serializable {
@Column(name = "product_url", nullable = false, unique = true)
private String productUrl;
@Column(name = "date", nullable = false)
private ZonedDateTime date;
@Override
public final boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null) {
return false;
}
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
Class<?> thisEffectiveClass =
this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
if (thisEffectiveClass != oEffectiveClass) {
return false;
}
PriceHistoryId that = (PriceHistoryId) o;
return getDate() != null && Objects.equals(getDate(), that.getDate());
}
@Override
public final int hashCode() {
return Objects.hash(date);
}
}

View File

@ -0,0 +1,81 @@
package ru.pricepulse.parsingservice.persistence.entity;
import java.time.LocalDateTime;
import java.util.Objects;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.EnumType;
import jakarta.persistence.Enumerated;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id;
import jakarta.persistence.PrePersist;
import jakarta.persistence.Table;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.proxy.HibernateProxy;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
@Getter
@Setter
@Entity
@Table(name = "product")
@AllArgsConstructor
@NoArgsConstructor
@Builder
public class ProductEntity {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "id", nullable = false)
private Long id;
@Column(name = "marketplace", nullable = false, length = Integer.MAX_VALUE)
@Enumerated(EnumType.STRING)
private Marketplace marketplace;
@Column(name = "category", nullable = false, length = Integer.MAX_VALUE)
@Enumerated(EnumType.STRING)
private Category category;
@Column(name = "brand", nullable = false, length = Integer.MAX_VALUE)
private String brand;
@Column(name = "product_name", nullable = false, length = Integer.MAX_VALUE)
private String productName;
@Column(name = "created_at", nullable = false)
private LocalDateTime createdAt;
@Column(name = "url", nullable = false, unique = true)
private String url;
@Column(name = "image-url", nullable = false)
private String imageUrl;
@Override
public final boolean equals(Object o) {
if (this == o) return true;
if (o == null) return false;
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
Class<?> thisEffectiveClass = this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
if (thisEffectiveClass != oEffectiveClass) return false;
ProductEntity that = (ProductEntity) o;
return getId() != null && Objects.equals(getId(), that.getId());
}
@Override
public final int hashCode() {
return this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass().hashCode() : getClass().hashCode();
}
@PrePersist
protected void onCreate() {
createdAt = LocalDateTime.now();
}
}

View File

@ -0,0 +1,16 @@
package ru.pricepulse.parsingservice.persistence.repository;
import java.time.ZonedDateTime;
import java.util.List;
import org.springframework.data.jpa.repository.JpaRepository;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, PriceHistoryId> {
List<PriceHistoryEntity> findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(String productUrl,
ZonedDateTime from,
ZonedDateTime to);
}

View File

@ -0,0 +1,28 @@
package ru.pricepulse.parsingservice.persistence.repository;
import java.util.List;
import java.util.Optional;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.stereotype.Repository;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
@Repository
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
List<ProductEntity> findAllByUrlIn(List<String> urls);
@Query("""
select p.url from ProductEntity p where p.url in :urls
""")
List<String> findSavedUrl(List<String> urls);
Optional<ProductEntity> findByUrl(String url);
Page<ProductEntity> findAllByMarketplaceAndCategory(Marketplace marketplace, Category category, Pageable pageable);
}

View File

@ -0,0 +1,105 @@
package ru.pricepulse.parsingservice.service;
import java.time.ZonedDateTime;
import java.util.List;
import jakarta.persistence.EntityNotFoundException;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.domain.Pageable;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
import ru.pricepulse.parsingservice.service.dto.ProductDto;
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
import ru.pricepulse.parsingservice.service.mapper.PriceHistoryMapper;
import ru.pricepulse.parsingservice.service.mapper.ProductMapper;
@Slf4j
@Service
@RequiredArgsConstructor
public class ProductService {
private final ProductRepository productRepository;
private final ProductPriceRepository productPriceRepository;
private final ProductMapper productMapper;
private final PriceHistoryMapper priceHistoryMapper;
@Transactional
@Retryable
public void saveBatch(List<ParsedData> parsedData) {
List<String> productsUrls = parsedData.stream().map(ParsedData::getUrl).toList();
List<String> alreadySavedUrls = productRepository.findSavedUrl(productsUrls);
List<ProductEntity> products = parsedData.stream()
.filter(data -> !alreadySavedUrls.contains(data.getUrl()))
.map(this::getProduct)
.toList();
List<PriceHistoryEntity> prices = parsedData.stream().map(this::getPriceHistory).toList();
productRepository.saveAll(products);
log.info("Сохранили пачку товаров {}", products.size());
productPriceRepository.saveAll(prices);
log.info("Сохранили историю цен {}", prices.size());
}
@Transactional(readOnly = true)
public ProductDto findByUrl(String productUrl) {
var product = productRepository.findByUrl(productUrl).orElseThrow(EntityNotFoundException::new);
return productMapper.toProductDto(product);
}
@Transactional(readOnly = true)
public PriceHistoryDto findPriceHistoryByRange(String productUrl,
ZonedDateTime from,
ZonedDateTime to) {
var priceHistory = productPriceRepository
.findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(productUrl, from, to);
return priceHistoryMapper.toPriceHistoryDto(priceHistory);
}
@Transactional(readOnly = true)
public ProductsPageDto findAllProductsByPage(Marketplace marketplace,
Category category,
Pageable pageable) {
var page = productRepository.findAllByMarketplaceAndCategory(marketplace, category, pageable);
return new ProductsPageDto(
page.getNumberOfElements(),
page.getTotalPages(),
page.getNumber(),
page.getContent().stream().map(productMapper::toProductDto).toList()
);
}
private PriceHistoryEntity getPriceHistory(ParsedData product) {
var priceHistoryId = new PriceHistoryId();
priceHistoryId.setProductUrl(product.getUrl());
priceHistoryId.setDate(ZonedDateTime.now());
var priceHistory = new PriceHistoryEntity();
priceHistory.setId(priceHistoryId);
priceHistory.setPrice(product.getPrice());
return priceHistory;
}
private ProductEntity getProduct(ParsedData product) {
var productEntity = new ProductEntity();
productEntity.setCategory(product.getCategory());
productEntity.setBrand(product.getBrand());
productEntity.setProductName(product.getProductName());
productEntity.setUrl(product.getUrl());
productEntity.setMarketplace(product.getMarketplace());
productEntity.setImageUrl(product.getImageUrl());
return productEntity;
}
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.service.dto;
import java.math.BigDecimal;
import java.time.ZonedDateTime;
import java.util.HashMap;
import java.util.Map;
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
public class PriceHistoryDto {
private final Map<ZonedDateTime, BigDecimal> priceHistory;
public PriceHistoryDto() {
this.priceHistory = new HashMap<>();
}
}

View File

@ -0,0 +1,28 @@
package ru.pricepulse.parsingservice.service.dto;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.Setter;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
@Getter
@Setter
@RequiredArgsConstructor
public class ProductDto {
private final Long id;
private final Marketplace marketplace;
private final Category category;
private final String brand;
private final String productName;
private final String url;
private final String imageUrl;
}

View File

@ -0,0 +1,22 @@
package ru.pricepulse.parsingservice.service.dto;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
@AllArgsConstructor
public class ProductsPageDto {
private final int totalItems;
private final int totalPages;
private final int currentPage;
private final List<ProductDto> products;
}

View File

@ -0,0 +1,19 @@
package ru.pricepulse.parsingservice.service.mapper;
import java.util.List;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
@Component
public class PriceHistoryMapper {
public PriceHistoryDto toPriceHistoryDto (List<PriceHistoryEntity> priceHistory) {
var priceHistoryDto = new PriceHistoryDto();
priceHistory.forEach(item ->
priceHistoryDto.getPriceHistory().put(item.getId().getDate().withNano(0), item.getPrice()));
return priceHistoryDto;
}
}

View File

@ -0,0 +1,22 @@
package ru.pricepulse.parsingservice.service.mapper;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.service.dto.ProductDto;
@Component
public class ProductMapper {
public ProductDto toProductDto(ProductEntity product) {
return new ProductDto(
product.getId(),
product.getMarketplace(),
product.getCategory(),
product.getBrand(),
product.getProductName(),
product.getUrl(),
product.getImageUrl()
);
}
}

View File

@ -0,0 +1,58 @@
package ru.pricepulse.parsingservice.service.scheduler;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import jakarta.annotation.PostConstruct;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Profile;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService;
@Slf4j
@Service
@RequiredArgsConstructor
@Profile("postgres_stat")
public class PartitionScheduler {
private final PartitionService partitionService;
private final DateTimeFormatter partitionDateTimeFormatter;
@PostConstruct
public void init() {
checkAndCreateMonthlyPartitions();
}
@Scheduled(cron = "@monthly")
public void checkAndCreatePartitionsMonthly() {
checkAndCreateMonthlyPartitions();
}
public void checkAndCreateMonthlyPartitions() {
LocalDate currentMonth = LocalDate.now().withDayOfMonth(1);
LocalDate nextMonth = currentMonth.plusMonths(1);
String currentMonthPartition = getPartitionName(currentMonth);
String nextMonthPartition = getPartitionName(nextMonth);
checkAndCreatePartition(currentMonthPartition, currentMonth);
checkAndCreatePartition(nextMonthPartition, nextMonth);
}
private String getPartitionName(LocalDate date) {
return "price_history_" + partitionDateTimeFormatter.format(date);
}
private void checkAndCreatePartition(String partitionName, LocalDate startDate) {
if (!partitionService.checkPartitionExists(partitionName)) {
LocalDate endDate = startDate.plusMonths(1);
partitionService.createPartition(partitionName, startDate.toString(), endDate.toString());
log.info("Партиция {} создана для диапазона: {} - {} ", partitionName, startDate, endDate);
} else {
log.info("Партиция {} уже существует.", partitionName);
}
}
}

View File

@ -0,0 +1,60 @@
package ru.pricepulse.parsingservice.web.handler;
import java.net.URI;
import jakarta.persistence.EntityNotFoundException;
import jakarta.servlet.http.HttpServletRequest;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ControllerAdvice;
import org.springframework.web.bind.annotation.ExceptionHandler;
import org.springframework.web.bind.annotation.ResponseStatus;
@ControllerAdvice
public class CommonExceptionHandler {
@ExceptionHandler(IllegalArgumentException.class)
@ResponseStatus(HttpStatus.BAD_REQUEST)
public ResponseEntity<ErrorResponse> exceptionHandler(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.BAD_REQUEST, request, ex);
}
@ExceptionHandler(EntityNotFoundException.class)
@ResponseStatus(HttpStatus.NOT_FOUND)
public ResponseEntity<ErrorResponse> handleNotFoundException(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.NOT_FOUND, request, ex);
}
@ExceptionHandler(Exception.class)
@ResponseStatus(HttpStatus.INTERNAL_SERVER_ERROR)
public ResponseEntity<ErrorResponse> handleInternalServerErrorException(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.INTERNAL_SERVER_ERROR, request, ex);
}
/*@ExceptionHandler(AccessDeniedException.class)
@ResponseStatus(HttpStatus.FORBIDDEN)
public ResponseEntity<ErrorResponse> handleForbiddenException(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.FORBIDDEN, request, ex);
}
@ExceptionHandler(AuthenticationException.class)
@ResponseStatus(HttpStatus.UNAUTHORIZED)
public ResponseEntity<ErrorResponse> handleUnauthorizedException(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.UNAUTHORIZED, request, ex);
}*/
private ResponseEntity<ErrorResponse> handleException(HttpStatus status, HttpServletRequest request, Exception ex) {
var errorResponse = new ErrorResponse(
status.value(),
status,
URI.create(request.getRequestURI()),
ex.getMessage()
);
return ResponseEntity.status(status).body(errorResponse);
}
}

View File

@ -0,0 +1,13 @@
package ru.pricepulse.parsingservice.web.handler;
import java.net.URI;
import org.springframework.http.HttpStatus;
public record ErrorResponse (
Integer statusCode,
HttpStatus status,
URI requestURI,
String message
){
}

View File

@ -0,0 +1,27 @@
package ru.pricepulse.parsingservice.web.rest;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.ozon_parser.service.OzonService;
@RestController
@RequestMapping("/api/v1/categories")
@RequiredArgsConstructor
public class CategoryApi {
private final OzonService ozonService;
@GetMapping
public ResponseEntity<?> getCategories(Marketplace marketplace) {
if (Marketplace.OZON.equals(marketplace)) {
return ResponseEntity.ok(ozonService.getCategories());
}
return ResponseEntity.ok(Category.values());
}
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.web.rest;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
@RestController
@RequestMapping("/api/v1/marketplaces")
@RequiredArgsConstructor
public class MarketplaceApi {
@GetMapping
public ResponseEntity<Marketplace[]> getMarketplace() {
return ResponseEntity.ok(Marketplace.values());
}
}

View File

@ -0,0 +1,52 @@
package ru.pricepulse.parsingservice.web.rest;
import java.time.LocalDate;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import lombok.RequiredArgsConstructor;
import org.springframework.data.domain.Pageable;
import org.springframework.format.annotation.DateTimeFormat;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.service.ProductService;
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
import ru.pricepulse.parsingservice.service.dto.ProductDto;
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
@RestController
@RequestMapping("/api/v1/products")
@RequiredArgsConstructor
public class ProductApi {
private final ProductService productService;
@GetMapping("/info")
public ResponseEntity<ProductDto> getProductInfo(@RequestParam String productUrl) {
return ResponseEntity.ok(productService.findByUrl(productUrl));
}
@GetMapping("/price-history")
public ResponseEntity<PriceHistoryDto> getProductPriceHistoryByRange(@RequestParam String productUrl,
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate from,
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate to,
String zoneOffset) {
ZoneOffset zone = ZoneOffset.of(zoneOffset);
ZonedDateTime fromDateTime = from.atStartOfDay(zone);
ZonedDateTime toDateTime = to.atStartOfDay(zone);
return ResponseEntity.ok(productService.findPriceHistoryByRange(productUrl, fromDateTime, toDateTime));
}
@GetMapping
public ResponseEntity<ProductsPageDto> getAllProductsByCategoryAndPage(Marketplace marketplace,
Category category,
Pageable pageable) {
return ResponseEntity.ok(productService.findAllProductsByPage(marketplace, category, pageable));
}
}

View File

@ -0,0 +1,26 @@
package ru.pricepulse.parsingservice.wildberries_parser.converter;
import java.time.LocalDateTime;
import org.springframework.core.convert.converter.Converter;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
@Component
public class ProductInfoDto2ProductEntity implements Converter<ProductInfoDto, ProductEntity> {
@Override
public ProductEntity convert(ProductInfoDto source) {
return ProductEntity.builder()
.marketplace(Marketplace.WILDBERRIES)
.category(Category.LAPTOP)
.brand(source.getBrand())
.productName(source.getName())
.createdAt(LocalDateTime.now())
.imageUrl("")
.build();
}
}

View File

@ -0,0 +1,115 @@
package ru.pricepulse.parsingservice.wildberries_parser.proxy;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.InetSocketAddress;
import java.net.Proxy;
import java.net.URL;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.stream.Collectors;
public class ProxyChecker {
private static final int TIMEOUT = 2000;
private static final int THREAD_COUNT = 30;
public static List<String> readProxiesFromFile(String filePath) {
try {
return Files.readAllLines(Paths.get(filePath));
} catch (IOException e) {
System.err.println("Ошибка при чтении файла: " + e.getMessage());
return new ArrayList<>();
}
}
public static List<String> checkProxies(List<String> proxies) {
ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT);
List<Future<String>> futures = new ArrayList<>();
// Отправляем задачи проверки прокси в пул потоков
for (String proxyAddress : proxies) {
futures.add(executor.submit(() -> isProxyWorking(proxyAddress) ? proxyAddress : null));
}
// Получаем результаты выполнения
List<String> workingProxies = futures.stream()
.map(future -> {
try {
return future.get();
} catch (Exception e) {
System.err.println("Ошибка при получении результата проверки прокси: " + e.getMessage());
return null;
}
})
.filter(proxy -> proxy != null)
.collect(Collectors.toList());
executor.shutdown(); // Завершаем работу пула потоков
return workingProxies;
}
private static boolean isProxyWorking(String proxyAddress) {
String[] parts = proxyAddress.split(":");
if (parts.length != 2) {
System.err.println("Некорректный формат прокси: " + proxyAddress);
return false;
}
String ip = parts[0];
int port;
try {
port = Integer.parseInt(parts[1]);
} catch (NumberFormatException e) {
System.err.println("Некорректный порт у прокси: " + proxyAddress);
return false;
}
try {
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ip, port));
URL url = new URL("http://www.google.com");
HttpURLConnection connection = (HttpURLConnection) url.openConnection(proxy);
connection.setConnectTimeout(TIMEOUT);
connection.setReadTimeout(TIMEOUT);
connection.setRequestMethod("GET");
connection.connect();
int responseCode = connection.getResponseCode();
if (
responseCode == 200
// responseCode == 403 ||
// responseCode == 500 ||
// responseCode == 407 ||
// responseCode == 501
) {
System.out.println("Прокси работает (код ответа " + responseCode + "): " + proxyAddress);
return true;
} else {
System.out.println("Прокси не отвечает (код ответа " + responseCode + "): " + proxyAddress);
return false;
}
} catch (IOException e) {
System.out.println("Прокси не отвечает: " + proxyAddress);
return false;
}
}
public static void saveProxiesToFile(List<String> proxies, Path filePath) {
try (BufferedWriter writer = Files.newBufferedWriter(filePath)) {
for (String proxy : proxies) {
writer.write(proxy);
writer.newLine();
}
} catch (IOException e) {
System.err.println("Ошибка при записи в файл: " + e.getMessage());
}
}
}

View File

@ -0,0 +1,24 @@
package ru.pricepulse.parsingservice.wildberries_parser.scheduler;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.wildberries_parser.service.ParsingService;
@Slf4j
@Service
@RequiredArgsConstructor
@ConditionalOnProperty(prefix = "marketplace.wildberries", name = "status", havingValue = "true")
public class WildberriesProductUpdater {
private final ParsingService parsingService;
@Scheduled(fixedRate = 3600000)
public void updateWildberriesProducts() {
log.info("Начинаем отладку...");
parsingService.parse();
log.info("Заканчиваем отладку...");
}
}

View File

@ -0,0 +1,79 @@
package ru.pricepulse.parsingservice.wildberries_parser.service;
import java.math.BigDecimal;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.AllArgsConstructor;
import org.springframework.core.convert.ConversionService;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.config.MarketplacesConfig;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
@Service("wildberriesParsingService")
@AllArgsConstructor
public class ParsingService {
private final Client client;
private final ObjectMapper objectMapper;
private final ConversionService conversionService;
private final MarketplacesConfig marketplacesConfig;
private final ProductService productService;
public void parse() {
final int elementsInPage = 100;
int page = 1;
Integer totalPages = null;
do {
var pageData = client.scrapPage(page, marketplacesConfig.getWildberriesConfigProperties().getShard(), marketplacesConfig.getWildberriesConfigProperties().getLaptopUrl());
System.out.println("Получена страница: " + page);
if (totalPages == null) {
Map<String, Object> dataMap = (Map<String, Object>) pageData.get("data");
int totalElements = (int) dataMap.get("total");
totalPages = (int) Math.ceil((double) totalElements / elementsInPage);
}
List<ProductEntity> productEntities = new ArrayList<>();
List<PriceHistoryEntity> priceHistories = new ArrayList<>();
List<ProductInfoDto> productInfoDtoList = convertMapObjectToListProductInfoDto(pageData);
productInfoDtoList.forEach(dto -> {
ProductEntity productEntity = conversionService.convert(dto, ProductEntity.class);
productEntity.setUrl("https://www.wildberries.ru/catalog/" + dto.getId() + "/detail.aspx?targetUrl=BP");
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
.id(new PriceHistoryId(productEntity.getUrl(), ZonedDateTime.now()))
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
.build();
productEntities.add(productEntity);
priceHistories.add(priceHistory);
});
productService.saveData(productEntities, priceHistories);
page++;
} while (page <= totalPages);
}
private List<ProductInfoDto> convertMapObjectToListProductInfoDto(Map<String, Object> map) {
Map<String, ArrayList<Object>> dataMap = (Map<String, ArrayList<Object>>) map.get("data");
return getProductInfoDtos(dataMap);
}
private List<ProductInfoDto> getProductInfoDtos(Map<String, ArrayList<Object>> dataMap) {
return objectMapper.convertValue(
dataMap.get("products"),
new TypeReference<>() {
}
);
}
}

View File

@ -0,0 +1,57 @@
package ru.pricepulse.parsingservice.wildberries_parser.service;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.AllArgsConstructor;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
@Service("wildberriesProductService")
@AllArgsConstructor
public class ProductService {
private final ProductRepository productRepository;
private final ProductPriceRepository productPriceRepository;
@Transactional
public void saveData(List<ProductEntity> productEntities, List<PriceHistoryEntity> priceHistoryEntities) {
// Получаем URL продуктов
List<String> urls = productEntities.stream()
.map(ProductEntity::getUrl)
.collect(Collectors.toList());
// Находим уже существующие URL в базе данных
List<String> existingUrls = productRepository.findAllByUrlIn(urls).stream()
.map(ProductEntity::getUrl)
.toList();
// Фильтруем уникальные продукты, которых еще нет в базе
List<ProductEntity> uniqueProducts = productEntities.stream()
.filter(product -> !existingUrls.contains(product.getUrl()))
.collect(Collectors.toList());
// Сохраняем только новые продукты
productRepository.saveAll(uniqueProducts);
// Создаем мапу для быстрого доступа к продуктам по URL
Map<String, ProductEntity> productMap = productRepository.findAllByUrlIn(urls).stream()
.collect(Collectors.toMap(ProductEntity::getUrl, product -> product));
// Фильтруем и обновляем идентификаторы для истории цен
List<PriceHistoryEntity> updatedPriceHistories = priceHistoryEntities.stream()
.peek(priceHistory -> {
ProductEntity product = productMap.get(priceHistory.getId().getProductUrl());
priceHistory.getId().setProductUrl(product.getUrl());
})
.collect(Collectors.toList());
// Сохраняем историю цен
productPriceRepository.saveAll(updatedPriceHistories);
}
}

View File

@ -0,0 +1,7 @@
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
import java.util.Map;
public interface Client {
Map<String, Object> scrapPage(int page, String shard, String query);
}

View File

@ -0,0 +1,42 @@
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.core.ParameterizedTypeReference;
import org.springframework.http.HttpEntity;
import org.springframework.http.HttpMethod;
import org.springframework.http.ResponseEntity;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
import ru.pricepulse.parsingservice.config.MarketplacesConfig;
import java.util.Map;
@AllArgsConstructor
@Service
@Slf4j
public class ClientImpl implements Client {
private final RestTemplate restTemplate;
private final MarketplacesConfig marketplacesConfig;
@Override
@Retryable(maxAttempts = 50, value = RuntimeException.class)
public Map<String, Object> scrapPage(int page, String shard, String query) {
String url = marketplacesConfig.getWildberriesConfigProperties().getCatalogWbUrl() +
shard +
query +
"?dest=-1257786&page=" + page + "&subject=2290";
ResponseEntity<Map<String, Object>> response = restTemplate.exchange(
url,
HttpMethod.GET,
HttpEntity.EMPTY,
new ParameterizedTypeReference<>() {
}
);
return response.getBody();
}
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.wildberries_parser.service.dto;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
public class ProductInfoDto {
private Long id;
private String brand;
private String name;
private String supplier;
private Double supplierRating;
private Integer salePriceU;
private Integer reviewRating;
}

View File

@ -0,0 +1,47 @@
server:
port: ${SERVER_PORT}
spring:
application:
name: parsing-service
jpa:
hibernate:
ddl-auto: validate
database: postgresql
datasource:
driver-class-name: org.postgresql.Driver
url: jdbc:postgresql://${POSTGRES_JDBC_URL}
username: ${POSTGRES_JDBC_USERNAME}
password: ${POSTGRES_JDBC_PASSWORD}
clickhouse:
driver-class-name: com.clickhouse.jdbc.ClickHouseDriver
url: jdbc:clickhouse://${CLICKHOUSE_JDBC_URL}
username: ${CLICKHOUSE_JDBC_USERNAME}
password: ${CLICKHOUSE_JDBC_PASSWORD}
liquibase:
change-log: classpath:/db/changelog/master.yml
marketplace:
ozon:
max-threads: ${OZON_MAX_PROCESSING_THREADS:5}
max-num-of-pages-on-screen: ${OZON_MAX_NUM_OF_PAGES_ON_SCREEN:100}
wildberries:
status: true
base-url: "https://static-basket-01.wbbasket.ru"
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
user-agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)"
catalog-wb-url: "https://catalog.wb.ru/catalog/"
retry-attempts: 5
retry-delay: 1000
shard: "electronic15"
laptop-url: "/catalog"
logging:
pattern:
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg %X%n"
# level:
# sql: debug
# level:
# org:
# springframework:
# boot:
# autoconfigure: DEBUG

View File

@ -0,0 +1,29 @@
<?xml version="1.0" encoding="UTF-8"?>
<databaseChangeLog
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
<changeSet id="20240926_create_product_table.xml" author="danil">
<createTable tableName="product">
<column name="id" type="bigint" autoIncrement="true" remarks="Идентификатор товара">
<constraints primaryKey="true" />
</column>
<column name="marketplace" type="varchar" remarks="Название маркетплейса (enum)">
<constraints nullable="false" />
</column>
<column name="category" type="varchar" remarks="Категория товара">
<constraints nullable="false" />
</column>
<column name="brand" type="varchar" remarks="Бренд товара">
<constraints nullable="false" />
</column>
<column name="product_name" type="varchar" remarks="Название товара">
<constraints nullable="false" />
</column>
<column name="created_at" type="timestamptz" remarks="Время добавления товара в базу">
<constraints nullable="false" />
</column>
</createTable>
</changeSet>
</databaseChangeLog>

View File

@ -0,0 +1,30 @@
<?xml version="1.0" encoding="UTF-8"?>
<databaseChangeLog
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
<changeSet id="20240926_create_price_history_table.xml" author="Emelyanov535">
<createTable tableName="price_history">
<column name="id" type="bigint" autoIncrement="true" remarks="Идентификатор">
<constraints primaryKey="true" />
</column>
<column name="product_id" type="bigint" remarks="ID товара">
<constraints nullable="false" />
</column>
<column name="price" type="numeric(10,2)" remarks="Цена товара">
<constraints nullable="false" />
</column>
<column name="date" type="timestamptz" remarks="Дата сохранения">
<constraints nullable="false" />
</column>
</createTable>
<addForeignKeyConstraint baseTableName="price_history"
baseColumnNames="product_id"
constraintName="fk_product_price_history"
referencedTableName="product"
referencedColumnNames="id"
onDelete="CASCADE"/>
</changeSet>
</databaseChangeLog>

View File

@ -0,0 +1,7 @@
databaseChangeLog:
- include:
file: 20240926_001_create_product_table.xml
relativeToChangelogFile: true
- include:
file: 20240926_002_create_price_history_table.xml
relativeToChangelogFile: true

View File

@ -0,0 +1,28 @@
<?xml version="1.0" encoding="UTF-8"?>
<databaseChangeLog
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
<changeSet id="20240926_create_product_table.xml" author="danil">
<addColumn tableName="product">
<column name="url" type="varchar" remarks="Ссылка на товар">
<constraints nullable="false" unique="true" />
</column>
</addColumn>
<addColumn tableName="product">
<column name="image-url" type="varchar" remarks="Ссылка на изображение товара">
<constraints nullable="false" />
</column>
</addColumn>
<dropTable tableName="price_history" cascadeConstraints="true" />
<sql>
CREATE TABLE if not exists price_history(
product_url varchar NOT NULL,
price numeric(10, 2) NOT NULL,
date timestamptz NOT NULL,
PRIMARY KEY (product_url, date)
) PARTITION BY RANGE (date);
</sql>
</changeSet>
</databaseChangeLog>

View File

@ -0,0 +1,4 @@
databaseChangeLog:
- include:
file: 20241006_001_add_columns_in_tables.xml
relativeToChangelogFile: true

View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="UTF-8"?>
<databaseChangeLog
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
<changeSet id="20241014_add_constraint_on_product_url.xml" author="Emelyanov535">
<addUniqueConstraint tableName="product" columnNames="url"/>
</changeSet>
</databaseChangeLog>

View File

@ -0,0 +1,4 @@
databaseChangeLog:
- include:
file: 20241014_add_constraint_on_product_url.xml
relativeToChangelogFile: true

View File

@ -0,0 +1,10 @@
databaseChangeLog:
- include:
file: 20240926/master.yml
relativeToChangelogFile: true
- include:
file: 20241006/master.yml
relativeToChangelogFile: true
- include:
file: 20241014/master.yml
relativeToChangelogFile: true

View File

@ -0,0 +1,135 @@
97.74.87.226:80
54.248.238.110:80
3.141.217.225:80
127.0.0.7:80
49.12.235.70:8081
13.38.176.104:3128
46.51.249.135:3128
162.223.90.130:80
133.186.144.112:8080
51.210.54.186:80
101.108.123.39:8080
3.130.65.162:3128
80.249.112.162:80
3.126.147.182:3128
110.164.191.211:80
13.208.56.180:80
31.207.38.66:80
116.203.27.109:80
13.36.104.85:80
18.228.198.164:3128
3.123.150.192:3128
8.219.97.248:80
149.102.233.167:8081
202.162.105.202:80
165.22.77.86:80
154.205.128.153:8888
154.65.39.8:80
3.124.133.93:3128
31.40.248.2:8080
188.253.112.218:80
3.71.239.218:80
159.223.92.147:8888
3.78.92.159:3128
54.92.168.145:8080
50.62.183.223:80
123.30.154.171:7777
43.200.77.128:3128
35.76.62.196:80
204.57.112.5:80
15.235.153.57:8089
54.152.3.36:80
47.74.152.29:8888
0.0.0.0:80
13.59.156.167:80
3.127.62.252:80
35.79.120.242:3128
3.212.148.199:80
3.122.84.99:3128
45.92.177.60:8080
23.95.216.78:34561
82.180.146.116:3128
52.67.10.183:80
172.191.74.198:8080
13.37.59.99:3128
148.66.6.213:80
18.134.236.231:80
3.130.65.162:80
103.153.154.6:80
109.236.83.153:8888
78.32.2.82:8080
3.9.71.167:1080
35.72.118.126:80
46.47.197.210:3128
13.37.73.214:80
13.37.89.201:80
110.12.211.140:80
154.90.55.37:80
152.89.246.197:8080
3.37.125.76:3128
44.218.183.55:80
18.135.133.116:3128
52.196.1.182:80
94.72.152.254:80
3.123.150.192:80
196.11.183.160:8080
18.133.16.21:80
3.12.144.146:80
49.13.173.87:80
13.56.192.187:80
161.35.49.68:80
13.37.59.99:80
3.122.84.99:80
158.140.139.11:58100
148.66.6.210:80
153.19.91.77:80
189.22.234.41:80
52.67.10.183:3128
41.59.90.171:80
43.132.219.102:80
13.40.46.249:1088
16.163.149.249:80
3.71.239.218:3128
13.36.113.81:3128
60.242.169.3:80
49.13.173.87:8081
35.176.148.8:1080
18.135.133.116:80
13.37.89.201:3128
3.127.121.101:80
35.178.104.4:80
182.72.203.246:80
13.40.239.130:1080
65.108.207.6:80
18.223.25.15:80
54.233.119.172:3128
66.97.37.164:80
3.78.92.159:80
110.168.213.172:8080
49.12.235.70:80
94.156.250.169:20128
15.236.106.236:3128
13.38.153.36:80
178.128.199.145:80
156.67.217.159:80
148.66.6.211:80
13.36.87.105:3128
3.126.147.182:80
51.222.155.142:80
141.145.214.176:80
184.169.154.119:80
5.255.113.61:80
3.124.133.93:80
3.127.121.101:3128
148.66.6.212:80
176.9.239.181:80
63.35.64.177:3128
18.169.83.87:1080
148.66.6.214:80
18.228.149.161:80
18.228.198.164:80
106.105.118.250:80
103.174.102.127:80
162.0.238.147:80
103.127.1.130:80
185.233.187.103:80

View File

@ -0,0 +1,135 @@
97.74.87.226:80
54.248.238.110:80
3.141.217.225:80
127.0.0.7:80
49.12.235.70:8081
13.38.176.104:3128
46.51.249.135:3128
162.223.90.130:80
133.186.144.112:8080
51.210.54.186:80
101.108.123.39:8080
3.130.65.162:3128
80.249.112.162:80
3.126.147.182:3128
110.164.191.211:80
13.208.56.180:80
31.207.38.66:80
116.203.27.109:80
13.36.104.85:80
18.228.198.164:3128
3.123.150.192:3128
8.219.97.248:80
149.102.233.167:8081
202.162.105.202:80
165.22.77.86:80
154.205.128.153:8888
154.65.39.8:80
3.124.133.93:3128
31.40.248.2:8080
188.253.112.218:80
3.71.239.218:80
159.223.92.147:8888
3.78.92.159:3128
54.92.168.145:8080
50.62.183.223:80
123.30.154.171:7777
43.200.77.128:3128
35.76.62.196:80
204.57.112.5:80
15.235.153.57:8089
54.152.3.36:80
47.74.152.29:8888
0.0.0.0:80
13.59.156.167:80
3.127.62.252:80
35.79.120.242:3128
3.212.148.199:80
3.122.84.99:3128
45.92.177.60:8080
23.95.216.78:34561
82.180.146.116:3128
52.67.10.183:80
172.191.74.198:8080
13.37.59.99:3128
148.66.6.213:80
18.134.236.231:80
3.130.65.162:80
103.153.154.6:80
109.236.83.153:8888
78.32.2.82:8080
3.9.71.167:1080
35.72.118.126:80
46.47.197.210:3128
13.37.73.214:80
13.37.89.201:80
110.12.211.140:80
154.90.55.37:80
152.89.246.197:8080
3.37.125.76:3128
44.218.183.55:80
18.135.133.116:3128
52.196.1.182:80
94.72.152.254:80
3.123.150.192:80
196.11.183.160:8080
18.133.16.21:80
3.12.144.146:80
49.13.173.87:80
13.56.192.187:80
161.35.49.68:80
13.37.59.99:80
3.122.84.99:80
158.140.139.11:58100
148.66.6.210:80
153.19.91.77:80
189.22.234.41:80
52.67.10.183:3128
41.59.90.171:80
43.132.219.102:80
13.40.46.249:1088
16.163.149.249:80
3.71.239.218:3128
13.36.113.81:3128
60.242.169.3:80
49.13.173.87:8081
35.176.148.8:1080
18.135.133.116:80
13.37.89.201:3128
3.127.121.101:80
35.178.104.4:80
182.72.203.246:80
13.40.239.130:1080
65.108.207.6:80
18.223.25.15:80
54.233.119.172:3128
66.97.37.164:80
3.78.92.159:80
110.168.213.172:8080
49.12.235.70:80
94.156.250.169:20128
15.236.106.236:3128
13.38.153.36:80
178.128.199.145:80
156.67.217.159:80
148.66.6.211:80
13.36.87.105:3128
3.126.147.182:80
51.222.155.142:80
141.145.214.176:80
184.169.154.119:80
5.255.113.61:80
3.124.133.93:80
3.127.121.101:3128
148.66.6.212:80
176.9.239.181:80
63.35.64.177:3128
18.169.83.87:1080
148.66.6.214:80
18.228.149.161:80
18.228.198.164:80
106.105.118.250:80
103.174.102.127:80
162.0.238.147:80
103.127.1.130:80
185.233.187.103:80

View File

@ -0,0 +1,11 @@
package ru.pricepulse.parsingservice;
import org.junit.jupiter.api.Test;
class ParsingServiceApplicationTests {
@Test
void contextLoads() {
}
}