Compare commits
21 Commits
master
...
feature/pa
Author | SHA1 | Date | |
---|---|---|---|
|
8afc758987 | ||
|
4f5dda4dbf | ||
|
59c41a4912 | ||
c4bb7a5ffa | |||
|
fd71513bbf | ||
|
171cc650f1 | ||
1df7dc94b8 | |||
|
42d947440c | ||
|
83b1c5d72c | ||
|
82f648e16c | ||
84e0af60c9 | |||
|
9895aaff33 | ||
|
ae8ac061bc | ||
|
a0271125a1 | ||
84f344084c | |||
5ae300389c | |||
|
ef2240e8ab | ||
f58b0a4a02 | |||
ffe6920b29 | |||
30ca5acc34 | |||
a24bf08f52 |
18
.run/ParsingService [local].run.xml
Normal file
18
.run/ParsingService [local].run.xml
Normal file
@ -0,0 +1,18 @@
|
||||
<component name="ProjectRunConfigurationManager">
|
||||
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
||||
<option name="ACTIVE_PROFILES" value="dev" />
|
||||
<option name="SCHEDULED_DEBUGGER" value="true" />
|
||||
<envs>
|
||||
<env name="JDBC_PASSWORD" value="postgres" />
|
||||
<env name="JDBC_URL" value="localhost:5432/parsed_data" />
|
||||
<env name="JDBC_USERNAME" value="postgres" />
|
||||
<env name="SERVER_PORT" value="8080" />
|
||||
<env name="WEBDRIVER_CHROME_PATH" value="$PROJECT_DIR$/parsing-service/web-driver/chromedriver" />
|
||||
</envs>
|
||||
<module name="parsing-service.main" />
|
||||
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
|
||||
<method v="2">
|
||||
<option name="Make" enabled="true" />
|
||||
</method>
|
||||
</configuration>
|
||||
</component>
|
80
docker/docker-compose.yml
Normal file
80
docker/docker-compose.yml
Normal file
@ -0,0 +1,80 @@
|
||||
version: "3.8"
|
||||
name: price-pulse
|
||||
services:
|
||||
|
||||
postgres:
|
||||
image: postgres:16
|
||||
ports:
|
||||
- "5432:5432"
|
||||
environment:
|
||||
POSTGRES_DB: parsed_data
|
||||
POSTGRES_USER: postgres
|
||||
POSTGRES_PASSWORD: postgres
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 1024M
|
||||
reservations:
|
||||
memory: 256M
|
||||
|
||||
clickhouse:
|
||||
image: clickhouse/clickhouse-server:latest
|
||||
ports:
|
||||
- "8123:8123" # HTTP интерфейс, /play для проверки запросов, но лучше использовать не браузер для этого
|
||||
- "9000:9000" # Интерфейс для работы с клиентами
|
||||
- "9009:9009" # Интерфейс для взаимодействия через TCP
|
||||
volumes:
|
||||
- clickhouse_data:/var/lib/clickhouse
|
||||
- clickhouse_logs:/var/log/clickhouse
|
||||
environment:
|
||||
CLICKHOUSE_DB: parsed_data
|
||||
CLICKHOUSE_USER: user
|
||||
CLICKHOUSE_PASSWORD: password
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 1024M
|
||||
reservations:
|
||||
memory: 256M
|
||||
|
||||
zookeeper:
|
||||
image: confluentinc/cp-zookeeper:latest
|
||||
environment:
|
||||
ZOOKEEPER_CLIENT_PORT: 2181
|
||||
ZOOKEEPER_TICK_TIME: 2000
|
||||
ports:
|
||||
- "2181:2181"
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 1024M
|
||||
reservations:
|
||||
memory: 256M
|
||||
|
||||
kafka:
|
||||
image: confluentinc/cp-kafka:latest
|
||||
depends_on:
|
||||
- zookeeper
|
||||
ports:
|
||||
- "9092:9092"
|
||||
environment:
|
||||
KAFKA_BROKER_ID: 1
|
||||
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
|
||||
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092
|
||||
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT
|
||||
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
|
||||
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||
volumes:
|
||||
- kafka_data:/var/lib/kafka
|
||||
deploy:
|
||||
resources:
|
||||
limits:
|
||||
memory: 1024M
|
||||
reservations:
|
||||
memory: 256M
|
||||
|
||||
volumes:
|
||||
clickhouse_data:
|
||||
clickhouse_logs:
|
||||
kafka_data:
|
||||
|
37
parsing-service/.gitignore
vendored
Normal file
37
parsing-service/.gitignore
vendored
Normal file
@ -0,0 +1,37 @@
|
||||
HELP.md
|
||||
.gradle
|
||||
build/
|
||||
!gradle/wrapper/gradle-wrapper.jar
|
||||
!**/src/main/**/build/
|
||||
!**/src/test/**/build/
|
||||
|
||||
### STS ###
|
||||
.apt_generated
|
||||
.classpath
|
||||
.factorypath
|
||||
.project
|
||||
.settings
|
||||
.springBeans
|
||||
.sts4-cache
|
||||
bin/
|
||||
!**/src/main/**/bin/
|
||||
!**/src/test/**/bin/
|
||||
|
||||
### IntelliJ IDEA ###
|
||||
.idea
|
||||
*.iws
|
||||
*.iml
|
||||
*.ipr
|
||||
out/
|
||||
!**/src/main/**/out/
|
||||
!**/src/test/**/out/
|
||||
|
||||
### NetBeans ###
|
||||
/nbproject/private/
|
||||
/nbbuild/
|
||||
/dist/
|
||||
/nbdist/
|
||||
/.nb-gradle/
|
||||
|
||||
### VS Code ###
|
||||
.vscode/
|
17
parsing-service/.run/ParsingService [local].run.xml
Normal file
17
parsing-service/.run/ParsingService [local].run.xml
Normal file
@ -0,0 +1,17 @@
|
||||
<component name="ProjectRunConfigurationManager">
|
||||
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
||||
<option name="ACTIVE_PROFILES" value="dev,ozon,headless,postgres_stat" />
|
||||
<option name="SCHEDULED_DEBUGGER" value="true" />
|
||||
<envs>
|
||||
<env name="POSTGRES_JDBC_PASSWORD" value="postgres" />
|
||||
<env name="POSTGRES_JDBC_USERNAME" value="postgres" />
|
||||
<env name="POSTGRES_JDBC_URL" value="localhost:5432/parsed_data" />
|
||||
<env name="SERVER_PORT" value="8080" />
|
||||
</envs>
|
||||
<module name="parsing-service.main" />
|
||||
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
|
||||
<method v="2">
|
||||
<option name="Make" enabled="true" />
|
||||
</method>
|
||||
</configuration>
|
||||
</component>
|
61
parsing-service/build.gradle
Normal file
61
parsing-service/build.gradle
Normal file
@ -0,0 +1,61 @@
|
||||
plugins {
|
||||
id 'java'
|
||||
id 'org.springframework.boot' version '3.3.4'
|
||||
id 'io.spring.dependency-management' version '1.1.6'
|
||||
}
|
||||
|
||||
group = 'ru.pricepulse'
|
||||
version = '0.0.1-SNAPSHOT'
|
||||
|
||||
java {
|
||||
toolchain {
|
||||
languageVersion = JavaLanguageVersion.of(21)
|
||||
}
|
||||
}
|
||||
|
||||
configurations {
|
||||
compileOnly {
|
||||
extendsFrom annotationProcessor
|
||||
}
|
||||
}
|
||||
|
||||
repositories {
|
||||
mavenCentral()
|
||||
}
|
||||
|
||||
ext {
|
||||
jsoupVesion = '1.18.1'
|
||||
seleniumVersion = '4.25.0'
|
||||
}
|
||||
|
||||
dependencies {
|
||||
implementation 'org.springframework.boot:spring-boot-starter-data-jpa'
|
||||
implementation 'org.springframework.boot:spring-boot-starter-web'
|
||||
// implementation 'org.liquibase:liquibase-core'
|
||||
implementation 'org.springframework.kafka:spring-kafka'
|
||||
implementation "org.jsoup:jsoup:${jsoupVesion}"
|
||||
implementation "org.seleniumhq.selenium:selenium-java:${seleniumVersion}"
|
||||
implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
|
||||
implementation 'org.apache.commons:commons-pool2:2.12.0'
|
||||
implementation 'com.clickhouse:clickhouse-jdbc:0.6.5'
|
||||
implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.6.0'
|
||||
|
||||
compileOnly 'org.projectlombok:lombok'
|
||||
|
||||
runtimeOnly 'org.postgresql:postgresql'
|
||||
|
||||
annotationProcessor 'org.projectlombok:lombok'
|
||||
|
||||
testImplementation 'org.springframework.boot:spring-boot-starter-test'
|
||||
testImplementation 'org.springframework.kafka:spring-kafka-test'
|
||||
|
||||
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
|
||||
implementation 'org.springframework.boot:spring-boot-starter-webflux'
|
||||
|
||||
implementation 'org.springframework.retry:spring-retry:2.0.9'
|
||||
|
||||
}
|
||||
|
||||
tasks.named('test') {
|
||||
useJUnitPlatform()
|
||||
}
|
BIN
parsing-service/gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
BIN
parsing-service/gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
Binary file not shown.
7
parsing-service/gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
7
parsing-service/gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
@ -0,0 +1,7 @@
|
||||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-bin.zip
|
||||
networkTimeout=10000
|
||||
validateDistributionUrl=true
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
252
parsing-service/gradlew
vendored
Normal file
252
parsing-service/gradlew
vendored
Normal file
@ -0,0 +1,252 @@
|
||||
#!/bin/sh
|
||||
|
||||
#
|
||||
# Copyright © 2015-2021 the original authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
#
|
||||
|
||||
##############################################################################
|
||||
#
|
||||
# Gradle start up script for POSIX generated by Gradle.
|
||||
#
|
||||
# Important for running:
|
||||
#
|
||||
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
|
||||
# noncompliant, but you have some other compliant shell such as ksh or
|
||||
# bash, then to run this script, type that shell name before the whole
|
||||
# command line, like:
|
||||
#
|
||||
# ksh Gradle
|
||||
#
|
||||
# Busybox and similar reduced shells will NOT work, because this script
|
||||
# requires all of these POSIX shell features:
|
||||
# * functions;
|
||||
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
|
||||
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
|
||||
# * compound commands having a testable exit status, especially «case»;
|
||||
# * various built-in commands including «command», «set», and «ulimit».
|
||||
#
|
||||
# Important for patching:
|
||||
#
|
||||
# (2) This script targets any POSIX shell, so it avoids extensions provided
|
||||
# by Bash, Ksh, etc; in particular arrays are avoided.
|
||||
#
|
||||
# The "traditional" practice of packing multiple parameters into a
|
||||
# space-separated string is a well documented source of bugs and security
|
||||
# problems, so this is (mostly) avoided, by progressively accumulating
|
||||
# options in "$@", and eventually passing that to Java.
|
||||
#
|
||||
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
|
||||
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
|
||||
# see the in-line comments for details.
|
||||
#
|
||||
# There are tweaks for specific operating systems such as AIX, CygWin,
|
||||
# Darwin, MinGW, and NonStop.
|
||||
#
|
||||
# (3) This script is generated from the Groovy template
|
||||
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
||||
# within the Gradle project.
|
||||
#
|
||||
# You can find Gradle at https://github.com/gradle/gradle/.
|
||||
#
|
||||
##############################################################################
|
||||
|
||||
# Attempt to set APP_HOME
|
||||
|
||||
# Resolve links: $0 may be a link
|
||||
app_path=$0
|
||||
|
||||
# Need this for daisy-chained symlinks.
|
||||
while
|
||||
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
|
||||
[ -h "$app_path" ]
|
||||
do
|
||||
ls=$( ls -ld "$app_path" )
|
||||
link=${ls#*' -> '}
|
||||
case $link in #(
|
||||
/*) app_path=$link ;; #(
|
||||
*) app_path=$APP_HOME$link ;;
|
||||
esac
|
||||
done
|
||||
|
||||
# This is normally unused
|
||||
# shellcheck disable=SC2034
|
||||
APP_BASE_NAME=${0##*/}
|
||||
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
|
||||
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
|
||||
' "$PWD" ) || exit
|
||||
|
||||
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||
MAX_FD=maximum
|
||||
|
||||
warn () {
|
||||
echo "$*"
|
||||
} >&2
|
||||
|
||||
die () {
|
||||
echo
|
||||
echo "$*"
|
||||
echo
|
||||
exit 1
|
||||
} >&2
|
||||
|
||||
# OS specific support (must be 'true' or 'false').
|
||||
cygwin=false
|
||||
msys=false
|
||||
darwin=false
|
||||
nonstop=false
|
||||
case "$( uname )" in #(
|
||||
CYGWIN* ) cygwin=true ;; #(
|
||||
Darwin* ) darwin=true ;; #(
|
||||
MSYS* | MINGW* ) msys=true ;; #(
|
||||
NONSTOP* ) nonstop=true ;;
|
||||
esac
|
||||
|
||||
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||
|
||||
|
||||
# Determine the Java command to use to start the JVM.
|
||||
if [ -n "$JAVA_HOME" ] ; then
|
||||
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||
# IBM's JDK on AIX uses strange locations for the executables
|
||||
JAVACMD=$JAVA_HOME/jre/sh/java
|
||||
else
|
||||
JAVACMD=$JAVA_HOME/bin/java
|
||||
fi
|
||||
if [ ! -x "$JAVACMD" ] ; then
|
||||
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
else
|
||||
JAVACMD=java
|
||||
if ! command -v java >/dev/null 2>&1
|
||||
then
|
||||
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||
|
||||
Please set the JAVA_HOME variable in your environment to match the
|
||||
location of your Java installation."
|
||||
fi
|
||||
fi
|
||||
|
||||
# Increase the maximum file descriptors if we can.
|
||||
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||
case $MAX_FD in #(
|
||||
max*)
|
||||
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
|
||||
# shellcheck disable=SC2039,SC3045
|
||||
MAX_FD=$( ulimit -H -n ) ||
|
||||
warn "Could not query maximum file descriptor limit"
|
||||
esac
|
||||
case $MAX_FD in #(
|
||||
'' | soft) :;; #(
|
||||
*)
|
||||
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
|
||||
# shellcheck disable=SC2039,SC3045
|
||||
ulimit -n "$MAX_FD" ||
|
||||
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
||||
esac
|
||||
fi
|
||||
|
||||
# Collect all arguments for the java command, stacking in reverse order:
|
||||
# * args from the command line
|
||||
# * the main class name
|
||||
# * -classpath
|
||||
# * -D...appname settings
|
||||
# * --module-path (only if needed)
|
||||
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
|
||||
|
||||
# For Cygwin or MSYS, switch paths to Windows format before running java
|
||||
if "$cygwin" || "$msys" ; then
|
||||
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
|
||||
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
|
||||
|
||||
JAVACMD=$( cygpath --unix "$JAVACMD" )
|
||||
|
||||
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||
for arg do
|
||||
if
|
||||
case $arg in #(
|
||||
-*) false ;; # don't mess with options #(
|
||||
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
|
||||
[ -e "$t" ] ;; #(
|
||||
*) false ;;
|
||||
esac
|
||||
then
|
||||
arg=$( cygpath --path --ignore --mixed "$arg" )
|
||||
fi
|
||||
# Roll the args list around exactly as many times as the number of
|
||||
# args, so each arg winds up back in the position where it started, but
|
||||
# possibly modified.
|
||||
#
|
||||
# NB: a `for` loop captures its iteration list before it begins, so
|
||||
# changing the positional parameters here affects neither the number of
|
||||
# iterations, nor the values presented in `arg`.
|
||||
shift # remove old arg
|
||||
set -- "$@" "$arg" # push replacement arg
|
||||
done
|
||||
fi
|
||||
|
||||
|
||||
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||
|
||||
# Collect all arguments for the java command:
|
||||
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
|
||||
# and any embedded shellness will be escaped.
|
||||
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
|
||||
# treated as '${Hostname}' itself on the command line.
|
||||
|
||||
set -- \
|
||||
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||
-classpath "$CLASSPATH" \
|
||||
org.gradle.wrapper.GradleWrapperMain \
|
||||
"$@"
|
||||
|
||||
# Stop when "xargs" is not available.
|
||||
if ! command -v xargs >/dev/null 2>&1
|
||||
then
|
||||
die "xargs is not available"
|
||||
fi
|
||||
|
||||
# Use "xargs" to parse quoted args.
|
||||
#
|
||||
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
|
||||
#
|
||||
# In Bash we could simply go:
|
||||
#
|
||||
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
|
||||
# set -- "${ARGS[@]}" "$@"
|
||||
#
|
||||
# but POSIX shell has neither arrays nor command substitution, so instead we
|
||||
# post-process each arg (as a line of input to sed) to backslash-escape any
|
||||
# character that might be a shell metacharacter, then use eval to reverse
|
||||
# that process (while maintaining the separation between arguments), and wrap
|
||||
# the whole thing up as a single "set" statement.
|
||||
#
|
||||
# This will of course break if any of these variables contains a newline or
|
||||
# an unmatched quote.
|
||||
#
|
||||
|
||||
eval "set -- $(
|
||||
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
|
||||
xargs -n1 |
|
||||
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
|
||||
tr '\n' ' '
|
||||
)" '"$@"'
|
||||
|
||||
exec "$JAVACMD" "$@"
|
94
parsing-service/gradlew.bat
vendored
Normal file
94
parsing-service/gradlew.bat
vendored
Normal file
@ -0,0 +1,94 @@
|
||||
@rem
|
||||
@rem Copyright 2015 the original author or authors.
|
||||
@rem
|
||||
@rem Licensed under the Apache License, Version 2.0 (the "License");
|
||||
@rem you may not use this file except in compliance with the License.
|
||||
@rem You may obtain a copy of the License at
|
||||
@rem
|
||||
@rem https://www.apache.org/licenses/LICENSE-2.0
|
||||
@rem
|
||||
@rem Unless required by applicable law or agreed to in writing, software
|
||||
@rem distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
@rem See the License for the specific language governing permissions and
|
||||
@rem limitations under the License.
|
||||
@rem
|
||||
@rem SPDX-License-Identifier: Apache-2.0
|
||||
@rem
|
||||
|
||||
@if "%DEBUG%"=="" @echo off
|
||||
@rem ##########################################################################
|
||||
@rem
|
||||
@rem Gradle startup script for Windows
|
||||
@rem
|
||||
@rem ##########################################################################
|
||||
|
||||
@rem Set local scope for the variables with windows NT shell
|
||||
if "%OS%"=="Windows_NT" setlocal
|
||||
|
||||
set DIRNAME=%~dp0
|
||||
if "%DIRNAME%"=="" set DIRNAME=.
|
||||
@rem This is normally unused
|
||||
set APP_BASE_NAME=%~n0
|
||||
set APP_HOME=%DIRNAME%
|
||||
|
||||
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
|
||||
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
|
||||
|
||||
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
|
||||
|
||||
@rem Find java.exe
|
||||
if defined JAVA_HOME goto findJavaFromJavaHome
|
||||
|
||||
set JAVA_EXE=java.exe
|
||||
%JAVA_EXE% -version >NUL 2>&1
|
||||
if %ERRORLEVEL% equ 0 goto execute
|
||||
|
||||
echo. 1>&2
|
||||
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
|
||||
echo. 1>&2
|
||||
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
|
||||
echo location of your Java installation. 1>&2
|
||||
|
||||
goto fail
|
||||
|
||||
:findJavaFromJavaHome
|
||||
set JAVA_HOME=%JAVA_HOME:"=%
|
||||
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
||||
|
||||
if exist "%JAVA_EXE%" goto execute
|
||||
|
||||
echo. 1>&2
|
||||
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
|
||||
echo. 1>&2
|
||||
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
|
||||
echo location of your Java installation. 1>&2
|
||||
|
||||
goto fail
|
||||
|
||||
:execute
|
||||
@rem Setup the command line
|
||||
|
||||
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||
|
||||
|
||||
@rem Execute Gradle
|
||||
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
|
||||
|
||||
:end
|
||||
@rem End local scope for the variables with windows NT shell
|
||||
if %ERRORLEVEL% equ 0 goto mainEnd
|
||||
|
||||
:fail
|
||||
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
||||
rem the _cmd.exe /c_ return code!
|
||||
set EXIT_CODE=%ERRORLEVEL%
|
||||
if %EXIT_CODE% equ 0 set EXIT_CODE=1
|
||||
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
|
||||
exit /b %EXIT_CODE%
|
||||
|
||||
:mainEnd
|
||||
if "%OS%"=="Windows_NT" endlocal
|
||||
|
||||
:omega
|
1
parsing-service/settings.gradle
Normal file
1
parsing-service/settings.gradle
Normal file
@ -0,0 +1 @@
|
||||
rootProject.name = 'parsing-service'
|
@ -0,0 +1,15 @@
|
||||
package ru.pricepulse.parsingservice;
|
||||
|
||||
import org.springframework.boot.SpringApplication;
|
||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||
import org.springframework.retry.annotation.EnableRetry;
|
||||
|
||||
@SpringBootApplication
|
||||
@EnableRetry
|
||||
public class ParsingServiceApplication {
|
||||
|
||||
public static void main(String[] args) {
|
||||
SpringApplication.run(ParsingServiceApplication.class, args);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
@Configuration
|
||||
public class DateTimeFormatterConfig {
|
||||
|
||||
@Bean
|
||||
public DateTimeFormatter partitionDateTimeFormatter() {
|
||||
return DateTimeFormatter.ofPattern("yyyy_MM");
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,36 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.http.HttpRequest;
|
||||
import org.springframework.http.client.ClientHttpRequestExecution;
|
||||
import org.springframework.http.client.ClientHttpRequestInterceptor;
|
||||
import org.springframework.http.client.ClientHttpResponse;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.InetSocketAddress;
|
||||
|
||||
@Slf4j
|
||||
@AllArgsConstructor
|
||||
public class DynamicProxyInterceptor implements ClientHttpRequestInterceptor {
|
||||
|
||||
private final UserAgentProvider userAgentProvider;
|
||||
private final ProxyProvider proxyProvider;
|
||||
|
||||
@Override
|
||||
public ClientHttpResponse intercept(HttpRequest request, byte[] body, ClientHttpRequestExecution execution) throws IOException {
|
||||
// Получаем случайный прокси
|
||||
//InetSocketAddress proxyAddress = proxyProvider.getNextProxy();
|
||||
//log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
|
||||
|
||||
// Устанавливаем прокси
|
||||
//System.setProperty("http.proxyHost", proxyAddress.getHostName());
|
||||
//System.setProperty("http.proxyPort", String.valueOf(proxyAddress.getPort()));
|
||||
|
||||
//Устанавливаем динамический user-agent
|
||||
String randomUserAgent = userAgentProvider.getRandomUserAgent();
|
||||
request.getHeaders().set("User-Agent", randomUserAgent);
|
||||
|
||||
return execution.execute(request, body);
|
||||
}
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import org.springframework.boot.autoconfigure.kafka.KafkaProperties;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
@Configuration
|
||||
@EnableConfigurationProperties(KafkaProperties.class)
|
||||
public class KafkaConfig {
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||
import ru.pricepulse.parsingservice.config.properties.WildberriesConfigProperties;
|
||||
|
||||
@Getter
|
||||
@Configuration
|
||||
@EnableConfigurationProperties({
|
||||
OzonConfigProperties.class,
|
||||
WildberriesConfigProperties.class
|
||||
})
|
||||
@AllArgsConstructor
|
||||
public class MarketplacesConfig {
|
||||
private final WildberriesConfigProperties wildberriesConfigProperties;
|
||||
private final OzonConfigProperties ozonConfigProperties;
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import org.springframework.core.io.Resource;
|
||||
import org.springframework.core.io.ResourceLoader;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.checkProxies;
|
||||
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.readProxiesFromFile;
|
||||
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.saveProxiesToFile;
|
||||
|
||||
@Component
|
||||
public class ProxyProvider {
|
||||
private List<String> workingProxies;
|
||||
|
||||
private final AtomicInteger currentProxyIndex = new AtomicInteger(0);
|
||||
|
||||
private final ResourceLoader resourceLoader;
|
||||
|
||||
public ProxyProvider(ResourceLoader resourceLoader) {
|
||||
this.resourceLoader = resourceLoader;
|
||||
}
|
||||
|
||||
//@PostConstruct
|
||||
public void init() throws IOException {
|
||||
Resource proxy = resourceLoader.getResource("classpath:proxy.txt");
|
||||
Resource okProxy = resourceLoader.getResource("classpath:ok-proxy.txt");
|
||||
List<String> proxies = Files.readAllLines(Path.of(proxy.getURI()));
|
||||
System.out.println("Начата проверка проксей");
|
||||
workingProxies = checkProxies(proxies);
|
||||
System.out.println("Закончена проверка проксей");
|
||||
|
||||
|
||||
saveProxiesToFile(workingProxies, Path.of(okProxy.getURI()));
|
||||
|
||||
if (workingProxies.isEmpty()) {
|
||||
throw new RuntimeException("Нет доступных рабочих прокси.");
|
||||
}
|
||||
|
||||
System.out.println("Найдено рабочих прокси: " + workingProxies.size());
|
||||
}
|
||||
|
||||
public synchronized InetSocketAddress getNextProxy() {
|
||||
// Получаем текущий индекс прокси
|
||||
int currentIndex = currentProxyIndex.getAndUpdate(index -> (index + 1) % workingProxies.size());
|
||||
|
||||
String[] proxy = workingProxies.get(currentIndex).split(":");
|
||||
return new InetSocketAddress(proxy[0], Integer.parseInt(proxy[1]));
|
||||
}
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.http.client.ClientHttpRequestInterceptor;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
import java.util.Collections;
|
||||
|
||||
@Configuration
|
||||
@AllArgsConstructor
|
||||
public class RestTemplateConfig {
|
||||
|
||||
private final UserAgentProvider userAgentProvider;
|
||||
private final ProxyProvider proxyProvider;
|
||||
|
||||
@Bean
|
||||
public RestTemplate restTemplate() {
|
||||
RestTemplate restTemplate = new RestTemplate();
|
||||
ClientHttpRequestInterceptor dynamicProxyInterceptor = new DynamicProxyInterceptor(userAgentProvider, proxyProvider);
|
||||
|
||||
// Добавляем интерсептор в RestTemplate
|
||||
restTemplate.setInterceptors(Collections.singletonList(dynamicProxyInterceptor));
|
||||
|
||||
return restTemplate;
|
||||
}
|
||||
}
|
@ -0,0 +1,8 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.retry.annotation.EnableRetry;
|
||||
|
||||
@Configuration
|
||||
@EnableRetry
|
||||
public class RetryConfig {}
|
@ -0,0 +1,20 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||
import org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler;
|
||||
|
||||
@Configuration
|
||||
@EnableScheduling
|
||||
public class SchedulerConfig {
|
||||
|
||||
@Bean
|
||||
public ThreadPoolTaskScheduler taskScheduler() {
|
||||
ThreadPoolTaskScheduler taskScheduler = new ThreadPoolTaskScheduler();
|
||||
taskScheduler.setPoolSize(10);
|
||||
taskScheduler.setThreadNamePrefix("ScheduledTask-");
|
||||
return taskScheduler;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,10 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import ru.pricepulse.parsingservice.config.properties.SeleniumConfigProperties;
|
||||
|
||||
@Configuration
|
||||
@EnableConfigurationProperties(SeleniumConfigProperties.class)
|
||||
public class SeleniumConfig {
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class UserAgentProvider {
|
||||
private static final List<String> userAgents = List.of(
|
||||
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)",
|
||||
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36",
|
||||
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15A372 Safari/604.1"
|
||||
);
|
||||
|
||||
public String getRandomUserAgent() {
|
||||
return userAgents.get(new Random().nextInt(userAgents.size()));
|
||||
}
|
||||
}
|
@ -0,0 +1,58 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import java.net.InetSocketAddress;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.client.reactive.ReactorClientHttpConnector;
|
||||
import org.springframework.web.reactive.function.client.WebClient;
|
||||
import reactor.netty.http.client.HttpClient;
|
||||
import reactor.netty.transport.ProxyProvider;
|
||||
|
||||
@Slf4j
|
||||
@Configuration
|
||||
@AllArgsConstructor
|
||||
public class WebClientConfig {
|
||||
private final UserAgentProvider userAgentProvider;
|
||||
private final ru.pricepulse.parsingservice.config.ProxyProvider proxyProvider;
|
||||
|
||||
|
||||
@Bean
|
||||
public WebClient webClient() {
|
||||
return WebClient.builder()
|
||||
.filter((request, next) -> {
|
||||
// Получаем случайный прокси для каждого запроса
|
||||
InetSocketAddress proxyAddress = proxyProvider.getNextProxy();
|
||||
log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
|
||||
|
||||
HttpClient httpClient = HttpClient.create()
|
||||
.proxy(proxy -> proxy
|
||||
.type(ProxyProvider.Proxy.HTTP)
|
||||
.address(proxyAddress));
|
||||
|
||||
String randomUserAgent = userAgentProvider.getRandomUserAgent();
|
||||
log.info("Используемый User-Agent: {}", randomUserAgent);
|
||||
|
||||
// Создаем новый WebClient с прокси
|
||||
WebClient webClientWithProxy = WebClient.builder()
|
||||
.clientConnector(new ReactorClientHttpConnector(httpClient))
|
||||
.build();
|
||||
|
||||
// Выполняем запрос с обновленным User-Agent через WebClient с прокси
|
||||
return webClientWithProxy
|
||||
.method(request.method())
|
||||
.uri(request.url())
|
||||
.headers(headers -> headers.putAll(request.headers()))
|
||||
.header(HttpHeaders.USER_AGENT, randomUserAgent)
|
||||
.body(request.body()).exchange();
|
||||
})
|
||||
.codecs(configurer -> configurer
|
||||
.defaultCodecs()
|
||||
.maxInMemorySize(10 * 1024 * 1024))
|
||||
.build();
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,59 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import io.github.bonigarcia.wdm.WebDriverManager;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.chrome.ChromeDriver;
|
||||
import org.openqa.selenium.chrome.ChromeOptions;
|
||||
import org.springframework.beans.factory.config.ConfigurableBeanFactory;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.context.annotation.Scope;
|
||||
|
||||
@Configuration
|
||||
public class WebDriverConfig {
|
||||
|
||||
@Bean
|
||||
@Profile("visible")
|
||||
@Scope(ConfigurableBeanFactory.SCOPE_PROTOTYPE)
|
||||
public WebDriver webDriverVisible() {
|
||||
Map<String, Object> prefs = new HashMap<>();
|
||||
prefs.put("profile.managed_default_content_settings.images", 2);
|
||||
prefs.put("profile.managed_default_content_settings.geolocation", 2);
|
||||
|
||||
var options = new ChromeOptions();
|
||||
options.setExperimentalOption("prefs", prefs);
|
||||
WebDriverManager.chromedriver().setup();
|
||||
return new ChromeDriver(options);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@Profile("headless")
|
||||
@Scope(ConfigurableBeanFactory.SCOPE_PROTOTYPE)
|
||||
public WebDriver webDriverHeadless(ChromeOptions options) {
|
||||
WebDriverManager.chromedriver().setup();
|
||||
return new ChromeDriver(options);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@Profile("headless")
|
||||
public ChromeOptions chromeOptions() {
|
||||
Map<String, Object> prefs = new HashMap<>();
|
||||
prefs.put("profile.managed_default_content_settings.images", 2);
|
||||
prefs.put("profile.managed_default_content_settings.stylesheets", 2);
|
||||
|
||||
var options = new ChromeOptions();
|
||||
options.setExperimentalOption("prefs", prefs);
|
||||
options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36");
|
||||
//options.addArguments("--window-size=1920,2000");
|
||||
options.addArguments("--headless");
|
||||
options.addArguments("--disable-gpu");
|
||||
options.addArguments("--no-sandbox");
|
||||
options.addArguments("--disable-dev-shm-usage");
|
||||
return options;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,8 @@
|
||||
package ru.pricepulse.parsingservice.config.properties;
|
||||
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
@ConfigurationProperties(prefix = "application.kafka")
|
||||
public class KafkaConfigProperties {
|
||||
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
package ru.pricepulse.parsingservice.config.properties;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@ConfigurationProperties(prefix = "marketplace.ozon")
|
||||
public class OzonConfigProperties {
|
||||
|
||||
private Integer maxThreads;
|
||||
|
||||
private Integer maxNumOfPagesOnScreen;
|
||||
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package ru.pricepulse.parsingservice.config.properties;
|
||||
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
@ConfigurationProperties("selenium")
|
||||
public class SeleniumConfigProperties {
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package ru.pricepulse.parsingservice.config.properties;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@ConfigurationProperties(prefix = "marketplace.wildberries")
|
||||
public class WildberriesConfigProperties {
|
||||
private String baseUrl;
|
||||
private String catalogUrl;
|
||||
private String userAgent;
|
||||
private String catalogWbUrl;
|
||||
private int retryAttempts;
|
||||
private long retryDelay;
|
||||
private String laptopUrl;
|
||||
private String shard;
|
||||
}
|
@ -0,0 +1,6 @@
|
||||
package ru.pricepulse.parsingservice.enumeration;
|
||||
|
||||
public enum Category {
|
||||
LAPTOP,
|
||||
SMARTPHONE
|
||||
}
|
@ -0,0 +1,8 @@
|
||||
package ru.pricepulse.parsingservice.enumeration;
|
||||
|
||||
public enum Marketplace {
|
||||
WILDBERRIES,
|
||||
OZON,
|
||||
DNS
|
||||
}
|
||||
|
@ -0,0 +1,31 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.enumeration;
|
||||
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
|
||||
public enum OzonCategory {
|
||||
|
||||
LAPTOP ("/noutbuki-15692/?brandcertified=t", Category.LAPTOP),
|
||||
|
||||
SMARTPHONE ("/smartfony-15502/?brandcertified=t", Category.SMARTPHONE);
|
||||
|
||||
private static final String BASE_CATEGORY_URL = "https://www.ozon.ru/category";
|
||||
|
||||
private final String categoryUrl;
|
||||
|
||||
private final Category mappedCategory;
|
||||
|
||||
OzonCategory(String categoryUrl,
|
||||
Category mappedCategory) {
|
||||
this.categoryUrl = categoryUrl;
|
||||
this.mappedCategory = mappedCategory;
|
||||
}
|
||||
|
||||
public String getCategoryUrl() {
|
||||
return BASE_CATEGORY_URL + categoryUrl;
|
||||
}
|
||||
|
||||
public Category getMappedCategory() {
|
||||
return mappedCategory;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,70 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.pool;
|
||||
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
|
||||
import jakarta.annotation.PreDestroy;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.springframework.beans.factory.ObjectFactory;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.stereotype.Component;
|
||||
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@Profile("ozon")
|
||||
public class WebDriverPool {
|
||||
|
||||
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>();
|
||||
|
||||
private final Queue<WebDriver> busyDrivers = new ConcurrentLinkedQueue<>();
|
||||
|
||||
private final ObjectFactory<WebDriver> webDriverFactory;
|
||||
|
||||
private final OzonConfigProperties ozonConfigProperties;
|
||||
|
||||
public WebDriverPool(ObjectFactory<WebDriver> webDriverFactory,
|
||||
OzonConfigProperties ozonConfigProperties) {
|
||||
this.webDriverFactory = webDriverFactory;
|
||||
this.ozonConfigProperties = ozonConfigProperties;
|
||||
int poolSize = ozonConfigProperties.getMaxThreads();
|
||||
|
||||
for (int i = 0; i < poolSize; i++) {
|
||||
availableDrivers.add(createNewDriver());
|
||||
}
|
||||
}
|
||||
|
||||
private WebDriver createNewDriver() {
|
||||
return webDriverFactory.getObject();
|
||||
}
|
||||
|
||||
public WebDriver borrowDriver() {
|
||||
WebDriver driver = availableDrivers.poll();
|
||||
if (driver != null) {
|
||||
busyDrivers.add(driver);
|
||||
return driver;
|
||||
}
|
||||
throw new NoSuchElementException("No available driver found");
|
||||
}
|
||||
|
||||
public void returnDriver(WebDriver driver) {
|
||||
busyDrivers.remove(driver);
|
||||
availableDrivers.add(driver);
|
||||
}
|
||||
|
||||
@PreDestroy
|
||||
public void shutdownPool() {
|
||||
for (WebDriver driver : availableDrivers) {
|
||||
driver.quit();
|
||||
}
|
||||
|
||||
for (WebDriver driver : busyDrivers) {
|
||||
driver.quit();
|
||||
}
|
||||
availableDrivers.clear();
|
||||
busyDrivers.clear();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class OzonService {
|
||||
|
||||
public OzonCategory[] getCategories() {
|
||||
return OzonCategory.values();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Profile("postgres_stat")
|
||||
public class PartitionService {
|
||||
|
||||
private final JdbcTemplate postgresDataSource;
|
||||
|
||||
public boolean checkPartitionExists(String partitionName) {
|
||||
String query = "SELECT to_regclass('public." + partitionName + "')";
|
||||
String result = postgresDataSource.queryForObject(query, String.class);
|
||||
return result != null;
|
||||
}
|
||||
|
||||
public void createPartition(String partitionName, String startDate, String endDate) {
|
||||
String createPartitionSQL = "CREATE TABLE IF NOT EXISTS " + partitionName +
|
||||
" PARTITION OF price_history FOR VALUES FROM ('" + startDate + "') TO ('" + endDate + "')";
|
||||
postgresDataSource.execute(createPartitionSQL);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,30 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.dto;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Builder
|
||||
public class ParsedData {
|
||||
|
||||
private Marketplace marketplace;
|
||||
|
||||
private Category category;
|
||||
|
||||
private String brand;
|
||||
|
||||
private String productName;
|
||||
|
||||
private String url;
|
||||
|
||||
private String imageUrl;
|
||||
|
||||
private BigDecimal price;
|
||||
|
||||
}
|
@ -0,0 +1,64 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||
|
||||
@Slf4j
|
||||
public class AccessDeniedPage implements MarketplacePage {
|
||||
|
||||
private static final String RELOAD_BUTTON_ID = "reload-button";
|
||||
private static final String RELOAD_BUTTON_XPATH = "//button[contains(text(),'Обновить')]";
|
||||
private static final String WARNING_IMAGE_CSS = "img[alt='warning']";
|
||||
private static final String ACCESS_DENIED_TEXT_XPATH = "//h1[text()='Доступ ограничен']";
|
||||
|
||||
private final By reloadButtonById = By.id(RELOAD_BUTTON_ID);
|
||||
private final By reloadButtonByXpath = By.xpath(RELOAD_BUTTON_XPATH);
|
||||
private final By warningImage = By.cssSelector(WARNING_IMAGE_CSS);
|
||||
private final By accessDeniedText = By.xpath(ACCESS_DENIED_TEXT_XPATH);
|
||||
|
||||
private WebDriver driver;
|
||||
private WebDriverWait wait;
|
||||
|
||||
public AccessDeniedPage(WebDriver driver,
|
||||
WebDriverWait wait) {
|
||||
this.driver = driver;
|
||||
this.wait = wait;
|
||||
}
|
||||
|
||||
public void clickReloadButton() {
|
||||
try {
|
||||
log.debug("Пытаемся найти кнопку по id и нажать");
|
||||
driver.findElement(reloadButtonById).click();
|
||||
return;
|
||||
} catch (Exception e) {
|
||||
log.debug("Кнопка обновления страницы не найдена по id");
|
||||
}
|
||||
try {
|
||||
log.debug("Пытаемся найти кнопку по xpath и нажать");
|
||||
driver.findElement(reloadButtonByXpath).click();
|
||||
log.debug("Успешно нашли кнопку по xpath");
|
||||
return;
|
||||
} catch (Exception e) {
|
||||
log.debug("Кнопка обновления страницы не найдена по xpath");
|
||||
}
|
||||
}
|
||||
|
||||
private boolean isWarningImage() {
|
||||
return driver.findElement(warningImage) != null;
|
||||
}
|
||||
|
||||
private boolean isAccessDeniedText() {
|
||||
return driver.findElement(accessDeniedText) != null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isLoaded() {
|
||||
try {
|
||||
return isWarningImage() && isAccessDeniedText();
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,90 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
|
||||
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
|
||||
@Slf4j
|
||||
public class CategoryPage implements MarketplacePage {
|
||||
|
||||
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsV2']";
|
||||
|
||||
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
|
||||
|
||||
private WebDriver driver;
|
||||
|
||||
private WebDriverWait wait;
|
||||
|
||||
public CategoryPage(WebDriver driver, WebDriverWait wait) {
|
||||
this.driver = driver;
|
||||
this.wait = wait;
|
||||
}
|
||||
|
||||
public ArrayList<ParsedData> getParsedProducts() {
|
||||
wait.until(visibilityOfElementLocated(searchResults));
|
||||
log.info("Нашли SearchResultsV2");
|
||||
var searchResultsElement = driver.findElement(searchResults);
|
||||
wait.until(driver -> visibilityOfElementLocated(By.cssSelector(":scope > div")));
|
||||
log.info("Нашли внешний блок списка");
|
||||
var outerDiv = searchResultsElement.findElement(By.cssSelector(":scope > div")); // Внешний блок со списком товаров
|
||||
wait.until(driver -> visibilityOfAllElements(outerDiv.findElements(By.cssSelector(":scope > div"))));
|
||||
log.info("Нашли элементы списка");
|
||||
var innerDivs = outerDiv.findElements(By.cssSelector(":scope > div")); // Блок карточки товара
|
||||
|
||||
var products = new ArrayList<ParsedData>();
|
||||
innerDivs.forEach(innerDiv -> {
|
||||
var productDataDivs = innerDiv.findElements(By.cssSelector(":scope > div"));
|
||||
var productImageUrl = productDataDivs.get(0)
|
||||
.findElement(By.cssSelector(":scope > a > div"))
|
||||
.findElements(By.cssSelector(":scope > div")).getFirst()
|
||||
.findElement(By.tagName("img")).getAttribute("src");
|
||||
|
||||
var productBrand = productDataDivs.get(1).findElement(By.cssSelector(":scope > div"))
|
||||
.findElements(By.cssSelector(":scope > div")).getFirst()
|
||||
.findElement(By.tagName("b")).getText();
|
||||
|
||||
var productNameLink = productDataDivs.get(1).findElement(By.cssSelector(":scope > div > a"));
|
||||
|
||||
var productUrl = productNameLink.getAttribute("href");
|
||||
|
||||
var productName = productNameLink.findElement(By.tagName("span")).getText();
|
||||
|
||||
var productPrice = parseCurrency(productDataDivs.get(2).findElement(By.cssSelector(":scope > div > div"))
|
||||
.findElements(By.tagName("span")).getFirst().getText());
|
||||
/*var parsedData = new ParsedData();
|
||||
parsedData.setUrl(productUrl);
|
||||
parsedData.setBrand(productBrand);
|
||||
parsedData.setProductName(productName);
|
||||
parsedData.setImageUrl(productImageUrl);
|
||||
parsedData.setPrice(productPrice);
|
||||
products.add(parsedData);*/
|
||||
});
|
||||
|
||||
|
||||
return products;
|
||||
}
|
||||
|
||||
private BigDecimal parseCurrency(String currencyStr) {
|
||||
String cleanedString = currencyStr.replaceAll("[^\\d]", "");
|
||||
|
||||
return new BigDecimal(cleanedString);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isLoaded() {
|
||||
try {
|
||||
return driver.findElement(searchResults) != null;
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||
|
||||
public interface MarketplacePage {
|
||||
|
||||
boolean isLoaded();
|
||||
|
||||
}
|
@ -0,0 +1,38 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||
|
||||
@Slf4j
|
||||
public class NoContentPage {
|
||||
|
||||
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
|
||||
private static final String NOT_FOUND_TEXT_XPATH = "\"//*[contains(text(), 'По вашим параметрам ничего не нашлось. Попробуйте сбросить фильтры. ')]\"";
|
||||
private static final String SEARCH_RESULTS_ERROR = "div[data-widget='searchResultsError']";
|
||||
|
||||
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
|
||||
private final By notFoundText = By.xpath(NOT_FOUND_TEXT_XPATH);
|
||||
private final By searchResultsError = By.cssSelector(SEARCH_RESULTS_ERROR);
|
||||
|
||||
private WebDriver driver;
|
||||
|
||||
private WebDriverWait wait;
|
||||
|
||||
public NoContentPage(WebDriver driver, WebDriverWait wait) {
|
||||
this.driver = driver;
|
||||
this.wait = wait;
|
||||
}
|
||||
|
||||
public boolean isLoaded() {
|
||||
try {
|
||||
return driver.findElement(searchResultsError) != null
|
||||
|| driver.findElement(errorText) != null
|
||||
|| driver.findElement(notFoundText) != null;
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,228 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
|
||||
@Slf4j
|
||||
public class OzonCategoryPage {
|
||||
|
||||
private static final String OZON_MAIN_LINK = "https://www.ozon.ru";
|
||||
|
||||
public static final String SEARCH_RESULTS_CSS_SELECTOR = "div[data-widget='searchResultsV2']";
|
||||
|
||||
public static final int INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT = 1;
|
||||
|
||||
public static final int INDEX_OF_PRODUCT_PRICE = 0;
|
||||
|
||||
public static final int INDEX_OF_PRODUCT_BRAND = 1;
|
||||
|
||||
public static final int INDEX_OF_PRODUCT_NAME = 2;
|
||||
|
||||
private final Document document;
|
||||
|
||||
public OzonCategoryPage(String pageHtml) {
|
||||
this.document = Jsoup.parse(pageHtml);
|
||||
}
|
||||
|
||||
public List<ParsedData> getProducts(Category category) {
|
||||
List<ParsedData> products = new ArrayList<>();
|
||||
|
||||
Elements searchResultsDivs = getSearchResultsDivs();
|
||||
if (searchResultsDivs.isEmpty()) {
|
||||
return List.of();
|
||||
}
|
||||
log.info("нашли столько результатов на странице {}", searchResultsDivs.size());
|
||||
|
||||
for (Element searchResultsDiv : searchResultsDivs) {
|
||||
Elements productsDivs = getProductsDivs(searchResultsDiv);
|
||||
List<Elements> allProductDataDivs = getAllProductDataDivs(productsDivs);
|
||||
List<ParsedData> parsedProductsData = extractParsedData(allProductDataDivs, category);
|
||||
products.addAll(parsedProductsData);
|
||||
}
|
||||
|
||||
/*try {
|
||||
|
||||
for (Element searchResultsDiv : searchResultsDivs) {
|
||||
var productDivs = searchResultsDiv.select("> div > div");
|
||||
for (Element productDiv : productDivs) {
|
||||
Elements productDataDivs = productDivs.select("> div > *");
|
||||
if (productDataDivs.select("> *").isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
productDataDivs.removeLast();
|
||||
Element productUrlAndImageUrlA = productDataDivs.first();
|
||||
Element productDataDiv = productDataDivs.last();
|
||||
Elements productDataInnerDivs = productDataDiv.select("> *");
|
||||
try {
|
||||
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
|
||||
.select("span").text().toLowerCase()
|
||||
.contains("осталось")) {
|
||||
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
|
||||
}
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND).select("> span");
|
||||
|
||||
String productUrl = OZON_MAIN_LINK + productUrlAndImageUrlA.attr("href").replaceAll("\\?.*$", "");
|
||||
String productImageUrl = productUrlAndImageUrlA.select("> div > div")
|
||||
.first().getElementsByTag("img")
|
||||
.first().attr("src");
|
||||
|
||||
BigDecimal productPrice;
|
||||
try {
|
||||
productPrice = parseOzonPriceToBigDecimal(
|
||||
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
|
||||
.first().text());
|
||||
} catch (Exception e) {
|
||||
log.error("не удалось распарсить цену");
|
||||
continue;
|
||||
}
|
||||
|
||||
String productBrand = productBrandBlockSpans.first().selectFirst("> span > b").text();
|
||||
String productName = productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME).select("> div > span").text();
|
||||
|
||||
ParsedData parsedData = new ParsedData();
|
||||
parsedData.setCategory(category);
|
||||
parsedData.setMarketplace(Marketplace.OZON);
|
||||
parsedData.setUrl(productUrl);
|
||||
parsedData.setImageUrl(productImageUrl);
|
||||
parsedData.setPrice(productPrice);
|
||||
parsedData.setBrand(productBrand);
|
||||
parsedData.setProductName(productName);
|
||||
products.add(parsedData);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}*/
|
||||
return products;
|
||||
}
|
||||
|
||||
private Elements getSearchResultsDivs() {
|
||||
try {
|
||||
return document.select(SEARCH_RESULTS_CSS_SELECTOR);
|
||||
} catch (Exception e) {
|
||||
log.warn("Не удалось достать блоки searchResultsV2");
|
||||
return new Elements();
|
||||
}
|
||||
}
|
||||
|
||||
private Elements getProductsDivs(Element searchResultsDiv) {
|
||||
return searchResultsDiv.select("> div > div");
|
||||
}
|
||||
|
||||
private List<Elements> getAllProductDataDivs(Elements productsDivs) {
|
||||
List<Elements> allProductDataDivs = new ArrayList<>();
|
||||
for (Element productDiv : productsDivs) {
|
||||
Elements productDataDivs = productDiv.select("> div > *");
|
||||
if (productDataDivs.select("> *").isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
removeAddInFavouriteDiv(productDataDivs);
|
||||
allProductDataDivs.add(productDataDivs);
|
||||
}
|
||||
return allProductDataDivs;
|
||||
}
|
||||
|
||||
private void removeAddInFavouriteDiv(Elements productDataDivs) {
|
||||
productDataDivs.removeLast();
|
||||
}
|
||||
|
||||
private List<ParsedData> extractParsedData(List<Elements> allProductDataDivs,
|
||||
Category category) {
|
||||
List<ParsedData> parsedData = new ArrayList<>();
|
||||
for (Elements productDataDivs : allProductDataDivs) {
|
||||
try {
|
||||
ParsedData parsedDataItem = getParsedDataItem(productDataDivs, category);
|
||||
parsedData.add(parsedDataItem);
|
||||
} catch (Exception e) {
|
||||
//log.error(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
return parsedData;
|
||||
}
|
||||
|
||||
private ParsedData getParsedDataItem(Elements productDataDivs,
|
||||
Category category) {
|
||||
removeExtraDivIfExists(productDataDivs);
|
||||
return ParsedData.builder()
|
||||
.category(category)
|
||||
.marketplace(Marketplace.OZON)
|
||||
.url(extractUrl(productDataDivs))
|
||||
.imageUrl(extractImageUrl(productDataDivs))
|
||||
.brand(extractBrand(productDataDivs))
|
||||
.productName(extractProductName(productDataDivs))
|
||||
.price(extractPrice(productDataDivs))
|
||||
.build();
|
||||
}
|
||||
|
||||
private void removeExtraDivIfExists(Elements productDataDivs) {
|
||||
Element productDataDiv = productDataDivs.last();
|
||||
Elements productDataInnerDivs = productDataDiv.select("> *");
|
||||
try {
|
||||
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
|
||||
.select("span").text().toLowerCase()
|
||||
.contains("осталось")) {
|
||||
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
|
||||
}
|
||||
} catch (Exception ignored) {}
|
||||
}
|
||||
|
||||
private String extractUrl(Elements productDataDivs) {
|
||||
Element productUrlA = productDataDivs.first();
|
||||
return OZON_MAIN_LINK + productUrlA
|
||||
.attr("href").replaceAll("\\?.*$", "");
|
||||
}
|
||||
|
||||
private String extractImageUrl(Elements productDataDivs) {
|
||||
Element productImageUrlA = productDataDivs.first();
|
||||
return productImageUrlA.select("> div > div")
|
||||
.first().getElementsByTag("img")
|
||||
.first().attr("src");
|
||||
}
|
||||
|
||||
private String extractBrand(Elements productDataDivs) {
|
||||
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
|
||||
//log.info(productDataInnerDivs.html());
|
||||
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND)
|
||||
.select("> span");
|
||||
String brand = productBrandBlockSpans.first().selectFirst("> span > b").text();
|
||||
if (productBrandBlockSpans.size() == 1 && "Оригинал".equals(brand)) {
|
||||
return "БРЕНД_НЕ_УКАЗАН";
|
||||
}
|
||||
return brand;
|
||||
}
|
||||
|
||||
private String extractProductName(Elements productDataDivs) {
|
||||
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
|
||||
return productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME)
|
||||
.select("> div > span").text();
|
||||
}
|
||||
|
||||
private BigDecimal extractPrice(Elements productDataDivs) {
|
||||
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
|
||||
return parseOzonPriceToBigDecimal(
|
||||
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
|
||||
.first().text());
|
||||
}
|
||||
|
||||
private Elements getProductMainDataInnerDivs(Elements productDataDivs) {
|
||||
return productDataDivs.last().select("> *");
|
||||
}
|
||||
|
||||
private BigDecimal parseOzonPriceToBigDecimal(String ozonPrice) {
|
||||
String cleanedString = ozonPrice.replaceAll("[^\\d]", "");
|
||||
return new BigDecimal(cleanedString);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,110 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.retry.annotation.Recover;
|
||||
import org.springframework.retry.annotation.Retryable;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.AccessDeniedPage;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.CategoryPage;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.NoContentPage;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@Profile("ozon")
|
||||
public class OzonHtmlFetcher {
|
||||
|
||||
private final WebDriverPool webDriverPool;
|
||||
|
||||
private final PageScroller pageScroller;
|
||||
|
||||
public OzonHtmlFetcher(WebDriverPool webDriverPool,
|
||||
PageScroller pageScroller) {
|
||||
this.webDriverPool = webDriverPool;
|
||||
this.pageScroller = pageScroller;
|
||||
}
|
||||
|
||||
@Retryable(maxAttempts = 10, recover = "recover")
|
||||
public String fetchPageHtml(String pageUrl,
|
||||
AtomicBoolean lastPageInCategory) {
|
||||
var driver = webDriverPool.borrowDriver();
|
||||
try {
|
||||
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
|
||||
driver.get(pageUrl);
|
||||
WebDriverWait wait = new WebDriverWait(driver, Duration.of(10, ChronoUnit.SECONDS));
|
||||
var accessDeniedPage = new AccessDeniedPage(driver, wait);
|
||||
var categoryPage = new CategoryPage(driver, wait);
|
||||
var noContentPage = new NoContentPage(driver, wait);
|
||||
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage, noContentPage, lastPageInCategory));
|
||||
checkAceesDeniedAndResolve(accessDeniedPage);
|
||||
|
||||
pageScroller.scrollToEndOfPage(driver);
|
||||
return driver.getPageSource();
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
webDriverPool.returnDriver(driver);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
|
||||
CategoryPage categoryPage,
|
||||
NoContentPage noContentPage,
|
||||
AtomicBoolean stopFlag) {
|
||||
log.debug("Проверка что страница 'Доступ ограничен'");
|
||||
if (checkAccessDeniedPage(accessDeniedPage)) {
|
||||
return true;
|
||||
}
|
||||
log.debug("Проверка что страница 'Страница категории'");
|
||||
if (checkCategoryPage(categoryPage)) {
|
||||
return true;
|
||||
}
|
||||
if (checkNoContentPage(noContentPage)) {
|
||||
stopFlag.set(true);
|
||||
return true;
|
||||
}
|
||||
log.debug("Проверка загрузки страницы неудачна");
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean checkCategoryPage(CategoryPage categoryPage) {
|
||||
return categoryPage.isLoaded();
|
||||
}
|
||||
|
||||
private void checkAceesDeniedAndResolve(AccessDeniedPage accessDeniedPage) {
|
||||
if (checkAccessDeniedPage(accessDeniedPage)) {
|
||||
log.info("Доступ ограничен, пробуем решить проблему");
|
||||
resolveAccessDeniedPage(accessDeniedPage);
|
||||
log.info("Проблема успешно решена");
|
||||
}
|
||||
}
|
||||
|
||||
private boolean checkNoContentPage(NoContentPage noContentPage) {
|
||||
if (noContentPage.isLoaded()) {
|
||||
log.info("Страница не найдена");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean checkAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
|
||||
return accessDeniedPage.isLoaded();
|
||||
}
|
||||
|
||||
private void resolveAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
|
||||
accessDeniedPage.clickReloadButton();
|
||||
}
|
||||
|
||||
@Recover
|
||||
private void recover(Exception e) {
|
||||
log.error("Все ретраи провалились");
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.OzonCategoryPage;
|
||||
|
||||
@Service
|
||||
public class OzonPageParser {
|
||||
|
||||
public List<ParsedData> parseProductsFromCategoryPage(String pageSource,
|
||||
Category category) {
|
||||
OzonCategoryPage categoryPage = new OzonCategoryPage(pageSource);
|
||||
return categoryPage.getProducts(category);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,114 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Semaphore;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.slf4j.MDC;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
import ru.pricepulse.parsingservice.service.ProductService;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@Profile("ozon")
|
||||
public class OzonParsingService {
|
||||
|
||||
private final Map<String, Set<String>> urlCache;
|
||||
|
||||
private final ExecutorService pageExecutorService;
|
||||
|
||||
private final Semaphore semaphore;
|
||||
|
||||
private final OzonHtmlFetcher categoryPageParsingService;
|
||||
|
||||
private final OzonConfigProperties ozonConfigProperties;
|
||||
|
||||
private final OzonPageParser ozonPageParser;
|
||||
|
||||
private final ProductService productService;
|
||||
|
||||
public OzonParsingService(OzonHtmlFetcher categoryPageParsingService,
|
||||
OzonConfigProperties ozonConfigProperties, OzonPageParser ozonPageParser,
|
||||
ProductService productService) {
|
||||
this.pageExecutorService = Executors.newFixedThreadPool(ozonConfigProperties.getMaxThreads());
|
||||
this.semaphore = new Semaphore(ozonConfigProperties.getMaxThreads());
|
||||
this.urlCache = new ConcurrentHashMap<>();
|
||||
for (OzonCategory category : OzonCategory.values()) {
|
||||
urlCache.put(category.getCategoryUrl(), ConcurrentHashMap.newKeySet());
|
||||
}
|
||||
|
||||
this.categoryPageParsingService = categoryPageParsingService;
|
||||
this.ozonConfigProperties = ozonConfigProperties;
|
||||
this.ozonPageParser = ozonPageParser;
|
||||
this.productService = productService;
|
||||
}
|
||||
|
||||
public void startProcessing() {
|
||||
for (OzonCategory category : OzonCategory.values()) {
|
||||
log.info("НАЧАЛО ОБРАБОТКИ КАТЕГОРИИ {}", category);
|
||||
processCategory(category);
|
||||
}
|
||||
}
|
||||
|
||||
private void processCategory(OzonCategory category) {
|
||||
int pageIndex = 1;
|
||||
AtomicBoolean lastPageInCategory = new AtomicBoolean(false);
|
||||
while (!lastPageInCategory.get()) {
|
||||
try {
|
||||
semaphore.acquire();
|
||||
|
||||
int finalPageIndex = pageIndex;
|
||||
String pageUrl = category.getCategoryUrl() + "&page=" + finalPageIndex;
|
||||
|
||||
pageExecutorService.submit(() -> processCategoryPage(pageUrl, category, lastPageInCategory));
|
||||
|
||||
pageIndex += ozonConfigProperties.getMaxNumOfPagesOnScreen();
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (lastPageInCategory.get()) {
|
||||
log.info("Достигли последней страницы категории");
|
||||
}
|
||||
}
|
||||
|
||||
private void processCategoryPage(String pageUrl,
|
||||
OzonCategory category,
|
||||
AtomicBoolean lastPageInCategory) {
|
||||
try {
|
||||
MDC.put("pageUrl", pageUrl);
|
||||
String pageSource = categoryPageParsingService.fetchPageHtml(pageUrl, lastPageInCategory);
|
||||
List<ParsedData> parsedProducts =
|
||||
ozonPageParser.parseProductsFromCategoryPage(pageSource, category.getMappedCategory());
|
||||
log.info("""
|
||||
|
||||
КОНЕЦ ПАРСИНГА СТРАНИЦЫ КАТЕГОРИИ
|
||||
КОЛИЧЕСТВО НАЙДЕННЫХ ТОВАРОВ НА СТРАНИЦЕ {},
|
||||
|
||||
""", parsedProducts.size());
|
||||
if (urlCache.size() > 1000000) {
|
||||
urlCache.clear();
|
||||
}
|
||||
Set<String> categoryCachecUrl = urlCache.get(category.getCategoryUrl());
|
||||
List<ParsedData> uniqueData = parsedProducts.stream()
|
||||
.filter(data -> categoryCachecUrl.add(data.getUrl()))
|
||||
.toList();
|
||||
productService.saveBatch(uniqueData);
|
||||
} finally {
|
||||
MDC.clear();
|
||||
semaphore.release();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,56 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.JavascriptExecutor;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class PageScroller {
|
||||
|
||||
private static final String ALL_CONTENT_PAGE_HEIGHT = "return document.body.scrollHeight";
|
||||
|
||||
private static final String SCROLL_TO_PAGE_HEIGHT = "window.scrollTo(0, document.body.scrollHeight);";
|
||||
|
||||
public void scrollToEndOfPage(WebDriver driver) throws InterruptedException {
|
||||
JavascriptExecutor js = (JavascriptExecutor) driver;
|
||||
AtomicLong lastHeight = new AtomicLong((long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT));
|
||||
int attemptsLimit = 100;
|
||||
log.info("Начинаем пролистывать страницу до конца");
|
||||
while (true) {
|
||||
js.executeScript(SCROLL_TO_PAGE_HEIGHT);
|
||||
|
||||
long newHeight = (long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT);
|
||||
|
||||
try {
|
||||
var nextPageButtons = driver.findElements(By.cssSelector("div[data-widget='megaPaginator'] > div")).get(1)
|
||||
.findElement(By.cssSelector(":scope > div > div > div"))
|
||||
.findElements(By.tagName("a"));
|
||||
|
||||
if (nextPageButtons != null && newHeight > lastHeight.get()) {
|
||||
log.info("ЗАКОНЧИЛИ СКРОЛЛИТЬ");
|
||||
break;
|
||||
}
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
|
||||
if (newHeight > lastHeight.get()) {
|
||||
attemptsLimit = 100;
|
||||
lastHeight.set(newHeight);
|
||||
} else {
|
||||
attemptsLimit--;
|
||||
Thread.sleep(1000);
|
||||
if (attemptsLimit == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingService;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Profile("ozon")
|
||||
public class OzonProductUpdater {
|
||||
|
||||
private final OzonParsingService ozonParsingService;
|
||||
|
||||
@Scheduled(fixedRate = 7200000)
|
||||
public void updateOzonProducts() {
|
||||
ozonParsingService.startProcessing();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,4 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.task;
|
||||
|
||||
public class OzonParsingTask {
|
||||
}
|
@ -0,0 +1,64 @@
|
||||
package ru.pricepulse.parsingservice.persistence.entity;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.Objects;
|
||||
|
||||
import jakarta.persistence.Column;
|
||||
import jakarta.persistence.EmbeddedId;
|
||||
import jakarta.persistence.Entity;
|
||||
import jakarta.persistence.PrePersist;
|
||||
import jakarta.persistence.Table;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import org.hibernate.proxy.HibernateProxy;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Entity
|
||||
@Table(name = "price_history")
|
||||
@Builder
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
public class PriceHistoryEntity {
|
||||
|
||||
@EmbeddedId
|
||||
private PriceHistoryId id;
|
||||
|
||||
@Column(name = "price", nullable = false, precision = 10, scale = 2)
|
||||
private BigDecimal price;
|
||||
|
||||
@Override
|
||||
public final boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null) {
|
||||
return false;
|
||||
}
|
||||
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
|
||||
Class<?> thisEffectiveClass =
|
||||
this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
|
||||
if (thisEffectiveClass != oEffectiveClass) {
|
||||
return false;
|
||||
}
|
||||
PriceHistoryEntity that = (PriceHistoryEntity) o;
|
||||
return getId() != null && Objects.equals(getId(), that.getId());
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
return Objects.hash(id);
|
||||
}
|
||||
|
||||
@PrePersist
|
||||
protected void onCreate() {
|
||||
if (id.getDate() == null) {
|
||||
id.setDate(ZonedDateTime.now());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,51 @@
|
||||
package ru.pricepulse.parsingservice.persistence.entity;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.Objects;
|
||||
|
||||
import jakarta.persistence.Column;
|
||||
import jakarta.persistence.Embeddable;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import org.hibernate.proxy.HibernateProxy;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@Embeddable
|
||||
public class PriceHistoryId implements Serializable {
|
||||
|
||||
@Column(name = "product_url", nullable = false, unique = true)
|
||||
private String productUrl;
|
||||
|
||||
@Column(name = "date", nullable = false)
|
||||
private ZonedDateTime date;
|
||||
|
||||
@Override
|
||||
public final boolean equals(Object o) {
|
||||
if (this == o) {
|
||||
return true;
|
||||
}
|
||||
if (o == null) {
|
||||
return false;
|
||||
}
|
||||
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
|
||||
Class<?> thisEffectiveClass =
|
||||
this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
|
||||
if (thisEffectiveClass != oEffectiveClass) {
|
||||
return false;
|
||||
}
|
||||
PriceHistoryId that = (PriceHistoryId) o;
|
||||
return getDate() != null && Objects.equals(getDate(), that.getDate());
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
return Objects.hash(date);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,81 @@
|
||||
package ru.pricepulse.parsingservice.persistence.entity;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.Objects;
|
||||
|
||||
import jakarta.persistence.Column;
|
||||
import jakarta.persistence.Entity;
|
||||
import jakarta.persistence.EnumType;
|
||||
import jakarta.persistence.Enumerated;
|
||||
import jakarta.persistence.GeneratedValue;
|
||||
import jakarta.persistence.GenerationType;
|
||||
import jakarta.persistence.Id;
|
||||
import jakarta.persistence.PrePersist;
|
||||
import jakarta.persistence.Table;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import org.hibernate.proxy.HibernateProxy;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Entity
|
||||
@Table(name = "product")
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@Builder
|
||||
public class ProductEntity {
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||
@Column(name = "id", nullable = false)
|
||||
private Long id;
|
||||
|
||||
@Column(name = "marketplace", nullable = false, length = Integer.MAX_VALUE)
|
||||
@Enumerated(EnumType.STRING)
|
||||
private Marketplace marketplace;
|
||||
|
||||
@Column(name = "category", nullable = false, length = Integer.MAX_VALUE)
|
||||
@Enumerated(EnumType.STRING)
|
||||
private Category category;
|
||||
|
||||
@Column(name = "brand", nullable = false, length = Integer.MAX_VALUE)
|
||||
private String brand;
|
||||
|
||||
@Column(name = "product_name", nullable = false, length = Integer.MAX_VALUE)
|
||||
private String productName;
|
||||
|
||||
@Column(name = "created_at", nullable = false)
|
||||
private LocalDateTime createdAt;
|
||||
|
||||
@Column(name = "url", nullable = false, unique = true)
|
||||
private String url;
|
||||
|
||||
@Column(name = "image-url", nullable = false)
|
||||
private String imageUrl;
|
||||
|
||||
@Override
|
||||
public final boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null) return false;
|
||||
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
|
||||
Class<?> thisEffectiveClass = this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
|
||||
if (thisEffectiveClass != oEffectiveClass) return false;
|
||||
ProductEntity that = (ProductEntity) o;
|
||||
return getId() != null && Objects.equals(getId(), that.getId());
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
return this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass().hashCode() : getClass().hashCode();
|
||||
}
|
||||
|
||||
@PrePersist
|
||||
protected void onCreate() {
|
||||
createdAt = LocalDateTime.now();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,16 @@
|
||||
package ru.pricepulse.parsingservice.persistence.repository;
|
||||
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||
|
||||
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, PriceHistoryId> {
|
||||
|
||||
List<PriceHistoryEntity> findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(String productUrl,
|
||||
ZonedDateTime from,
|
||||
ZonedDateTime to);
|
||||
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
package ru.pricepulse.parsingservice.persistence.repository;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.springframework.data.domain.Page;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.data.jpa.repository.Query;
|
||||
import org.springframework.stereotype.Repository;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
|
||||
@Repository
|
||||
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
|
||||
|
||||
List<ProductEntity> findAllByUrlIn(List<String> urls);
|
||||
|
||||
@Query("""
|
||||
select p.url from ProductEntity p where p.url in :urls
|
||||
""")
|
||||
List<String> findSavedUrl(List<String> urls);
|
||||
|
||||
Optional<ProductEntity> findByUrl(String url);
|
||||
|
||||
Page<ProductEntity> findAllByMarketplaceAndCategory(Marketplace marketplace, Category category, Pageable pageable);
|
||||
}
|
@ -0,0 +1,105 @@
|
||||
package ru.pricepulse.parsingservice.service;
|
||||
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.List;
|
||||
|
||||
import jakarta.persistence.EntityNotFoundException;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.retry.annotation.Retryable;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
||||
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
||||
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
|
||||
import ru.pricepulse.parsingservice.service.dto.ProductDto;
|
||||
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
|
||||
import ru.pricepulse.parsingservice.service.mapper.PriceHistoryMapper;
|
||||
import ru.pricepulse.parsingservice.service.mapper.ProductMapper;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ProductService {
|
||||
|
||||
private final ProductRepository productRepository;
|
||||
|
||||
private final ProductPriceRepository productPriceRepository;
|
||||
|
||||
private final ProductMapper productMapper;
|
||||
|
||||
private final PriceHistoryMapper priceHistoryMapper;
|
||||
|
||||
@Transactional
|
||||
@Retryable
|
||||
public void saveBatch(List<ParsedData> parsedData) {
|
||||
List<String> productsUrls = parsedData.stream().map(ParsedData::getUrl).toList();
|
||||
List<String> alreadySavedUrls = productRepository.findSavedUrl(productsUrls);
|
||||
List<ProductEntity> products = parsedData.stream()
|
||||
.filter(data -> !alreadySavedUrls.contains(data.getUrl()))
|
||||
.map(this::getProduct)
|
||||
.toList();
|
||||
List<PriceHistoryEntity> prices = parsedData.stream().map(this::getPriceHistory).toList();
|
||||
productRepository.saveAll(products);
|
||||
log.info("Сохранили пачку товаров {}", products.size());
|
||||
productPriceRepository.saveAll(prices);
|
||||
log.info("Сохранили историю цен {}", prices.size());
|
||||
}
|
||||
|
||||
@Transactional(readOnly = true)
|
||||
public ProductDto findByUrl(String productUrl) {
|
||||
var product = productRepository.findByUrl(productUrl).orElseThrow(EntityNotFoundException::new);
|
||||
return productMapper.toProductDto(product);
|
||||
}
|
||||
|
||||
@Transactional(readOnly = true)
|
||||
public PriceHistoryDto findPriceHistoryByRange(String productUrl,
|
||||
ZonedDateTime from,
|
||||
ZonedDateTime to) {
|
||||
var priceHistory = productPriceRepository
|
||||
.findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(productUrl, from, to);
|
||||
return priceHistoryMapper.toPriceHistoryDto(priceHistory);
|
||||
}
|
||||
|
||||
@Transactional(readOnly = true)
|
||||
public ProductsPageDto findAllProductsByPage(Marketplace marketplace,
|
||||
Category category,
|
||||
Pageable pageable) {
|
||||
var page = productRepository.findAllByMarketplaceAndCategory(marketplace, category, pageable);
|
||||
return new ProductsPageDto(
|
||||
page.getNumberOfElements(),
|
||||
page.getTotalPages(),
|
||||
page.getNumber(),
|
||||
page.getContent().stream().map(productMapper::toProductDto).toList()
|
||||
);
|
||||
}
|
||||
|
||||
private PriceHistoryEntity getPriceHistory(ParsedData product) {
|
||||
var priceHistoryId = new PriceHistoryId();
|
||||
priceHistoryId.setProductUrl(product.getUrl());
|
||||
priceHistoryId.setDate(ZonedDateTime.now());
|
||||
var priceHistory = new PriceHistoryEntity();
|
||||
priceHistory.setId(priceHistoryId);
|
||||
priceHistory.setPrice(product.getPrice());
|
||||
return priceHistory;
|
||||
}
|
||||
|
||||
private ProductEntity getProduct(ParsedData product) {
|
||||
var productEntity = new ProductEntity();
|
||||
productEntity.setCategory(product.getCategory());
|
||||
productEntity.setBrand(product.getBrand());
|
||||
productEntity.setProductName(product.getProductName());
|
||||
productEntity.setUrl(product.getUrl());
|
||||
productEntity.setMarketplace(product.getMarketplace());
|
||||
productEntity.setImageUrl(product.getImageUrl());
|
||||
return productEntity;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package ru.pricepulse.parsingservice.service.dto;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class PriceHistoryDto {
|
||||
|
||||
private final Map<ZonedDateTime, BigDecimal> priceHistory;
|
||||
|
||||
public PriceHistoryDto() {
|
||||
this.priceHistory = new HashMap<>();
|
||||
}
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
package ru.pricepulse.parsingservice.service.dto;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@RequiredArgsConstructor
|
||||
public class ProductDto {
|
||||
|
||||
private final Long id;
|
||||
|
||||
private final Marketplace marketplace;
|
||||
|
||||
private final Category category;
|
||||
|
||||
private final String brand;
|
||||
|
||||
private final String productName;
|
||||
|
||||
private final String url;
|
||||
|
||||
private final String imageUrl;
|
||||
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
package ru.pricepulse.parsingservice.service.dto;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@AllArgsConstructor
|
||||
public class ProductsPageDto {
|
||||
|
||||
private final int totalItems;
|
||||
|
||||
private final int totalPages;
|
||||
|
||||
private final int currentPage;
|
||||
|
||||
private final List<ProductDto> products;
|
||||
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package ru.pricepulse.parsingservice.service.mapper;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
|
||||
|
||||
@Component
|
||||
public class PriceHistoryMapper {
|
||||
|
||||
public PriceHistoryDto toPriceHistoryDto (List<PriceHistoryEntity> priceHistory) {
|
||||
var priceHistoryDto = new PriceHistoryDto();
|
||||
priceHistory.forEach(item ->
|
||||
priceHistoryDto.getPriceHistory().put(item.getId().getDate().withNano(0), item.getPrice()));
|
||||
return priceHistoryDto;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
package ru.pricepulse.parsingservice.service.mapper;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
import ru.pricepulse.parsingservice.service.dto.ProductDto;
|
||||
|
||||
@Component
|
||||
public class ProductMapper {
|
||||
|
||||
public ProductDto toProductDto(ProductEntity product) {
|
||||
return new ProductDto(
|
||||
product.getId(),
|
||||
product.getMarketplace(),
|
||||
product.getCategory(),
|
||||
product.getBrand(),
|
||||
product.getProductName(),
|
||||
product.getUrl(),
|
||||
product.getImageUrl()
|
||||
);
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,58 @@
|
||||
package ru.pricepulse.parsingservice.service.scheduler;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Profile("postgres_stat")
|
||||
public class PartitionScheduler {
|
||||
|
||||
private final PartitionService partitionService;
|
||||
private final DateTimeFormatter partitionDateTimeFormatter;
|
||||
|
||||
@PostConstruct
|
||||
public void init() {
|
||||
checkAndCreateMonthlyPartitions();
|
||||
}
|
||||
|
||||
@Scheduled(cron = "@monthly")
|
||||
public void checkAndCreatePartitionsMonthly() {
|
||||
checkAndCreateMonthlyPartitions();
|
||||
}
|
||||
|
||||
public void checkAndCreateMonthlyPartitions() {
|
||||
LocalDate currentMonth = LocalDate.now().withDayOfMonth(1);
|
||||
LocalDate nextMonth = currentMonth.plusMonths(1);
|
||||
|
||||
String currentMonthPartition = getPartitionName(currentMonth);
|
||||
String nextMonthPartition = getPartitionName(nextMonth);
|
||||
|
||||
checkAndCreatePartition(currentMonthPartition, currentMonth);
|
||||
checkAndCreatePartition(nextMonthPartition, nextMonth);
|
||||
}
|
||||
|
||||
private String getPartitionName(LocalDate date) {
|
||||
return "price_history_" + partitionDateTimeFormatter.format(date);
|
||||
}
|
||||
|
||||
private void checkAndCreatePartition(String partitionName, LocalDate startDate) {
|
||||
if (!partitionService.checkPartitionExists(partitionName)) {
|
||||
LocalDate endDate = startDate.plusMonths(1);
|
||||
partitionService.createPartition(partitionName, startDate.toString(), endDate.toString());
|
||||
log.info("Партиция {} создана для диапазона: {} - {} ", partitionName, startDate, endDate);
|
||||
} else {
|
||||
log.info("Партиция {} уже существует.", partitionName);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,60 @@
|
||||
package ru.pricepulse.parsingservice.web.handler;
|
||||
|
||||
import java.net.URI;
|
||||
|
||||
import jakarta.persistence.EntityNotFoundException;
|
||||
import jakarta.servlet.http.HttpServletRequest;
|
||||
import org.springframework.http.HttpStatus;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.ControllerAdvice;
|
||||
import org.springframework.web.bind.annotation.ExceptionHandler;
|
||||
import org.springframework.web.bind.annotation.ResponseStatus;
|
||||
|
||||
@ControllerAdvice
|
||||
public class CommonExceptionHandler {
|
||||
|
||||
@ExceptionHandler(IllegalArgumentException.class)
|
||||
@ResponseStatus(HttpStatus.BAD_REQUEST)
|
||||
public ResponseEntity<ErrorResponse> exceptionHandler(Exception ex,
|
||||
HttpServletRequest request) {
|
||||
return handleException(HttpStatus.BAD_REQUEST, request, ex);
|
||||
}
|
||||
|
||||
@ExceptionHandler(EntityNotFoundException.class)
|
||||
@ResponseStatus(HttpStatus.NOT_FOUND)
|
||||
public ResponseEntity<ErrorResponse> handleNotFoundException(Exception ex,
|
||||
HttpServletRequest request) {
|
||||
return handleException(HttpStatus.NOT_FOUND, request, ex);
|
||||
}
|
||||
|
||||
@ExceptionHandler(Exception.class)
|
||||
@ResponseStatus(HttpStatus.INTERNAL_SERVER_ERROR)
|
||||
public ResponseEntity<ErrorResponse> handleInternalServerErrorException(Exception ex,
|
||||
HttpServletRequest request) {
|
||||
return handleException(HttpStatus.INTERNAL_SERVER_ERROR, request, ex);
|
||||
}
|
||||
|
||||
/*@ExceptionHandler(AccessDeniedException.class)
|
||||
@ResponseStatus(HttpStatus.FORBIDDEN)
|
||||
public ResponseEntity<ErrorResponse> handleForbiddenException(Exception ex,
|
||||
HttpServletRequest request) {
|
||||
return handleException(HttpStatus.FORBIDDEN, request, ex);
|
||||
}
|
||||
|
||||
@ExceptionHandler(AuthenticationException.class)
|
||||
@ResponseStatus(HttpStatus.UNAUTHORIZED)
|
||||
public ResponseEntity<ErrorResponse> handleUnauthorizedException(Exception ex,
|
||||
HttpServletRequest request) {
|
||||
return handleException(HttpStatus.UNAUTHORIZED, request, ex);
|
||||
}*/
|
||||
|
||||
private ResponseEntity<ErrorResponse> handleException(HttpStatus status, HttpServletRequest request, Exception ex) {
|
||||
var errorResponse = new ErrorResponse(
|
||||
status.value(),
|
||||
status,
|
||||
URI.create(request.getRequestURI()),
|
||||
ex.getMessage()
|
||||
);
|
||||
return ResponseEntity.status(status).body(errorResponse);
|
||||
}
|
||||
}
|
@ -0,0 +1,13 @@
|
||||
package ru.pricepulse.parsingservice.web.handler;
|
||||
|
||||
import java.net.URI;
|
||||
|
||||
import org.springframework.http.HttpStatus;
|
||||
|
||||
public record ErrorResponse (
|
||||
Integer statusCode,
|
||||
HttpStatus status,
|
||||
URI requestURI,
|
||||
String message
|
||||
){
|
||||
}
|
@ -0,0 +1,27 @@
|
||||
package ru.pricepulse.parsingservice.web.rest;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.OzonService;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/categories")
|
||||
@RequiredArgsConstructor
|
||||
public class CategoryApi {
|
||||
|
||||
private final OzonService ozonService;
|
||||
|
||||
@GetMapping
|
||||
public ResponseEntity<?> getCategories(Marketplace marketplace) {
|
||||
if (Marketplace.OZON.equals(marketplace)) {
|
||||
return ResponseEntity.ok(ozonService.getCategories());
|
||||
}
|
||||
return ResponseEntity.ok(Category.values());
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package ru.pricepulse.parsingservice.web.rest;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/marketplaces")
|
||||
@RequiredArgsConstructor
|
||||
public class MarketplaceApi {
|
||||
|
||||
@GetMapping
|
||||
public ResponseEntity<Marketplace[]> getMarketplace() {
|
||||
return ResponseEntity.ok(Marketplace.values());
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
package ru.pricepulse.parsingservice.web.rest;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.ZonedDateTime;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.format.annotation.DateTimeFormat;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.service.ProductService;
|
||||
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
|
||||
import ru.pricepulse.parsingservice.service.dto.ProductDto;
|
||||
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/products")
|
||||
@RequiredArgsConstructor
|
||||
public class ProductApi {
|
||||
|
||||
private final ProductService productService;
|
||||
|
||||
@GetMapping("/info")
|
||||
public ResponseEntity<ProductDto> getProductInfo(@RequestParam String productUrl) {
|
||||
return ResponseEntity.ok(productService.findByUrl(productUrl));
|
||||
}
|
||||
|
||||
@GetMapping("/price-history")
|
||||
public ResponseEntity<PriceHistoryDto> getProductPriceHistoryByRange(@RequestParam String productUrl,
|
||||
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate from,
|
||||
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate to,
|
||||
String zoneOffset) {
|
||||
ZoneOffset zone = ZoneOffset.of(zoneOffset);
|
||||
ZonedDateTime fromDateTime = from.atStartOfDay(zone);
|
||||
ZonedDateTime toDateTime = to.atStartOfDay(zone);
|
||||
return ResponseEntity.ok(productService.findPriceHistoryByRange(productUrl, fromDateTime, toDateTime));
|
||||
}
|
||||
|
||||
@GetMapping
|
||||
public ResponseEntity<ProductsPageDto> getAllProductsByCategoryAndPage(Marketplace marketplace,
|
||||
Category category,
|
||||
Pageable pageable) {
|
||||
return ResponseEntity.ok(productService.findAllProductsByPage(marketplace, category, pageable));
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,26 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.converter;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
import org.springframework.core.convert.converter.Converter;
|
||||
import org.springframework.stereotype.Component;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
||||
|
||||
|
||||
@Component
|
||||
public class ProductInfoDto2ProductEntity implements Converter<ProductInfoDto, ProductEntity> {
|
||||
@Override
|
||||
public ProductEntity convert(ProductInfoDto source) {
|
||||
return ProductEntity.builder()
|
||||
.marketplace(Marketplace.WILDBERRIES)
|
||||
.category(Category.LAPTOP)
|
||||
.brand(source.getBrand())
|
||||
.productName(source.getName())
|
||||
.createdAt(LocalDateTime.now())
|
||||
.imageUrl("")
|
||||
.build();
|
||||
}
|
||||
}
|
@ -0,0 +1,115 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.proxy;
|
||||
|
||||
import java.io.BufferedWriter;
|
||||
import java.io.FileWriter;
|
||||
import java.io.IOException;
|
||||
import java.net.HttpURLConnection;
|
||||
import java.net.InetSocketAddress;
|
||||
import java.net.Proxy;
|
||||
import java.net.URL;
|
||||
import java.nio.file.Files;
|
||||
import java.nio.file.Path;
|
||||
import java.nio.file.Paths;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Future;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
public class ProxyChecker {
|
||||
private static final int TIMEOUT = 2000;
|
||||
private static final int THREAD_COUNT = 30;
|
||||
|
||||
public static List<String> readProxiesFromFile(String filePath) {
|
||||
try {
|
||||
return Files.readAllLines(Paths.get(filePath));
|
||||
} catch (IOException e) {
|
||||
System.err.println("Ошибка при чтении файла: " + e.getMessage());
|
||||
return new ArrayList<>();
|
||||
}
|
||||
}
|
||||
|
||||
public static List<String> checkProxies(List<String> proxies) {
|
||||
ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT);
|
||||
List<Future<String>> futures = new ArrayList<>();
|
||||
|
||||
// Отправляем задачи проверки прокси в пул потоков
|
||||
for (String proxyAddress : proxies) {
|
||||
futures.add(executor.submit(() -> isProxyWorking(proxyAddress) ? proxyAddress : null));
|
||||
}
|
||||
|
||||
// Получаем результаты выполнения
|
||||
List<String> workingProxies = futures.stream()
|
||||
.map(future -> {
|
||||
try {
|
||||
return future.get();
|
||||
} catch (Exception e) {
|
||||
System.err.println("Ошибка при получении результата проверки прокси: " + e.getMessage());
|
||||
return null;
|
||||
}
|
||||
})
|
||||
.filter(proxy -> proxy != null)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
executor.shutdown(); // Завершаем работу пула потоков
|
||||
return workingProxies;
|
||||
}
|
||||
|
||||
private static boolean isProxyWorking(String proxyAddress) {
|
||||
String[] parts = proxyAddress.split(":");
|
||||
if (parts.length != 2) {
|
||||
System.err.println("Некорректный формат прокси: " + proxyAddress);
|
||||
return false;
|
||||
}
|
||||
|
||||
String ip = parts[0];
|
||||
int port;
|
||||
|
||||
try {
|
||||
port = Integer.parseInt(parts[1]);
|
||||
} catch (NumberFormatException e) {
|
||||
System.err.println("Некорректный порт у прокси: " + proxyAddress);
|
||||
return false;
|
||||
}
|
||||
|
||||
try {
|
||||
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ip, port));
|
||||
URL url = new URL("http://www.google.com");
|
||||
HttpURLConnection connection = (HttpURLConnection) url.openConnection(proxy);
|
||||
connection.setConnectTimeout(TIMEOUT);
|
||||
connection.setReadTimeout(TIMEOUT);
|
||||
connection.setRequestMethod("GET");
|
||||
connection.connect();
|
||||
|
||||
int responseCode = connection.getResponseCode();
|
||||
if (
|
||||
responseCode == 200
|
||||
// responseCode == 403 ||
|
||||
// responseCode == 500 ||
|
||||
// responseCode == 407 ||
|
||||
// responseCode == 501
|
||||
) {
|
||||
System.out.println("Прокси работает (код ответа " + responseCode + "): " + proxyAddress);
|
||||
return true;
|
||||
} else {
|
||||
System.out.println("Прокси не отвечает (код ответа " + responseCode + "): " + proxyAddress);
|
||||
return false;
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.out.println("Прокси не отвечает: " + proxyAddress);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
public static void saveProxiesToFile(List<String> proxies, Path filePath) {
|
||||
try (BufferedWriter writer = Files.newBufferedWriter(filePath)) {
|
||||
for (String proxy : proxies) {
|
||||
writer.write(proxy);
|
||||
writer.newLine();
|
||||
}
|
||||
} catch (IOException e) {
|
||||
System.err.println("Ошибка при записи в файл: " + e.getMessage());
|
||||
}
|
||||
}
|
||||
}
|
@ -0,0 +1,24 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.scheduler;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.ParsingService;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@ConditionalOnProperty(prefix = "marketplace.wildberries", name = "status", havingValue = "true")
|
||||
public class WildberriesProductUpdater {
|
||||
|
||||
private final ParsingService parsingService;
|
||||
|
||||
@Scheduled(fixedRate = 3600000)
|
||||
public void updateWildberriesProducts() {
|
||||
log.info("Начинаем отладку...");
|
||||
parsingService.parse();
|
||||
log.info("Заканчиваем отладку...");
|
||||
}
|
||||
}
|
@ -0,0 +1,79 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.service;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.AllArgsConstructor;
|
||||
import org.springframework.core.convert.ConversionService;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.config.MarketplacesConfig;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
||||
|
||||
@Service("wildberriesParsingService")
|
||||
@AllArgsConstructor
|
||||
public class ParsingService {
|
||||
private final Client client;
|
||||
private final ObjectMapper objectMapper;
|
||||
private final ConversionService conversionService;
|
||||
private final MarketplacesConfig marketplacesConfig;
|
||||
private final ProductService productService;
|
||||
|
||||
public void parse() {
|
||||
|
||||
final int elementsInPage = 100;
|
||||
int page = 1;
|
||||
Integer totalPages = null;
|
||||
|
||||
do {
|
||||
var pageData = client.scrapPage(page, marketplacesConfig.getWildberriesConfigProperties().getShard(), marketplacesConfig.getWildberriesConfigProperties().getLaptopUrl());
|
||||
System.out.println("Получена страница: " + page);
|
||||
if (totalPages == null) {
|
||||
Map<String, Object> dataMap = (Map<String, Object>) pageData.get("data");
|
||||
int totalElements = (int) dataMap.get("total");
|
||||
totalPages = (int) Math.ceil((double) totalElements / elementsInPage);
|
||||
}
|
||||
|
||||
List<ProductEntity> productEntities = new ArrayList<>();
|
||||
List<PriceHistoryEntity> priceHistories = new ArrayList<>();
|
||||
List<ProductInfoDto> productInfoDtoList = convertMapObjectToListProductInfoDto(pageData);
|
||||
|
||||
productInfoDtoList.forEach(dto -> {
|
||||
|
||||
ProductEntity productEntity = conversionService.convert(dto, ProductEntity.class);
|
||||
productEntity.setUrl("https://www.wildberries.ru/catalog/" + dto.getId() + "/detail.aspx?targetUrl=BP");
|
||||
|
||||
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
|
||||
.id(new PriceHistoryId(productEntity.getUrl(), ZonedDateTime.now()))
|
||||
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
|
||||
.build();
|
||||
|
||||
productEntities.add(productEntity);
|
||||
priceHistories.add(priceHistory);
|
||||
});
|
||||
productService.saveData(productEntities, priceHistories);
|
||||
page++;
|
||||
} while (page <= totalPages);
|
||||
}
|
||||
|
||||
private List<ProductInfoDto> convertMapObjectToListProductInfoDto(Map<String, Object> map) {
|
||||
Map<String, ArrayList<Object>> dataMap = (Map<String, ArrayList<Object>>) map.get("data");
|
||||
return getProductInfoDtos(dataMap);
|
||||
}
|
||||
|
||||
private List<ProductInfoDto> getProductInfoDtos(Map<String, ArrayList<Object>> dataMap) {
|
||||
return objectMapper.convertValue(
|
||||
dataMap.get("products"),
|
||||
new TypeReference<>() {
|
||||
}
|
||||
);
|
||||
}
|
||||
}
|
@ -0,0 +1,57 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.service;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
||||
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
||||
|
||||
@Service("wildberriesProductService")
|
||||
@AllArgsConstructor
|
||||
public class ProductService {
|
||||
private final ProductRepository productRepository;
|
||||
private final ProductPriceRepository productPriceRepository;
|
||||
|
||||
@Transactional
|
||||
public void saveData(List<ProductEntity> productEntities, List<PriceHistoryEntity> priceHistoryEntities) {
|
||||
// Получаем URL продуктов
|
||||
List<String> urls = productEntities.stream()
|
||||
.map(ProductEntity::getUrl)
|
||||
.collect(Collectors.toList());
|
||||
|
||||
// Находим уже существующие URL в базе данных
|
||||
List<String> existingUrls = productRepository.findAllByUrlIn(urls).stream()
|
||||
.map(ProductEntity::getUrl)
|
||||
.toList();
|
||||
|
||||
// Фильтруем уникальные продукты, которых еще нет в базе
|
||||
List<ProductEntity> uniqueProducts = productEntities.stream()
|
||||
.filter(product -> !existingUrls.contains(product.getUrl()))
|
||||
.collect(Collectors.toList());
|
||||
|
||||
// Сохраняем только новые продукты
|
||||
productRepository.saveAll(uniqueProducts);
|
||||
|
||||
// Создаем мапу для быстрого доступа к продуктам по URL
|
||||
Map<String, ProductEntity> productMap = productRepository.findAllByUrlIn(urls).stream()
|
||||
.collect(Collectors.toMap(ProductEntity::getUrl, product -> product));
|
||||
|
||||
// Фильтруем и обновляем идентификаторы для истории цен
|
||||
List<PriceHistoryEntity> updatedPriceHistories = priceHistoryEntities.stream()
|
||||
.peek(priceHistory -> {
|
||||
ProductEntity product = productMap.get(priceHistory.getId().getProductUrl());
|
||||
priceHistory.getId().setProductUrl(product.getUrl());
|
||||
})
|
||||
.collect(Collectors.toList());
|
||||
|
||||
// Сохраняем историю цен
|
||||
productPriceRepository.saveAll(updatedPriceHistories);
|
||||
}
|
||||
}
|
||||
|
@ -0,0 +1,7 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public interface Client {
|
||||
Map<String, Object> scrapPage(int page, String shard, String query);
|
||||
}
|
@ -0,0 +1,42 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.core.ParameterizedTypeReference;
|
||||
import org.springframework.http.HttpEntity;
|
||||
import org.springframework.http.HttpMethod;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.retry.annotation.Retryable;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
import ru.pricepulse.parsingservice.config.MarketplacesConfig;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@AllArgsConstructor
|
||||
@Service
|
||||
@Slf4j
|
||||
public class ClientImpl implements Client {
|
||||
|
||||
private final RestTemplate restTemplate;
|
||||
private final MarketplacesConfig marketplacesConfig;
|
||||
|
||||
@Override
|
||||
@Retryable(maxAttempts = 50, value = RuntimeException.class)
|
||||
public Map<String, Object> scrapPage(int page, String shard, String query) {
|
||||
String url = marketplacesConfig.getWildberriesConfigProperties().getCatalogWbUrl() +
|
||||
shard +
|
||||
query +
|
||||
"?dest=-1257786&page=" + page + "&subject=2290";
|
||||
|
||||
ResponseEntity<Map<String, Object>> response = restTemplate.exchange(
|
||||
url,
|
||||
HttpMethod.GET,
|
||||
HttpEntity.EMPTY,
|
||||
new ParameterizedTypeReference<>() {
|
||||
}
|
||||
);
|
||||
|
||||
return response.getBody();
|
||||
}
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.service.dto;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@Builder
|
||||
public class ProductInfoDto {
|
||||
private Long id;
|
||||
private String brand;
|
||||
private String name;
|
||||
private String supplier;
|
||||
private Double supplierRating;
|
||||
private Integer salePriceU;
|
||||
private Integer reviewRating;
|
||||
}
|
47
parsing-service/src/main/resources/application.yml
Normal file
47
parsing-service/src/main/resources/application.yml
Normal file
@ -0,0 +1,47 @@
|
||||
server:
|
||||
port: ${SERVER_PORT}
|
||||
spring:
|
||||
application:
|
||||
name: parsing-service
|
||||
jpa:
|
||||
hibernate:
|
||||
ddl-auto: validate
|
||||
database: postgresql
|
||||
datasource:
|
||||
driver-class-name: org.postgresql.Driver
|
||||
url: jdbc:postgresql://${POSTGRES_JDBC_URL}
|
||||
username: ${POSTGRES_JDBC_USERNAME}
|
||||
password: ${POSTGRES_JDBC_PASSWORD}
|
||||
clickhouse:
|
||||
driver-class-name: com.clickhouse.jdbc.ClickHouseDriver
|
||||
url: jdbc:clickhouse://${CLICKHOUSE_JDBC_URL}
|
||||
username: ${CLICKHOUSE_JDBC_USERNAME}
|
||||
password: ${CLICKHOUSE_JDBC_PASSWORD}
|
||||
liquibase:
|
||||
change-log: classpath:/db/changelog/master.yml
|
||||
|
||||
marketplace:
|
||||
ozon:
|
||||
max-threads: ${OZON_MAX_PROCESSING_THREADS:5}
|
||||
max-num-of-pages-on-screen: ${OZON_MAX_NUM_OF_PAGES_ON_SCREEN:100}
|
||||
wildberries:
|
||||
status: true
|
||||
base-url: "https://static-basket-01.wbbasket.ru"
|
||||
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
||||
user-agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)"
|
||||
catalog-wb-url: "https://catalog.wb.ru/catalog/"
|
||||
retry-attempts: 5
|
||||
retry-delay: 1000
|
||||
shard: "electronic15"
|
||||
laptop-url: "/catalog"
|
||||
|
||||
logging:
|
||||
pattern:
|
||||
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg %X%n"
|
||||
# level:
|
||||
# sql: debug
|
||||
# level:
|
||||
# org:
|
||||
# springframework:
|
||||
# boot:
|
||||
# autoconfigure: DEBUG
|
@ -0,0 +1,29 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<databaseChangeLog
|
||||
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
|
||||
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
|
||||
<changeSet id="20240926_create_product_table.xml" author="danil">
|
||||
<createTable tableName="product">
|
||||
<column name="id" type="bigint" autoIncrement="true" remarks="Идентификатор товара">
|
||||
<constraints primaryKey="true" />
|
||||
</column>
|
||||
<column name="marketplace" type="varchar" remarks="Название маркетплейса (enum)">
|
||||
<constraints nullable="false" />
|
||||
</column>
|
||||
<column name="category" type="varchar" remarks="Категория товара">
|
||||
<constraints nullable="false" />
|
||||
</column>
|
||||
<column name="brand" type="varchar" remarks="Бренд товара">
|
||||
<constraints nullable="false" />
|
||||
</column>
|
||||
<column name="product_name" type="varchar" remarks="Название товара">
|
||||
<constraints nullable="false" />
|
||||
</column>
|
||||
<column name="created_at" type="timestamptz" remarks="Время добавления товара в базу">
|
||||
<constraints nullable="false" />
|
||||
</column>
|
||||
</createTable>
|
||||
</changeSet>
|
||||
</databaseChangeLog>
|
@ -0,0 +1,30 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<databaseChangeLog
|
||||
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
|
||||
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
|
||||
<changeSet id="20240926_create_price_history_table.xml" author="Emelyanov535">
|
||||
<createTable tableName="price_history">
|
||||
<column name="id" type="bigint" autoIncrement="true" remarks="Идентификатор">
|
||||
<constraints primaryKey="true" />
|
||||
</column>
|
||||
<column name="product_id" type="bigint" remarks="ID товара">
|
||||
<constraints nullable="false" />
|
||||
</column>
|
||||
<column name="price" type="numeric(10,2)" remarks="Цена товара">
|
||||
<constraints nullable="false" />
|
||||
</column>
|
||||
<column name="date" type="timestamptz" remarks="Дата сохранения">
|
||||
<constraints nullable="false" />
|
||||
</column>
|
||||
</createTable>
|
||||
|
||||
<addForeignKeyConstraint baseTableName="price_history"
|
||||
baseColumnNames="product_id"
|
||||
constraintName="fk_product_price_history"
|
||||
referencedTableName="product"
|
||||
referencedColumnNames="id"
|
||||
onDelete="CASCADE"/>
|
||||
</changeSet>
|
||||
</databaseChangeLog>
|
@ -0,0 +1,7 @@
|
||||
databaseChangeLog:
|
||||
- include:
|
||||
file: 20240926_001_create_product_table.xml
|
||||
relativeToChangelogFile: true
|
||||
- include:
|
||||
file: 20240926_002_create_price_history_table.xml
|
||||
relativeToChangelogFile: true
|
@ -0,0 +1,28 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<databaseChangeLog
|
||||
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
|
||||
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
|
||||
<changeSet id="20240926_create_product_table.xml" author="danil">
|
||||
<addColumn tableName="product">
|
||||
<column name="url" type="varchar" remarks="Ссылка на товар">
|
||||
<constraints nullable="false" unique="true" />
|
||||
</column>
|
||||
</addColumn>
|
||||
<addColumn tableName="product">
|
||||
<column name="image-url" type="varchar" remarks="Ссылка на изображение товара">
|
||||
<constraints nullable="false" />
|
||||
</column>
|
||||
</addColumn>
|
||||
<dropTable tableName="price_history" cascadeConstraints="true" />
|
||||
<sql>
|
||||
CREATE TABLE if not exists price_history(
|
||||
product_url varchar NOT NULL,
|
||||
price numeric(10, 2) NOT NULL,
|
||||
date timestamptz NOT NULL,
|
||||
PRIMARY KEY (product_url, date)
|
||||
) PARTITION BY RANGE (date);
|
||||
</sql>
|
||||
</changeSet>
|
||||
</databaseChangeLog>
|
@ -0,0 +1,4 @@
|
||||
databaseChangeLog:
|
||||
- include:
|
||||
file: 20241006_001_add_columns_in_tables.xml
|
||||
relativeToChangelogFile: true
|
@ -0,0 +1,10 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<databaseChangeLog
|
||||
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
|
||||
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
|
||||
<changeSet id="20241014_add_constraint_on_product_url.xml" author="Emelyanov535">
|
||||
<addUniqueConstraint tableName="product" columnNames="url"/>
|
||||
</changeSet>
|
||||
</databaseChangeLog>
|
@ -0,0 +1,4 @@
|
||||
databaseChangeLog:
|
||||
- include:
|
||||
file: 20241014_add_constraint_on_product_url.xml
|
||||
relativeToChangelogFile: true
|
10
parsing-service/src/main/resources/db/changelog/master.yml
Normal file
10
parsing-service/src/main/resources/db/changelog/master.yml
Normal file
@ -0,0 +1,10 @@
|
||||
databaseChangeLog:
|
||||
- include:
|
||||
file: 20240926/master.yml
|
||||
relativeToChangelogFile: true
|
||||
- include:
|
||||
file: 20241006/master.yml
|
||||
relativeToChangelogFile: true
|
||||
- include:
|
||||
file: 20241014/master.yml
|
||||
relativeToChangelogFile: true
|
135
parsing-service/src/main/resources/ok-proxy.txt
Normal file
135
parsing-service/src/main/resources/ok-proxy.txt
Normal file
@ -0,0 +1,135 @@
|
||||
97.74.87.226:80
|
||||
54.248.238.110:80
|
||||
3.141.217.225:80
|
||||
127.0.0.7:80
|
||||
49.12.235.70:8081
|
||||
13.38.176.104:3128
|
||||
46.51.249.135:3128
|
||||
162.223.90.130:80
|
||||
133.186.144.112:8080
|
||||
51.210.54.186:80
|
||||
101.108.123.39:8080
|
||||
3.130.65.162:3128
|
||||
80.249.112.162:80
|
||||
3.126.147.182:3128
|
||||
110.164.191.211:80
|
||||
13.208.56.180:80
|
||||
31.207.38.66:80
|
||||
116.203.27.109:80
|
||||
13.36.104.85:80
|
||||
18.228.198.164:3128
|
||||
3.123.150.192:3128
|
||||
8.219.97.248:80
|
||||
149.102.233.167:8081
|
||||
202.162.105.202:80
|
||||
165.22.77.86:80
|
||||
154.205.128.153:8888
|
||||
154.65.39.8:80
|
||||
3.124.133.93:3128
|
||||
31.40.248.2:8080
|
||||
188.253.112.218:80
|
||||
3.71.239.218:80
|
||||
159.223.92.147:8888
|
||||
3.78.92.159:3128
|
||||
54.92.168.145:8080
|
||||
50.62.183.223:80
|
||||
123.30.154.171:7777
|
||||
43.200.77.128:3128
|
||||
35.76.62.196:80
|
||||
204.57.112.5:80
|
||||
15.235.153.57:8089
|
||||
54.152.3.36:80
|
||||
47.74.152.29:8888
|
||||
0.0.0.0:80
|
||||
13.59.156.167:80
|
||||
3.127.62.252:80
|
||||
35.79.120.242:3128
|
||||
3.212.148.199:80
|
||||
3.122.84.99:3128
|
||||
45.92.177.60:8080
|
||||
23.95.216.78:34561
|
||||
82.180.146.116:3128
|
||||
52.67.10.183:80
|
||||
172.191.74.198:8080
|
||||
13.37.59.99:3128
|
||||
148.66.6.213:80
|
||||
18.134.236.231:80
|
||||
3.130.65.162:80
|
||||
103.153.154.6:80
|
||||
109.236.83.153:8888
|
||||
78.32.2.82:8080
|
||||
3.9.71.167:1080
|
||||
35.72.118.126:80
|
||||
46.47.197.210:3128
|
||||
13.37.73.214:80
|
||||
13.37.89.201:80
|
||||
110.12.211.140:80
|
||||
154.90.55.37:80
|
||||
152.89.246.197:8080
|
||||
3.37.125.76:3128
|
||||
44.218.183.55:80
|
||||
18.135.133.116:3128
|
||||
52.196.1.182:80
|
||||
94.72.152.254:80
|
||||
3.123.150.192:80
|
||||
196.11.183.160:8080
|
||||
18.133.16.21:80
|
||||
3.12.144.146:80
|
||||
49.13.173.87:80
|
||||
13.56.192.187:80
|
||||
161.35.49.68:80
|
||||
13.37.59.99:80
|
||||
3.122.84.99:80
|
||||
158.140.139.11:58100
|
||||
148.66.6.210:80
|
||||
153.19.91.77:80
|
||||
189.22.234.41:80
|
||||
52.67.10.183:3128
|
||||
41.59.90.171:80
|
||||
43.132.219.102:80
|
||||
13.40.46.249:1088
|
||||
16.163.149.249:80
|
||||
3.71.239.218:3128
|
||||
13.36.113.81:3128
|
||||
60.242.169.3:80
|
||||
49.13.173.87:8081
|
||||
35.176.148.8:1080
|
||||
18.135.133.116:80
|
||||
13.37.89.201:3128
|
||||
3.127.121.101:80
|
||||
35.178.104.4:80
|
||||
182.72.203.246:80
|
||||
13.40.239.130:1080
|
||||
65.108.207.6:80
|
||||
18.223.25.15:80
|
||||
54.233.119.172:3128
|
||||
66.97.37.164:80
|
||||
3.78.92.159:80
|
||||
110.168.213.172:8080
|
||||
49.12.235.70:80
|
||||
94.156.250.169:20128
|
||||
15.236.106.236:3128
|
||||
13.38.153.36:80
|
||||
178.128.199.145:80
|
||||
156.67.217.159:80
|
||||
148.66.6.211:80
|
||||
13.36.87.105:3128
|
||||
3.126.147.182:80
|
||||
51.222.155.142:80
|
||||
141.145.214.176:80
|
||||
184.169.154.119:80
|
||||
5.255.113.61:80
|
||||
3.124.133.93:80
|
||||
3.127.121.101:3128
|
||||
148.66.6.212:80
|
||||
176.9.239.181:80
|
||||
63.35.64.177:3128
|
||||
18.169.83.87:1080
|
||||
148.66.6.214:80
|
||||
18.228.149.161:80
|
||||
18.228.198.164:80
|
||||
106.105.118.250:80
|
||||
103.174.102.127:80
|
||||
162.0.238.147:80
|
||||
103.127.1.130:80
|
||||
185.233.187.103:80
|
135
parsing-service/src/main/resources/proxy.txt
Normal file
135
parsing-service/src/main/resources/proxy.txt
Normal file
@ -0,0 +1,135 @@
|
||||
97.74.87.226:80
|
||||
54.248.238.110:80
|
||||
3.141.217.225:80
|
||||
127.0.0.7:80
|
||||
49.12.235.70:8081
|
||||
13.38.176.104:3128
|
||||
46.51.249.135:3128
|
||||
162.223.90.130:80
|
||||
133.186.144.112:8080
|
||||
51.210.54.186:80
|
||||
101.108.123.39:8080
|
||||
3.130.65.162:3128
|
||||
80.249.112.162:80
|
||||
3.126.147.182:3128
|
||||
110.164.191.211:80
|
||||
13.208.56.180:80
|
||||
31.207.38.66:80
|
||||
116.203.27.109:80
|
||||
13.36.104.85:80
|
||||
18.228.198.164:3128
|
||||
3.123.150.192:3128
|
||||
8.219.97.248:80
|
||||
149.102.233.167:8081
|
||||
202.162.105.202:80
|
||||
165.22.77.86:80
|
||||
154.205.128.153:8888
|
||||
154.65.39.8:80
|
||||
3.124.133.93:3128
|
||||
31.40.248.2:8080
|
||||
188.253.112.218:80
|
||||
3.71.239.218:80
|
||||
159.223.92.147:8888
|
||||
3.78.92.159:3128
|
||||
54.92.168.145:8080
|
||||
50.62.183.223:80
|
||||
123.30.154.171:7777
|
||||
43.200.77.128:3128
|
||||
35.76.62.196:80
|
||||
204.57.112.5:80
|
||||
15.235.153.57:8089
|
||||
54.152.3.36:80
|
||||
47.74.152.29:8888
|
||||
0.0.0.0:80
|
||||
13.59.156.167:80
|
||||
3.127.62.252:80
|
||||
35.79.120.242:3128
|
||||
3.212.148.199:80
|
||||
3.122.84.99:3128
|
||||
45.92.177.60:8080
|
||||
23.95.216.78:34561
|
||||
82.180.146.116:3128
|
||||
52.67.10.183:80
|
||||
172.191.74.198:8080
|
||||
13.37.59.99:3128
|
||||
148.66.6.213:80
|
||||
18.134.236.231:80
|
||||
3.130.65.162:80
|
||||
103.153.154.6:80
|
||||
109.236.83.153:8888
|
||||
78.32.2.82:8080
|
||||
3.9.71.167:1080
|
||||
35.72.118.126:80
|
||||
46.47.197.210:3128
|
||||
13.37.73.214:80
|
||||
13.37.89.201:80
|
||||
110.12.211.140:80
|
||||
154.90.55.37:80
|
||||
152.89.246.197:8080
|
||||
3.37.125.76:3128
|
||||
44.218.183.55:80
|
||||
18.135.133.116:3128
|
||||
52.196.1.182:80
|
||||
94.72.152.254:80
|
||||
3.123.150.192:80
|
||||
196.11.183.160:8080
|
||||
18.133.16.21:80
|
||||
3.12.144.146:80
|
||||
49.13.173.87:80
|
||||
13.56.192.187:80
|
||||
161.35.49.68:80
|
||||
13.37.59.99:80
|
||||
3.122.84.99:80
|
||||
158.140.139.11:58100
|
||||
148.66.6.210:80
|
||||
153.19.91.77:80
|
||||
189.22.234.41:80
|
||||
52.67.10.183:3128
|
||||
41.59.90.171:80
|
||||
43.132.219.102:80
|
||||
13.40.46.249:1088
|
||||
16.163.149.249:80
|
||||
3.71.239.218:3128
|
||||
13.36.113.81:3128
|
||||
60.242.169.3:80
|
||||
49.13.173.87:8081
|
||||
35.176.148.8:1080
|
||||
18.135.133.116:80
|
||||
13.37.89.201:3128
|
||||
3.127.121.101:80
|
||||
35.178.104.4:80
|
||||
182.72.203.246:80
|
||||
13.40.239.130:1080
|
||||
65.108.207.6:80
|
||||
18.223.25.15:80
|
||||
54.233.119.172:3128
|
||||
66.97.37.164:80
|
||||
3.78.92.159:80
|
||||
110.168.213.172:8080
|
||||
49.12.235.70:80
|
||||
94.156.250.169:20128
|
||||
15.236.106.236:3128
|
||||
13.38.153.36:80
|
||||
178.128.199.145:80
|
||||
156.67.217.159:80
|
||||
148.66.6.211:80
|
||||
13.36.87.105:3128
|
||||
3.126.147.182:80
|
||||
51.222.155.142:80
|
||||
141.145.214.176:80
|
||||
184.169.154.119:80
|
||||
5.255.113.61:80
|
||||
3.124.133.93:80
|
||||
3.127.121.101:3128
|
||||
148.66.6.212:80
|
||||
176.9.239.181:80
|
||||
63.35.64.177:3128
|
||||
18.169.83.87:1080
|
||||
148.66.6.214:80
|
||||
18.228.149.161:80
|
||||
18.228.198.164:80
|
||||
106.105.118.250:80
|
||||
103.174.102.127:80
|
||||
162.0.238.147:80
|
||||
103.127.1.130:80
|
||||
185.233.187.103:80
|
@ -0,0 +1,11 @@
|
||||
package ru.pricepulse.parsingservice;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
class ParsingServiceApplicationTests {
|
||||
|
||||
@Test
|
||||
void contextLoads() {
|
||||
}
|
||||
|
||||
}
|
Loading…
Reference in New Issue
Block a user