Compare commits
21 Commits
master
...
feature/pa
Author | SHA1 | Date | |
---|---|---|---|
|
8afc758987 | ||
|
4f5dda4dbf | ||
|
59c41a4912 | ||
c4bb7a5ffa | |||
|
fd71513bbf | ||
|
171cc650f1 | ||
1df7dc94b8 | |||
|
42d947440c | ||
|
83b1c5d72c | ||
|
82f648e16c | ||
84e0af60c9 | |||
|
9895aaff33 | ||
|
ae8ac061bc | ||
|
a0271125a1 | ||
84f344084c | |||
5ae300389c | |||
|
ef2240e8ab | ||
f58b0a4a02 | |||
ffe6920b29 | |||
30ca5acc34 | |||
a24bf08f52 |
18
.run/ParsingService [local].run.xml
Normal file
18
.run/ParsingService [local].run.xml
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
<component name="ProjectRunConfigurationManager">
|
||||||
|
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
||||||
|
<option name="ACTIVE_PROFILES" value="dev" />
|
||||||
|
<option name="SCHEDULED_DEBUGGER" value="true" />
|
||||||
|
<envs>
|
||||||
|
<env name="JDBC_PASSWORD" value="postgres" />
|
||||||
|
<env name="JDBC_URL" value="localhost:5432/parsed_data" />
|
||||||
|
<env name="JDBC_USERNAME" value="postgres" />
|
||||||
|
<env name="SERVER_PORT" value="8080" />
|
||||||
|
<env name="WEBDRIVER_CHROME_PATH" value="$PROJECT_DIR$/parsing-service/web-driver/chromedriver" />
|
||||||
|
</envs>
|
||||||
|
<module name="parsing-service.main" />
|
||||||
|
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
|
||||||
|
<method v="2">
|
||||||
|
<option name="Make" enabled="true" />
|
||||||
|
</method>
|
||||||
|
</configuration>
|
||||||
|
</component>
|
80
docker/docker-compose.yml
Normal file
80
docker/docker-compose.yml
Normal file
@ -0,0 +1,80 @@
|
|||||||
|
version: "3.8"
|
||||||
|
name: price-pulse
|
||||||
|
services:
|
||||||
|
|
||||||
|
postgres:
|
||||||
|
image: postgres:16
|
||||||
|
ports:
|
||||||
|
- "5432:5432"
|
||||||
|
environment:
|
||||||
|
POSTGRES_DB: parsed_data
|
||||||
|
POSTGRES_USER: postgres
|
||||||
|
POSTGRES_PASSWORD: postgres
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 1024M
|
||||||
|
reservations:
|
||||||
|
memory: 256M
|
||||||
|
|
||||||
|
clickhouse:
|
||||||
|
image: clickhouse/clickhouse-server:latest
|
||||||
|
ports:
|
||||||
|
- "8123:8123" # HTTP интерфейс, /play для проверки запросов, но лучше использовать не браузер для этого
|
||||||
|
- "9000:9000" # Интерфейс для работы с клиентами
|
||||||
|
- "9009:9009" # Интерфейс для взаимодействия через TCP
|
||||||
|
volumes:
|
||||||
|
- clickhouse_data:/var/lib/clickhouse
|
||||||
|
- clickhouse_logs:/var/log/clickhouse
|
||||||
|
environment:
|
||||||
|
CLICKHOUSE_DB: parsed_data
|
||||||
|
CLICKHOUSE_USER: user
|
||||||
|
CLICKHOUSE_PASSWORD: password
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 1024M
|
||||||
|
reservations:
|
||||||
|
memory: 256M
|
||||||
|
|
||||||
|
zookeeper:
|
||||||
|
image: confluentinc/cp-zookeeper:latest
|
||||||
|
environment:
|
||||||
|
ZOOKEEPER_CLIENT_PORT: 2181
|
||||||
|
ZOOKEEPER_TICK_TIME: 2000
|
||||||
|
ports:
|
||||||
|
- "2181:2181"
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 1024M
|
||||||
|
reservations:
|
||||||
|
memory: 256M
|
||||||
|
|
||||||
|
kafka:
|
||||||
|
image: confluentinc/cp-kafka:latest
|
||||||
|
depends_on:
|
||||||
|
- zookeeper
|
||||||
|
ports:
|
||||||
|
- "9092:9092"
|
||||||
|
environment:
|
||||||
|
KAFKA_BROKER_ID: 1
|
||||||
|
KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
|
||||||
|
KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092
|
||||||
|
KAFKA_LISTENER_SECURITY_PROTOCOL_MAP: PLAINTEXT:PLAINTEXT
|
||||||
|
KAFKA_INTER_BROKER_LISTENER_NAME: PLAINTEXT
|
||||||
|
KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 1
|
||||||
|
volumes:
|
||||||
|
- kafka_data:/var/lib/kafka
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 1024M
|
||||||
|
reservations:
|
||||||
|
memory: 256M
|
||||||
|
|
||||||
|
volumes:
|
||||||
|
clickhouse_data:
|
||||||
|
clickhouse_logs:
|
||||||
|
kafka_data:
|
||||||
|
|
37
parsing-service/.gitignore
vendored
Normal file
37
parsing-service/.gitignore
vendored
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
HELP.md
|
||||||
|
.gradle
|
||||||
|
build/
|
||||||
|
!gradle/wrapper/gradle-wrapper.jar
|
||||||
|
!**/src/main/**/build/
|
||||||
|
!**/src/test/**/build/
|
||||||
|
|
||||||
|
### STS ###
|
||||||
|
.apt_generated
|
||||||
|
.classpath
|
||||||
|
.factorypath
|
||||||
|
.project
|
||||||
|
.settings
|
||||||
|
.springBeans
|
||||||
|
.sts4-cache
|
||||||
|
bin/
|
||||||
|
!**/src/main/**/bin/
|
||||||
|
!**/src/test/**/bin/
|
||||||
|
|
||||||
|
### IntelliJ IDEA ###
|
||||||
|
.idea
|
||||||
|
*.iws
|
||||||
|
*.iml
|
||||||
|
*.ipr
|
||||||
|
out/
|
||||||
|
!**/src/main/**/out/
|
||||||
|
!**/src/test/**/out/
|
||||||
|
|
||||||
|
### NetBeans ###
|
||||||
|
/nbproject/private/
|
||||||
|
/nbbuild/
|
||||||
|
/dist/
|
||||||
|
/nbdist/
|
||||||
|
/.nb-gradle/
|
||||||
|
|
||||||
|
### VS Code ###
|
||||||
|
.vscode/
|
17
parsing-service/.run/ParsingService [local].run.xml
Normal file
17
parsing-service/.run/ParsingService [local].run.xml
Normal file
@ -0,0 +1,17 @@
|
|||||||
|
<component name="ProjectRunConfigurationManager">
|
||||||
|
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
||||||
|
<option name="ACTIVE_PROFILES" value="dev,ozon,headless,postgres_stat" />
|
||||||
|
<option name="SCHEDULED_DEBUGGER" value="true" />
|
||||||
|
<envs>
|
||||||
|
<env name="POSTGRES_JDBC_PASSWORD" value="postgres" />
|
||||||
|
<env name="POSTGRES_JDBC_USERNAME" value="postgres" />
|
||||||
|
<env name="POSTGRES_JDBC_URL" value="localhost:5432/parsed_data" />
|
||||||
|
<env name="SERVER_PORT" value="8080" />
|
||||||
|
</envs>
|
||||||
|
<module name="parsing-service.main" />
|
||||||
|
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
|
||||||
|
<method v="2">
|
||||||
|
<option name="Make" enabled="true" />
|
||||||
|
</method>
|
||||||
|
</configuration>
|
||||||
|
</component>
|
61
parsing-service/build.gradle
Normal file
61
parsing-service/build.gradle
Normal file
@ -0,0 +1,61 @@
|
|||||||
|
plugins {
|
||||||
|
id 'java'
|
||||||
|
id 'org.springframework.boot' version '3.3.4'
|
||||||
|
id 'io.spring.dependency-management' version '1.1.6'
|
||||||
|
}
|
||||||
|
|
||||||
|
group = 'ru.pricepulse'
|
||||||
|
version = '0.0.1-SNAPSHOT'
|
||||||
|
|
||||||
|
java {
|
||||||
|
toolchain {
|
||||||
|
languageVersion = JavaLanguageVersion.of(21)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
configurations {
|
||||||
|
compileOnly {
|
||||||
|
extendsFrom annotationProcessor
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
repositories {
|
||||||
|
mavenCentral()
|
||||||
|
}
|
||||||
|
|
||||||
|
ext {
|
||||||
|
jsoupVesion = '1.18.1'
|
||||||
|
seleniumVersion = '4.25.0'
|
||||||
|
}
|
||||||
|
|
||||||
|
dependencies {
|
||||||
|
implementation 'org.springframework.boot:spring-boot-starter-data-jpa'
|
||||||
|
implementation 'org.springframework.boot:spring-boot-starter-web'
|
||||||
|
// implementation 'org.liquibase:liquibase-core'
|
||||||
|
implementation 'org.springframework.kafka:spring-kafka'
|
||||||
|
implementation "org.jsoup:jsoup:${jsoupVesion}"
|
||||||
|
implementation "org.seleniumhq.selenium:selenium-java:${seleniumVersion}"
|
||||||
|
implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
|
||||||
|
implementation 'org.apache.commons:commons-pool2:2.12.0'
|
||||||
|
implementation 'com.clickhouse:clickhouse-jdbc:0.6.5'
|
||||||
|
implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.6.0'
|
||||||
|
|
||||||
|
compileOnly 'org.projectlombok:lombok'
|
||||||
|
|
||||||
|
runtimeOnly 'org.postgresql:postgresql'
|
||||||
|
|
||||||
|
annotationProcessor 'org.projectlombok:lombok'
|
||||||
|
|
||||||
|
testImplementation 'org.springframework.boot:spring-boot-starter-test'
|
||||||
|
testImplementation 'org.springframework.kafka:spring-kafka-test'
|
||||||
|
|
||||||
|
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
|
||||||
|
implementation 'org.springframework.boot:spring-boot-starter-webflux'
|
||||||
|
|
||||||
|
implementation 'org.springframework.retry:spring-retry:2.0.9'
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
tasks.named('test') {
|
||||||
|
useJUnitPlatform()
|
||||||
|
}
|
BIN
parsing-service/gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
BIN
parsing-service/gradle/wrapper/gradle-wrapper.jar
vendored
Normal file
Binary file not shown.
7
parsing-service/gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
7
parsing-service/gradle/wrapper/gradle-wrapper.properties
vendored
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
distributionBase=GRADLE_USER_HOME
|
||||||
|
distributionPath=wrapper/dists
|
||||||
|
distributionUrl=https\://services.gradle.org/distributions/gradle-8.10.2-bin.zip
|
||||||
|
networkTimeout=10000
|
||||||
|
validateDistributionUrl=true
|
||||||
|
zipStoreBase=GRADLE_USER_HOME
|
||||||
|
zipStorePath=wrapper/dists
|
252
parsing-service/gradlew
vendored
Normal file
252
parsing-service/gradlew
vendored
Normal file
@ -0,0 +1,252 @@
|
|||||||
|
#!/bin/sh
|
||||||
|
|
||||||
|
#
|
||||||
|
# Copyright © 2015-2021 the original authors.
|
||||||
|
#
|
||||||
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
# you may not use this file except in compliance with the License.
|
||||||
|
# You may obtain a copy of the License at
|
||||||
|
#
|
||||||
|
# https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
#
|
||||||
|
# Unless required by applicable law or agreed to in writing, software
|
||||||
|
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
# See the License for the specific language governing permissions and
|
||||||
|
# limitations under the License.
|
||||||
|
#
|
||||||
|
# SPDX-License-Identifier: Apache-2.0
|
||||||
|
#
|
||||||
|
|
||||||
|
##############################################################################
|
||||||
|
#
|
||||||
|
# Gradle start up script for POSIX generated by Gradle.
|
||||||
|
#
|
||||||
|
# Important for running:
|
||||||
|
#
|
||||||
|
# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
|
||||||
|
# noncompliant, but you have some other compliant shell such as ksh or
|
||||||
|
# bash, then to run this script, type that shell name before the whole
|
||||||
|
# command line, like:
|
||||||
|
#
|
||||||
|
# ksh Gradle
|
||||||
|
#
|
||||||
|
# Busybox and similar reduced shells will NOT work, because this script
|
||||||
|
# requires all of these POSIX shell features:
|
||||||
|
# * functions;
|
||||||
|
# * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
|
||||||
|
# «${var#prefix}», «${var%suffix}», and «$( cmd )»;
|
||||||
|
# * compound commands having a testable exit status, especially «case»;
|
||||||
|
# * various built-in commands including «command», «set», and «ulimit».
|
||||||
|
#
|
||||||
|
# Important for patching:
|
||||||
|
#
|
||||||
|
# (2) This script targets any POSIX shell, so it avoids extensions provided
|
||||||
|
# by Bash, Ksh, etc; in particular arrays are avoided.
|
||||||
|
#
|
||||||
|
# The "traditional" practice of packing multiple parameters into a
|
||||||
|
# space-separated string is a well documented source of bugs and security
|
||||||
|
# problems, so this is (mostly) avoided, by progressively accumulating
|
||||||
|
# options in "$@", and eventually passing that to Java.
|
||||||
|
#
|
||||||
|
# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
|
||||||
|
# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
|
||||||
|
# see the in-line comments for details.
|
||||||
|
#
|
||||||
|
# There are tweaks for specific operating systems such as AIX, CygWin,
|
||||||
|
# Darwin, MinGW, and NonStop.
|
||||||
|
#
|
||||||
|
# (3) This script is generated from the Groovy template
|
||||||
|
# https://github.com/gradle/gradle/blob/HEAD/platforms/jvm/plugins-application/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
|
||||||
|
# within the Gradle project.
|
||||||
|
#
|
||||||
|
# You can find Gradle at https://github.com/gradle/gradle/.
|
||||||
|
#
|
||||||
|
##############################################################################
|
||||||
|
|
||||||
|
# Attempt to set APP_HOME
|
||||||
|
|
||||||
|
# Resolve links: $0 may be a link
|
||||||
|
app_path=$0
|
||||||
|
|
||||||
|
# Need this for daisy-chained symlinks.
|
||||||
|
while
|
||||||
|
APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
|
||||||
|
[ -h "$app_path" ]
|
||||||
|
do
|
||||||
|
ls=$( ls -ld "$app_path" )
|
||||||
|
link=${ls#*' -> '}
|
||||||
|
case $link in #(
|
||||||
|
/*) app_path=$link ;; #(
|
||||||
|
*) app_path=$APP_HOME$link ;;
|
||||||
|
esac
|
||||||
|
done
|
||||||
|
|
||||||
|
# This is normally unused
|
||||||
|
# shellcheck disable=SC2034
|
||||||
|
APP_BASE_NAME=${0##*/}
|
||||||
|
# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036)
|
||||||
|
APP_HOME=$( cd -P "${APP_HOME:-./}" > /dev/null && printf '%s
|
||||||
|
' "$PWD" ) || exit
|
||||||
|
|
||||||
|
# Use the maximum available, or set MAX_FD != -1 to use that value.
|
||||||
|
MAX_FD=maximum
|
||||||
|
|
||||||
|
warn () {
|
||||||
|
echo "$*"
|
||||||
|
} >&2
|
||||||
|
|
||||||
|
die () {
|
||||||
|
echo
|
||||||
|
echo "$*"
|
||||||
|
echo
|
||||||
|
exit 1
|
||||||
|
} >&2
|
||||||
|
|
||||||
|
# OS specific support (must be 'true' or 'false').
|
||||||
|
cygwin=false
|
||||||
|
msys=false
|
||||||
|
darwin=false
|
||||||
|
nonstop=false
|
||||||
|
case "$( uname )" in #(
|
||||||
|
CYGWIN* ) cygwin=true ;; #(
|
||||||
|
Darwin* ) darwin=true ;; #(
|
||||||
|
MSYS* | MINGW* ) msys=true ;; #(
|
||||||
|
NONSTOP* ) nonstop=true ;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
|
||||||
|
|
||||||
|
|
||||||
|
# Determine the Java command to use to start the JVM.
|
||||||
|
if [ -n "$JAVA_HOME" ] ; then
|
||||||
|
if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
|
||||||
|
# IBM's JDK on AIX uses strange locations for the executables
|
||||||
|
JAVACMD=$JAVA_HOME/jre/sh/java
|
||||||
|
else
|
||||||
|
JAVACMD=$JAVA_HOME/bin/java
|
||||||
|
fi
|
||||||
|
if [ ! -x "$JAVACMD" ] ; then
|
||||||
|
die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
|
||||||
|
|
||||||
|
Please set the JAVA_HOME variable in your environment to match the
|
||||||
|
location of your Java installation."
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
JAVACMD=java
|
||||||
|
if ! command -v java >/dev/null 2>&1
|
||||||
|
then
|
||||||
|
die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
|
||||||
|
|
||||||
|
Please set the JAVA_HOME variable in your environment to match the
|
||||||
|
location of your Java installation."
|
||||||
|
fi
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Increase the maximum file descriptors if we can.
|
||||||
|
if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
|
||||||
|
case $MAX_FD in #(
|
||||||
|
max*)
|
||||||
|
# In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
|
||||||
|
# shellcheck disable=SC2039,SC3045
|
||||||
|
MAX_FD=$( ulimit -H -n ) ||
|
||||||
|
warn "Could not query maximum file descriptor limit"
|
||||||
|
esac
|
||||||
|
case $MAX_FD in #(
|
||||||
|
'' | soft) :;; #(
|
||||||
|
*)
|
||||||
|
# In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
|
||||||
|
# shellcheck disable=SC2039,SC3045
|
||||||
|
ulimit -n "$MAX_FD" ||
|
||||||
|
warn "Could not set maximum file descriptor limit to $MAX_FD"
|
||||||
|
esac
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Collect all arguments for the java command, stacking in reverse order:
|
||||||
|
# * args from the command line
|
||||||
|
# * the main class name
|
||||||
|
# * -classpath
|
||||||
|
# * -D...appname settings
|
||||||
|
# * --module-path (only if needed)
|
||||||
|
# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
|
||||||
|
|
||||||
|
# For Cygwin or MSYS, switch paths to Windows format before running java
|
||||||
|
if "$cygwin" || "$msys" ; then
|
||||||
|
APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
|
||||||
|
CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
|
||||||
|
|
||||||
|
JAVACMD=$( cygpath --unix "$JAVACMD" )
|
||||||
|
|
||||||
|
# Now convert the arguments - kludge to limit ourselves to /bin/sh
|
||||||
|
for arg do
|
||||||
|
if
|
||||||
|
case $arg in #(
|
||||||
|
-*) false ;; # don't mess with options #(
|
||||||
|
/?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
|
||||||
|
[ -e "$t" ] ;; #(
|
||||||
|
*) false ;;
|
||||||
|
esac
|
||||||
|
then
|
||||||
|
arg=$( cygpath --path --ignore --mixed "$arg" )
|
||||||
|
fi
|
||||||
|
# Roll the args list around exactly as many times as the number of
|
||||||
|
# args, so each arg winds up back in the position where it started, but
|
||||||
|
# possibly modified.
|
||||||
|
#
|
||||||
|
# NB: a `for` loop captures its iteration list before it begins, so
|
||||||
|
# changing the positional parameters here affects neither the number of
|
||||||
|
# iterations, nor the values presented in `arg`.
|
||||||
|
shift # remove old arg
|
||||||
|
set -- "$@" "$arg" # push replacement arg
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||||
|
DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
|
||||||
|
|
||||||
|
# Collect all arguments for the java command:
|
||||||
|
# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments,
|
||||||
|
# and any embedded shellness will be escaped.
|
||||||
|
# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be
|
||||||
|
# treated as '${Hostname}' itself on the command line.
|
||||||
|
|
||||||
|
set -- \
|
||||||
|
"-Dorg.gradle.appname=$APP_BASE_NAME" \
|
||||||
|
-classpath "$CLASSPATH" \
|
||||||
|
org.gradle.wrapper.GradleWrapperMain \
|
||||||
|
"$@"
|
||||||
|
|
||||||
|
# Stop when "xargs" is not available.
|
||||||
|
if ! command -v xargs >/dev/null 2>&1
|
||||||
|
then
|
||||||
|
die "xargs is not available"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# Use "xargs" to parse quoted args.
|
||||||
|
#
|
||||||
|
# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
|
||||||
|
#
|
||||||
|
# In Bash we could simply go:
|
||||||
|
#
|
||||||
|
# readarray ARGS < <( xargs -n1 <<<"$var" ) &&
|
||||||
|
# set -- "${ARGS[@]}" "$@"
|
||||||
|
#
|
||||||
|
# but POSIX shell has neither arrays nor command substitution, so instead we
|
||||||
|
# post-process each arg (as a line of input to sed) to backslash-escape any
|
||||||
|
# character that might be a shell metacharacter, then use eval to reverse
|
||||||
|
# that process (while maintaining the separation between arguments), and wrap
|
||||||
|
# the whole thing up as a single "set" statement.
|
||||||
|
#
|
||||||
|
# This will of course break if any of these variables contains a newline or
|
||||||
|
# an unmatched quote.
|
||||||
|
#
|
||||||
|
|
||||||
|
eval "set -- $(
|
||||||
|
printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
|
||||||
|
xargs -n1 |
|
||||||
|
sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
|
||||||
|
tr '\n' ' '
|
||||||
|
)" '"$@"'
|
||||||
|
|
||||||
|
exec "$JAVACMD" "$@"
|
94
parsing-service/gradlew.bat
vendored
Normal file
94
parsing-service/gradlew.bat
vendored
Normal file
@ -0,0 +1,94 @@
|
|||||||
|
@rem
|
||||||
|
@rem Copyright 2015 the original author or authors.
|
||||||
|
@rem
|
||||||
|
@rem Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
@rem you may not use this file except in compliance with the License.
|
||||||
|
@rem You may obtain a copy of the License at
|
||||||
|
@rem
|
||||||
|
@rem https://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
@rem
|
||||||
|
@rem Unless required by applicable law or agreed to in writing, software
|
||||||
|
@rem distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
@rem See the License for the specific language governing permissions and
|
||||||
|
@rem limitations under the License.
|
||||||
|
@rem
|
||||||
|
@rem SPDX-License-Identifier: Apache-2.0
|
||||||
|
@rem
|
||||||
|
|
||||||
|
@if "%DEBUG%"=="" @echo off
|
||||||
|
@rem ##########################################################################
|
||||||
|
@rem
|
||||||
|
@rem Gradle startup script for Windows
|
||||||
|
@rem
|
||||||
|
@rem ##########################################################################
|
||||||
|
|
||||||
|
@rem Set local scope for the variables with windows NT shell
|
||||||
|
if "%OS%"=="Windows_NT" setlocal
|
||||||
|
|
||||||
|
set DIRNAME=%~dp0
|
||||||
|
if "%DIRNAME%"=="" set DIRNAME=.
|
||||||
|
@rem This is normally unused
|
||||||
|
set APP_BASE_NAME=%~n0
|
||||||
|
set APP_HOME=%DIRNAME%
|
||||||
|
|
||||||
|
@rem Resolve any "." and ".." in APP_HOME to make it shorter.
|
||||||
|
for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
|
||||||
|
|
||||||
|
@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
|
||||||
|
set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
|
||||||
|
|
||||||
|
@rem Find java.exe
|
||||||
|
if defined JAVA_HOME goto findJavaFromJavaHome
|
||||||
|
|
||||||
|
set JAVA_EXE=java.exe
|
||||||
|
%JAVA_EXE% -version >NUL 2>&1
|
||||||
|
if %ERRORLEVEL% equ 0 goto execute
|
||||||
|
|
||||||
|
echo. 1>&2
|
||||||
|
echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
|
||||||
|
echo. 1>&2
|
||||||
|
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
|
||||||
|
echo location of your Java installation. 1>&2
|
||||||
|
|
||||||
|
goto fail
|
||||||
|
|
||||||
|
:findJavaFromJavaHome
|
||||||
|
set JAVA_HOME=%JAVA_HOME:"=%
|
||||||
|
set JAVA_EXE=%JAVA_HOME%/bin/java.exe
|
||||||
|
|
||||||
|
if exist "%JAVA_EXE%" goto execute
|
||||||
|
|
||||||
|
echo. 1>&2
|
||||||
|
echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
|
||||||
|
echo. 1>&2
|
||||||
|
echo Please set the JAVA_HOME variable in your environment to match the 1>&2
|
||||||
|
echo location of your Java installation. 1>&2
|
||||||
|
|
||||||
|
goto fail
|
||||||
|
|
||||||
|
:execute
|
||||||
|
@rem Setup the command line
|
||||||
|
|
||||||
|
set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
|
||||||
|
|
||||||
|
|
||||||
|
@rem Execute Gradle
|
||||||
|
"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
|
||||||
|
|
||||||
|
:end
|
||||||
|
@rem End local scope for the variables with windows NT shell
|
||||||
|
if %ERRORLEVEL% equ 0 goto mainEnd
|
||||||
|
|
||||||
|
:fail
|
||||||
|
rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
|
||||||
|
rem the _cmd.exe /c_ return code!
|
||||||
|
set EXIT_CODE=%ERRORLEVEL%
|
||||||
|
if %EXIT_CODE% equ 0 set EXIT_CODE=1
|
||||||
|
if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
|
||||||
|
exit /b %EXIT_CODE%
|
||||||
|
|
||||||
|
:mainEnd
|
||||||
|
if "%OS%"=="Windows_NT" endlocal
|
||||||
|
|
||||||
|
:omega
|
1
parsing-service/settings.gradle
Normal file
1
parsing-service/settings.gradle
Normal file
@ -0,0 +1 @@
|
|||||||
|
rootProject.name = 'parsing-service'
|
@ -0,0 +1,15 @@
|
|||||||
|
package ru.pricepulse.parsingservice;
|
||||||
|
|
||||||
|
import org.springframework.boot.SpringApplication;
|
||||||
|
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||||
|
import org.springframework.retry.annotation.EnableRetry;
|
||||||
|
|
||||||
|
@SpringBootApplication
|
||||||
|
@EnableRetry
|
||||||
|
public class ParsingServiceApplication {
|
||||||
|
|
||||||
|
public static void main(String[] args) {
|
||||||
|
SpringApplication.run(ParsingServiceApplication.class, args);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,16 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
public class DateTimeFormatterConfig {
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public DateTimeFormatter partitionDateTimeFormatter() {
|
||||||
|
return DateTimeFormatter.ofPattern("yyyy_MM");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,36 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.http.HttpRequest;
|
||||||
|
import org.springframework.http.client.ClientHttpRequestExecution;
|
||||||
|
import org.springframework.http.client.ClientHttpRequestInterceptor;
|
||||||
|
import org.springframework.http.client.ClientHttpResponse;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class DynamicProxyInterceptor implements ClientHttpRequestInterceptor {
|
||||||
|
|
||||||
|
private final UserAgentProvider userAgentProvider;
|
||||||
|
private final ProxyProvider proxyProvider;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ClientHttpResponse intercept(HttpRequest request, byte[] body, ClientHttpRequestExecution execution) throws IOException {
|
||||||
|
// Получаем случайный прокси
|
||||||
|
//InetSocketAddress proxyAddress = proxyProvider.getNextProxy();
|
||||||
|
//log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
|
||||||
|
|
||||||
|
// Устанавливаем прокси
|
||||||
|
//System.setProperty("http.proxyHost", proxyAddress.getHostName());
|
||||||
|
//System.setProperty("http.proxyPort", String.valueOf(proxyAddress.getPort()));
|
||||||
|
|
||||||
|
//Устанавливаем динамический user-agent
|
||||||
|
String randomUserAgent = userAgentProvider.getRandomUserAgent();
|
||||||
|
request.getHeaders().set("User-Agent", randomUserAgent);
|
||||||
|
|
||||||
|
return execution.execute(request, body);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,10 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import org.springframework.boot.autoconfigure.kafka.KafkaProperties;
|
||||||
|
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
@EnableConfigurationProperties(KafkaProperties.class)
|
||||||
|
public class KafkaConfig {
|
||||||
|
}
|
@ -0,0 +1,20 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Getter;
|
||||||
|
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||||
|
import ru.pricepulse.parsingservice.config.properties.WildberriesConfigProperties;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Configuration
|
||||||
|
@EnableConfigurationProperties({
|
||||||
|
OzonConfigProperties.class,
|
||||||
|
WildberriesConfigProperties.class
|
||||||
|
})
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class MarketplacesConfig {
|
||||||
|
private final WildberriesConfigProperties wildberriesConfigProperties;
|
||||||
|
private final OzonConfigProperties ozonConfigProperties;
|
||||||
|
}
|
@ -0,0 +1,57 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import jakarta.annotation.PostConstruct;
|
||||||
|
import org.springframework.core.io.Resource;
|
||||||
|
import org.springframework.core.io.ResourceLoader;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.atomic.AtomicInteger;
|
||||||
|
|
||||||
|
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.checkProxies;
|
||||||
|
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.readProxiesFromFile;
|
||||||
|
import static ru.pricepulse.parsingservice.wildberries_parser.proxy.ProxyChecker.saveProxiesToFile;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class ProxyProvider {
|
||||||
|
private List<String> workingProxies;
|
||||||
|
|
||||||
|
private final AtomicInteger currentProxyIndex = new AtomicInteger(0);
|
||||||
|
|
||||||
|
private final ResourceLoader resourceLoader;
|
||||||
|
|
||||||
|
public ProxyProvider(ResourceLoader resourceLoader) {
|
||||||
|
this.resourceLoader = resourceLoader;
|
||||||
|
}
|
||||||
|
|
||||||
|
//@PostConstruct
|
||||||
|
public void init() throws IOException {
|
||||||
|
Resource proxy = resourceLoader.getResource("classpath:proxy.txt");
|
||||||
|
Resource okProxy = resourceLoader.getResource("classpath:ok-proxy.txt");
|
||||||
|
List<String> proxies = Files.readAllLines(Path.of(proxy.getURI()));
|
||||||
|
System.out.println("Начата проверка проксей");
|
||||||
|
workingProxies = checkProxies(proxies);
|
||||||
|
System.out.println("Закончена проверка проксей");
|
||||||
|
|
||||||
|
|
||||||
|
saveProxiesToFile(workingProxies, Path.of(okProxy.getURI()));
|
||||||
|
|
||||||
|
if (workingProxies.isEmpty()) {
|
||||||
|
throw new RuntimeException("Нет доступных рабочих прокси.");
|
||||||
|
}
|
||||||
|
|
||||||
|
System.out.println("Найдено рабочих прокси: " + workingProxies.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
public synchronized InetSocketAddress getNextProxy() {
|
||||||
|
// Получаем текущий индекс прокси
|
||||||
|
int currentIndex = currentProxyIndex.getAndUpdate(index -> (index + 1) % workingProxies.size());
|
||||||
|
|
||||||
|
String[] proxy = workingProxies.get(currentIndex).split(":");
|
||||||
|
return new InetSocketAddress(proxy[0], Integer.parseInt(proxy[1]));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,28 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.http.client.ClientHttpRequestInterceptor;
|
||||||
|
import org.springframework.web.client.RestTemplate;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class RestTemplateConfig {
|
||||||
|
|
||||||
|
private final UserAgentProvider userAgentProvider;
|
||||||
|
private final ProxyProvider proxyProvider;
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public RestTemplate restTemplate() {
|
||||||
|
RestTemplate restTemplate = new RestTemplate();
|
||||||
|
ClientHttpRequestInterceptor dynamicProxyInterceptor = new DynamicProxyInterceptor(userAgentProvider, proxyProvider);
|
||||||
|
|
||||||
|
// Добавляем интерсептор в RestTemplate
|
||||||
|
restTemplate.setInterceptors(Collections.singletonList(dynamicProxyInterceptor));
|
||||||
|
|
||||||
|
return restTemplate;
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,8 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.retry.annotation.EnableRetry;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
@EnableRetry
|
||||||
|
public class RetryConfig {}
|
@ -0,0 +1,20 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.scheduling.annotation.EnableScheduling;
|
||||||
|
import org.springframework.scheduling.concurrent.ThreadPoolTaskScheduler;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
@EnableScheduling
|
||||||
|
public class SchedulerConfig {
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public ThreadPoolTaskScheduler taskScheduler() {
|
||||||
|
ThreadPoolTaskScheduler taskScheduler = new ThreadPoolTaskScheduler();
|
||||||
|
taskScheduler.setPoolSize(10);
|
||||||
|
taskScheduler.setThreadNamePrefix("ScheduledTask-");
|
||||||
|
return taskScheduler;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,10 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import ru.pricepulse.parsingservice.config.properties.SeleniumConfigProperties;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
@EnableConfigurationProperties(SeleniumConfigProperties.class)
|
||||||
|
public class SeleniumConfig {
|
||||||
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class UserAgentProvider {
|
||||||
|
private static final List<String> userAgents = List.of(
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)",
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15A372 Safari/604.1"
|
||||||
|
);
|
||||||
|
|
||||||
|
public String getRandomUserAgent() {
|
||||||
|
return userAgents.get(new Random().nextInt(userAgents.size()));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,58 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.http.HttpHeaders;
|
||||||
|
import org.springframework.http.client.reactive.ReactorClientHttpConnector;
|
||||||
|
import org.springframework.web.reactive.function.client.WebClient;
|
||||||
|
import reactor.netty.http.client.HttpClient;
|
||||||
|
import reactor.netty.transport.ProxyProvider;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Configuration
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class WebClientConfig {
|
||||||
|
private final UserAgentProvider userAgentProvider;
|
||||||
|
private final ru.pricepulse.parsingservice.config.ProxyProvider proxyProvider;
|
||||||
|
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public WebClient webClient() {
|
||||||
|
return WebClient.builder()
|
||||||
|
.filter((request, next) -> {
|
||||||
|
// Получаем случайный прокси для каждого запроса
|
||||||
|
InetSocketAddress proxyAddress = proxyProvider.getNextProxy();
|
||||||
|
log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
|
||||||
|
|
||||||
|
HttpClient httpClient = HttpClient.create()
|
||||||
|
.proxy(proxy -> proxy
|
||||||
|
.type(ProxyProvider.Proxy.HTTP)
|
||||||
|
.address(proxyAddress));
|
||||||
|
|
||||||
|
String randomUserAgent = userAgentProvider.getRandomUserAgent();
|
||||||
|
log.info("Используемый User-Agent: {}", randomUserAgent);
|
||||||
|
|
||||||
|
// Создаем новый WebClient с прокси
|
||||||
|
WebClient webClientWithProxy = WebClient.builder()
|
||||||
|
.clientConnector(new ReactorClientHttpConnector(httpClient))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
// Выполняем запрос с обновленным User-Agent через WebClient с прокси
|
||||||
|
return webClientWithProxy
|
||||||
|
.method(request.method())
|
||||||
|
.uri(request.url())
|
||||||
|
.headers(headers -> headers.putAll(request.headers()))
|
||||||
|
.header(HttpHeaders.USER_AGENT, randomUserAgent)
|
||||||
|
.body(request.body()).exchange();
|
||||||
|
})
|
||||||
|
.codecs(configurer -> configurer
|
||||||
|
.defaultCodecs()
|
||||||
|
.maxInMemorySize(10 * 1024 * 1024))
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,59 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import io.github.bonigarcia.wdm.WebDriverManager;
|
||||||
|
import org.openqa.selenium.WebDriver;
|
||||||
|
import org.openqa.selenium.chrome.ChromeDriver;
|
||||||
|
import org.openqa.selenium.chrome.ChromeOptions;
|
||||||
|
import org.springframework.beans.factory.config.ConfigurableBeanFactory;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.context.annotation.Profile;
|
||||||
|
import org.springframework.context.annotation.Scope;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
public class WebDriverConfig {
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
@Profile("visible")
|
||||||
|
@Scope(ConfigurableBeanFactory.SCOPE_PROTOTYPE)
|
||||||
|
public WebDriver webDriverVisible() {
|
||||||
|
Map<String, Object> prefs = new HashMap<>();
|
||||||
|
prefs.put("profile.managed_default_content_settings.images", 2);
|
||||||
|
prefs.put("profile.managed_default_content_settings.geolocation", 2);
|
||||||
|
|
||||||
|
var options = new ChromeOptions();
|
||||||
|
options.setExperimentalOption("prefs", prefs);
|
||||||
|
WebDriverManager.chromedriver().setup();
|
||||||
|
return new ChromeDriver(options);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
@Profile("headless")
|
||||||
|
@Scope(ConfigurableBeanFactory.SCOPE_PROTOTYPE)
|
||||||
|
public WebDriver webDriverHeadless(ChromeOptions options) {
|
||||||
|
WebDriverManager.chromedriver().setup();
|
||||||
|
return new ChromeDriver(options);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
@Profile("headless")
|
||||||
|
public ChromeOptions chromeOptions() {
|
||||||
|
Map<String, Object> prefs = new HashMap<>();
|
||||||
|
prefs.put("profile.managed_default_content_settings.images", 2);
|
||||||
|
prefs.put("profile.managed_default_content_settings.stylesheets", 2);
|
||||||
|
|
||||||
|
var options = new ChromeOptions();
|
||||||
|
options.setExperimentalOption("prefs", prefs);
|
||||||
|
options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36");
|
||||||
|
//options.addArguments("--window-size=1920,2000");
|
||||||
|
options.addArguments("--headless");
|
||||||
|
options.addArguments("--disable-gpu");
|
||||||
|
options.addArguments("--no-sandbox");
|
||||||
|
options.addArguments("--disable-dev-shm-usage");
|
||||||
|
return options;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,8 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config.properties;
|
||||||
|
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
|
||||||
|
@ConfigurationProperties(prefix = "application.kafka")
|
||||||
|
public class KafkaConfigProperties {
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,16 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config.properties;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@ConfigurationProperties(prefix = "marketplace.ozon")
|
||||||
|
public class OzonConfigProperties {
|
||||||
|
|
||||||
|
private Integer maxThreads;
|
||||||
|
|
||||||
|
private Integer maxNumOfPagesOnScreen;
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,7 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config.properties;
|
||||||
|
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
|
||||||
|
@ConfigurationProperties("selenium")
|
||||||
|
public class SeleniumConfigProperties {
|
||||||
|
}
|
@ -0,0 +1,20 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config.properties;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@ConfigurationProperties(prefix = "marketplace.wildberries")
|
||||||
|
public class WildberriesConfigProperties {
|
||||||
|
private String baseUrl;
|
||||||
|
private String catalogUrl;
|
||||||
|
private String userAgent;
|
||||||
|
private String catalogWbUrl;
|
||||||
|
private int retryAttempts;
|
||||||
|
private long retryDelay;
|
||||||
|
private String laptopUrl;
|
||||||
|
private String shard;
|
||||||
|
}
|
@ -0,0 +1,6 @@
|
|||||||
|
package ru.pricepulse.parsingservice.enumeration;
|
||||||
|
|
||||||
|
public enum Category {
|
||||||
|
LAPTOP,
|
||||||
|
SMARTPHONE
|
||||||
|
}
|
@ -0,0 +1,8 @@
|
|||||||
|
package ru.pricepulse.parsingservice.enumeration;
|
||||||
|
|
||||||
|
public enum Marketplace {
|
||||||
|
WILDBERRIES,
|
||||||
|
OZON,
|
||||||
|
DNS
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,31 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.enumeration;
|
||||||
|
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
|
||||||
|
public enum OzonCategory {
|
||||||
|
|
||||||
|
LAPTOP ("/noutbuki-15692/?brandcertified=t", Category.LAPTOP),
|
||||||
|
|
||||||
|
SMARTPHONE ("/smartfony-15502/?brandcertified=t", Category.SMARTPHONE);
|
||||||
|
|
||||||
|
private static final String BASE_CATEGORY_URL = "https://www.ozon.ru/category";
|
||||||
|
|
||||||
|
private final String categoryUrl;
|
||||||
|
|
||||||
|
private final Category mappedCategory;
|
||||||
|
|
||||||
|
OzonCategory(String categoryUrl,
|
||||||
|
Category mappedCategory) {
|
||||||
|
this.categoryUrl = categoryUrl;
|
||||||
|
this.mappedCategory = mappedCategory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCategoryUrl() {
|
||||||
|
return BASE_CATEGORY_URL + categoryUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Category getMappedCategory() {
|
||||||
|
return mappedCategory;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,70 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.pool;
|
||||||
|
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
|
import java.util.Queue;
|
||||||
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||||
|
|
||||||
|
import jakarta.annotation.PreDestroy;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.openqa.selenium.WebDriver;
|
||||||
|
import org.springframework.beans.factory.ObjectFactory;
|
||||||
|
import org.springframework.context.annotation.Profile;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Component
|
||||||
|
@Profile("ozon")
|
||||||
|
public class WebDriverPool {
|
||||||
|
|
||||||
|
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>();
|
||||||
|
|
||||||
|
private final Queue<WebDriver> busyDrivers = new ConcurrentLinkedQueue<>();
|
||||||
|
|
||||||
|
private final ObjectFactory<WebDriver> webDriverFactory;
|
||||||
|
|
||||||
|
private final OzonConfigProperties ozonConfigProperties;
|
||||||
|
|
||||||
|
public WebDriverPool(ObjectFactory<WebDriver> webDriverFactory,
|
||||||
|
OzonConfigProperties ozonConfigProperties) {
|
||||||
|
this.webDriverFactory = webDriverFactory;
|
||||||
|
this.ozonConfigProperties = ozonConfigProperties;
|
||||||
|
int poolSize = ozonConfigProperties.getMaxThreads();
|
||||||
|
|
||||||
|
for (int i = 0; i < poolSize; i++) {
|
||||||
|
availableDrivers.add(createNewDriver());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private WebDriver createNewDriver() {
|
||||||
|
return webDriverFactory.getObject();
|
||||||
|
}
|
||||||
|
|
||||||
|
public WebDriver borrowDriver() {
|
||||||
|
WebDriver driver = availableDrivers.poll();
|
||||||
|
if (driver != null) {
|
||||||
|
busyDrivers.add(driver);
|
||||||
|
return driver;
|
||||||
|
}
|
||||||
|
throw new NoSuchElementException("No available driver found");
|
||||||
|
}
|
||||||
|
|
||||||
|
public void returnDriver(WebDriver driver) {
|
||||||
|
busyDrivers.remove(driver);
|
||||||
|
availableDrivers.add(driver);
|
||||||
|
}
|
||||||
|
|
||||||
|
@PreDestroy
|
||||||
|
public void shutdownPool() {
|
||||||
|
for (WebDriver driver : availableDrivers) {
|
||||||
|
driver.quit();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (WebDriver driver : busyDrivers) {
|
||||||
|
driver.quit();
|
||||||
|
}
|
||||||
|
availableDrivers.clear();
|
||||||
|
busyDrivers.clear();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,17 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class OzonService {
|
||||||
|
|
||||||
|
public OzonCategory[] getCategories() {
|
||||||
|
return OzonCategory.values();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,27 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.context.annotation.Profile;
|
||||||
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Profile("postgres_stat")
|
||||||
|
public class PartitionService {
|
||||||
|
|
||||||
|
private final JdbcTemplate postgresDataSource;
|
||||||
|
|
||||||
|
public boolean checkPartitionExists(String partitionName) {
|
||||||
|
String query = "SELECT to_regclass('public." + partitionName + "')";
|
||||||
|
String result = postgresDataSource.queryForObject(query, String.class);
|
||||||
|
return result != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void createPartition(String partitionName, String startDate, String endDate) {
|
||||||
|
String createPartitionSQL = "CREATE TABLE IF NOT EXISTS " + partitionName +
|
||||||
|
" PARTITION OF price_history FOR VALUES FROM ('" + startDate + "') TO ('" + endDate + "')";
|
||||||
|
postgresDataSource.execute(createPartitionSQL);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,30 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.dto;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@Builder
|
||||||
|
public class ParsedData {
|
||||||
|
|
||||||
|
private Marketplace marketplace;
|
||||||
|
|
||||||
|
private Category category;
|
||||||
|
|
||||||
|
private String brand;
|
||||||
|
|
||||||
|
private String productName;
|
||||||
|
|
||||||
|
private String url;
|
||||||
|
|
||||||
|
private String imageUrl;
|
||||||
|
|
||||||
|
private BigDecimal price;
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,64 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.openqa.selenium.By;
|
||||||
|
import org.openqa.selenium.WebDriver;
|
||||||
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class AccessDeniedPage implements MarketplacePage {
|
||||||
|
|
||||||
|
private static final String RELOAD_BUTTON_ID = "reload-button";
|
||||||
|
private static final String RELOAD_BUTTON_XPATH = "//button[contains(text(),'Обновить')]";
|
||||||
|
private static final String WARNING_IMAGE_CSS = "img[alt='warning']";
|
||||||
|
private static final String ACCESS_DENIED_TEXT_XPATH = "//h1[text()='Доступ ограничен']";
|
||||||
|
|
||||||
|
private final By reloadButtonById = By.id(RELOAD_BUTTON_ID);
|
||||||
|
private final By reloadButtonByXpath = By.xpath(RELOAD_BUTTON_XPATH);
|
||||||
|
private final By warningImage = By.cssSelector(WARNING_IMAGE_CSS);
|
||||||
|
private final By accessDeniedText = By.xpath(ACCESS_DENIED_TEXT_XPATH);
|
||||||
|
|
||||||
|
private WebDriver driver;
|
||||||
|
private WebDriverWait wait;
|
||||||
|
|
||||||
|
public AccessDeniedPage(WebDriver driver,
|
||||||
|
WebDriverWait wait) {
|
||||||
|
this.driver = driver;
|
||||||
|
this.wait = wait;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void clickReloadButton() {
|
||||||
|
try {
|
||||||
|
log.debug("Пытаемся найти кнопку по id и нажать");
|
||||||
|
driver.findElement(reloadButtonById).click();
|
||||||
|
return;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Кнопка обновления страницы не найдена по id");
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
log.debug("Пытаемся найти кнопку по xpath и нажать");
|
||||||
|
driver.findElement(reloadButtonByXpath).click();
|
||||||
|
log.debug("Успешно нашли кнопку по xpath");
|
||||||
|
return;
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.debug("Кнопка обновления страницы не найдена по xpath");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isWarningImage() {
|
||||||
|
return driver.findElement(warningImage) != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean isAccessDeniedText() {
|
||||||
|
return driver.findElement(accessDeniedText) != null;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isLoaded() {
|
||||||
|
try {
|
||||||
|
return isWarningImage() && isAccessDeniedText();
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,90 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
|
||||||
|
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.openqa.selenium.By;
|
||||||
|
import org.openqa.selenium.WebDriver;
|
||||||
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class CategoryPage implements MarketplacePage {
|
||||||
|
|
||||||
|
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsV2']";
|
||||||
|
|
||||||
|
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
|
||||||
|
|
||||||
|
private WebDriver driver;
|
||||||
|
|
||||||
|
private WebDriverWait wait;
|
||||||
|
|
||||||
|
public CategoryPage(WebDriver driver, WebDriverWait wait) {
|
||||||
|
this.driver = driver;
|
||||||
|
this.wait = wait;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList<ParsedData> getParsedProducts() {
|
||||||
|
wait.until(visibilityOfElementLocated(searchResults));
|
||||||
|
log.info("Нашли SearchResultsV2");
|
||||||
|
var searchResultsElement = driver.findElement(searchResults);
|
||||||
|
wait.until(driver -> visibilityOfElementLocated(By.cssSelector(":scope > div")));
|
||||||
|
log.info("Нашли внешний блок списка");
|
||||||
|
var outerDiv = searchResultsElement.findElement(By.cssSelector(":scope > div")); // Внешний блок со списком товаров
|
||||||
|
wait.until(driver -> visibilityOfAllElements(outerDiv.findElements(By.cssSelector(":scope > div"))));
|
||||||
|
log.info("Нашли элементы списка");
|
||||||
|
var innerDivs = outerDiv.findElements(By.cssSelector(":scope > div")); // Блок карточки товара
|
||||||
|
|
||||||
|
var products = new ArrayList<ParsedData>();
|
||||||
|
innerDivs.forEach(innerDiv -> {
|
||||||
|
var productDataDivs = innerDiv.findElements(By.cssSelector(":scope > div"));
|
||||||
|
var productImageUrl = productDataDivs.get(0)
|
||||||
|
.findElement(By.cssSelector(":scope > a > div"))
|
||||||
|
.findElements(By.cssSelector(":scope > div")).getFirst()
|
||||||
|
.findElement(By.tagName("img")).getAttribute("src");
|
||||||
|
|
||||||
|
var productBrand = productDataDivs.get(1).findElement(By.cssSelector(":scope > div"))
|
||||||
|
.findElements(By.cssSelector(":scope > div")).getFirst()
|
||||||
|
.findElement(By.tagName("b")).getText();
|
||||||
|
|
||||||
|
var productNameLink = productDataDivs.get(1).findElement(By.cssSelector(":scope > div > a"));
|
||||||
|
|
||||||
|
var productUrl = productNameLink.getAttribute("href");
|
||||||
|
|
||||||
|
var productName = productNameLink.findElement(By.tagName("span")).getText();
|
||||||
|
|
||||||
|
var productPrice = parseCurrency(productDataDivs.get(2).findElement(By.cssSelector(":scope > div > div"))
|
||||||
|
.findElements(By.tagName("span")).getFirst().getText());
|
||||||
|
/*var parsedData = new ParsedData();
|
||||||
|
parsedData.setUrl(productUrl);
|
||||||
|
parsedData.setBrand(productBrand);
|
||||||
|
parsedData.setProductName(productName);
|
||||||
|
parsedData.setImageUrl(productImageUrl);
|
||||||
|
parsedData.setPrice(productPrice);
|
||||||
|
products.add(parsedData);*/
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
return products;
|
||||||
|
}
|
||||||
|
|
||||||
|
private BigDecimal parseCurrency(String currencyStr) {
|
||||||
|
String cleanedString = currencyStr.replaceAll("[^\\d]", "");
|
||||||
|
|
||||||
|
return new BigDecimal(cleanedString);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isLoaded() {
|
||||||
|
try {
|
||||||
|
return driver.findElement(searchResults) != null;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,7 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||||
|
|
||||||
|
public interface MarketplacePage {
|
||||||
|
|
||||||
|
boolean isLoaded();
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,38 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.openqa.selenium.By;
|
||||||
|
import org.openqa.selenium.WebDriver;
|
||||||
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class NoContentPage {
|
||||||
|
|
||||||
|
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
|
||||||
|
private static final String NOT_FOUND_TEXT_XPATH = "\"//*[contains(text(), 'По вашим параметрам ничего не нашлось. Попробуйте сбросить фильтры. ')]\"";
|
||||||
|
private static final String SEARCH_RESULTS_ERROR = "div[data-widget='searchResultsError']";
|
||||||
|
|
||||||
|
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
|
||||||
|
private final By notFoundText = By.xpath(NOT_FOUND_TEXT_XPATH);
|
||||||
|
private final By searchResultsError = By.cssSelector(SEARCH_RESULTS_ERROR);
|
||||||
|
|
||||||
|
private WebDriver driver;
|
||||||
|
|
||||||
|
private WebDriverWait wait;
|
||||||
|
|
||||||
|
public NoContentPage(WebDriver driver, WebDriverWait wait) {
|
||||||
|
this.driver = driver;
|
||||||
|
this.wait = wait;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isLoaded() {
|
||||||
|
try {
|
||||||
|
return driver.findElement(searchResultsError) != null
|
||||||
|
|| driver.findElement(errorText) != null
|
||||||
|
|| driver.findElement(notFoundText) != null;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,228 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class OzonCategoryPage {
|
||||||
|
|
||||||
|
private static final String OZON_MAIN_LINK = "https://www.ozon.ru";
|
||||||
|
|
||||||
|
public static final String SEARCH_RESULTS_CSS_SELECTOR = "div[data-widget='searchResultsV2']";
|
||||||
|
|
||||||
|
public static final int INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT = 1;
|
||||||
|
|
||||||
|
public static final int INDEX_OF_PRODUCT_PRICE = 0;
|
||||||
|
|
||||||
|
public static final int INDEX_OF_PRODUCT_BRAND = 1;
|
||||||
|
|
||||||
|
public static final int INDEX_OF_PRODUCT_NAME = 2;
|
||||||
|
|
||||||
|
private final Document document;
|
||||||
|
|
||||||
|
public OzonCategoryPage(String pageHtml) {
|
||||||
|
this.document = Jsoup.parse(pageHtml);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<ParsedData> getProducts(Category category) {
|
||||||
|
List<ParsedData> products = new ArrayList<>();
|
||||||
|
|
||||||
|
Elements searchResultsDivs = getSearchResultsDivs();
|
||||||
|
if (searchResultsDivs.isEmpty()) {
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
log.info("нашли столько результатов на странице {}", searchResultsDivs.size());
|
||||||
|
|
||||||
|
for (Element searchResultsDiv : searchResultsDivs) {
|
||||||
|
Elements productsDivs = getProductsDivs(searchResultsDiv);
|
||||||
|
List<Elements> allProductDataDivs = getAllProductDataDivs(productsDivs);
|
||||||
|
List<ParsedData> parsedProductsData = extractParsedData(allProductDataDivs, category);
|
||||||
|
products.addAll(parsedProductsData);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*try {
|
||||||
|
|
||||||
|
for (Element searchResultsDiv : searchResultsDivs) {
|
||||||
|
var productDivs = searchResultsDiv.select("> div > div");
|
||||||
|
for (Element productDiv : productDivs) {
|
||||||
|
Elements productDataDivs = productDivs.select("> div > *");
|
||||||
|
if (productDataDivs.select("> *").isEmpty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
productDataDivs.removeLast();
|
||||||
|
Element productUrlAndImageUrlA = productDataDivs.first();
|
||||||
|
Element productDataDiv = productDataDivs.last();
|
||||||
|
Elements productDataInnerDivs = productDataDiv.select("> *");
|
||||||
|
try {
|
||||||
|
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
|
||||||
|
.select("span").text().toLowerCase()
|
||||||
|
.contains("осталось")) {
|
||||||
|
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
|
||||||
|
}
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND).select("> span");
|
||||||
|
|
||||||
|
String productUrl = OZON_MAIN_LINK + productUrlAndImageUrlA.attr("href").replaceAll("\\?.*$", "");
|
||||||
|
String productImageUrl = productUrlAndImageUrlA.select("> div > div")
|
||||||
|
.first().getElementsByTag("img")
|
||||||
|
.first().attr("src");
|
||||||
|
|
||||||
|
BigDecimal productPrice;
|
||||||
|
try {
|
||||||
|
productPrice = parseOzonPriceToBigDecimal(
|
||||||
|
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
|
||||||
|
.first().text());
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("не удалось распарсить цену");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
String productBrand = productBrandBlockSpans.first().selectFirst("> span > b").text();
|
||||||
|
String productName = productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME).select("> div > span").text();
|
||||||
|
|
||||||
|
ParsedData parsedData = new ParsedData();
|
||||||
|
parsedData.setCategory(category);
|
||||||
|
parsedData.setMarketplace(Marketplace.OZON);
|
||||||
|
parsedData.setUrl(productUrl);
|
||||||
|
parsedData.setImageUrl(productImageUrl);
|
||||||
|
parsedData.setPrice(productPrice);
|
||||||
|
parsedData.setBrand(productBrand);
|
||||||
|
parsedData.setProductName(productName);
|
||||||
|
products.add(parsedData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error(e.getMessage(), e);
|
||||||
|
}*/
|
||||||
|
return products;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Elements getSearchResultsDivs() {
|
||||||
|
try {
|
||||||
|
return document.select(SEARCH_RESULTS_CSS_SELECTOR);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Не удалось достать блоки searchResultsV2");
|
||||||
|
return new Elements();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Elements getProductsDivs(Element searchResultsDiv) {
|
||||||
|
return searchResultsDiv.select("> div > div");
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Elements> getAllProductDataDivs(Elements productsDivs) {
|
||||||
|
List<Elements> allProductDataDivs = new ArrayList<>();
|
||||||
|
for (Element productDiv : productsDivs) {
|
||||||
|
Elements productDataDivs = productDiv.select("> div > *");
|
||||||
|
if (productDataDivs.select("> *").isEmpty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
removeAddInFavouriteDiv(productDataDivs);
|
||||||
|
allProductDataDivs.add(productDataDivs);
|
||||||
|
}
|
||||||
|
return allProductDataDivs;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void removeAddInFavouriteDiv(Elements productDataDivs) {
|
||||||
|
productDataDivs.removeLast();
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<ParsedData> extractParsedData(List<Elements> allProductDataDivs,
|
||||||
|
Category category) {
|
||||||
|
List<ParsedData> parsedData = new ArrayList<>();
|
||||||
|
for (Elements productDataDivs : allProductDataDivs) {
|
||||||
|
try {
|
||||||
|
ParsedData parsedDataItem = getParsedDataItem(productDataDivs, category);
|
||||||
|
parsedData.add(parsedDataItem);
|
||||||
|
} catch (Exception e) {
|
||||||
|
//log.error(e.getMessage(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parsedData;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ParsedData getParsedDataItem(Elements productDataDivs,
|
||||||
|
Category category) {
|
||||||
|
removeExtraDivIfExists(productDataDivs);
|
||||||
|
return ParsedData.builder()
|
||||||
|
.category(category)
|
||||||
|
.marketplace(Marketplace.OZON)
|
||||||
|
.url(extractUrl(productDataDivs))
|
||||||
|
.imageUrl(extractImageUrl(productDataDivs))
|
||||||
|
.brand(extractBrand(productDataDivs))
|
||||||
|
.productName(extractProductName(productDataDivs))
|
||||||
|
.price(extractPrice(productDataDivs))
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void removeExtraDivIfExists(Elements productDataDivs) {
|
||||||
|
Element productDataDiv = productDataDivs.last();
|
||||||
|
Elements productDataInnerDivs = productDataDiv.select("> *");
|
||||||
|
try {
|
||||||
|
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
|
||||||
|
.select("span").text().toLowerCase()
|
||||||
|
.contains("осталось")) {
|
||||||
|
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
|
||||||
|
}
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String extractUrl(Elements productDataDivs) {
|
||||||
|
Element productUrlA = productDataDivs.first();
|
||||||
|
return OZON_MAIN_LINK + productUrlA
|
||||||
|
.attr("href").replaceAll("\\?.*$", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
private String extractImageUrl(Elements productDataDivs) {
|
||||||
|
Element productImageUrlA = productDataDivs.first();
|
||||||
|
return productImageUrlA.select("> div > div")
|
||||||
|
.first().getElementsByTag("img")
|
||||||
|
.first().attr("src");
|
||||||
|
}
|
||||||
|
|
||||||
|
private String extractBrand(Elements productDataDivs) {
|
||||||
|
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
|
||||||
|
//log.info(productDataInnerDivs.html());
|
||||||
|
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND)
|
||||||
|
.select("> span");
|
||||||
|
String brand = productBrandBlockSpans.first().selectFirst("> span > b").text();
|
||||||
|
if (productBrandBlockSpans.size() == 1 && "Оригинал".equals(brand)) {
|
||||||
|
return "БРЕНД_НЕ_УКАЗАН";
|
||||||
|
}
|
||||||
|
return brand;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String extractProductName(Elements productDataDivs) {
|
||||||
|
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
|
||||||
|
return productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME)
|
||||||
|
.select("> div > span").text();
|
||||||
|
}
|
||||||
|
|
||||||
|
private BigDecimal extractPrice(Elements productDataDivs) {
|
||||||
|
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
|
||||||
|
return parseOzonPriceToBigDecimal(
|
||||||
|
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
|
||||||
|
.first().text());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Elements getProductMainDataInnerDivs(Elements productDataDivs) {
|
||||||
|
return productDataDivs.last().select("> *");
|
||||||
|
}
|
||||||
|
|
||||||
|
private BigDecimal parseOzonPriceToBigDecimal(String ozonPrice) {
|
||||||
|
String cleanedString = ozonPrice.replaceAll("[^\\d]", "");
|
||||||
|
return new BigDecimal(cleanedString);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,110 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||||
|
|
||||||
|
import java.time.Duration;
|
||||||
|
import java.time.temporal.ChronoUnit;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
|
import org.springframework.context.annotation.Profile;
|
||||||
|
import org.springframework.retry.annotation.Recover;
|
||||||
|
import org.springframework.retry.annotation.Retryable;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.page.AccessDeniedPage;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.page.CategoryPage;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.page.NoContentPage;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@Profile("ozon")
|
||||||
|
public class OzonHtmlFetcher {
|
||||||
|
|
||||||
|
private final WebDriverPool webDriverPool;
|
||||||
|
|
||||||
|
private final PageScroller pageScroller;
|
||||||
|
|
||||||
|
public OzonHtmlFetcher(WebDriverPool webDriverPool,
|
||||||
|
PageScroller pageScroller) {
|
||||||
|
this.webDriverPool = webDriverPool;
|
||||||
|
this.pageScroller = pageScroller;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Retryable(maxAttempts = 10, recover = "recover")
|
||||||
|
public String fetchPageHtml(String pageUrl,
|
||||||
|
AtomicBoolean lastPageInCategory) {
|
||||||
|
var driver = webDriverPool.borrowDriver();
|
||||||
|
try {
|
||||||
|
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
|
||||||
|
driver.get(pageUrl);
|
||||||
|
WebDriverWait wait = new WebDriverWait(driver, Duration.of(10, ChronoUnit.SECONDS));
|
||||||
|
var accessDeniedPage = new AccessDeniedPage(driver, wait);
|
||||||
|
var categoryPage = new CategoryPage(driver, wait);
|
||||||
|
var noContentPage = new NoContentPage(driver, wait);
|
||||||
|
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage, noContentPage, lastPageInCategory));
|
||||||
|
checkAceesDeniedAndResolve(accessDeniedPage);
|
||||||
|
|
||||||
|
pageScroller.scrollToEndOfPage(driver);
|
||||||
|
return driver.getPageSource();
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error(e.getMessage(), e);
|
||||||
|
throw new RuntimeException(e);
|
||||||
|
} finally {
|
||||||
|
webDriverPool.returnDriver(driver);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
|
||||||
|
CategoryPage categoryPage,
|
||||||
|
NoContentPage noContentPage,
|
||||||
|
AtomicBoolean stopFlag) {
|
||||||
|
log.debug("Проверка что страница 'Доступ ограничен'");
|
||||||
|
if (checkAccessDeniedPage(accessDeniedPage)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
log.debug("Проверка что страница 'Страница категории'");
|
||||||
|
if (checkCategoryPage(categoryPage)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (checkNoContentPage(noContentPage)) {
|
||||||
|
stopFlag.set(true);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
log.debug("Проверка загрузки страницы неудачна");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkCategoryPage(CategoryPage categoryPage) {
|
||||||
|
return categoryPage.isLoaded();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkAceesDeniedAndResolve(AccessDeniedPage accessDeniedPage) {
|
||||||
|
if (checkAccessDeniedPage(accessDeniedPage)) {
|
||||||
|
log.info("Доступ ограничен, пробуем решить проблему");
|
||||||
|
resolveAccessDeniedPage(accessDeniedPage);
|
||||||
|
log.info("Проблема успешно решена");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkNoContentPage(NoContentPage noContentPage) {
|
||||||
|
if (noContentPage.isLoaded()) {
|
||||||
|
log.info("Страница не найдена");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
|
||||||
|
return accessDeniedPage.isLoaded();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void resolveAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
|
||||||
|
accessDeniedPage.clickReloadButton();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Recover
|
||||||
|
private void recover(Exception e) {
|
||||||
|
log.error("Все ретраи провалились");
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.page.OzonCategoryPage;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
public class OzonPageParser {
|
||||||
|
|
||||||
|
public List<ParsedData> parseProductsFromCategoryPage(String pageSource,
|
||||||
|
Category category) {
|
||||||
|
OzonCategoryPage categoryPage = new OzonCategoryPage(pageSource);
|
||||||
|
return categoryPage.getProducts(category);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,114 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.Semaphore;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.slf4j.MDC;
|
||||||
|
import org.springframework.context.annotation.Profile;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
|
import ru.pricepulse.parsingservice.service.ProductService;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@Profile("ozon")
|
||||||
|
public class OzonParsingService {
|
||||||
|
|
||||||
|
private final Map<String, Set<String>> urlCache;
|
||||||
|
|
||||||
|
private final ExecutorService pageExecutorService;
|
||||||
|
|
||||||
|
private final Semaphore semaphore;
|
||||||
|
|
||||||
|
private final OzonHtmlFetcher categoryPageParsingService;
|
||||||
|
|
||||||
|
private final OzonConfigProperties ozonConfigProperties;
|
||||||
|
|
||||||
|
private final OzonPageParser ozonPageParser;
|
||||||
|
|
||||||
|
private final ProductService productService;
|
||||||
|
|
||||||
|
public OzonParsingService(OzonHtmlFetcher categoryPageParsingService,
|
||||||
|
OzonConfigProperties ozonConfigProperties, OzonPageParser ozonPageParser,
|
||||||
|
ProductService productService) {
|
||||||
|
this.pageExecutorService = Executors.newFixedThreadPool(ozonConfigProperties.getMaxThreads());
|
||||||
|
this.semaphore = new Semaphore(ozonConfigProperties.getMaxThreads());
|
||||||
|
this.urlCache = new ConcurrentHashMap<>();
|
||||||
|
for (OzonCategory category : OzonCategory.values()) {
|
||||||
|
urlCache.put(category.getCategoryUrl(), ConcurrentHashMap.newKeySet());
|
||||||
|
}
|
||||||
|
|
||||||
|
this.categoryPageParsingService = categoryPageParsingService;
|
||||||
|
this.ozonConfigProperties = ozonConfigProperties;
|
||||||
|
this.ozonPageParser = ozonPageParser;
|
||||||
|
this.productService = productService;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void startProcessing() {
|
||||||
|
for (OzonCategory category : OzonCategory.values()) {
|
||||||
|
log.info("НАЧАЛО ОБРАБОТКИ КАТЕГОРИИ {}", category);
|
||||||
|
processCategory(category);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void processCategory(OzonCategory category) {
|
||||||
|
int pageIndex = 1;
|
||||||
|
AtomicBoolean lastPageInCategory = new AtomicBoolean(false);
|
||||||
|
while (!lastPageInCategory.get()) {
|
||||||
|
try {
|
||||||
|
semaphore.acquire();
|
||||||
|
|
||||||
|
int finalPageIndex = pageIndex;
|
||||||
|
String pageUrl = category.getCategoryUrl() + "&page=" + finalPageIndex;
|
||||||
|
|
||||||
|
pageExecutorService.submit(() -> processCategoryPage(pageUrl, category, lastPageInCategory));
|
||||||
|
|
||||||
|
pageIndex += ozonConfigProperties.getMaxNumOfPagesOnScreen();
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (lastPageInCategory.get()) {
|
||||||
|
log.info("Достигли последней страницы категории");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void processCategoryPage(String pageUrl,
|
||||||
|
OzonCategory category,
|
||||||
|
AtomicBoolean lastPageInCategory) {
|
||||||
|
try {
|
||||||
|
MDC.put("pageUrl", pageUrl);
|
||||||
|
String pageSource = categoryPageParsingService.fetchPageHtml(pageUrl, lastPageInCategory);
|
||||||
|
List<ParsedData> parsedProducts =
|
||||||
|
ozonPageParser.parseProductsFromCategoryPage(pageSource, category.getMappedCategory());
|
||||||
|
log.info("""
|
||||||
|
|
||||||
|
КОНЕЦ ПАРСИНГА СТРАНИЦЫ КАТЕГОРИИ
|
||||||
|
КОЛИЧЕСТВО НАЙДЕННЫХ ТОВАРОВ НА СТРАНИЦЕ {},
|
||||||
|
|
||||||
|
""", parsedProducts.size());
|
||||||
|
if (urlCache.size() > 1000000) {
|
||||||
|
urlCache.clear();
|
||||||
|
}
|
||||||
|
Set<String> categoryCachecUrl = urlCache.get(category.getCategoryUrl());
|
||||||
|
List<ParsedData> uniqueData = parsedProducts.stream()
|
||||||
|
.filter(data -> categoryCachecUrl.add(data.getUrl()))
|
||||||
|
.toList();
|
||||||
|
productService.saveBatch(uniqueData);
|
||||||
|
} finally {
|
||||||
|
MDC.clear();
|
||||||
|
semaphore.release();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,56 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||||
|
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.openqa.selenium.By;
|
||||||
|
import org.openqa.selenium.JavascriptExecutor;
|
||||||
|
import org.openqa.selenium.WebDriver;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class PageScroller {
|
||||||
|
|
||||||
|
private static final String ALL_CONTENT_PAGE_HEIGHT = "return document.body.scrollHeight";
|
||||||
|
|
||||||
|
private static final String SCROLL_TO_PAGE_HEIGHT = "window.scrollTo(0, document.body.scrollHeight);";
|
||||||
|
|
||||||
|
public void scrollToEndOfPage(WebDriver driver) throws InterruptedException {
|
||||||
|
JavascriptExecutor js = (JavascriptExecutor) driver;
|
||||||
|
AtomicLong lastHeight = new AtomicLong((long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT));
|
||||||
|
int attemptsLimit = 100;
|
||||||
|
log.info("Начинаем пролистывать страницу до конца");
|
||||||
|
while (true) {
|
||||||
|
js.executeScript(SCROLL_TO_PAGE_HEIGHT);
|
||||||
|
|
||||||
|
long newHeight = (long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT);
|
||||||
|
|
||||||
|
try {
|
||||||
|
var nextPageButtons = driver.findElements(By.cssSelector("div[data-widget='megaPaginator'] > div")).get(1)
|
||||||
|
.findElement(By.cssSelector(":scope > div > div > div"))
|
||||||
|
.findElements(By.tagName("a"));
|
||||||
|
|
||||||
|
if (nextPageButtons != null && newHeight > lastHeight.get()) {
|
||||||
|
log.info("ЗАКОНЧИЛИ СКРОЛЛИТЬ");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
|
||||||
|
if (newHeight > lastHeight.get()) {
|
||||||
|
attemptsLimit = 100;
|
||||||
|
lastHeight.set(newHeight);
|
||||||
|
} else {
|
||||||
|
attemptsLimit--;
|
||||||
|
Thread.sleep(1000);
|
||||||
|
if (attemptsLimit == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,21 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.context.annotation.Profile;
|
||||||
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingService;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Profile("ozon")
|
||||||
|
public class OzonProductUpdater {
|
||||||
|
|
||||||
|
private final OzonParsingService ozonParsingService;
|
||||||
|
|
||||||
|
@Scheduled(fixedRate = 7200000)
|
||||||
|
public void updateOzonProducts() {
|
||||||
|
ozonParsingService.startProcessing();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,4 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.task;
|
||||||
|
|
||||||
|
public class OzonParsingTask {
|
||||||
|
}
|
@ -0,0 +1,64 @@
|
|||||||
|
package ru.pricepulse.parsingservice.persistence.entity;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import jakarta.persistence.Column;
|
||||||
|
import jakarta.persistence.EmbeddedId;
|
||||||
|
import jakarta.persistence.Entity;
|
||||||
|
import jakarta.persistence.PrePersist;
|
||||||
|
import jakarta.persistence.Table;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
|
import org.hibernate.proxy.HibernateProxy;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@Entity
|
||||||
|
@Table(name = "price_history")
|
||||||
|
@Builder
|
||||||
|
@AllArgsConstructor
|
||||||
|
@NoArgsConstructor
|
||||||
|
public class PriceHistoryEntity {
|
||||||
|
|
||||||
|
@EmbeddedId
|
||||||
|
private PriceHistoryId id;
|
||||||
|
|
||||||
|
@Column(name = "price", nullable = false, precision = 10, scale = 2)
|
||||||
|
private BigDecimal price;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final boolean equals(Object o) {
|
||||||
|
if (this == o) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (o == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
|
||||||
|
Class<?> thisEffectiveClass =
|
||||||
|
this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
|
||||||
|
if (thisEffectiveClass != oEffectiveClass) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
PriceHistoryEntity that = (PriceHistoryEntity) o;
|
||||||
|
return getId() != null && Objects.equals(getId(), that.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final int hashCode() {
|
||||||
|
return Objects.hash(id);
|
||||||
|
}
|
||||||
|
|
||||||
|
@PrePersist
|
||||||
|
protected void onCreate() {
|
||||||
|
if (id.getDate() == null) {
|
||||||
|
id.setDate(ZonedDateTime.now());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,51 @@
|
|||||||
|
package ru.pricepulse.parsingservice.persistence.entity;
|
||||||
|
|
||||||
|
import java.io.Serializable;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import jakarta.persistence.Column;
|
||||||
|
import jakarta.persistence.Embeddable;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
|
import org.hibernate.proxy.HibernateProxy;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@AllArgsConstructor
|
||||||
|
@NoArgsConstructor
|
||||||
|
@Embeddable
|
||||||
|
public class PriceHistoryId implements Serializable {
|
||||||
|
|
||||||
|
@Column(name = "product_url", nullable = false, unique = true)
|
||||||
|
private String productUrl;
|
||||||
|
|
||||||
|
@Column(name = "date", nullable = false)
|
||||||
|
private ZonedDateTime date;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final boolean equals(Object o) {
|
||||||
|
if (this == o) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
if (o == null) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
|
||||||
|
Class<?> thisEffectiveClass =
|
||||||
|
this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
|
||||||
|
if (thisEffectiveClass != oEffectiveClass) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
PriceHistoryId that = (PriceHistoryId) o;
|
||||||
|
return getDate() != null && Objects.equals(getDate(), that.getDate());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final int hashCode() {
|
||||||
|
return Objects.hash(date);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,81 @@
|
|||||||
|
package ru.pricepulse.parsingservice.persistence.entity;
|
||||||
|
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import jakarta.persistence.Column;
|
||||||
|
import jakarta.persistence.Entity;
|
||||||
|
import jakarta.persistence.EnumType;
|
||||||
|
import jakarta.persistence.Enumerated;
|
||||||
|
import jakarta.persistence.GeneratedValue;
|
||||||
|
import jakarta.persistence.GenerationType;
|
||||||
|
import jakarta.persistence.Id;
|
||||||
|
import jakarta.persistence.PrePersist;
|
||||||
|
import jakarta.persistence.Table;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
|
import org.hibernate.proxy.HibernateProxy;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@Entity
|
||||||
|
@Table(name = "product")
|
||||||
|
@AllArgsConstructor
|
||||||
|
@NoArgsConstructor
|
||||||
|
@Builder
|
||||||
|
public class ProductEntity {
|
||||||
|
@Id
|
||||||
|
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||||
|
@Column(name = "id", nullable = false)
|
||||||
|
private Long id;
|
||||||
|
|
||||||
|
@Column(name = "marketplace", nullable = false, length = Integer.MAX_VALUE)
|
||||||
|
@Enumerated(EnumType.STRING)
|
||||||
|
private Marketplace marketplace;
|
||||||
|
|
||||||
|
@Column(name = "category", nullable = false, length = Integer.MAX_VALUE)
|
||||||
|
@Enumerated(EnumType.STRING)
|
||||||
|
private Category category;
|
||||||
|
|
||||||
|
@Column(name = "brand", nullable = false, length = Integer.MAX_VALUE)
|
||||||
|
private String brand;
|
||||||
|
|
||||||
|
@Column(name = "product_name", nullable = false, length = Integer.MAX_VALUE)
|
||||||
|
private String productName;
|
||||||
|
|
||||||
|
@Column(name = "created_at", nullable = false)
|
||||||
|
private LocalDateTime createdAt;
|
||||||
|
|
||||||
|
@Column(name = "url", nullable = false, unique = true)
|
||||||
|
private String url;
|
||||||
|
|
||||||
|
@Column(name = "image-url", nullable = false)
|
||||||
|
private String imageUrl;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final boolean equals(Object o) {
|
||||||
|
if (this == o) return true;
|
||||||
|
if (o == null) return false;
|
||||||
|
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
|
||||||
|
Class<?> thisEffectiveClass = this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
|
||||||
|
if (thisEffectiveClass != oEffectiveClass) return false;
|
||||||
|
ProductEntity that = (ProductEntity) o;
|
||||||
|
return getId() != null && Objects.equals(getId(), that.getId());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public final int hashCode() {
|
||||||
|
return this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass().hashCode() : getClass().hashCode();
|
||||||
|
}
|
||||||
|
|
||||||
|
@PrePersist
|
||||||
|
protected void onCreate() {
|
||||||
|
createdAt = LocalDateTime.now();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,16 @@
|
|||||||
|
package ru.pricepulse.parsingservice.persistence.repository;
|
||||||
|
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||||
|
|
||||||
|
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, PriceHistoryId> {
|
||||||
|
|
||||||
|
List<PriceHistoryEntity> findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(String productUrl,
|
||||||
|
ZonedDateTime from,
|
||||||
|
ZonedDateTime to);
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,28 @@
|
|||||||
|
package ru.pricepulse.parsingservice.persistence.repository;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.springframework.data.domain.Page;
|
||||||
|
import org.springframework.data.domain.Pageable;
|
||||||
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
import org.springframework.data.jpa.repository.Query;
|
||||||
|
import org.springframework.stereotype.Repository;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
|
|
||||||
|
@Repository
|
||||||
|
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
|
||||||
|
|
||||||
|
List<ProductEntity> findAllByUrlIn(List<String> urls);
|
||||||
|
|
||||||
|
@Query("""
|
||||||
|
select p.url from ProductEntity p where p.url in :urls
|
||||||
|
""")
|
||||||
|
List<String> findSavedUrl(List<String> urls);
|
||||||
|
|
||||||
|
Optional<ProductEntity> findByUrl(String url);
|
||||||
|
|
||||||
|
Page<ProductEntity> findAllByMarketplaceAndCategory(Marketplace marketplace, Category category, Pageable pageable);
|
||||||
|
}
|
@ -0,0 +1,105 @@
|
|||||||
|
package ru.pricepulse.parsingservice.service;
|
||||||
|
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import jakarta.persistence.EntityNotFoundException;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.data.domain.Pageable;
|
||||||
|
import org.springframework.retry.annotation.Retryable;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.ProductDto;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
|
||||||
|
import ru.pricepulse.parsingservice.service.mapper.PriceHistoryMapper;
|
||||||
|
import ru.pricepulse.parsingservice.service.mapper.ProductMapper;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class ProductService {
|
||||||
|
|
||||||
|
private final ProductRepository productRepository;
|
||||||
|
|
||||||
|
private final ProductPriceRepository productPriceRepository;
|
||||||
|
|
||||||
|
private final ProductMapper productMapper;
|
||||||
|
|
||||||
|
private final PriceHistoryMapper priceHistoryMapper;
|
||||||
|
|
||||||
|
@Transactional
|
||||||
|
@Retryable
|
||||||
|
public void saveBatch(List<ParsedData> parsedData) {
|
||||||
|
List<String> productsUrls = parsedData.stream().map(ParsedData::getUrl).toList();
|
||||||
|
List<String> alreadySavedUrls = productRepository.findSavedUrl(productsUrls);
|
||||||
|
List<ProductEntity> products = parsedData.stream()
|
||||||
|
.filter(data -> !alreadySavedUrls.contains(data.getUrl()))
|
||||||
|
.map(this::getProduct)
|
||||||
|
.toList();
|
||||||
|
List<PriceHistoryEntity> prices = parsedData.stream().map(this::getPriceHistory).toList();
|
||||||
|
productRepository.saveAll(products);
|
||||||
|
log.info("Сохранили пачку товаров {}", products.size());
|
||||||
|
productPriceRepository.saveAll(prices);
|
||||||
|
log.info("Сохранили историю цен {}", prices.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Transactional(readOnly = true)
|
||||||
|
public ProductDto findByUrl(String productUrl) {
|
||||||
|
var product = productRepository.findByUrl(productUrl).orElseThrow(EntityNotFoundException::new);
|
||||||
|
return productMapper.toProductDto(product);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Transactional(readOnly = true)
|
||||||
|
public PriceHistoryDto findPriceHistoryByRange(String productUrl,
|
||||||
|
ZonedDateTime from,
|
||||||
|
ZonedDateTime to) {
|
||||||
|
var priceHistory = productPriceRepository
|
||||||
|
.findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(productUrl, from, to);
|
||||||
|
return priceHistoryMapper.toPriceHistoryDto(priceHistory);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Transactional(readOnly = true)
|
||||||
|
public ProductsPageDto findAllProductsByPage(Marketplace marketplace,
|
||||||
|
Category category,
|
||||||
|
Pageable pageable) {
|
||||||
|
var page = productRepository.findAllByMarketplaceAndCategory(marketplace, category, pageable);
|
||||||
|
return new ProductsPageDto(
|
||||||
|
page.getNumberOfElements(),
|
||||||
|
page.getTotalPages(),
|
||||||
|
page.getNumber(),
|
||||||
|
page.getContent().stream().map(productMapper::toProductDto).toList()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
private PriceHistoryEntity getPriceHistory(ParsedData product) {
|
||||||
|
var priceHistoryId = new PriceHistoryId();
|
||||||
|
priceHistoryId.setProductUrl(product.getUrl());
|
||||||
|
priceHistoryId.setDate(ZonedDateTime.now());
|
||||||
|
var priceHistory = new PriceHistoryEntity();
|
||||||
|
priceHistory.setId(priceHistoryId);
|
||||||
|
priceHistory.setPrice(product.getPrice());
|
||||||
|
return priceHistory;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ProductEntity getProduct(ParsedData product) {
|
||||||
|
var productEntity = new ProductEntity();
|
||||||
|
productEntity.setCategory(product.getCategory());
|
||||||
|
productEntity.setBrand(product.getBrand());
|
||||||
|
productEntity.setProductName(product.getProductName());
|
||||||
|
productEntity.setUrl(product.getUrl());
|
||||||
|
productEntity.setMarketplace(product.getMarketplace());
|
||||||
|
productEntity.setImageUrl(product.getImageUrl());
|
||||||
|
return productEntity;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,20 @@
|
|||||||
|
package ru.pricepulse.parsingservice.service.dto;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
public class PriceHistoryDto {
|
||||||
|
|
||||||
|
private final Map<ZonedDateTime, BigDecimal> priceHistory;
|
||||||
|
|
||||||
|
public PriceHistoryDto() {
|
||||||
|
this.priceHistory = new HashMap<>();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,28 @@
|
|||||||
|
package ru.pricepulse.parsingservice.service.dto;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class ProductDto {
|
||||||
|
|
||||||
|
private final Long id;
|
||||||
|
|
||||||
|
private final Marketplace marketplace;
|
||||||
|
|
||||||
|
private final Category category;
|
||||||
|
|
||||||
|
private final String brand;
|
||||||
|
|
||||||
|
private final String productName;
|
||||||
|
|
||||||
|
private final String url;
|
||||||
|
|
||||||
|
private final String imageUrl;
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,22 @@
|
|||||||
|
package ru.pricepulse.parsingservice.service.dto;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class ProductsPageDto {
|
||||||
|
|
||||||
|
private final int totalItems;
|
||||||
|
|
||||||
|
private final int totalPages;
|
||||||
|
|
||||||
|
private final int currentPage;
|
||||||
|
|
||||||
|
private final List<ProductDto> products;
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
package ru.pricepulse.parsingservice.service.mapper;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class PriceHistoryMapper {
|
||||||
|
|
||||||
|
public PriceHistoryDto toPriceHistoryDto (List<PriceHistoryEntity> priceHistory) {
|
||||||
|
var priceHistoryDto = new PriceHistoryDto();
|
||||||
|
priceHistory.forEach(item ->
|
||||||
|
priceHistoryDto.getPriceHistory().put(item.getId().getDate().withNano(0), item.getPrice()));
|
||||||
|
return priceHistoryDto;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,22 @@
|
|||||||
|
package ru.pricepulse.parsingservice.service.mapper;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.ProductDto;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class ProductMapper {
|
||||||
|
|
||||||
|
public ProductDto toProductDto(ProductEntity product) {
|
||||||
|
return new ProductDto(
|
||||||
|
product.getId(),
|
||||||
|
product.getMarketplace(),
|
||||||
|
product.getCategory(),
|
||||||
|
product.getBrand(),
|
||||||
|
product.getProductName(),
|
||||||
|
product.getUrl(),
|
||||||
|
product.getImageUrl()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,58 @@
|
|||||||
|
package ru.pricepulse.parsingservice.service.scheduler;
|
||||||
|
|
||||||
|
import java.time.LocalDate;
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
|
||||||
|
import jakarta.annotation.PostConstruct;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.context.annotation.Profile;
|
||||||
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@Profile("postgres_stat")
|
||||||
|
public class PartitionScheduler {
|
||||||
|
|
||||||
|
private final PartitionService partitionService;
|
||||||
|
private final DateTimeFormatter partitionDateTimeFormatter;
|
||||||
|
|
||||||
|
@PostConstruct
|
||||||
|
public void init() {
|
||||||
|
checkAndCreateMonthlyPartitions();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Scheduled(cron = "@monthly")
|
||||||
|
public void checkAndCreatePartitionsMonthly() {
|
||||||
|
checkAndCreateMonthlyPartitions();
|
||||||
|
}
|
||||||
|
|
||||||
|
public void checkAndCreateMonthlyPartitions() {
|
||||||
|
LocalDate currentMonth = LocalDate.now().withDayOfMonth(1);
|
||||||
|
LocalDate nextMonth = currentMonth.plusMonths(1);
|
||||||
|
|
||||||
|
String currentMonthPartition = getPartitionName(currentMonth);
|
||||||
|
String nextMonthPartition = getPartitionName(nextMonth);
|
||||||
|
|
||||||
|
checkAndCreatePartition(currentMonthPartition, currentMonth);
|
||||||
|
checkAndCreatePartition(nextMonthPartition, nextMonth);
|
||||||
|
}
|
||||||
|
|
||||||
|
private String getPartitionName(LocalDate date) {
|
||||||
|
return "price_history_" + partitionDateTimeFormatter.format(date);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void checkAndCreatePartition(String partitionName, LocalDate startDate) {
|
||||||
|
if (!partitionService.checkPartitionExists(partitionName)) {
|
||||||
|
LocalDate endDate = startDate.plusMonths(1);
|
||||||
|
partitionService.createPartition(partitionName, startDate.toString(), endDate.toString());
|
||||||
|
log.info("Партиция {} создана для диапазона: {} - {} ", partitionName, startDate, endDate);
|
||||||
|
} else {
|
||||||
|
log.info("Партиция {} уже существует.", partitionName);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,60 @@
|
|||||||
|
package ru.pricepulse.parsingservice.web.handler;
|
||||||
|
|
||||||
|
import java.net.URI;
|
||||||
|
|
||||||
|
import jakarta.persistence.EntityNotFoundException;
|
||||||
|
import jakarta.servlet.http.HttpServletRequest;
|
||||||
|
import org.springframework.http.HttpStatus;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.web.bind.annotation.ControllerAdvice;
|
||||||
|
import org.springframework.web.bind.annotation.ExceptionHandler;
|
||||||
|
import org.springframework.web.bind.annotation.ResponseStatus;
|
||||||
|
|
||||||
|
@ControllerAdvice
|
||||||
|
public class CommonExceptionHandler {
|
||||||
|
|
||||||
|
@ExceptionHandler(IllegalArgumentException.class)
|
||||||
|
@ResponseStatus(HttpStatus.BAD_REQUEST)
|
||||||
|
public ResponseEntity<ErrorResponse> exceptionHandler(Exception ex,
|
||||||
|
HttpServletRequest request) {
|
||||||
|
return handleException(HttpStatus.BAD_REQUEST, request, ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ExceptionHandler(EntityNotFoundException.class)
|
||||||
|
@ResponseStatus(HttpStatus.NOT_FOUND)
|
||||||
|
public ResponseEntity<ErrorResponse> handleNotFoundException(Exception ex,
|
||||||
|
HttpServletRequest request) {
|
||||||
|
return handleException(HttpStatus.NOT_FOUND, request, ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ExceptionHandler(Exception.class)
|
||||||
|
@ResponseStatus(HttpStatus.INTERNAL_SERVER_ERROR)
|
||||||
|
public ResponseEntity<ErrorResponse> handleInternalServerErrorException(Exception ex,
|
||||||
|
HttpServletRequest request) {
|
||||||
|
return handleException(HttpStatus.INTERNAL_SERVER_ERROR, request, ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*@ExceptionHandler(AccessDeniedException.class)
|
||||||
|
@ResponseStatus(HttpStatus.FORBIDDEN)
|
||||||
|
public ResponseEntity<ErrorResponse> handleForbiddenException(Exception ex,
|
||||||
|
HttpServletRequest request) {
|
||||||
|
return handleException(HttpStatus.FORBIDDEN, request, ex);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ExceptionHandler(AuthenticationException.class)
|
||||||
|
@ResponseStatus(HttpStatus.UNAUTHORIZED)
|
||||||
|
public ResponseEntity<ErrorResponse> handleUnauthorizedException(Exception ex,
|
||||||
|
HttpServletRequest request) {
|
||||||
|
return handleException(HttpStatus.UNAUTHORIZED, request, ex);
|
||||||
|
}*/
|
||||||
|
|
||||||
|
private ResponseEntity<ErrorResponse> handleException(HttpStatus status, HttpServletRequest request, Exception ex) {
|
||||||
|
var errorResponse = new ErrorResponse(
|
||||||
|
status.value(),
|
||||||
|
status,
|
||||||
|
URI.create(request.getRequestURI()),
|
||||||
|
ex.getMessage()
|
||||||
|
);
|
||||||
|
return ResponseEntity.status(status).body(errorResponse);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,13 @@
|
|||||||
|
package ru.pricepulse.parsingservice.web.handler;
|
||||||
|
|
||||||
|
import java.net.URI;
|
||||||
|
|
||||||
|
import org.springframework.http.HttpStatus;
|
||||||
|
|
||||||
|
public record ErrorResponse (
|
||||||
|
Integer statusCode,
|
||||||
|
HttpStatus status,
|
||||||
|
URI requestURI,
|
||||||
|
String message
|
||||||
|
){
|
||||||
|
}
|
@ -0,0 +1,27 @@
|
|||||||
|
package ru.pricepulse.parsingservice.web.rest;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.OzonService;
|
||||||
|
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/api/v1/categories")
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class CategoryApi {
|
||||||
|
|
||||||
|
private final OzonService ozonService;
|
||||||
|
|
||||||
|
@GetMapping
|
||||||
|
public ResponseEntity<?> getCategories(Marketplace marketplace) {
|
||||||
|
if (Marketplace.OZON.equals(marketplace)) {
|
||||||
|
return ResponseEntity.ok(ozonService.getCategories());
|
||||||
|
}
|
||||||
|
return ResponseEntity.ok(Category.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,20 @@
|
|||||||
|
package ru.pricepulse.parsingservice.web.rest;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/api/v1/marketplaces")
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class MarketplaceApi {
|
||||||
|
|
||||||
|
@GetMapping
|
||||||
|
public ResponseEntity<Marketplace[]> getMarketplace() {
|
||||||
|
return ResponseEntity.ok(Marketplace.values());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,52 @@
|
|||||||
|
package ru.pricepulse.parsingservice.web.rest;
|
||||||
|
|
||||||
|
import java.time.LocalDate;
|
||||||
|
import java.time.ZoneOffset;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.data.domain.Pageable;
|
||||||
|
import org.springframework.format.annotation.DateTimeFormat;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestParam;
|
||||||
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
import ru.pricepulse.parsingservice.service.ProductService;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.ProductDto;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
|
||||||
|
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/api/v1/products")
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class ProductApi {
|
||||||
|
|
||||||
|
private final ProductService productService;
|
||||||
|
|
||||||
|
@GetMapping("/info")
|
||||||
|
public ResponseEntity<ProductDto> getProductInfo(@RequestParam String productUrl) {
|
||||||
|
return ResponseEntity.ok(productService.findByUrl(productUrl));
|
||||||
|
}
|
||||||
|
|
||||||
|
@GetMapping("/price-history")
|
||||||
|
public ResponseEntity<PriceHistoryDto> getProductPriceHistoryByRange(@RequestParam String productUrl,
|
||||||
|
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate from,
|
||||||
|
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate to,
|
||||||
|
String zoneOffset) {
|
||||||
|
ZoneOffset zone = ZoneOffset.of(zoneOffset);
|
||||||
|
ZonedDateTime fromDateTime = from.atStartOfDay(zone);
|
||||||
|
ZonedDateTime toDateTime = to.atStartOfDay(zone);
|
||||||
|
return ResponseEntity.ok(productService.findPriceHistoryByRange(productUrl, fromDateTime, toDateTime));
|
||||||
|
}
|
||||||
|
|
||||||
|
@GetMapping
|
||||||
|
public ResponseEntity<ProductsPageDto> getAllProductsByCategoryAndPage(Marketplace marketplace,
|
||||||
|
Category category,
|
||||||
|
Pageable pageable) {
|
||||||
|
return ResponseEntity.ok(productService.findAllProductsByPage(marketplace, category, pageable));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,26 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.converter;
|
||||||
|
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
|
||||||
|
import org.springframework.core.convert.converter.Converter;
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
|
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
||||||
|
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class ProductInfoDto2ProductEntity implements Converter<ProductInfoDto, ProductEntity> {
|
||||||
|
@Override
|
||||||
|
public ProductEntity convert(ProductInfoDto source) {
|
||||||
|
return ProductEntity.builder()
|
||||||
|
.marketplace(Marketplace.WILDBERRIES)
|
||||||
|
.category(Category.LAPTOP)
|
||||||
|
.brand(source.getBrand())
|
||||||
|
.productName(source.getName())
|
||||||
|
.createdAt(LocalDateTime.now())
|
||||||
|
.imageUrl("")
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,115 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.proxy;
|
||||||
|
|
||||||
|
import java.io.BufferedWriter;
|
||||||
|
import java.io.FileWriter;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.HttpURLConnection;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
import java.net.Proxy;
|
||||||
|
import java.net.URL;
|
||||||
|
import java.nio.file.Files;
|
||||||
|
import java.nio.file.Path;
|
||||||
|
import java.nio.file.Paths;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.Future;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
public class ProxyChecker {
|
||||||
|
private static final int TIMEOUT = 2000;
|
||||||
|
private static final int THREAD_COUNT = 30;
|
||||||
|
|
||||||
|
public static List<String> readProxiesFromFile(String filePath) {
|
||||||
|
try {
|
||||||
|
return Files.readAllLines(Paths.get(filePath));
|
||||||
|
} catch (IOException e) {
|
||||||
|
System.err.println("Ошибка при чтении файла: " + e.getMessage());
|
||||||
|
return new ArrayList<>();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static List<String> checkProxies(List<String> proxies) {
|
||||||
|
ExecutorService executor = Executors.newFixedThreadPool(THREAD_COUNT);
|
||||||
|
List<Future<String>> futures = new ArrayList<>();
|
||||||
|
|
||||||
|
// Отправляем задачи проверки прокси в пул потоков
|
||||||
|
for (String proxyAddress : proxies) {
|
||||||
|
futures.add(executor.submit(() -> isProxyWorking(proxyAddress) ? proxyAddress : null));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Получаем результаты выполнения
|
||||||
|
List<String> workingProxies = futures.stream()
|
||||||
|
.map(future -> {
|
||||||
|
try {
|
||||||
|
return future.get();
|
||||||
|
} catch (Exception e) {
|
||||||
|
System.err.println("Ошибка при получении результата проверки прокси: " + e.getMessage());
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.filter(proxy -> proxy != null)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
executor.shutdown(); // Завершаем работу пула потоков
|
||||||
|
return workingProxies;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isProxyWorking(String proxyAddress) {
|
||||||
|
String[] parts = proxyAddress.split(":");
|
||||||
|
if (parts.length != 2) {
|
||||||
|
System.err.println("Некорректный формат прокси: " + proxyAddress);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
String ip = parts[0];
|
||||||
|
int port;
|
||||||
|
|
||||||
|
try {
|
||||||
|
port = Integer.parseInt(parts[1]);
|
||||||
|
} catch (NumberFormatException e) {
|
||||||
|
System.err.println("Некорректный порт у прокси: " + proxyAddress);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
Proxy proxy = new Proxy(Proxy.Type.HTTP, new InetSocketAddress(ip, port));
|
||||||
|
URL url = new URL("http://www.google.com");
|
||||||
|
HttpURLConnection connection = (HttpURLConnection) url.openConnection(proxy);
|
||||||
|
connection.setConnectTimeout(TIMEOUT);
|
||||||
|
connection.setReadTimeout(TIMEOUT);
|
||||||
|
connection.setRequestMethod("GET");
|
||||||
|
connection.connect();
|
||||||
|
|
||||||
|
int responseCode = connection.getResponseCode();
|
||||||
|
if (
|
||||||
|
responseCode == 200
|
||||||
|
// responseCode == 403 ||
|
||||||
|
// responseCode == 500 ||
|
||||||
|
// responseCode == 407 ||
|
||||||
|
// responseCode == 501
|
||||||
|
) {
|
||||||
|
System.out.println("Прокси работает (код ответа " + responseCode + "): " + proxyAddress);
|
||||||
|
return true;
|
||||||
|
} else {
|
||||||
|
System.out.println("Прокси не отвечает (код ответа " + responseCode + "): " + proxyAddress);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
System.out.println("Прокси не отвечает: " + proxyAddress);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public static void saveProxiesToFile(List<String> proxies, Path filePath) {
|
||||||
|
try (BufferedWriter writer = Files.newBufferedWriter(filePath)) {
|
||||||
|
for (String proxy : proxies) {
|
||||||
|
writer.write(proxy);
|
||||||
|
writer.newLine();
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
System.err.println("Ошибка при записи в файл: " + e.getMessage());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,24 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.scheduler;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||||
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.wildberries_parser.service.ParsingService;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@ConditionalOnProperty(prefix = "marketplace.wildberries", name = "status", havingValue = "true")
|
||||||
|
public class WildberriesProductUpdater {
|
||||||
|
|
||||||
|
private final ParsingService parsingService;
|
||||||
|
|
||||||
|
@Scheduled(fixedRate = 3600000)
|
||||||
|
public void updateWildberriesProducts() {
|
||||||
|
log.info("Начинаем отладку...");
|
||||||
|
parsingService.parse();
|
||||||
|
log.info("Заканчиваем отладку...");
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,79 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.service;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import org.springframework.core.convert.ConversionService;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.config.MarketplacesConfig;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
|
import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
|
||||||
|
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
||||||
|
|
||||||
|
@Service("wildberriesParsingService")
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class ParsingService {
|
||||||
|
private final Client client;
|
||||||
|
private final ObjectMapper objectMapper;
|
||||||
|
private final ConversionService conversionService;
|
||||||
|
private final MarketplacesConfig marketplacesConfig;
|
||||||
|
private final ProductService productService;
|
||||||
|
|
||||||
|
public void parse() {
|
||||||
|
|
||||||
|
final int elementsInPage = 100;
|
||||||
|
int page = 1;
|
||||||
|
Integer totalPages = null;
|
||||||
|
|
||||||
|
do {
|
||||||
|
var pageData = client.scrapPage(page, marketplacesConfig.getWildberriesConfigProperties().getShard(), marketplacesConfig.getWildberriesConfigProperties().getLaptopUrl());
|
||||||
|
System.out.println("Получена страница: " + page);
|
||||||
|
if (totalPages == null) {
|
||||||
|
Map<String, Object> dataMap = (Map<String, Object>) pageData.get("data");
|
||||||
|
int totalElements = (int) dataMap.get("total");
|
||||||
|
totalPages = (int) Math.ceil((double) totalElements / elementsInPage);
|
||||||
|
}
|
||||||
|
|
||||||
|
List<ProductEntity> productEntities = new ArrayList<>();
|
||||||
|
List<PriceHistoryEntity> priceHistories = new ArrayList<>();
|
||||||
|
List<ProductInfoDto> productInfoDtoList = convertMapObjectToListProductInfoDto(pageData);
|
||||||
|
|
||||||
|
productInfoDtoList.forEach(dto -> {
|
||||||
|
|
||||||
|
ProductEntity productEntity = conversionService.convert(dto, ProductEntity.class);
|
||||||
|
productEntity.setUrl("https://www.wildberries.ru/catalog/" + dto.getId() + "/detail.aspx?targetUrl=BP");
|
||||||
|
|
||||||
|
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
|
||||||
|
.id(new PriceHistoryId(productEntity.getUrl(), ZonedDateTime.now()))
|
||||||
|
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
productEntities.add(productEntity);
|
||||||
|
priceHistories.add(priceHistory);
|
||||||
|
});
|
||||||
|
productService.saveData(productEntities, priceHistories);
|
||||||
|
page++;
|
||||||
|
} while (page <= totalPages);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<ProductInfoDto> convertMapObjectToListProductInfoDto(Map<String, Object> map) {
|
||||||
|
Map<String, ArrayList<Object>> dataMap = (Map<String, ArrayList<Object>>) map.get("data");
|
||||||
|
return getProductInfoDtos(dataMap);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<ProductInfoDto> getProductInfoDtos(Map<String, ArrayList<Object>> dataMap) {
|
||||||
|
return objectMapper.convertValue(
|
||||||
|
dataMap.get("products"),
|
||||||
|
new TypeReference<>() {
|
||||||
|
}
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,57 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.service;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
||||||
|
|
||||||
|
@Service("wildberriesProductService")
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class ProductService {
|
||||||
|
private final ProductRepository productRepository;
|
||||||
|
private final ProductPriceRepository productPriceRepository;
|
||||||
|
|
||||||
|
@Transactional
|
||||||
|
public void saveData(List<ProductEntity> productEntities, List<PriceHistoryEntity> priceHistoryEntities) {
|
||||||
|
// Получаем URL продуктов
|
||||||
|
List<String> urls = productEntities.stream()
|
||||||
|
.map(ProductEntity::getUrl)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
// Находим уже существующие URL в базе данных
|
||||||
|
List<String> existingUrls = productRepository.findAllByUrlIn(urls).stream()
|
||||||
|
.map(ProductEntity::getUrl)
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
// Фильтруем уникальные продукты, которых еще нет в базе
|
||||||
|
List<ProductEntity> uniqueProducts = productEntities.stream()
|
||||||
|
.filter(product -> !existingUrls.contains(product.getUrl()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
// Сохраняем только новые продукты
|
||||||
|
productRepository.saveAll(uniqueProducts);
|
||||||
|
|
||||||
|
// Создаем мапу для быстрого доступа к продуктам по URL
|
||||||
|
Map<String, ProductEntity> productMap = productRepository.findAllByUrlIn(urls).stream()
|
||||||
|
.collect(Collectors.toMap(ProductEntity::getUrl, product -> product));
|
||||||
|
|
||||||
|
// Фильтруем и обновляем идентификаторы для истории цен
|
||||||
|
List<PriceHistoryEntity> updatedPriceHistories = priceHistoryEntities.stream()
|
||||||
|
.peek(priceHistory -> {
|
||||||
|
ProductEntity product = productMap.get(priceHistory.getId().getProductUrl());
|
||||||
|
priceHistory.getId().setProductUrl(product.getUrl());
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
// Сохраняем историю цен
|
||||||
|
productPriceRepository.saveAll(updatedPriceHistories);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,7 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
public interface Client {
|
||||||
|
Map<String, Object> scrapPage(int page, String shard, String query);
|
||||||
|
}
|
@ -0,0 +1,42 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.core.ParameterizedTypeReference;
|
||||||
|
import org.springframework.http.HttpEntity;
|
||||||
|
import org.springframework.http.HttpMethod;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.retry.annotation.Retryable;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.web.client.RestTemplate;
|
||||||
|
import ru.pricepulse.parsingservice.config.MarketplacesConfig;
|
||||||
|
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
@AllArgsConstructor
|
||||||
|
@Service
|
||||||
|
@Slf4j
|
||||||
|
public class ClientImpl implements Client {
|
||||||
|
|
||||||
|
private final RestTemplate restTemplate;
|
||||||
|
private final MarketplacesConfig marketplacesConfig;
|
||||||
|
|
||||||
|
@Override
|
||||||
|
@Retryable(maxAttempts = 50, value = RuntimeException.class)
|
||||||
|
public Map<String, Object> scrapPage(int page, String shard, String query) {
|
||||||
|
String url = marketplacesConfig.getWildberriesConfigProperties().getCatalogWbUrl() +
|
||||||
|
shard +
|
||||||
|
query +
|
||||||
|
"?dest=-1257786&page=" + page + "&subject=2290";
|
||||||
|
|
||||||
|
ResponseEntity<Map<String, Object>> response = restTemplate.exchange(
|
||||||
|
url,
|
||||||
|
HttpMethod.GET,
|
||||||
|
HttpEntity.EMPTY,
|
||||||
|
new ParameterizedTypeReference<>() {
|
||||||
|
}
|
||||||
|
);
|
||||||
|
|
||||||
|
return response.getBody();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,20 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.service.dto;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Data;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
|
||||||
|
@Data
|
||||||
|
@AllArgsConstructor
|
||||||
|
@NoArgsConstructor
|
||||||
|
@Builder
|
||||||
|
public class ProductInfoDto {
|
||||||
|
private Long id;
|
||||||
|
private String brand;
|
||||||
|
private String name;
|
||||||
|
private String supplier;
|
||||||
|
private Double supplierRating;
|
||||||
|
private Integer salePriceU;
|
||||||
|
private Integer reviewRating;
|
||||||
|
}
|
47
parsing-service/src/main/resources/application.yml
Normal file
47
parsing-service/src/main/resources/application.yml
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
server:
|
||||||
|
port: ${SERVER_PORT}
|
||||||
|
spring:
|
||||||
|
application:
|
||||||
|
name: parsing-service
|
||||||
|
jpa:
|
||||||
|
hibernate:
|
||||||
|
ddl-auto: validate
|
||||||
|
database: postgresql
|
||||||
|
datasource:
|
||||||
|
driver-class-name: org.postgresql.Driver
|
||||||
|
url: jdbc:postgresql://${POSTGRES_JDBC_URL}
|
||||||
|
username: ${POSTGRES_JDBC_USERNAME}
|
||||||
|
password: ${POSTGRES_JDBC_PASSWORD}
|
||||||
|
clickhouse:
|
||||||
|
driver-class-name: com.clickhouse.jdbc.ClickHouseDriver
|
||||||
|
url: jdbc:clickhouse://${CLICKHOUSE_JDBC_URL}
|
||||||
|
username: ${CLICKHOUSE_JDBC_USERNAME}
|
||||||
|
password: ${CLICKHOUSE_JDBC_PASSWORD}
|
||||||
|
liquibase:
|
||||||
|
change-log: classpath:/db/changelog/master.yml
|
||||||
|
|
||||||
|
marketplace:
|
||||||
|
ozon:
|
||||||
|
max-threads: ${OZON_MAX_PROCESSING_THREADS:5}
|
||||||
|
max-num-of-pages-on-screen: ${OZON_MAX_NUM_OF_PAGES_ON_SCREEN:100}
|
||||||
|
wildberries:
|
||||||
|
status: true
|
||||||
|
base-url: "https://static-basket-01.wbbasket.ru"
|
||||||
|
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
||||||
|
user-agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)"
|
||||||
|
catalog-wb-url: "https://catalog.wb.ru/catalog/"
|
||||||
|
retry-attempts: 5
|
||||||
|
retry-delay: 1000
|
||||||
|
shard: "electronic15"
|
||||||
|
laptop-url: "/catalog"
|
||||||
|
|
||||||
|
logging:
|
||||||
|
pattern:
|
||||||
|
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg %X%n"
|
||||||
|
# level:
|
||||||
|
# sql: debug
|
||||||
|
# level:
|
||||||
|
# org:
|
||||||
|
# springframework:
|
||||||
|
# boot:
|
||||||
|
# autoconfigure: DEBUG
|
@ -0,0 +1,29 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<databaseChangeLog
|
||||||
|
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
|
||||||
|
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
|
||||||
|
<changeSet id="20240926_create_product_table.xml" author="danil">
|
||||||
|
<createTable tableName="product">
|
||||||
|
<column name="id" type="bigint" autoIncrement="true" remarks="Идентификатор товара">
|
||||||
|
<constraints primaryKey="true" />
|
||||||
|
</column>
|
||||||
|
<column name="marketplace" type="varchar" remarks="Название маркетплейса (enum)">
|
||||||
|
<constraints nullable="false" />
|
||||||
|
</column>
|
||||||
|
<column name="category" type="varchar" remarks="Категория товара">
|
||||||
|
<constraints nullable="false" />
|
||||||
|
</column>
|
||||||
|
<column name="brand" type="varchar" remarks="Бренд товара">
|
||||||
|
<constraints nullable="false" />
|
||||||
|
</column>
|
||||||
|
<column name="product_name" type="varchar" remarks="Название товара">
|
||||||
|
<constraints nullable="false" />
|
||||||
|
</column>
|
||||||
|
<column name="created_at" type="timestamptz" remarks="Время добавления товара в базу">
|
||||||
|
<constraints nullable="false" />
|
||||||
|
</column>
|
||||||
|
</createTable>
|
||||||
|
</changeSet>
|
||||||
|
</databaseChangeLog>
|
@ -0,0 +1,30 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<databaseChangeLog
|
||||||
|
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
|
||||||
|
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
|
||||||
|
<changeSet id="20240926_create_price_history_table.xml" author="Emelyanov535">
|
||||||
|
<createTable tableName="price_history">
|
||||||
|
<column name="id" type="bigint" autoIncrement="true" remarks="Идентификатор">
|
||||||
|
<constraints primaryKey="true" />
|
||||||
|
</column>
|
||||||
|
<column name="product_id" type="bigint" remarks="ID товара">
|
||||||
|
<constraints nullable="false" />
|
||||||
|
</column>
|
||||||
|
<column name="price" type="numeric(10,2)" remarks="Цена товара">
|
||||||
|
<constraints nullable="false" />
|
||||||
|
</column>
|
||||||
|
<column name="date" type="timestamptz" remarks="Дата сохранения">
|
||||||
|
<constraints nullable="false" />
|
||||||
|
</column>
|
||||||
|
</createTable>
|
||||||
|
|
||||||
|
<addForeignKeyConstraint baseTableName="price_history"
|
||||||
|
baseColumnNames="product_id"
|
||||||
|
constraintName="fk_product_price_history"
|
||||||
|
referencedTableName="product"
|
||||||
|
referencedColumnNames="id"
|
||||||
|
onDelete="CASCADE"/>
|
||||||
|
</changeSet>
|
||||||
|
</databaseChangeLog>
|
@ -0,0 +1,7 @@
|
|||||||
|
databaseChangeLog:
|
||||||
|
- include:
|
||||||
|
file: 20240926_001_create_product_table.xml
|
||||||
|
relativeToChangelogFile: true
|
||||||
|
- include:
|
||||||
|
file: 20240926_002_create_price_history_table.xml
|
||||||
|
relativeToChangelogFile: true
|
@ -0,0 +1,28 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<databaseChangeLog
|
||||||
|
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
|
||||||
|
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
|
||||||
|
<changeSet id="20240926_create_product_table.xml" author="danil">
|
||||||
|
<addColumn tableName="product">
|
||||||
|
<column name="url" type="varchar" remarks="Ссылка на товар">
|
||||||
|
<constraints nullable="false" unique="true" />
|
||||||
|
</column>
|
||||||
|
</addColumn>
|
||||||
|
<addColumn tableName="product">
|
||||||
|
<column name="image-url" type="varchar" remarks="Ссылка на изображение товара">
|
||||||
|
<constraints nullable="false" />
|
||||||
|
</column>
|
||||||
|
</addColumn>
|
||||||
|
<dropTable tableName="price_history" cascadeConstraints="true" />
|
||||||
|
<sql>
|
||||||
|
CREATE TABLE if not exists price_history(
|
||||||
|
product_url varchar NOT NULL,
|
||||||
|
price numeric(10, 2) NOT NULL,
|
||||||
|
date timestamptz NOT NULL,
|
||||||
|
PRIMARY KEY (product_url, date)
|
||||||
|
) PARTITION BY RANGE (date);
|
||||||
|
</sql>
|
||||||
|
</changeSet>
|
||||||
|
</databaseChangeLog>
|
@ -0,0 +1,4 @@
|
|||||||
|
databaseChangeLog:
|
||||||
|
- include:
|
||||||
|
file: 20241006_001_add_columns_in_tables.xml
|
||||||
|
relativeToChangelogFile: true
|
@ -0,0 +1,10 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<databaseChangeLog
|
||||||
|
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
|
||||||
|
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
|
||||||
|
<changeSet id="20241014_add_constraint_on_product_url.xml" author="Emelyanov535">
|
||||||
|
<addUniqueConstraint tableName="product" columnNames="url"/>
|
||||||
|
</changeSet>
|
||||||
|
</databaseChangeLog>
|
@ -0,0 +1,4 @@
|
|||||||
|
databaseChangeLog:
|
||||||
|
- include:
|
||||||
|
file: 20241014_add_constraint_on_product_url.xml
|
||||||
|
relativeToChangelogFile: true
|
10
parsing-service/src/main/resources/db/changelog/master.yml
Normal file
10
parsing-service/src/main/resources/db/changelog/master.yml
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
databaseChangeLog:
|
||||||
|
- include:
|
||||||
|
file: 20240926/master.yml
|
||||||
|
relativeToChangelogFile: true
|
||||||
|
- include:
|
||||||
|
file: 20241006/master.yml
|
||||||
|
relativeToChangelogFile: true
|
||||||
|
- include:
|
||||||
|
file: 20241014/master.yml
|
||||||
|
relativeToChangelogFile: true
|
135
parsing-service/src/main/resources/ok-proxy.txt
Normal file
135
parsing-service/src/main/resources/ok-proxy.txt
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
97.74.87.226:80
|
||||||
|
54.248.238.110:80
|
||||||
|
3.141.217.225:80
|
||||||
|
127.0.0.7:80
|
||||||
|
49.12.235.70:8081
|
||||||
|
13.38.176.104:3128
|
||||||
|
46.51.249.135:3128
|
||||||
|
162.223.90.130:80
|
||||||
|
133.186.144.112:8080
|
||||||
|
51.210.54.186:80
|
||||||
|
101.108.123.39:8080
|
||||||
|
3.130.65.162:3128
|
||||||
|
80.249.112.162:80
|
||||||
|
3.126.147.182:3128
|
||||||
|
110.164.191.211:80
|
||||||
|
13.208.56.180:80
|
||||||
|
31.207.38.66:80
|
||||||
|
116.203.27.109:80
|
||||||
|
13.36.104.85:80
|
||||||
|
18.228.198.164:3128
|
||||||
|
3.123.150.192:3128
|
||||||
|
8.219.97.248:80
|
||||||
|
149.102.233.167:8081
|
||||||
|
202.162.105.202:80
|
||||||
|
165.22.77.86:80
|
||||||
|
154.205.128.153:8888
|
||||||
|
154.65.39.8:80
|
||||||
|
3.124.133.93:3128
|
||||||
|
31.40.248.2:8080
|
||||||
|
188.253.112.218:80
|
||||||
|
3.71.239.218:80
|
||||||
|
159.223.92.147:8888
|
||||||
|
3.78.92.159:3128
|
||||||
|
54.92.168.145:8080
|
||||||
|
50.62.183.223:80
|
||||||
|
123.30.154.171:7777
|
||||||
|
43.200.77.128:3128
|
||||||
|
35.76.62.196:80
|
||||||
|
204.57.112.5:80
|
||||||
|
15.235.153.57:8089
|
||||||
|
54.152.3.36:80
|
||||||
|
47.74.152.29:8888
|
||||||
|
0.0.0.0:80
|
||||||
|
13.59.156.167:80
|
||||||
|
3.127.62.252:80
|
||||||
|
35.79.120.242:3128
|
||||||
|
3.212.148.199:80
|
||||||
|
3.122.84.99:3128
|
||||||
|
45.92.177.60:8080
|
||||||
|
23.95.216.78:34561
|
||||||
|
82.180.146.116:3128
|
||||||
|
52.67.10.183:80
|
||||||
|
172.191.74.198:8080
|
||||||
|
13.37.59.99:3128
|
||||||
|
148.66.6.213:80
|
||||||
|
18.134.236.231:80
|
||||||
|
3.130.65.162:80
|
||||||
|
103.153.154.6:80
|
||||||
|
109.236.83.153:8888
|
||||||
|
78.32.2.82:8080
|
||||||
|
3.9.71.167:1080
|
||||||
|
35.72.118.126:80
|
||||||
|
46.47.197.210:3128
|
||||||
|
13.37.73.214:80
|
||||||
|
13.37.89.201:80
|
||||||
|
110.12.211.140:80
|
||||||
|
154.90.55.37:80
|
||||||
|
152.89.246.197:8080
|
||||||
|
3.37.125.76:3128
|
||||||
|
44.218.183.55:80
|
||||||
|
18.135.133.116:3128
|
||||||
|
52.196.1.182:80
|
||||||
|
94.72.152.254:80
|
||||||
|
3.123.150.192:80
|
||||||
|
196.11.183.160:8080
|
||||||
|
18.133.16.21:80
|
||||||
|
3.12.144.146:80
|
||||||
|
49.13.173.87:80
|
||||||
|
13.56.192.187:80
|
||||||
|
161.35.49.68:80
|
||||||
|
13.37.59.99:80
|
||||||
|
3.122.84.99:80
|
||||||
|
158.140.139.11:58100
|
||||||
|
148.66.6.210:80
|
||||||
|
153.19.91.77:80
|
||||||
|
189.22.234.41:80
|
||||||
|
52.67.10.183:3128
|
||||||
|
41.59.90.171:80
|
||||||
|
43.132.219.102:80
|
||||||
|
13.40.46.249:1088
|
||||||
|
16.163.149.249:80
|
||||||
|
3.71.239.218:3128
|
||||||
|
13.36.113.81:3128
|
||||||
|
60.242.169.3:80
|
||||||
|
49.13.173.87:8081
|
||||||
|
35.176.148.8:1080
|
||||||
|
18.135.133.116:80
|
||||||
|
13.37.89.201:3128
|
||||||
|
3.127.121.101:80
|
||||||
|
35.178.104.4:80
|
||||||
|
182.72.203.246:80
|
||||||
|
13.40.239.130:1080
|
||||||
|
65.108.207.6:80
|
||||||
|
18.223.25.15:80
|
||||||
|
54.233.119.172:3128
|
||||||
|
66.97.37.164:80
|
||||||
|
3.78.92.159:80
|
||||||
|
110.168.213.172:8080
|
||||||
|
49.12.235.70:80
|
||||||
|
94.156.250.169:20128
|
||||||
|
15.236.106.236:3128
|
||||||
|
13.38.153.36:80
|
||||||
|
178.128.199.145:80
|
||||||
|
156.67.217.159:80
|
||||||
|
148.66.6.211:80
|
||||||
|
13.36.87.105:3128
|
||||||
|
3.126.147.182:80
|
||||||
|
51.222.155.142:80
|
||||||
|
141.145.214.176:80
|
||||||
|
184.169.154.119:80
|
||||||
|
5.255.113.61:80
|
||||||
|
3.124.133.93:80
|
||||||
|
3.127.121.101:3128
|
||||||
|
148.66.6.212:80
|
||||||
|
176.9.239.181:80
|
||||||
|
63.35.64.177:3128
|
||||||
|
18.169.83.87:1080
|
||||||
|
148.66.6.214:80
|
||||||
|
18.228.149.161:80
|
||||||
|
18.228.198.164:80
|
||||||
|
106.105.118.250:80
|
||||||
|
103.174.102.127:80
|
||||||
|
162.0.238.147:80
|
||||||
|
103.127.1.130:80
|
||||||
|
185.233.187.103:80
|
135
parsing-service/src/main/resources/proxy.txt
Normal file
135
parsing-service/src/main/resources/proxy.txt
Normal file
@ -0,0 +1,135 @@
|
|||||||
|
97.74.87.226:80
|
||||||
|
54.248.238.110:80
|
||||||
|
3.141.217.225:80
|
||||||
|
127.0.0.7:80
|
||||||
|
49.12.235.70:8081
|
||||||
|
13.38.176.104:3128
|
||||||
|
46.51.249.135:3128
|
||||||
|
162.223.90.130:80
|
||||||
|
133.186.144.112:8080
|
||||||
|
51.210.54.186:80
|
||||||
|
101.108.123.39:8080
|
||||||
|
3.130.65.162:3128
|
||||||
|
80.249.112.162:80
|
||||||
|
3.126.147.182:3128
|
||||||
|
110.164.191.211:80
|
||||||
|
13.208.56.180:80
|
||||||
|
31.207.38.66:80
|
||||||
|
116.203.27.109:80
|
||||||
|
13.36.104.85:80
|
||||||
|
18.228.198.164:3128
|
||||||
|
3.123.150.192:3128
|
||||||
|
8.219.97.248:80
|
||||||
|
149.102.233.167:8081
|
||||||
|
202.162.105.202:80
|
||||||
|
165.22.77.86:80
|
||||||
|
154.205.128.153:8888
|
||||||
|
154.65.39.8:80
|
||||||
|
3.124.133.93:3128
|
||||||
|
31.40.248.2:8080
|
||||||
|
188.253.112.218:80
|
||||||
|
3.71.239.218:80
|
||||||
|
159.223.92.147:8888
|
||||||
|
3.78.92.159:3128
|
||||||
|
54.92.168.145:8080
|
||||||
|
50.62.183.223:80
|
||||||
|
123.30.154.171:7777
|
||||||
|
43.200.77.128:3128
|
||||||
|
35.76.62.196:80
|
||||||
|
204.57.112.5:80
|
||||||
|
15.235.153.57:8089
|
||||||
|
54.152.3.36:80
|
||||||
|
47.74.152.29:8888
|
||||||
|
0.0.0.0:80
|
||||||
|
13.59.156.167:80
|
||||||
|
3.127.62.252:80
|
||||||
|
35.79.120.242:3128
|
||||||
|
3.212.148.199:80
|
||||||
|
3.122.84.99:3128
|
||||||
|
45.92.177.60:8080
|
||||||
|
23.95.216.78:34561
|
||||||
|
82.180.146.116:3128
|
||||||
|
52.67.10.183:80
|
||||||
|
172.191.74.198:8080
|
||||||
|
13.37.59.99:3128
|
||||||
|
148.66.6.213:80
|
||||||
|
18.134.236.231:80
|
||||||
|
3.130.65.162:80
|
||||||
|
103.153.154.6:80
|
||||||
|
109.236.83.153:8888
|
||||||
|
78.32.2.82:8080
|
||||||
|
3.9.71.167:1080
|
||||||
|
35.72.118.126:80
|
||||||
|
46.47.197.210:3128
|
||||||
|
13.37.73.214:80
|
||||||
|
13.37.89.201:80
|
||||||
|
110.12.211.140:80
|
||||||
|
154.90.55.37:80
|
||||||
|
152.89.246.197:8080
|
||||||
|
3.37.125.76:3128
|
||||||
|
44.218.183.55:80
|
||||||
|
18.135.133.116:3128
|
||||||
|
52.196.1.182:80
|
||||||
|
94.72.152.254:80
|
||||||
|
3.123.150.192:80
|
||||||
|
196.11.183.160:8080
|
||||||
|
18.133.16.21:80
|
||||||
|
3.12.144.146:80
|
||||||
|
49.13.173.87:80
|
||||||
|
13.56.192.187:80
|
||||||
|
161.35.49.68:80
|
||||||
|
13.37.59.99:80
|
||||||
|
3.122.84.99:80
|
||||||
|
158.140.139.11:58100
|
||||||
|
148.66.6.210:80
|
||||||
|
153.19.91.77:80
|
||||||
|
189.22.234.41:80
|
||||||
|
52.67.10.183:3128
|
||||||
|
41.59.90.171:80
|
||||||
|
43.132.219.102:80
|
||||||
|
13.40.46.249:1088
|
||||||
|
16.163.149.249:80
|
||||||
|
3.71.239.218:3128
|
||||||
|
13.36.113.81:3128
|
||||||
|
60.242.169.3:80
|
||||||
|
49.13.173.87:8081
|
||||||
|
35.176.148.8:1080
|
||||||
|
18.135.133.116:80
|
||||||
|
13.37.89.201:3128
|
||||||
|
3.127.121.101:80
|
||||||
|
35.178.104.4:80
|
||||||
|
182.72.203.246:80
|
||||||
|
13.40.239.130:1080
|
||||||
|
65.108.207.6:80
|
||||||
|
18.223.25.15:80
|
||||||
|
54.233.119.172:3128
|
||||||
|
66.97.37.164:80
|
||||||
|
3.78.92.159:80
|
||||||
|
110.168.213.172:8080
|
||||||
|
49.12.235.70:80
|
||||||
|
94.156.250.169:20128
|
||||||
|
15.236.106.236:3128
|
||||||
|
13.38.153.36:80
|
||||||
|
178.128.199.145:80
|
||||||
|
156.67.217.159:80
|
||||||
|
148.66.6.211:80
|
||||||
|
13.36.87.105:3128
|
||||||
|
3.126.147.182:80
|
||||||
|
51.222.155.142:80
|
||||||
|
141.145.214.176:80
|
||||||
|
184.169.154.119:80
|
||||||
|
5.255.113.61:80
|
||||||
|
3.124.133.93:80
|
||||||
|
3.127.121.101:3128
|
||||||
|
148.66.6.212:80
|
||||||
|
176.9.239.181:80
|
||||||
|
63.35.64.177:3128
|
||||||
|
18.169.83.87:1080
|
||||||
|
148.66.6.214:80
|
||||||
|
18.228.149.161:80
|
||||||
|
18.228.198.164:80
|
||||||
|
106.105.118.250:80
|
||||||
|
103.174.102.127:80
|
||||||
|
162.0.238.147:80
|
||||||
|
103.127.1.130:80
|
||||||
|
185.233.187.103:80
|
@ -0,0 +1,11 @@
|
|||||||
|
package ru.pricepulse.parsingservice;
|
||||||
|
|
||||||
|
import org.junit.jupiter.api.Test;
|
||||||
|
|
||||||
|
class ParsingServiceApplicationTests {
|
||||||
|
|
||||||
|
@Test
|
||||||
|
void contextLoads() {
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user