features: add proxy, change webClient to restTemplate, add checking
This commit is contained in:
parent
84f344084c
commit
84e0af60c9
@ -49,6 +49,9 @@ dependencies {
|
|||||||
|
|
||||||
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
|
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
|
||||||
implementation 'org.springframework.boot:spring-boot-starter-webflux'
|
implementation 'org.springframework.boot:spring-boot-starter-webflux'
|
||||||
|
|
||||||
|
implementation 'org.springframework.retry:spring-retry:2.0.9'
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
tasks.named('test') {
|
tasks.named('test') {
|
||||||
|
@ -2,8 +2,10 @@ package ru.pricepulse.parsingservice;
|
|||||||
|
|
||||||
import org.springframework.boot.SpringApplication;
|
import org.springframework.boot.SpringApplication;
|
||||||
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
import org.springframework.boot.autoconfigure.SpringBootApplication;
|
||||||
|
import org.springframework.retry.annotation.EnableRetry;
|
||||||
|
|
||||||
@SpringBootApplication
|
@SpringBootApplication
|
||||||
|
@EnableRetry
|
||||||
public class ParsingServiceApplication {
|
public class ParsingServiceApplication {
|
||||||
|
|
||||||
public static void main(String[] args) {
|
public static void main(String[] args) {
|
||||||
|
@ -1,11 +1,20 @@
|
|||||||
package ru.pricepulse.parsingservice.config;
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Getter;
|
||||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||||
import org.springframework.context.annotation.Configuration;
|
import org.springframework.context.annotation.Configuration;
|
||||||
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||||
|
import ru.pricepulse.parsingservice.config.properties.WildberriesConfigProperties;
|
||||||
|
|
||||||
|
@Getter
|
||||||
@Configuration
|
@Configuration
|
||||||
@EnableConfigurationProperties({
|
@EnableConfigurationProperties({
|
||||||
OzonConfigProperties.class
|
OzonConfigProperties.class,
|
||||||
|
WildberriesConfigProperties.class
|
||||||
})
|
})
|
||||||
public class MarketplacesConfig {}
|
@AllArgsConstructor
|
||||||
|
public class MarketplacesConfig {
|
||||||
|
private final WildberriesConfigProperties wildberriesConfigProperties;
|
||||||
|
private final OzonConfigProperties ozonConfigProperties;
|
||||||
|
}
|
||||||
|
@ -1,15 +0,0 @@
|
|||||||
package ru.pricepulse.parsingservice.config;
|
|
||||||
|
|
||||||
import org.springframework.context.annotation.Bean;
|
|
||||||
import org.springframework.context.annotation.Configuration;
|
|
||||||
import org.springframework.web.client.RestTemplate;
|
|
||||||
|
|
||||||
@Configuration
|
|
||||||
public class RestTemplateConfig {
|
|
||||||
|
|
||||||
@Bean
|
|
||||||
public RestTemplate restTemplate() {
|
|
||||||
return new RestTemplate();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,15 +1,14 @@
|
|||||||
package ru.pricepulse.parsingservice.wildberries_parser.configuration;
|
package ru.pricepulse.parsingservice.config.properties;
|
||||||
|
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
import org.springframework.context.annotation.Configuration;
|
|
||||||
|
|
||||||
@Configuration
|
|
||||||
@ConfigurationProperties(prefix = "marketplace.wildberries")
|
|
||||||
@Getter
|
@Getter
|
||||||
@Setter
|
@Setter
|
||||||
public class WbProperties {
|
@ConfigurationProperties(prefix = "marketplace.wildberries")
|
||||||
|
public class WildberriesConfigProperties {
|
||||||
private String baseUrl;
|
private String baseUrl;
|
||||||
private String catalogUrl;
|
private String catalogUrl;
|
||||||
private String userAgent;
|
private String userAgent;
|
@ -3,6 +3,7 @@ package ru.pricepulse.parsingservice.ozon_parser.pool;
|
|||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.openqa.selenium.WebDriver;
|
import org.openqa.selenium.WebDriver;
|
||||||
import org.springframework.beans.factory.ObjectFactory;
|
import org.springframework.beans.factory.ObjectFactory;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
import java.util.Queue;
|
import java.util.Queue;
|
||||||
@ -10,6 +11,7 @@ import java.util.concurrent.ConcurrentLinkedQueue;
|
|||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Component
|
@Component
|
||||||
|
@ConditionalOnProperty(prefix = "marketplace.ozon", name = "status", havingValue = "true")
|
||||||
public class WebDriverPool {
|
public class WebDriverPool {
|
||||||
|
|
||||||
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>(); // Список доступных драйверов
|
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>(); // Список доступных драйверов
|
||||||
|
@ -3,6 +3,7 @@ package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsin
|
|||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
import org.slf4j.MDC;
|
import org.slf4j.MDC;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||||
import org.springframework.retry.annotation.Recover;
|
import org.springframework.retry.annotation.Recover;
|
||||||
import org.springframework.retry.annotation.Retryable;
|
import org.springframework.retry.annotation.Retryable;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
@ -20,6 +21,7 @@ import java.util.concurrent.TimeUnit;
|
|||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
|
@ConditionalOnProperty(prefix = "marketplace.ozon", name = "status", havingValue = "true")
|
||||||
public class CategoryPageParsingService {
|
public class CategoryPageParsingService {
|
||||||
|
|
||||||
private final ExecutorService productPageExecutor = Executors.newFixedThreadPool(3);
|
private final ExecutorService productPageExecutor = Executors.newFixedThreadPool(3);
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing;
|
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingService;
|
import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingService;
|
||||||
|
|
||||||
@ -10,7 +11,8 @@ import java.util.concurrent.Executors;
|
|||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service("ozonParsingService")
|
||||||
|
@ConditionalOnProperty(prefix = "marketplace.ozon", name = "status", havingValue = "true")
|
||||||
public class ParsingService implements MarketplaceParsingService {
|
public class ParsingService implements MarketplaceParsingService {
|
||||||
|
|
||||||
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
|
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
|
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||||
import org.springframework.scheduling.annotation.Scheduled;
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||||
@ -8,6 +9,7 @@ import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing
|
|||||||
|
|
||||||
@Service
|
@Service
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
|
@ConditionalOnProperty(prefix = "marketplace.ozon", name = "status", havingValue = "true")
|
||||||
public class OzonProductUpdater {
|
public class OzonProductUpdater {
|
||||||
|
|
||||||
private final OzonConfigProperties properties;
|
private final OzonConfigProperties properties;
|
||||||
|
@ -39,12 +39,15 @@ public class ProductEntity {
|
|||||||
@Column(name = "created_at", nullable = false)
|
@Column(name = "created_at", nullable = false)
|
||||||
private LocalDateTime createdAt;
|
private LocalDateTime createdAt;
|
||||||
|
|
||||||
@Column(name = "url", nullable = false)
|
@Column(name = "url", nullable = false, unique = true)
|
||||||
private String url;
|
private String url;
|
||||||
|
|
||||||
@Column(name = "image-url", nullable = false)
|
@Column(name = "image-url", nullable = false)
|
||||||
private String imageUrl;
|
private String imageUrl;
|
||||||
|
|
||||||
|
@Column(name = "article", nullable = false)
|
||||||
|
private String article;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final boolean equals(Object o) {
|
public final boolean equals(Object o) {
|
||||||
if (this == o) return true;
|
if (this == o) return true;
|
||||||
|
@ -3,5 +3,10 @@ package ru.pricepulse.parsingservice.persistence.repository;
|
|||||||
import org.springframework.data.jpa.repository.JpaRepository;
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
|
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
|
||||||
|
ProductEntity findByUrl(String url);
|
||||||
|
List<ProductEntity> findAllByUrlIn(List<String> urls);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,19 +0,0 @@
|
|||||||
package ru.pricepulse.parsingservice.wildberries_parser;
|
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
|
||||||
import org.springframework.boot.CommandLineRunner;
|
|
||||||
import org.springframework.stereotype.Component;
|
|
||||||
import ru.pricepulse.parsingservice.wildberries_parser.service.ParsingService;
|
|
||||||
|
|
||||||
@Component
|
|
||||||
@AllArgsConstructor
|
|
||||||
public class DebugRunner implements CommandLineRunner {
|
|
||||||
private final ParsingService parsingService;
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void run(String... args){
|
|
||||||
System.out.println("Начинаем отладку...");
|
|
||||||
parsingService.parse();
|
|
||||||
System.out.println("Заканчиваем отладку...");
|
|
||||||
}
|
|
||||||
}
|
|
@ -0,0 +1,41 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.configuration;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.http.HttpRequest;
|
||||||
|
import org.springframework.http.client.ClientHttpRequestExecution;
|
||||||
|
import org.springframework.http.client.ClientHttpRequestInterceptor;
|
||||||
|
import org.springframework.http.client.ClientHttpResponse;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class DynamicProxyInterceptor implements ClientHttpRequestInterceptor {
|
||||||
|
|
||||||
|
private final UserAgentProvider userAgentProvider;
|
||||||
|
private final ProxyProvider proxyProvider;
|
||||||
|
|
||||||
|
public DynamicProxyInterceptor(UserAgentProvider userAgentProvider, ProxyProvider proxyProvider) {
|
||||||
|
this.userAgentProvider = userAgentProvider;
|
||||||
|
this.proxyProvider = proxyProvider;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public ClientHttpResponse intercept(HttpRequest request, byte[] body, ClientHttpRequestExecution execution) throws IOException {
|
||||||
|
// Получаем случайный прокси
|
||||||
|
InetSocketAddress proxyAddress = proxyProvider.getRandomProxy();
|
||||||
|
log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
|
||||||
|
|
||||||
|
// Устанавливаем прокси
|
||||||
|
System.setProperty("http.proxyHost", proxyAddress.getHostName());
|
||||||
|
System.setProperty("http.proxyPort", String.valueOf(proxyAddress.getPort()));
|
||||||
|
|
||||||
|
// Устанавливаем динамический user-agent
|
||||||
|
String randomUserAgent = userAgentProvider.getRandomUserAgent();
|
||||||
|
log.info("Используемый User-Agent: {}", randomUserAgent);
|
||||||
|
request.getHeaders().set("User-Agent", randomUserAgent);
|
||||||
|
|
||||||
|
// Выполняем запрос
|
||||||
|
return execution.execute(request, body);
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,21 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.configuration;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class ProxyProvider {
|
||||||
|
private static final List<String> proxies = List.of(
|
||||||
|
"85.215.64.49:80",
|
||||||
|
"82.115.19.142:80",
|
||||||
|
"148.113.172.51:8080"
|
||||||
|
);
|
||||||
|
|
||||||
|
public InetSocketAddress getRandomProxy() {
|
||||||
|
String[] proxy = proxies.get(new Random().nextInt(proxies.size())).split(":");
|
||||||
|
return new InetSocketAddress(proxy[0], Integer.parseInt(proxy[1]));
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,29 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.configuration;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import org.springframework.context.annotation.Bean;
|
||||||
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.http.client.ClientHttpRequestInterceptor;
|
||||||
|
import org.springframework.web.client.RestTemplate;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
|
||||||
|
@Configuration
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class RestTemplateConfig {
|
||||||
|
|
||||||
|
private final UserAgentProvider userAgentProvider;
|
||||||
|
private final ProxyProvider proxyProvider;
|
||||||
|
|
||||||
|
@Bean
|
||||||
|
public RestTemplate restTemplate() {
|
||||||
|
RestTemplate restTemplate = new RestTemplate();
|
||||||
|
ClientHttpRequestInterceptor dynamicProxyInterceptor = new DynamicProxyInterceptor(userAgentProvider, proxyProvider);
|
||||||
|
|
||||||
|
// Добавляем интерсептор в RestTemplate
|
||||||
|
restTemplate.setInterceptors(Collections.singletonList(dynamicProxyInterceptor));
|
||||||
|
|
||||||
|
return restTemplate;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.configuration;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Random;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class UserAgentProvider {
|
||||||
|
private static final List<String> userAgents = List.of(
|
||||||
|
"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)",
|
||||||
|
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/93.0.4577.63 Safari/537.36",
|
||||||
|
"Mozilla/5.0 (iPhone; CPU iPhone OS 15_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.0 Mobile/15A372 Safari/604.1"
|
||||||
|
);
|
||||||
|
|
||||||
|
public String getRandomUserAgent() {
|
||||||
|
return userAgents.get(new Random().nextInt(userAgents.size()));
|
||||||
|
}
|
||||||
|
}
|
@ -1,18 +1,58 @@
|
|||||||
package ru.pricepulse.parsingservice.wildberries_parser.configuration;
|
package ru.pricepulse.parsingservice.wildberries_parser.configuration;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.context.annotation.Bean;
|
import org.springframework.context.annotation.Bean;
|
||||||
import org.springframework.context.annotation.Configuration;
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
import org.springframework.http.HttpHeaders;
|
||||||
|
import org.springframework.http.client.reactive.ReactorClientHttpConnector;
|
||||||
import org.springframework.web.reactive.function.client.WebClient;
|
import org.springframework.web.reactive.function.client.WebClient;
|
||||||
|
import reactor.netty.http.client.HttpClient;
|
||||||
|
import reactor.netty.transport.ProxyProvider;
|
||||||
|
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
@Configuration
|
@Configuration
|
||||||
|
@AllArgsConstructor
|
||||||
public class WebClientConfig {
|
public class WebClientConfig {
|
||||||
|
private final UserAgentProvider userAgentProvider;
|
||||||
|
private final ru.pricepulse.parsingservice.wildberries_parser.configuration.ProxyProvider proxyProvider;
|
||||||
|
|
||||||
|
|
||||||
@Bean
|
@Bean
|
||||||
public WebClient webClient() {
|
public WebClient webClient() {
|
||||||
return WebClient.builder()
|
return WebClient.builder()
|
||||||
.defaultHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)")
|
.filter((request, next) -> {
|
||||||
|
// Получаем случайный прокси для каждого запроса
|
||||||
|
InetSocketAddress proxyAddress = proxyProvider.getRandomProxy();
|
||||||
|
log.info("Используемый прокси: {}:{}", proxyAddress.getHostName(), proxyAddress.getPort());
|
||||||
|
|
||||||
|
HttpClient httpClient = HttpClient.create()
|
||||||
|
.proxy(proxy -> proxy
|
||||||
|
.type(ProxyProvider.Proxy.HTTP)
|
||||||
|
.address(proxyAddress));
|
||||||
|
|
||||||
|
String randomUserAgent = userAgentProvider.getRandomUserAgent();
|
||||||
|
log.info("Используемый User-Agent: {}", randomUserAgent);
|
||||||
|
|
||||||
|
// Создаем новый WebClient с прокси
|
||||||
|
WebClient webClientWithProxy = WebClient.builder()
|
||||||
|
.clientConnector(new ReactorClientHttpConnector(httpClient))
|
||||||
|
.build();
|
||||||
|
|
||||||
|
// Выполняем запрос с обновленным User-Agent через WebClient с прокси
|
||||||
|
return webClientWithProxy
|
||||||
|
.method(request.method())
|
||||||
|
.uri(request.url())
|
||||||
|
.headers(headers -> headers.putAll(request.headers()))
|
||||||
|
.header(HttpHeaders.USER_AGENT, randomUserAgent)
|
||||||
|
.body(request.body()).exchange();
|
||||||
|
})
|
||||||
.codecs(configurer -> configurer
|
.codecs(configurer -> configurer
|
||||||
.defaultCodecs()
|
.defaultCodecs()
|
||||||
.maxInMemorySize(10 * 1024 * 1024))
|
.maxInMemorySize(10 * 1024 * 1024))
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -20,6 +20,8 @@ public class ProductInfoDto2ProductEntity implements Converter<ProductInfoDto, P
|
|||||||
.brand(source.getBrand())
|
.brand(source.getBrand())
|
||||||
.productName(source.getName())
|
.productName(source.getName())
|
||||||
.createdAt(LocalDateTime.now())
|
.createdAt(LocalDateTime.now())
|
||||||
|
.imageUrl("")
|
||||||
|
.article(source.getId().toString())
|
||||||
.build();
|
.build();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,23 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.scheduler;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||||
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.wildberries_parser.service.ParsingService;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
@ConditionalOnProperty(prefix = "marketplace.wildberries", name = "status", havingValue = "true")
|
||||||
|
public class WildberriesProductUpdater {
|
||||||
|
private final ParsingService parsingService;
|
||||||
|
|
||||||
|
@Scheduled(fixedRate = 3600000)
|
||||||
|
public void updateWildberriesProducts() {
|
||||||
|
log.info("Начинаем отладку...");
|
||||||
|
parsingService.parse();
|
||||||
|
log.info("Заканчиваем отладку...");
|
||||||
|
}
|
||||||
|
}
|
@ -2,16 +2,13 @@ package ru.pricepulse.parsingservice.wildberries_parser.service;
|
|||||||
|
|
||||||
import com.fasterxml.jackson.core.type.TypeReference;
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import jakarta.transaction.Transactional;
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import org.springframework.core.convert.ConversionService;
|
import org.springframework.core.convert.ConversionService;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.config.MarketplacesConfig;
|
||||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
|
||||||
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
|
||||||
import ru.pricepulse.parsingservice.wildberries_parser.configuration.WbProperties;
|
|
||||||
import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
|
import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
|
||||||
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
||||||
|
|
||||||
@ -21,17 +18,15 @@ import java.util.ArrayList;
|
|||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
@Service
|
@Service("wildberriesParsingService")
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class ParsingService {
|
public class ParsingService {
|
||||||
private final Client client;
|
private final Client client;
|
||||||
private final ObjectMapper objectMapper;
|
private final ObjectMapper objectMapper;
|
||||||
private final ConversionService conversionService;
|
private final ConversionService conversionService;
|
||||||
private final ProductRepository productRepository;
|
private final MarketplacesConfig marketplacesConfig;
|
||||||
private final WbProperties wbProperties;
|
private final ProductService productService;
|
||||||
private final ProductPriceRepository productPriceRepository;
|
|
||||||
|
|
||||||
@Transactional
|
|
||||||
public void parse() {
|
public void parse() {
|
||||||
List<ProductEntity> productEntities = new ArrayList<>();
|
List<ProductEntity> productEntities = new ArrayList<>();
|
||||||
List<PriceHistoryEntity> priceHistories = new ArrayList<>();
|
List<PriceHistoryEntity> priceHistories = new ArrayList<>();
|
||||||
@ -41,8 +36,8 @@ public class ParsingService {
|
|||||||
Integer totalPages = null;
|
Integer totalPages = null;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
var pageData = client.scrapPage(page, wbProperties.getShard(), wbProperties.getLaptopUrl());
|
var pageData = client.scrapPage(page, marketplacesConfig.getWildberriesConfigProperties().getShard(), marketplacesConfig.getWildberriesConfigProperties().getLaptopUrl());
|
||||||
|
System.out.println("Получена страница: " + page);
|
||||||
if (totalPages == null) {
|
if (totalPages == null) {
|
||||||
Map<String, Object> dataMap = (Map<String, Object>) pageData.get("data");
|
Map<String, Object> dataMap = (Map<String, Object>) pageData.get("data");
|
||||||
int totalElements = (int) dataMap.get("total");
|
int totalElements = (int) dataMap.get("total");
|
||||||
@ -54,6 +49,7 @@ public class ParsingService {
|
|||||||
productInfoDtoList.forEach(dto -> {
|
productInfoDtoList.forEach(dto -> {
|
||||||
|
|
||||||
ProductEntity productEntity = conversionService.convert(dto, ProductEntity.class);
|
ProductEntity productEntity = conversionService.convert(dto, ProductEntity.class);
|
||||||
|
productEntity.setUrl("https://www.wildberries.ru/catalog/" + dto.getId() + "/detail.aspx?targetUrl=BP");
|
||||||
|
|
||||||
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
|
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
|
||||||
.id(new PriceHistoryId(productEntity, OffsetDateTime.now()))
|
.id(new PriceHistoryId(productEntity, OffsetDateTime.now()))
|
||||||
@ -66,9 +62,9 @@ public class ParsingService {
|
|||||||
|
|
||||||
page++;
|
page++;
|
||||||
} while (page <= totalPages);
|
} while (page <= totalPages);
|
||||||
|
// } while (page <= 5);
|
||||||
|
|
||||||
productRepository.saveAll(productEntities);
|
productService.saveData(productEntities, priceHistories);
|
||||||
productPriceRepository.saveAll(priceHistories);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<ProductInfoDto> convertMapObjectToListProductInfoDto(Map<String, Object> map) {
|
private List<ProductInfoDto> convertMapObjectToListProductInfoDto(Map<String, Object> map) {
|
||||||
@ -79,7 +75,8 @@ public class ParsingService {
|
|||||||
private List<ProductInfoDto> getProductInfoDtos(Map<String, ArrayList<Object>> dataMap) {
|
private List<ProductInfoDto> getProductInfoDtos(Map<String, ArrayList<Object>> dataMap) {
|
||||||
return objectMapper.convertValue(
|
return objectMapper.convertValue(
|
||||||
dataMap.get("products"),
|
dataMap.get("products"),
|
||||||
new TypeReference<>() {}
|
new TypeReference<>() {
|
||||||
|
}
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,57 @@
|
|||||||
|
package ru.pricepulse.parsingservice.wildberries_parser.service;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class ProductService {
|
||||||
|
private final ProductRepository productRepository;
|
||||||
|
private final ProductPriceRepository productPriceRepository;
|
||||||
|
|
||||||
|
@Transactional
|
||||||
|
public void saveData(List<ProductEntity> productEntities, List<PriceHistoryEntity> priceHistoryEntities) {
|
||||||
|
// Получаем URL продуктов
|
||||||
|
List<String> urls = productEntities.stream()
|
||||||
|
.map(ProductEntity::getUrl)
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
// Находим уже существующие URL в базе данных
|
||||||
|
List<String> existingUrls = productRepository.findAllByUrlIn(urls).stream()
|
||||||
|
.map(ProductEntity::getUrl)
|
||||||
|
.toList();
|
||||||
|
|
||||||
|
// Фильтруем уникальные продукты, которых еще нет в базе
|
||||||
|
List<ProductEntity> uniqueProducts = productEntities.stream()
|
||||||
|
.filter(product -> !existingUrls.contains(product.getUrl()))
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
// Сохраняем только новые продукты
|
||||||
|
productRepository.saveAll(uniqueProducts);
|
||||||
|
|
||||||
|
// Создаем мапу для быстрого доступа к продуктам по URL
|
||||||
|
Map<String, ProductEntity> productMap = productRepository.findAllByUrlIn(urls).stream()
|
||||||
|
.collect(Collectors.toMap(ProductEntity::getUrl, product -> product));
|
||||||
|
|
||||||
|
// Фильтруем и обновляем идентификаторы для истории цен
|
||||||
|
List<PriceHistoryEntity> updatedPriceHistories = priceHistoryEntities.stream()
|
||||||
|
.peek(priceHistory -> {
|
||||||
|
ProductEntity product = productMap.get(priceHistory.getId().getProduct().getUrl());
|
||||||
|
priceHistory.getId().setProduct(product);
|
||||||
|
})
|
||||||
|
.collect(Collectors.toList());
|
||||||
|
|
||||||
|
// Сохраняем историю цен
|
||||||
|
productPriceRepository.saveAll(updatedPriceHistories);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,33 +1,72 @@
|
|||||||
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
|
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.core.ParameterizedTypeReference;
|
import org.springframework.core.ParameterizedTypeReference;
|
||||||
|
import org.springframework.http.HttpEntity;
|
||||||
|
import org.springframework.http.HttpMethod;
|
||||||
|
import org.springframework.retry.annotation.Recover;
|
||||||
|
import org.springframework.retry.annotation.Retryable;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.web.reactive.function.client.WebClient;
|
import org.springframework.web.client.RestTemplate;
|
||||||
import ru.pricepulse.parsingservice.wildberries_parser.configuration.WbProperties;
|
import ru.pricepulse.parsingservice.config.MarketplacesConfig;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@Service
|
@Service
|
||||||
|
@Slf4j
|
||||||
public class ClientImpl implements Client {
|
public class ClientImpl implements Client {
|
||||||
|
|
||||||
private final WebClient webClient;
|
private final RestTemplate restTemplate;
|
||||||
private final WbProperties wbProperties;
|
private final MarketplacesConfig marketplacesConfig;
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@Retryable(maxAttempts = 50, value = RuntimeException.class)
|
||||||
public Map<String, Object> scrapPage(int page, String shard, String query) {
|
public Map<String, Object> scrapPage(int page, String shard, String query) {
|
||||||
String url = wbProperties.getCatalogWbUrl() +
|
String url = marketplacesConfig.getWildberriesConfigProperties().getCatalogWbUrl() +
|
||||||
shard +
|
shard +
|
||||||
query +
|
query +
|
||||||
"?dest=-1257786&page=" + page + "&subject=2290";
|
"?dest=-1257786&page=" + page + "&subject=2290";
|
||||||
|
|
||||||
return webClient.get()
|
return restTemplate.exchange(
|
||||||
.uri(url)
|
url,
|
||||||
.retrieve()
|
HttpMethod.GET,
|
||||||
.bodyToMono(new ParameterizedTypeReference<Map<String, Object>>() {
|
HttpEntity.EMPTY,
|
||||||
})
|
new ParameterizedTypeReference<Map<String, Object>>() {}
|
||||||
.retry(50)
|
).getBody();
|
||||||
.block();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Recover
|
||||||
|
public Map<String, Object> recover(RuntimeException e, int page, String shard, String query) {
|
||||||
|
// Логика обработки неудачи после всех попыток
|
||||||
|
log.error("Все попытки завершились неудачей: {}", e.getMessage());
|
||||||
|
// Можно вернуть пустую карту или другое значение по умолчанию
|
||||||
|
return Collections.emptyMap();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// @Override
|
||||||
|
// public Map<String, Object> scrapPage(int page, String shard, String query) {
|
||||||
|
// String url = marketplacesConfig.getWildberriesConfigProperties().getCatalogWbUrl() +
|
||||||
|
// shard +
|
||||||
|
// query +
|
||||||
|
// "?dest=-1257786&page=" + page + "&subject=2290";
|
||||||
|
//
|
||||||
|
// try {
|
||||||
|
// TimeUnit.MILLISECONDS.sleep(new Random().nextInt(1000) + 500);
|
||||||
|
// } catch (InterruptedException e) {
|
||||||
|
// Thread.currentThread().interrupt();
|
||||||
|
// }
|
||||||
|
//
|
||||||
|
// return webClient.get()
|
||||||
|
// .uri(url)
|
||||||
|
// .retrieve()
|
||||||
|
// .bodyToMono(new ParameterizedTypeReference<Map<String, Object>>() {
|
||||||
|
// })
|
||||||
|
// .retry(50)
|
||||||
|
// .block();
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
@ -19,9 +19,11 @@ selenium:
|
|||||||
|
|
||||||
marketplace:
|
marketplace:
|
||||||
ozon:
|
ozon:
|
||||||
|
status: false
|
||||||
categories-urls:
|
categories-urls:
|
||||||
- https://www.ozon.ru/category/noutbuki-15692
|
- https://www.ozon.ru/category/noutbuki-15692
|
||||||
wildberries:
|
wildberries:
|
||||||
|
status: true
|
||||||
base-url: "https://static-basket-01.wbbasket.ru"
|
base-url: "https://static-basket-01.wbbasket.ru"
|
||||||
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
||||||
user-agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)"
|
user-agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)"
|
||||||
@ -31,8 +33,12 @@ marketplace:
|
|||||||
shard: "electronic15"
|
shard: "electronic15"
|
||||||
laptop-url: "/catalog"
|
laptop-url: "/catalog"
|
||||||
|
|
||||||
|
|
||||||
logging:
|
logging:
|
||||||
pattern:
|
pattern:
|
||||||
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg %X%n"
|
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg %X%n"
|
||||||
|
# level:
|
||||||
|
# org:
|
||||||
|
# springframework:
|
||||||
|
# boot:
|
||||||
|
# autoconfigure: DEBUG
|
@ -5,11 +5,6 @@
|
|||||||
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
|
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
|
||||||
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
|
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
|
||||||
<changeSet id="20240926_create_product_table.xml" author="danil">
|
<changeSet id="20240926_create_product_table.xml" author="danil">
|
||||||
<preConditions>
|
|
||||||
<not>
|
|
||||||
<tableExists tableName="product" />
|
|
||||||
</not>
|
|
||||||
</preConditions>
|
|
||||||
<createTable tableName="product">
|
<createTable tableName="product">
|
||||||
<column name="id" type="bigint" autoIncrement="true" remarks="Идентификатор товара">
|
<column name="id" type="bigint" autoIncrement="true" remarks="Идентификатор товара">
|
||||||
<constraints primaryKey="true" />
|
<constraints primaryKey="true" />
|
||||||
|
@ -5,11 +5,6 @@
|
|||||||
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
|
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
|
||||||
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
|
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
|
||||||
<changeSet id="20240926_create_price_history_table.xml" author="Emelyanov535">
|
<changeSet id="20240926_create_price_history_table.xml" author="Emelyanov535">
|
||||||
<preConditions>
|
|
||||||
<not>
|
|
||||||
<tableExists tableName="price_history" />
|
|
||||||
</not>
|
|
||||||
</preConditions>
|
|
||||||
<createTable tableName="price_history">
|
<createTable tableName="price_history">
|
||||||
<column name="id" type="bigint" autoIncrement="true" remarks="Идентификатор">
|
<column name="id" type="bigint" autoIncrement="true" remarks="Идентификатор">
|
||||||
<constraints primaryKey="true" />
|
<constraints primaryKey="true" />
|
||||||
|
@ -0,0 +1,10 @@
|
|||||||
|
<?xml version="1.0" encoding="UTF-8"?>
|
||||||
|
<databaseChangeLog
|
||||||
|
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
|
||||||
|
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||||
|
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
|
||||||
|
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
|
||||||
|
<changeSet id="20241014_add_constraint_on_product_url.xml" author="Emelyanov535">
|
||||||
|
<addUniqueConstraint tableName="product" columnNames="url"/>
|
||||||
|
</changeSet>
|
||||||
|
</databaseChangeLog>
|
@ -0,0 +1,4 @@
|
|||||||
|
databaseChangeLog:
|
||||||
|
- include:
|
||||||
|
file: 20241014_add_constraint_on_product_url.xml
|
||||||
|
relativeToChangelogFile: true
|
@ -5,3 +5,6 @@ databaseChangeLog:
|
|||||||
- include:
|
- include:
|
||||||
file: 20241006/master.yml
|
file: 20241006/master.yml
|
||||||
relativeToChangelogFile: true
|
relativeToChangelogFile: true
|
||||||
|
- include:
|
||||||
|
file: 20241014/master.yml
|
||||||
|
relativeToChangelogFile: true
|
||||||
|
Loading…
Reference in New Issue
Block a user