Feature/parsing-service: save commit

This commit is contained in:
danil.markov 2024-10-15 10:10:11 +04:00
parent 9895aaff33
commit 82f648e16c
10 changed files with 71 additions and 46 deletions

View File

@ -1,25 +1,11 @@
<component name="ProjectRunConfigurationManager"> <component name="ProjectRunConfigurationManager">
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot"> <configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
<option name="ACTIVE_PROFILES" value="dev,headless" /> <option name="ACTIVE_PROFILES" value="dev,headless,ozon,postgres_stat" />
<option name="SCHEDULED_DEBUGGER" value="true" /> <option name="SCHEDULED_DEBUGGER" value="true" />
<envs> <envs>
<env name="JDBC_PASSWORD" value="postgres" /> <env name="POSTGRES_JDBC_PASSWORD" value="postgres" />
<env name="JDBC_USERNAME" value="postgres" /> <env name="POSTGRES_JDBC_USERNAME" value="postgres" />
<env name="JDBC_URL" value="localhost:5432/parsed_data" /> <env name="POSTGRES_JDBC_URL" value="localhost:5432/parsed_data" />
<env name="SERVER_PORT" value="8080" />
</envs>
<module name="parsing-service.main" />
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
<option name="ACTIVE_PROFILES" value="dev" />
<envs>
<env name="JDBC_PASSWORD" value="postgres" />
<env name="JDBC_USERNAME" value="postgres" />
<env name="JDBC_URL" value="localhost:5432/parsed_data" />
<env name="SERVER_PORT" value="8080" /> <env name="SERVER_PORT" value="8080" />
</envs> </envs>
<module name="parsing-service.main" /> <module name="parsing-service.main" />

View File

@ -3,9 +3,11 @@ package ru.pricepulse.parsingservice.config;
import org.springframework.boot.context.properties.EnableConfigurationProperties; import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Configuration; import org.springframework.context.annotation.Configuration;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties; import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
import ru.pricepulse.parsingservice.config.properties.WildberriesConfigProperties;
@Configuration @Configuration
@EnableConfigurationProperties({ @EnableConfigurationProperties({
OzonConfigProperties.class OzonConfigProperties.class,
WildberriesConfigProperties.class
}) })
public class MarketplacesConfig {} public class MarketplacesConfig {}

View File

@ -1,15 +0,0 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.client.RestTemplate;
@Configuration
public class RestTemplateConfig {
@Bean
public RestTemplate restTemplate() {
return new RestTemplate();
}
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.config.properties;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
@Getter
@Setter
@ConfigurationProperties(prefix = "marketplace.wildberries")
public class WildberriesConfigProperties {
private String baseUrl;
private String catalogUrl;
private String userAgent;
private String catalogWbUrl;
private int retryAttempts;
private long retryDelay;
private String laptopUrl;
private String shard;
}

View File

@ -41,7 +41,6 @@ public class WebDriverPool {
if (driver != null) { if (driver != null) {
busyDrivers.add(driver); // Добавляем драйвер в занятые busyDrivers.add(driver); // Добавляем драйвер в занятые
} }
log.info("Занимаем драйвер {}", driver);
return driver; // Возвращаем драйвер return driver; // Возвращаем драйвер
} }
@ -49,7 +48,6 @@ public class WebDriverPool {
public void returnDriver(WebDriver driver) { public void returnDriver(WebDriver driver) {
busyDrivers.remove(driver); // Убираем драйвер из занятых busyDrivers.remove(driver); // Убираем драйвер из занятых
availableDrivers.add(driver); // Возвращаем драйвер в доступные availableDrivers.add(driver); // Возвращаем драйвер в доступные
log.info("Вернули драйвер {}", driver);
} }
// Метод для закрытия всех драйверов в пуле // Метод для закрытия всех драйверов в пуле

View File

@ -9,8 +9,12 @@ import org.openqa.selenium.support.ui.WebDriverWait;
public class NoContentPage { public class NoContentPage {
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\""; private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
private static final String NOT_FOUND_TEXT_XPATH = "\"//*[contains(text(), 'По вашим параметрам ничего не нашлось. Попробуйте сбросить фильтры. ')]\"";
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsError']";
private final By errorText = By.xpath(ERROR_TEXT_XPATH); private final By errorText = By.xpath(ERROR_TEXT_XPATH);
private final By notFoundText = By.xpath(NOT_FOUND_TEXT_XPATH);
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
private WebDriver driver; private WebDriver driver;
@ -23,7 +27,9 @@ public class NoContentPage {
public boolean isLoaded() { public boolean isLoaded() {
try { try {
return driver.findElement(errorText) != null; return driver.findElement(searchResults) != null
|| driver.findElement(errorText) != null
|| driver.findElement(notFoundText) != null;
} catch (Exception e) { } catch (Exception e) {
return false; return false;
} }

View File

@ -38,7 +38,9 @@ public class OzonCategoryPageParsingService {
@Retryable(maxAttempts = 10, recover = "recover") @Retryable(maxAttempts = 10, recover = "recover")
public void parseCategoryPage(String pageUrl, Category category, AtomicBoolean stopFlag) { public void parseCategoryPage(String pageUrl, Category category, AtomicBoolean stopFlag) {
var driver = webDriverPool.borrowDriver(); var driver = webDriverPool.borrowDriver();
if (driver == null) {
throw new RuntimeException();
}
try { try {
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS)); driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
driver.get(pageUrl); driver.get(pageUrl);
@ -57,6 +59,7 @@ public class OzonCategoryPageParsingService {
stopFlag.set(true); stopFlag.set(true);
return; return;
} }
log.info("Получаем список товаров на текущей странице: {}", pageUrl); log.info("Получаем список товаров на текущей странице: {}", pageUrl);
List<ParsedData> parsedData; List<ParsedData> parsedData;
try { try {

View File

@ -1,8 +1,13 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing; package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService; import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors; import java.util.concurrent.Executors;
import java.util.concurrent.Semaphore;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
@ -17,8 +22,8 @@ import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingServic
public class OzonParsingService implements MarketplaceParsingService { public class OzonParsingService implements MarketplaceParsingService {
private final AtomicBoolean stopFlag = new AtomicBoolean(false); private final AtomicBoolean stopFlag = new AtomicBoolean(false);
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(5);
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(12); private final Semaphore semaphore = new Semaphore(5); // Ограничиваем количество активных и ожидающих задач
private final OzonCategoryPageParsingService categoryPageParsingService; private final OzonCategoryPageParsingService categoryPageParsingService;
@ -30,12 +35,30 @@ public class OzonParsingService implements MarketplaceParsingService {
int pageIndex = 1; int pageIndex = 1;
while (!stopFlag.get()) { while (!stopFlag.get()) {
try {
semaphore.acquire(); // Получаем разрешение перед созданием новой задачи
int finalPageIndex = pageIndex; int finalPageIndex = pageIndex;
String pageUrl = url + "&page=" + finalPageIndex; String pageUrl = url + "&page=" + finalPageIndex;
pageExecutorService.submit(() -> categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag));
++pageIndex; pageExecutorService.submit(() -> {
try {
categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag);
} finally {
semaphore.release(); // Освобождаем разрешение после завершения задачи
} }
});
++pageIndex;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
}
}
if (stopFlag.get()) { if (stopFlag.get()) {
log.info("Конец парсинга категории");
pageExecutorService.shutdownNow(); pageExecutorService.shutdownNow();
} }
} }

View File

@ -5,6 +5,7 @@ import java.util.List;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional; import org.springframework.transaction.annotation.Transactional;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData; import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
@ -24,6 +25,7 @@ public class ProductService {
private final ProductPriceRepository productPriceRepository; private final ProductPriceRepository productPriceRepository;
@Transactional @Transactional
@Retryable
public void saveBatch(List<ParsedData> parsedData) { public void saveBatch(List<ParsedData> parsedData) {
var products = new ArrayList<ProductEntity>(); var products = new ArrayList<ProductEntity>();
var prices = new ArrayList<PriceHistoryEntity>(); var prices = new ArrayList<PriceHistoryEntity>();
@ -40,7 +42,7 @@ public class ProductService {
var priceHistoryEntity = getPriceHistory(product); var priceHistoryEntity = getPriceHistory(product);
prices.add(priceHistoryEntity); prices.add(priceHistoryEntity);
if (productRepository.existsByUrl(product.getUrl())) { if (productRepository.existsByUrl(product.getUrl())) {
log.debug("Запись {} уже есть", product.getUrl()); log.info("Запись {} уже есть", product.getUrl());
return; return;
} }
var productEntity = getProduct(product); var productEntity = getProduct(product);

View File

@ -23,7 +23,7 @@ spring:
marketplace: marketplace:
ozon: ozon:
categories-urls: categories-urls:
- https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t&is_high_rating=t - https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t
wildberries: wildberries:
base-url: "https://static-basket-01.wbbasket.ru" base-url: "https://static-basket-01.wbbasket.ru"
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json" catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"