Feature/parsing-service: save commit

This commit is contained in:
danil.markov 2024-10-15 10:10:11 +04:00
parent 9895aaff33
commit 82f648e16c
10 changed files with 71 additions and 46 deletions

View File

@ -1,25 +1,11 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
<option name="ACTIVE_PROFILES" value="dev,headless" />
<option name="ACTIVE_PROFILES" value="dev,headless,ozon,postgres_stat" />
<option name="SCHEDULED_DEBUGGER" value="true" />
<envs>
<env name="JDBC_PASSWORD" value="postgres" />
<env name="JDBC_USERNAME" value="postgres" />
<env name="JDBC_URL" value="localhost:5432/parsed_data" />
<env name="SERVER_PORT" value="8080" />
</envs>
<module name="parsing-service.main" />
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
<option name="ACTIVE_PROFILES" value="dev" />
<envs>
<env name="JDBC_PASSWORD" value="postgres" />
<env name="JDBC_USERNAME" value="postgres" />
<env name="JDBC_URL" value="localhost:5432/parsed_data" />
<env name="POSTGRES_JDBC_PASSWORD" value="postgres" />
<env name="POSTGRES_JDBC_USERNAME" value="postgres" />
<env name="POSTGRES_JDBC_URL" value="localhost:5432/parsed_data" />
<env name="SERVER_PORT" value="8080" />
</envs>
<module name="parsing-service.main" />

View File

@ -3,9 +3,11 @@ package ru.pricepulse.parsingservice.config;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
import ru.pricepulse.parsingservice.config.properties.WildberriesConfigProperties;
@Configuration
@EnableConfigurationProperties({
OzonConfigProperties.class
OzonConfigProperties.class,
WildberriesConfigProperties.class
})
public class MarketplacesConfig {}

View File

@ -1,15 +0,0 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.client.RestTemplate;
@Configuration
public class RestTemplateConfig {
@Bean
public RestTemplate restTemplate() {
return new RestTemplate();
}
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.config.properties;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
@Getter
@Setter
@ConfigurationProperties(prefix = "marketplace.wildberries")
public class WildberriesConfigProperties {
private String baseUrl;
private String catalogUrl;
private String userAgent;
private String catalogWbUrl;
private int retryAttempts;
private long retryDelay;
private String laptopUrl;
private String shard;
}

View File

@ -41,7 +41,6 @@ public class WebDriverPool {
if (driver != null) {
busyDrivers.add(driver); // Добавляем драйвер в занятые
}
log.info("Занимаем драйвер {}", driver);
return driver; // Возвращаем драйвер
}
@ -49,7 +48,6 @@ public class WebDriverPool {
public void returnDriver(WebDriver driver) {
busyDrivers.remove(driver); // Убираем драйвер из занятых
availableDrivers.add(driver); // Возвращаем драйвер в доступные
log.info("Вернули драйвер {}", driver);
}
// Метод для закрытия всех драйверов в пуле

View File

@ -9,8 +9,12 @@ import org.openqa.selenium.support.ui.WebDriverWait;
public class NoContentPage {
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
private static final String NOT_FOUND_TEXT_XPATH = "\"//*[contains(text(), 'По вашим параметрам ничего не нашлось. Попробуйте сбросить фильтры. ')]\"";
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsError']";
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
private final By notFoundText = By.xpath(NOT_FOUND_TEXT_XPATH);
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
private WebDriver driver;
@ -23,7 +27,9 @@ public class NoContentPage {
public boolean isLoaded() {
try {
return driver.findElement(errorText) != null;
return driver.findElement(searchResults) != null
|| driver.findElement(errorText) != null
|| driver.findElement(notFoundText) != null;
} catch (Exception e) {
return false;
}

View File

@ -38,7 +38,9 @@ public class OzonCategoryPageParsingService {
@Retryable(maxAttempts = 10, recover = "recover")
public void parseCategoryPage(String pageUrl, Category category, AtomicBoolean stopFlag) {
var driver = webDriverPool.borrowDriver();
if (driver == null) {
throw new RuntimeException();
}
try {
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
driver.get(pageUrl);
@ -57,6 +59,7 @@ public class OzonCategoryPageParsingService {
stopFlag.set(true);
return;
}
log.info("Получаем список товаров на текущей странице: {}", pageUrl);
List<ParsedData> parsedData;
try {

View File

@ -1,8 +1,13 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.util.ArrayList;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Semaphore;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
@ -17,8 +22,8 @@ import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingServic
public class OzonParsingService implements MarketplaceParsingService {
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(12);
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(5);
private final Semaphore semaphore = new Semaphore(5); // Ограничиваем количество активных и ожидающих задач
private final OzonCategoryPageParsingService categoryPageParsingService;
@ -30,12 +35,30 @@ public class OzonParsingService implements MarketplaceParsingService {
int pageIndex = 1;
while (!stopFlag.get()) {
int finalPageIndex = pageIndex;
String pageUrl = url + "&page=" + finalPageIndex;
pageExecutorService.submit(() -> categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag));
++pageIndex;
try {
semaphore.acquire(); // Получаем разрешение перед созданием новой задачи
int finalPageIndex = pageIndex;
String pageUrl = url + "&page=" + finalPageIndex;
pageExecutorService.submit(() -> {
try {
categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag);
} finally {
semaphore.release(); // Освобождаем разрешение после завершения задачи
}
});
++pageIndex;
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
}
}
if (stopFlag.get()) {
log.info("Конец парсинга категории");
pageExecutorService.shutdownNow();
}
}

View File

@ -5,6 +5,7 @@ import java.util.List;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
@ -24,6 +25,7 @@ public class ProductService {
private final ProductPriceRepository productPriceRepository;
@Transactional
@Retryable
public void saveBatch(List<ParsedData> parsedData) {
var products = new ArrayList<ProductEntity>();
var prices = new ArrayList<PriceHistoryEntity>();
@ -40,7 +42,7 @@ public class ProductService {
var priceHistoryEntity = getPriceHistory(product);
prices.add(priceHistoryEntity);
if (productRepository.existsByUrl(product.getUrl())) {
log.debug("Запись {} уже есть", product.getUrl());
log.info("Запись {} уже есть", product.getUrl());
return;
}
var productEntity = getProduct(product);

View File

@ -23,7 +23,7 @@ spring:
marketplace:
ozon:
categories-urls:
- https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t&is_high_rating=t
- https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t
wildberries:
base-url: "https://static-basket-01.wbbasket.ru"
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"