Feature/parsing-service: save commit
This commit is contained in:
parent
9895aaff33
commit
82f648e16c
@ -1,25 +1,11 @@
|
||||
<component name="ProjectRunConfigurationManager">
|
||||
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
||||
<option name="ACTIVE_PROFILES" value="dev,headless" />
|
||||
<option name="ACTIVE_PROFILES" value="dev,headless,ozon,postgres_stat" />
|
||||
<option name="SCHEDULED_DEBUGGER" value="true" />
|
||||
<envs>
|
||||
<env name="JDBC_PASSWORD" value="postgres" />
|
||||
<env name="JDBC_USERNAME" value="postgres" />
|
||||
<env name="JDBC_URL" value="localhost:5432/parsed_data" />
|
||||
<env name="SERVER_PORT" value="8080" />
|
||||
</envs>
|
||||
<module name="parsing-service.main" />
|
||||
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
|
||||
<method v="2">
|
||||
<option name="Make" enabled="true" />
|
||||
</method>
|
||||
</configuration>
|
||||
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
||||
<option name="ACTIVE_PROFILES" value="dev" />
|
||||
<envs>
|
||||
<env name="JDBC_PASSWORD" value="postgres" />
|
||||
<env name="JDBC_USERNAME" value="postgres" />
|
||||
<env name="JDBC_URL" value="localhost:5432/parsed_data" />
|
||||
<env name="POSTGRES_JDBC_PASSWORD" value="postgres" />
|
||||
<env name="POSTGRES_JDBC_USERNAME" value="postgres" />
|
||||
<env name="POSTGRES_JDBC_URL" value="localhost:5432/parsed_data" />
|
||||
<env name="SERVER_PORT" value="8080" />
|
||||
</envs>
|
||||
<module name="parsing-service.main" />
|
||||
|
@ -3,9 +3,11 @@ package ru.pricepulse.parsingservice.config;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||
import ru.pricepulse.parsingservice.config.properties.WildberriesConfigProperties;
|
||||
|
||||
@Configuration
|
||||
@EnableConfigurationProperties({
|
||||
OzonConfigProperties.class
|
||||
OzonConfigProperties.class,
|
||||
WildberriesConfigProperties.class
|
||||
})
|
||||
public class MarketplacesConfig {}
|
||||
|
@ -1,15 +0,0 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
@Configuration
|
||||
public class RestTemplateConfig {
|
||||
|
||||
@Bean
|
||||
public RestTemplate restTemplate() {
|
||||
return new RestTemplate();
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package ru.pricepulse.parsingservice.config.properties;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@ConfigurationProperties(prefix = "marketplace.wildberries")
|
||||
public class WildberriesConfigProperties {
|
||||
private String baseUrl;
|
||||
private String catalogUrl;
|
||||
private String userAgent;
|
||||
private String catalogWbUrl;
|
||||
private int retryAttempts;
|
||||
private long retryDelay;
|
||||
private String laptopUrl;
|
||||
private String shard;
|
||||
}
|
@ -41,7 +41,6 @@ public class WebDriverPool {
|
||||
if (driver != null) {
|
||||
busyDrivers.add(driver); // Добавляем драйвер в занятые
|
||||
}
|
||||
log.info("Занимаем драйвер {}", driver);
|
||||
return driver; // Возвращаем драйвер
|
||||
}
|
||||
|
||||
@ -49,7 +48,6 @@ public class WebDriverPool {
|
||||
public void returnDriver(WebDriver driver) {
|
||||
busyDrivers.remove(driver); // Убираем драйвер из занятых
|
||||
availableDrivers.add(driver); // Возвращаем драйвер в доступные
|
||||
log.info("Вернули драйвер {}", driver);
|
||||
}
|
||||
|
||||
// Метод для закрытия всех драйверов в пуле
|
||||
|
@ -9,8 +9,12 @@ import org.openqa.selenium.support.ui.WebDriverWait;
|
||||
public class NoContentPage {
|
||||
|
||||
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
|
||||
private static final String NOT_FOUND_TEXT_XPATH = "\"//*[contains(text(), 'По вашим параметрам ничего не нашлось. Попробуйте сбросить фильтры. ')]\"";
|
||||
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsError']";
|
||||
|
||||
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
|
||||
private final By notFoundText = By.xpath(NOT_FOUND_TEXT_XPATH);
|
||||
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
|
||||
|
||||
private WebDriver driver;
|
||||
|
||||
@ -23,7 +27,9 @@ public class NoContentPage {
|
||||
|
||||
public boolean isLoaded() {
|
||||
try {
|
||||
return driver.findElement(errorText) != null;
|
||||
return driver.findElement(searchResults) != null
|
||||
|| driver.findElement(errorText) != null
|
||||
|| driver.findElement(notFoundText) != null;
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
|
@ -38,7 +38,9 @@ public class OzonCategoryPageParsingService {
|
||||
@Retryable(maxAttempts = 10, recover = "recover")
|
||||
public void parseCategoryPage(String pageUrl, Category category, AtomicBoolean stopFlag) {
|
||||
var driver = webDriverPool.borrowDriver();
|
||||
|
||||
if (driver == null) {
|
||||
throw new RuntimeException();
|
||||
}
|
||||
try {
|
||||
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
|
||||
driver.get(pageUrl);
|
||||
@ -57,6 +59,7 @@ public class OzonCategoryPageParsingService {
|
||||
stopFlag.set(true);
|
||||
return;
|
||||
}
|
||||
|
||||
log.info("Получаем список товаров на текущей странице: {}", pageUrl);
|
||||
List<ParsedData> parsedData;
|
||||
try {
|
||||
|
@ -1,8 +1,13 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Semaphore;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -17,8 +22,8 @@ import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingServic
|
||||
public class OzonParsingService implements MarketplaceParsingService {
|
||||
|
||||
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
|
||||
|
||||
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(12);
|
||||
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(5);
|
||||
private final Semaphore semaphore = new Semaphore(5); // Ограничиваем количество активных и ожидающих задач
|
||||
|
||||
private final OzonCategoryPageParsingService categoryPageParsingService;
|
||||
|
||||
@ -30,12 +35,30 @@ public class OzonParsingService implements MarketplaceParsingService {
|
||||
int pageIndex = 1;
|
||||
|
||||
while (!stopFlag.get()) {
|
||||
int finalPageIndex = pageIndex;
|
||||
String pageUrl = url + "&page=" + finalPageIndex;
|
||||
pageExecutorService.submit(() -> categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag));
|
||||
++pageIndex;
|
||||
try {
|
||||
semaphore.acquire(); // Получаем разрешение перед созданием новой задачи
|
||||
|
||||
int finalPageIndex = pageIndex;
|
||||
String pageUrl = url + "&page=" + finalPageIndex;
|
||||
|
||||
pageExecutorService.submit(() -> {
|
||||
try {
|
||||
categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag);
|
||||
} finally {
|
||||
semaphore.release(); // Освобождаем разрешение после завершения задачи
|
||||
}
|
||||
});
|
||||
|
||||
++pageIndex;
|
||||
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (stopFlag.get()) {
|
||||
log.info("Конец парсинга категории");
|
||||
pageExecutorService.shutdownNow();
|
||||
}
|
||||
}
|
||||
|
@ -5,6 +5,7 @@ import java.util.List;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.retry.annotation.Retryable;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
@ -24,6 +25,7 @@ public class ProductService {
|
||||
private final ProductPriceRepository productPriceRepository;
|
||||
|
||||
@Transactional
|
||||
@Retryable
|
||||
public void saveBatch(List<ParsedData> parsedData) {
|
||||
var products = new ArrayList<ProductEntity>();
|
||||
var prices = new ArrayList<PriceHistoryEntity>();
|
||||
@ -40,7 +42,7 @@ public class ProductService {
|
||||
var priceHistoryEntity = getPriceHistory(product);
|
||||
prices.add(priceHistoryEntity);
|
||||
if (productRepository.existsByUrl(product.getUrl())) {
|
||||
log.debug("Запись {} уже есть", product.getUrl());
|
||||
log.info("Запись {} уже есть", product.getUrl());
|
||||
return;
|
||||
}
|
||||
var productEntity = getProduct(product);
|
||||
|
@ -23,7 +23,7 @@ spring:
|
||||
marketplace:
|
||||
ozon:
|
||||
categories-urls:
|
||||
- https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t&is_high_rating=t
|
||||
- https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t
|
||||
wildberries:
|
||||
base-url: "https://static-basket-01.wbbasket.ru"
|
||||
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
||||
|
Loading…
Reference in New Issue
Block a user