Feature/parsing-service: save commit
This commit is contained in:
parent
9895aaff33
commit
82f648e16c
@ -1,25 +1,11 @@
|
|||||||
<component name="ProjectRunConfigurationManager">
|
<component name="ProjectRunConfigurationManager">
|
||||||
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
||||||
<option name="ACTIVE_PROFILES" value="dev,headless" />
|
<option name="ACTIVE_PROFILES" value="dev,headless,ozon,postgres_stat" />
|
||||||
<option name="SCHEDULED_DEBUGGER" value="true" />
|
<option name="SCHEDULED_DEBUGGER" value="true" />
|
||||||
<envs>
|
<envs>
|
||||||
<env name="JDBC_PASSWORD" value="postgres" />
|
<env name="POSTGRES_JDBC_PASSWORD" value="postgres" />
|
||||||
<env name="JDBC_USERNAME" value="postgres" />
|
<env name="POSTGRES_JDBC_USERNAME" value="postgres" />
|
||||||
<env name="JDBC_URL" value="localhost:5432/parsed_data" />
|
<env name="POSTGRES_JDBC_URL" value="localhost:5432/parsed_data" />
|
||||||
<env name="SERVER_PORT" value="8080" />
|
|
||||||
</envs>
|
|
||||||
<module name="parsing-service.main" />
|
|
||||||
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
|
|
||||||
<method v="2">
|
|
||||||
<option name="Make" enabled="true" />
|
|
||||||
</method>
|
|
||||||
</configuration>
|
|
||||||
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
|
||||||
<option name="ACTIVE_PROFILES" value="dev" />
|
|
||||||
<envs>
|
|
||||||
<env name="JDBC_PASSWORD" value="postgres" />
|
|
||||||
<env name="JDBC_USERNAME" value="postgres" />
|
|
||||||
<env name="JDBC_URL" value="localhost:5432/parsed_data" />
|
|
||||||
<env name="SERVER_PORT" value="8080" />
|
<env name="SERVER_PORT" value="8080" />
|
||||||
</envs>
|
</envs>
|
||||||
<module name="parsing-service.main" />
|
<module name="parsing-service.main" />
|
||||||
|
@ -3,9 +3,11 @@ package ru.pricepulse.parsingservice.config;
|
|||||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||||
import org.springframework.context.annotation.Configuration;
|
import org.springframework.context.annotation.Configuration;
|
||||||
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||||
|
import ru.pricepulse.parsingservice.config.properties.WildberriesConfigProperties;
|
||||||
|
|
||||||
@Configuration
|
@Configuration
|
||||||
@EnableConfigurationProperties({
|
@EnableConfigurationProperties({
|
||||||
OzonConfigProperties.class
|
OzonConfigProperties.class,
|
||||||
|
WildberriesConfigProperties.class
|
||||||
})
|
})
|
||||||
public class MarketplacesConfig {}
|
public class MarketplacesConfig {}
|
||||||
|
@ -1,15 +0,0 @@
|
|||||||
package ru.pricepulse.parsingservice.config;
|
|
||||||
|
|
||||||
import org.springframework.context.annotation.Bean;
|
|
||||||
import org.springframework.context.annotation.Configuration;
|
|
||||||
import org.springframework.web.client.RestTemplate;
|
|
||||||
|
|
||||||
@Configuration
|
|
||||||
public class RestTemplateConfig {
|
|
||||||
|
|
||||||
@Bean
|
|
||||||
public RestTemplate restTemplate() {
|
|
||||||
return new RestTemplate();
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -0,0 +1,20 @@
|
|||||||
|
package ru.pricepulse.parsingservice.config.properties;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@ConfigurationProperties(prefix = "marketplace.wildberries")
|
||||||
|
public class WildberriesConfigProperties {
|
||||||
|
private String baseUrl;
|
||||||
|
private String catalogUrl;
|
||||||
|
private String userAgent;
|
||||||
|
private String catalogWbUrl;
|
||||||
|
private int retryAttempts;
|
||||||
|
private long retryDelay;
|
||||||
|
private String laptopUrl;
|
||||||
|
private String shard;
|
||||||
|
}
|
@ -41,7 +41,6 @@ public class WebDriverPool {
|
|||||||
if (driver != null) {
|
if (driver != null) {
|
||||||
busyDrivers.add(driver); // Добавляем драйвер в занятые
|
busyDrivers.add(driver); // Добавляем драйвер в занятые
|
||||||
}
|
}
|
||||||
log.info("Занимаем драйвер {}", driver);
|
|
||||||
return driver; // Возвращаем драйвер
|
return driver; // Возвращаем драйвер
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -49,7 +48,6 @@ public class WebDriverPool {
|
|||||||
public void returnDriver(WebDriver driver) {
|
public void returnDriver(WebDriver driver) {
|
||||||
busyDrivers.remove(driver); // Убираем драйвер из занятых
|
busyDrivers.remove(driver); // Убираем драйвер из занятых
|
||||||
availableDrivers.add(driver); // Возвращаем драйвер в доступные
|
availableDrivers.add(driver); // Возвращаем драйвер в доступные
|
||||||
log.info("Вернули драйвер {}", driver);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Метод для закрытия всех драйверов в пуле
|
// Метод для закрытия всех драйверов в пуле
|
||||||
|
@ -9,8 +9,12 @@ import org.openqa.selenium.support.ui.WebDriverWait;
|
|||||||
public class NoContentPage {
|
public class NoContentPage {
|
||||||
|
|
||||||
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
|
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
|
||||||
|
private static final String NOT_FOUND_TEXT_XPATH = "\"//*[contains(text(), 'По вашим параметрам ничего не нашлось. Попробуйте сбросить фильтры. ')]\"";
|
||||||
|
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsError']";
|
||||||
|
|
||||||
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
|
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
|
||||||
|
private final By notFoundText = By.xpath(NOT_FOUND_TEXT_XPATH);
|
||||||
|
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
|
||||||
|
|
||||||
private WebDriver driver;
|
private WebDriver driver;
|
||||||
|
|
||||||
@ -23,7 +27,9 @@ public class NoContentPage {
|
|||||||
|
|
||||||
public boolean isLoaded() {
|
public boolean isLoaded() {
|
||||||
try {
|
try {
|
||||||
return driver.findElement(errorText) != null;
|
return driver.findElement(searchResults) != null
|
||||||
|
|| driver.findElement(errorText) != null
|
||||||
|
|| driver.findElement(notFoundText) != null;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
@ -38,7 +38,9 @@ public class OzonCategoryPageParsingService {
|
|||||||
@Retryable(maxAttempts = 10, recover = "recover")
|
@Retryable(maxAttempts = 10, recover = "recover")
|
||||||
public void parseCategoryPage(String pageUrl, Category category, AtomicBoolean stopFlag) {
|
public void parseCategoryPage(String pageUrl, Category category, AtomicBoolean stopFlag) {
|
||||||
var driver = webDriverPool.borrowDriver();
|
var driver = webDriverPool.borrowDriver();
|
||||||
|
if (driver == null) {
|
||||||
|
throw new RuntimeException();
|
||||||
|
}
|
||||||
try {
|
try {
|
||||||
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
|
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
|
||||||
driver.get(pageUrl);
|
driver.get(pageUrl);
|
||||||
@ -57,6 +59,7 @@ public class OzonCategoryPageParsingService {
|
|||||||
stopFlag.set(true);
|
stopFlag.set(true);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
log.info("Получаем список товаров на текущей странице: {}", pageUrl);
|
log.info("Получаем список товаров на текущей странице: {}", pageUrl);
|
||||||
List<ParsedData> parsedData;
|
List<ParsedData> parsedData;
|
||||||
try {
|
try {
|
||||||
|
@ -1,8 +1,13 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.concurrent.ArrayBlockingQueue;
|
||||||
|
import java.util.concurrent.BlockingQueue;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.Semaphore;
|
||||||
|
import java.util.concurrent.ThreadPoolExecutor;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
@ -17,8 +22,8 @@ import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingServic
|
|||||||
public class OzonParsingService implements MarketplaceParsingService {
|
public class OzonParsingService implements MarketplaceParsingService {
|
||||||
|
|
||||||
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
|
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
|
||||||
|
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(5);
|
||||||
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(12);
|
private final Semaphore semaphore = new Semaphore(5); // Ограничиваем количество активных и ожидающих задач
|
||||||
|
|
||||||
private final OzonCategoryPageParsingService categoryPageParsingService;
|
private final OzonCategoryPageParsingService categoryPageParsingService;
|
||||||
|
|
||||||
@ -30,12 +35,30 @@ public class OzonParsingService implements MarketplaceParsingService {
|
|||||||
int pageIndex = 1;
|
int pageIndex = 1;
|
||||||
|
|
||||||
while (!stopFlag.get()) {
|
while (!stopFlag.get()) {
|
||||||
|
try {
|
||||||
|
semaphore.acquire(); // Получаем разрешение перед созданием новой задачи
|
||||||
|
|
||||||
int finalPageIndex = pageIndex;
|
int finalPageIndex = pageIndex;
|
||||||
String pageUrl = url + "&page=" + finalPageIndex;
|
String pageUrl = url + "&page=" + finalPageIndex;
|
||||||
pageExecutorService.submit(() -> categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag));
|
|
||||||
++pageIndex;
|
pageExecutorService.submit(() -> {
|
||||||
|
try {
|
||||||
|
categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag);
|
||||||
|
} finally {
|
||||||
|
semaphore.release(); // Освобождаем разрешение после завершения задачи
|
||||||
}
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
++pageIndex;
|
||||||
|
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
Thread.currentThread().interrupt();
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (stopFlag.get()) {
|
if (stopFlag.get()) {
|
||||||
|
log.info("Конец парсинга категории");
|
||||||
pageExecutorService.shutdownNow();
|
pageExecutorService.shutdownNow();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -5,6 +5,7 @@ import java.util.List;
|
|||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.retry.annotation.Retryable;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
@ -24,6 +25,7 @@ public class ProductService {
|
|||||||
private final ProductPriceRepository productPriceRepository;
|
private final ProductPriceRepository productPriceRepository;
|
||||||
|
|
||||||
@Transactional
|
@Transactional
|
||||||
|
@Retryable
|
||||||
public void saveBatch(List<ParsedData> parsedData) {
|
public void saveBatch(List<ParsedData> parsedData) {
|
||||||
var products = new ArrayList<ProductEntity>();
|
var products = new ArrayList<ProductEntity>();
|
||||||
var prices = new ArrayList<PriceHistoryEntity>();
|
var prices = new ArrayList<PriceHistoryEntity>();
|
||||||
@ -40,7 +42,7 @@ public class ProductService {
|
|||||||
var priceHistoryEntity = getPriceHistory(product);
|
var priceHistoryEntity = getPriceHistory(product);
|
||||||
prices.add(priceHistoryEntity);
|
prices.add(priceHistoryEntity);
|
||||||
if (productRepository.existsByUrl(product.getUrl())) {
|
if (productRepository.existsByUrl(product.getUrl())) {
|
||||||
log.debug("Запись {} уже есть", product.getUrl());
|
log.info("Запись {} уже есть", product.getUrl());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
var productEntity = getProduct(product);
|
var productEntity = getProduct(product);
|
||||||
|
@ -23,7 +23,7 @@ spring:
|
|||||||
marketplace:
|
marketplace:
|
||||||
ozon:
|
ozon:
|
||||||
categories-urls:
|
categories-urls:
|
||||||
- https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t&is_high_rating=t
|
- https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t
|
||||||
wildberries:
|
wildberries:
|
||||||
base-url: "https://static-basket-01.wbbasket.ru"
|
base-url: "https://static-basket-01.wbbasket.ru"
|
||||||
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
||||||
|
Loading…
Reference in New Issue
Block a user