diff --git a/.run/ParsingService [local].run.xml b/.run/ParsingService [local].run.xml
new file mode 100644
index 0000000..ff04219
--- /dev/null
+++ b/.run/ParsingService [local].run.xml
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
\ No newline at end of file
diff --git a/parsing-service/.run/ParsingService [local].run.xml b/parsing-service/.run/ParsingService [local].run.xml
index 223eb3e..8d50103 100644
--- a/parsing-service/.run/ParsingService [local].run.xml
+++ b/parsing-service/.run/ParsingService [local].run.xml
@@ -1,6 +1,6 @@
-
+
diff --git a/parsing-service/build.gradle b/parsing-service/build.gradle
index 2372525..0fc9ca5 100644
--- a/parsing-service/build.gradle
+++ b/parsing-service/build.gradle
@@ -25,6 +25,7 @@ repositories {
ext {
jsoupVesion = '1.18.1'
+ seleniumVersion = '4.25.0'
}
dependencies {
@@ -33,6 +34,9 @@ dependencies {
implementation 'org.liquibase:liquibase-core'
implementation 'org.springframework.kafka:spring-kafka'
implementation "org.jsoup:jsoup:${jsoupVesion}"
+ implementation "org.seleniumhq.selenium:selenium-java:${seleniumVersion}"
+ implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
+ implementation 'org.apache.commons:commons-pool2:2.12.0'
compileOnly 'org.projectlombok:lombok'
diff --git a/parsing-service/src/main/java/ru/pricepulse/parsingservice/config/RetryConfig.java b/parsing-service/src/main/java/ru/pricepulse/parsingservice/config/RetryConfig.java
new file mode 100644
index 0000000..ddd9f79
--- /dev/null
+++ b/parsing-service/src/main/java/ru/pricepulse/parsingservice/config/RetryConfig.java
@@ -0,0 +1,8 @@
+package ru.pricepulse.parsingservice.config;
+
+import org.springframework.context.annotation.Configuration;
+import org.springframework.retry.annotation.EnableRetry;
+
+@Configuration
+@EnableRetry
+public class RetryConfig {}
diff --git a/parsing-service/src/main/java/ru/pricepulse/parsingservice/config/SeleniumConfig.java b/parsing-service/src/main/java/ru/pricepulse/parsingservice/config/SeleniumConfig.java
new file mode 100644
index 0000000..758d209
--- /dev/null
+++ b/parsing-service/src/main/java/ru/pricepulse/parsingservice/config/SeleniumConfig.java
@@ -0,0 +1,10 @@
+package ru.pricepulse.parsingservice.config;
+
+import org.springframework.boot.context.properties.EnableConfigurationProperties;
+import org.springframework.context.annotation.Configuration;
+import ru.pricepulse.parsingservice.config.properties.SeleniumConfigProperties;
+
+@Configuration
+@EnableConfigurationProperties(SeleniumConfigProperties.class)
+public class SeleniumConfig {
+}
diff --git a/parsing-service/src/main/java/ru/pricepulse/parsingservice/config/WebDriverConfig.java b/parsing-service/src/main/java/ru/pricepulse/parsingservice/config/WebDriverConfig.java
new file mode 100644
index 0000000..10a3ebc
--- /dev/null
+++ b/parsing-service/src/main/java/ru/pricepulse/parsingservice/config/WebDriverConfig.java
@@ -0,0 +1,59 @@
+package ru.pricepulse.parsingservice.config;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import io.github.bonigarcia.wdm.WebDriverManager;
+import org.openqa.selenium.WebDriver;
+import org.openqa.selenium.chrome.ChromeDriver;
+import org.openqa.selenium.chrome.ChromeOptions;
+import org.springframework.beans.factory.config.ConfigurableBeanFactory;
+import org.springframework.context.annotation.Bean;
+import org.springframework.context.annotation.Configuration;
+import org.springframework.context.annotation.Profile;
+import org.springframework.context.annotation.Scope;
+
+@Configuration
+public class WebDriverConfig {
+
+ @Bean
+ @Profile("visible")
+ @Scope(ConfigurableBeanFactory.SCOPE_PROTOTYPE)
+ public WebDriver webDriverVisible() {
+ Map prefs = new HashMap<>();
+ prefs.put("profile.managed_default_content_settings.images", 2);
+ prefs.put("profile.managed_default_content_settings.geolocation", 2);
+
+ var options = new ChromeOptions();
+ options.setExperimentalOption("prefs", prefs);
+ WebDriverManager.chromedriver().setup();
+ return new ChromeDriver(options);
+ }
+
+ @Bean
+ @Profile("headless")
+ @Scope(ConfigurableBeanFactory.SCOPE_PROTOTYPE)
+ public WebDriver webDriverHeadless(ChromeOptions options) {
+ WebDriverManager.chromedriver().setup();
+ return new ChromeDriver(options);
+ }
+
+ @Bean
+ @Profile("headless")
+ public ChromeOptions chromeOptions() {
+ Map prefs = new HashMap<>();
+ prefs.put("profile.managed_default_content_settings.images", 2);
+ prefs.put("profile.managed_default_content_settings.stylesheets", 2);
+
+ var options = new ChromeOptions();
+ options.setExperimentalOption("prefs", prefs);
+ options.addArguments("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/85.0.4183.121 Safari/537.36");
+ //options.addArguments("--window-size=1920,2000");
+ options.addArguments("--headless");
+ options.addArguments("--disable-gpu");
+ options.addArguments("--no-sandbox");
+ options.addArguments("--disable-dev-shm-usage");
+ return options;
+ }
+
+}
diff --git a/parsing-service/src/main/java/ru/pricepulse/parsingservice/config/properties/SeleniumConfigProperties.java b/parsing-service/src/main/java/ru/pricepulse/parsingservice/config/properties/SeleniumConfigProperties.java
new file mode 100644
index 0000000..a60abcb
--- /dev/null
+++ b/parsing-service/src/main/java/ru/pricepulse/parsingservice/config/properties/SeleniumConfigProperties.java
@@ -0,0 +1,7 @@
+package ru.pricepulse.parsingservice.config.properties;
+
+import org.springframework.boot.context.properties.ConfigurationProperties;
+
+@ConfigurationProperties("selenium")
+public class SeleniumConfigProperties {
+}
diff --git a/parsing-service/src/main/java/ru/pricepulse/parsingservice/persistence/entity/ProductEntity.java b/parsing-service/src/main/java/ru/pricepulse/parsingservice/persistence/entity/ProductEntity.java
index 37ef526..2faa174 100644
--- a/parsing-service/src/main/java/ru/pricepulse/parsingservice/persistence/entity/ProductEntity.java
+++ b/parsing-service/src/main/java/ru/pricepulse/parsingservice/persistence/entity/ProductEntity.java
@@ -22,6 +22,7 @@ import ru.pricepulse.parsingservice.persistence.enums.MarketplaceEnum;
@Entity
@Table(name = "product")
public class ProductEntity {
+
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "id", nullable = false)
@@ -69,4 +70,5 @@ public class ProductEntity {
protected void onCreate() {
createdAt = LocalDateTime.now();
}
+
}
\ No newline at end of file
diff --git a/parsing-service/src/main/java/ru/pricepulse/parsingservice/pool/WebDriverPool.java b/parsing-service/src/main/java/ru/pricepulse/parsingservice/pool/WebDriverPool.java
new file mode 100644
index 0000000..9f1cf3f
--- /dev/null
+++ b/parsing-service/src/main/java/ru/pricepulse/parsingservice/pool/WebDriverPool.java
@@ -0,0 +1,67 @@
+package ru.pricepulse.parsingservice.pool;
+
+import java.util.Queue;
+import java.util.concurrent.ConcurrentLinkedQueue;
+
+import lombok.extern.slf4j.Slf4j;
+import org.openqa.selenium.WebDriver;
+import org.openqa.selenium.chrome.ChromeDriver;
+import org.openqa.selenium.chrome.ChromeOptions;
+import org.springframework.beans.factory.ObjectFactory;
+import org.springframework.stereotype.Component;
+
+@Slf4j
+@Component
+public class WebDriverPool {
+
+ private final Queue availableDrivers = new ConcurrentLinkedQueue<>(); // Список доступных драйверов
+ private final Queue busyDrivers = new ConcurrentLinkedQueue<>(); // Список занятых драйверов
+ private final ObjectFactory webDriverFactory;
+
+ public WebDriverPool(ObjectFactory webDriverFactory) {
+ this.webDriverFactory = webDriverFactory;
+ int poolSize = 12;
+
+ // Инициализация пула с указанным количеством драйверов
+ for (int i = 0; i < poolSize; i++) {
+ availableDrivers.add(createNewDriver());
+ }
+ }
+
+ // Метод для создания нового экземпляра WebDriver
+ private WebDriver createNewDriver() {
+ return webDriverFactory.getObject();
+ }
+
+ // Метод для заимствования драйвера
+ public WebDriver borrowDriver() {
+ WebDriver driver = availableDrivers.poll(); // Получаем драйвер из доступных
+ if (driver != null) {
+ busyDrivers.add(driver); // Добавляем драйвер в занятые
+ }
+ log.info("Занимаем драйвер {}", driver);
+ return driver; // Возвращаем драйвер
+ }
+
+ // Метод для возврата драйвера в пул
+ public void returnDriver(WebDriver driver) {
+ busyDrivers.remove(driver); // Убираем драйвер из занятых
+ availableDrivers.add(driver); // Возвращаем драйвер в доступные
+ log.info("Вернули драйвер {}", driver);
+ }
+
+ // Метод для закрытия всех драйверов в пуле
+ public void shutdownPool() {
+ // Закрываем доступные драйверы
+ for (WebDriver driver : availableDrivers) {
+ driver.quit();
+ }
+ // Закрываем занятые драйверы
+ for (WebDriver driver : busyDrivers) {
+ driver.quit();
+ }
+ availableDrivers.clear();
+ busyDrivers.clear();
+ }
+
+}
diff --git a/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/impl/parsing/OzonParsingService.java b/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/impl/parsing/OzonParsingService.java
deleted file mode 100644
index 3a45b22..0000000
--- a/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/impl/parsing/OzonParsingService.java
+++ /dev/null
@@ -1,67 +0,0 @@
-package ru.pricepulse.parsingservice.service.impl.parsing;
-
-import java.util.concurrent.ExecutorService;
-import java.util.concurrent.Executors;
-import java.util.concurrent.atomic.AtomicBoolean;
-
-import lombok.extern.slf4j.Slf4j;
-import org.springframework.retry.annotation.Recover;
-import org.springframework.retry.annotation.Retryable;
-import org.springframework.stereotype.Service;
-import ru.pricepulse.parsingservice.service.DataParser;
-import ru.pricepulse.parsingservice.service.MarketplaceParsingService;
-import ru.pricepulse.parsingservice.service.request.PageFetcher;
-
-@Slf4j
-@Service
-public class OzonParsingService implements MarketplaceParsingService {
-
- private final PageFetcher pageFetcher;
- private final DataParser dataParser;
- private final ExecutorService executorService;
-
- public OzonParsingService(PageFetcher pageFetcher, DataParser dataParser) {
- this.pageFetcher = pageFetcher;
- this.dataParser = dataParser;
- this.executorService = Executors.newFixedThreadPool(1);
- }
-
- @Override
- public void processCategory(String categoryUrl) {
- int pageNumber = 1;
- AtomicBoolean hasMoreData = new AtomicBoolean(true);
-
- while (hasMoreData.get()) {
- int finalPageNumber = pageNumber;
- executorService.submit(() -> processTask(categoryUrl, finalPageNumber, hasMoreData));
- pageNumber++;
- }
- }
-
- @Retryable
- private void processTask(String categoryUrl, int pageNumber, AtomicBoolean hasMoreData) {
- String pageUrl = categoryUrl + "?page=" + pageNumber;
- String pageContent;
- try {
- log.info("Получение страницы {}", pageUrl);
- pageContent = pageFetcher.fetchPage(pageUrl);
- } catch (Exception e) {
- log.error("Ошибка получения страницы - {} \n {}", pageUrl, e.getMessage(), e);
- throw new RuntimeException(e);
- }
-
- if (!dataParser.pageHasData(pageContent)) {
- log.warn("Данные не найдены - {}", pageUrl);
- hasMoreData.set(false);
- return;
- }
-
- dataParser.parseAndQueueData(pageContent);
- }
-
- @Recover
- private void recover(Exception e, String categoryUrl, int pageNumber, AtomicBoolean hasMoreData) {
- log.error(e.getMessage(), e);
- }
-
-}
diff --git a/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/MarketplacePage.java b/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/MarketplacePage.java
new file mode 100644
index 0000000..02c81c6
--- /dev/null
+++ b/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/MarketplacePage.java
@@ -0,0 +1,7 @@
+package ru.pricepulse.parsingservice.service.marketplace.ozon;
+
+public interface MarketplacePage {
+
+ boolean isLoaded();
+
+}
diff --git a/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/page/AccessDeniedPage.java b/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/page/AccessDeniedPage.java
new file mode 100644
index 0000000..d2cd51c
--- /dev/null
+++ b/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/page/AccessDeniedPage.java
@@ -0,0 +1,67 @@
+package ru.pricepulse.parsingservice.service.marketplace.ozon.page;
+
+import lombok.extern.slf4j.Slf4j;
+import org.openqa.selenium.By;
+import org.openqa.selenium.WebDriver;
+import org.openqa.selenium.WebElement;
+import org.openqa.selenium.support.ui.ExpectedConditions;
+import org.openqa.selenium.support.ui.WebDriverWait;
+import ru.pricepulse.parsingservice.service.marketplace.ozon.MarketplacePage;
+
+@Slf4j
+public class AccessDeniedPage implements MarketplacePage {
+
+ private static final String RELOAD_BUTTON_ID = "reload-button";
+ private static final String RELOAD_BUTTON_XPATH = "//button[contains(text(),'Обновить')]";
+ private static final String WARNING_IMAGE_CSS = "img[alt='warning']";
+ private static final String ACCESS_DENIED_TEXT_XPATH = "//h1[text()='Доступ ограничен']";
+
+ private final By reloadButtonById = By.id(RELOAD_BUTTON_ID);
+ private final By reloadButtonByXpath = By.xpath(RELOAD_BUTTON_XPATH);
+ private final By warningImage = By.cssSelector(WARNING_IMAGE_CSS);
+ private final By accessDeniedText = By.xpath(ACCESS_DENIED_TEXT_XPATH);
+
+ private WebDriver driver;
+ private WebDriverWait wait;
+
+ public AccessDeniedPage(WebDriver driver,
+ WebDriverWait wait) {
+ this.driver = driver;
+ this.wait = wait;
+ }
+
+ public void clickReloadButton() {
+ try {
+ log.debug("Пытаемся найти кнопку по id и нажать");
+ driver.findElement(reloadButtonById).click();
+ return;
+ } catch (Exception e) {
+ log.debug("Кнопка обновления страницы не найдена по id");
+ }
+ try {
+ log.debug("Пытаемся найти кнопку по xpath и нажать");
+ driver.findElement(reloadButtonByXpath).click();
+ log.debug("Успешно нашли кнопку по xpath");
+ return;
+ } catch (Exception e) {
+ log.debug("Кнопка обновления страницы не найдена по xpath");
+ }
+ }
+
+ private boolean isWarningImage() {
+ return driver.findElement(warningImage) != null;
+ }
+
+ private boolean isAccessDeniedText() {
+ return driver.findElement(accessDeniedText) != null;
+ }
+
+ @Override
+ public boolean isLoaded() {
+ try {
+ return isWarningImage() && isAccessDeniedText();
+ } catch (Exception e) {
+ return false;
+ }
+ }
+}
diff --git a/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/page/CategoryPage.java b/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/page/CategoryPage.java
new file mode 100644
index 0000000..29b97f8
--- /dev/null
+++ b/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/page/CategoryPage.java
@@ -0,0 +1,79 @@
+package ru.pricepulse.parsingservice.service.marketplace.ozon.page;
+
+import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
+import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+import java.util.stream.Collectors;
+
+import lombok.extern.slf4j.Slf4j;
+import org.openqa.selenium.By;
+import org.openqa.selenium.StaleElementReferenceException;
+import org.openqa.selenium.WebDriver;
+import org.openqa.selenium.WebElement;
+import org.openqa.selenium.support.ui.ExpectedConditions;
+import org.openqa.selenium.support.ui.WebDriverWait;
+import org.springframework.retry.annotation.Recover;
+import org.springframework.retry.annotation.Retryable;
+import ru.pricepulse.parsingservice.service.marketplace.ozon.MarketplacePage;
+
+@Slf4j
+public class CategoryPage implements MarketplacePage {
+
+ private static final int PAGE_SIZE = 12;
+ private static final String SEARCH_RESULTS = "div[data-widget='searchResultsV2']";
+
+ private final By searchResults = By.cssSelector(SEARCH_RESULTS);
+
+ private WebDriver driver;
+
+ private WebDriverWait wait;
+
+ public CategoryPage(WebDriver driver, WebDriverWait wait) {
+ this.driver = driver;
+ this.wait = wait;
+ }
+
+ public Set getProductsLinks() {
+ wait.until(visibilityOfElementLocated(searchResults));
+ var searchResultsElement = driver.findElement(searchResults);
+ wait.until(driver -> visibilityOfElementLocated(By.cssSelector(":scope > div")));
+ var outerDiv = searchResultsElement.findElement(By.cssSelector(":scope > div")); // Внешний блок со списком товаров
+ wait.until(driver -> visibilityOfAllElements(outerDiv.findElements(By.cssSelector(":scope > div"))));
+ var innerDivs = outerDiv.findElements(By.cssSelector(":scope > div")); // Блок карточки товара
+ return searchProductsLinks(innerDivs, driver);
+ }
+
+ private Set searchProductsLinks(List innerDivs, WebDriver driver) {
+ return innerDivs.stream()
+ .map(div -> {
+ waitVisibility(div);
+ List linkTags = null;
+ try {
+ linkTags = div.findElements(By.tagName("a"));
+ } catch (Exception ignored) {}
+ return linkTags != null && !linkTags.isEmpty()
+ ? linkTags.getFirst().getAttribute("href")
+ : null;
+ })
+ .filter(href -> href != null && !href.isEmpty())
+ .collect(Collectors.toSet());
+ }
+
+ private void waitVisibility(WebElement outerElement) {
+ wait.until(driver -> !outerElement.findElements(By.tagName("a")).isEmpty());
+ }
+
+ @Override
+ public boolean isLoaded() {
+ try {
+ return driver.findElement(searchResults) != null;
+ } catch (Exception e) {
+ return false;
+ }
+ }
+
+}
diff --git a/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/parsing/CategoryPageParsingService.java b/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/parsing/CategoryPageParsingService.java
new file mode 100644
index 0000000..1229367
--- /dev/null
+++ b/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/parsing/CategoryPageParsingService.java
@@ -0,0 +1,141 @@
+package ru.pricepulse.parsingservice.service.marketplace.ozon.parsing;
+
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Set;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.TimeUnit;
+
+import lombok.extern.slf4j.Slf4j;
+import org.openqa.selenium.JavascriptExecutor;
+import org.openqa.selenium.StaleElementReferenceException;
+import org.openqa.selenium.WebDriver;
+import org.openqa.selenium.support.ui.WebDriverWait;
+import org.slf4j.MDC;
+import org.springframework.beans.factory.ObjectFactory;
+import org.springframework.retry.annotation.Recover;
+import org.springframework.retry.annotation.Retryable;
+import org.springframework.stereotype.Service;
+import ru.pricepulse.parsingservice.pool.WebDriverPool;
+import ru.pricepulse.parsingservice.service.marketplace.ozon.page.AccessDeniedPage;
+import ru.pricepulse.parsingservice.service.marketplace.ozon.page.CategoryPage;
+
+@Slf4j
+@Service
+public class CategoryPageParsingService {
+
+ private final ExecutorService productPageExecutor = Executors.newFixedThreadPool(3);
+
+ private final WebDriverPool webDriverPool;
+
+ public CategoryPageParsingService(WebDriverPool webDriverPool) {
+ this.webDriverPool = webDriverPool;
+ }
+
+ @Retryable(maxAttempts = 10, recover = "recover")
+ public void parseCategoryPage(int finalPageIndex, String url, ArrayList errors) throws InterruptedException {
+ MDC.put("pageIndex", String.valueOf(finalPageIndex));
+ String pageUrl = url + "/?page=" + finalPageIndex;
+ var driver = webDriverPool.borrowDriver();
+
+ try {
+ driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
+ driver.get(pageUrl);
+ WebDriverWait wait = new WebDriverWait(driver, Duration.of(10, ChronoUnit.SECONDS));
+ var accessDeniedPage = new AccessDeniedPage(driver, wait); // TODO подумать как не создавать кучу PageObject
+ var categoryPage = new CategoryPage(driver, wait);
+ wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage));
+ if (checkAccessDeniedPage(accessDeniedPage)) {
+ log.info("Доступ ограничен, пробуем решить проблему: {}", pageUrl);
+ resolveAccessDeniedPage(accessDeniedPage);
+ log.info("Проблема успешно решена: {}", pageUrl);
+ }
+ log.info("Получаем список ссылок на товары на текущей странице: {}", pageUrl);
+ Set hrefs = Set.of();
+ try {
+ hrefs = categoryPage.getProductsLinks();
+ } catch (Exception e) {
+ throw new Exception(e);
+ }
+ webDriverPool.returnDriver(driver);
+ log.info("Страница {} Получены ссылки на товары: {}", finalPageIndex, hrefs.size());
+ hrefs.forEach(href -> {
+ MDC.put("pageIndex", String.valueOf(finalPageIndex));
+ try {
+ processPage(href);
+ errors.add(href);
+ log.error(String.valueOf(errors.size()));
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ });
+ /*hrefs.forEach(href -> productPageExecutor.submit(() -> {
+ MDC.put("pageIndex", String.valueOf(finalPageIndex));
+ try {
+ processPage(href);
+ errors.add(href);
+ log.error(String.valueOf(errors.size()));
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ }));*/
+ productPageExecutor.awaitTermination(10, TimeUnit.SECONDS);
+ } catch (Exception ignored) {
+ throw new RuntimeException(ignored);
+ } finally {
+ webDriverPool.returnDriver(driver); // Завершаем работу драйвера
+ }
+ }
+
+ private String processPage(String href) throws InterruptedException {
+ var driver = webDriverPool.borrowDriver();
+ try {
+ driver.get(href);
+ log.info("Страница обработана");
+ } catch (Throwable ignored) {
+
+ } finally {
+ webDriverPool.returnDriver(driver); // Завершаем работу драйвера
+ }
+ return href;
+ }
+
+ private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
+ CategoryPage categoryPage) {
+ log.debug("Проверка что страница 'Доступ ограничен'");
+ try {
+ if (checkAccessDeniedPage(accessDeniedPage)) {
+ return true;
+ }
+ } catch (Exception e) {
+ log.debug("Ошибка проверки", e);
+ }
+ log.debug("Проверка что страница 'Страница категории'");
+ if (checkCategoryPage(categoryPage)) {
+ return true;
+ }
+ log.debug("Проверка загрузки страницы неудачна");
+ return false;
+ }
+
+ private boolean checkAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
+ return accessDeniedPage.isLoaded();
+ }
+
+ private boolean checkCategoryPage(CategoryPage categoryPage) {
+ return categoryPage.isLoaded();
+ }
+
+ private void resolveAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
+ accessDeniedPage.clickReloadButton();
+ }
+
+ @Recover
+ private void recover(Exception e) {
+ log.error("Все ретраи провалились");
+ }
+
+}
diff --git a/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/parsing/ParsingService.java b/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/parsing/ParsingService.java
new file mode 100644
index 0000000..f5999d2
--- /dev/null
+++ b/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/marketplace/ozon/parsing/ParsingService.java
@@ -0,0 +1,60 @@
+package ru.pricepulse.parsingservice.service.marketplace.ozon.parsing;
+
+import java.time.Duration;
+import java.time.temporal.ChronoUnit;
+import java.util.ArrayList;
+import java.util.Set;
+import java.util.concurrent.ExecutorService;
+import java.util.concurrent.Executors;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+import lombok.extern.slf4j.Slf4j;
+import org.openqa.selenium.JavascriptExecutor;
+import org.openqa.selenium.StaleElementReferenceException;
+import org.openqa.selenium.WebDriver;
+import org.openqa.selenium.support.ui.WebDriverWait;
+import org.slf4j.MDC;
+import org.springframework.beans.factory.ObjectFactory;
+import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.retry.annotation.Retryable;
+import org.springframework.stereotype.Service;
+import ru.pricepulse.parsingservice.service.MarketplaceParsingService;
+import ru.pricepulse.parsingservice.service.marketplace.ozon.page.AccessDeniedPage;
+import ru.pricepulse.parsingservice.service.marketplace.ozon.page.CategoryPage;
+
+@Slf4j
+@Service
+public class ParsingService implements MarketplaceParsingService {
+
+ private final AtomicBoolean stopFlag = new AtomicBoolean(false);
+
+ private final ExecutorService categoryExecutor = Executors.newFixedThreadPool(1);
+
+ private final CategoryPageParsingService categoryPageParsingService;
+
+ public ParsingService(CategoryPageParsingService categoryPageParsingService) {
+ this.categoryPageParsingService = categoryPageParsingService;
+ }
+
+ public void processCategory(String url) {
+ var startTime = System.currentTimeMillis();
+ log.info("Начало обработки категории: {}", url);
+ int pageIndex = 1;
+ var errors = new ArrayList();
+
+ while (!stopFlag.get()) {
+ int finalPageIndex = pageIndex;
+ try {
+ categoryPageParsingService.parseCategoryPage(finalPageIndex, url, errors);
+ } catch (InterruptedException e) {
+ throw new RuntimeException(e);
+ }
+ ++pageIndex;
+ if (pageIndex > 5) {
+ stopFlag.set(true);
+ }
+ }
+ log.info("Время выполнения {} ", (System.currentTimeMillis() - startTime) / 1000);
+ }
+
+}
\ No newline at end of file
diff --git a/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/scheduler/OzonProductUpdater.java b/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/scheduler/OzonProductUpdater.java
index e6b39f5..218ab96 100644
--- a/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/scheduler/OzonProductUpdater.java
+++ b/parsing-service/src/main/java/ru/pricepulse/parsingservice/service/scheduler/OzonProductUpdater.java
@@ -4,20 +4,19 @@ import lombok.RequiredArgsConstructor;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
-import ru.pricepulse.parsingservice.service.impl.parsing.OzonParsingService;
+import ru.pricepulse.parsingservice.service.marketplace.ozon.parsing.ParsingService;
@Service
@RequiredArgsConstructor
public class OzonProductUpdater {
private final OzonConfigProperties properties;
-
- private final OzonParsingService parsingService;
+ private final ParsingService ozonParsingService;
@Scheduled(fixedRate = 3600000)
public void updateOzonProducts() {
- properties.getCategoriesUrls().forEach(
- parsingService::processCategory);
+ properties.getCategoriesUrls()
+ .forEach(ozonParsingService::processCategory);
}
}
diff --git a/parsing-service/src/main/resources/application.yml b/parsing-service/src/main/resources/application.yml
index e9570ec..6faaa78 100644
--- a/parsing-service/src/main/resources/application.yml
+++ b/parsing-service/src/main/resources/application.yml
@@ -15,10 +15,12 @@ spring:
liquibase:
change-log: classpath:/db/changelog/master.yml
kafka:
-
+selenium:
marketplace:
ozon:
categories-urls:
- https://www.ozon.ru/category/noutbuki-15692
-
+logging:
+ pattern:
+ console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg %X%n"
\ No newline at end of file