Feature/ozon-parser-v0.1: Версия рабочая, парсит быстро + api
This commit is contained in:
parent
1df7dc94b8
commit
171cc650f1
@ -1,6 +1,6 @@
|
|||||||
<component name="ProjectRunConfigurationManager">
|
<component name="ProjectRunConfigurationManager">
|
||||||
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
||||||
<option name="ACTIVE_PROFILES" value="dev,headless,postgres_stat" />
|
<option name="ACTIVE_PROFILES" value="dev,ozon,headless,postgres_stat" />
|
||||||
<option name="SCHEDULED_DEBUGGER" value="true" />
|
<option name="SCHEDULED_DEBUGGER" value="true" />
|
||||||
<envs>
|
<envs>
|
||||||
<env name="POSTGRES_JDBC_PASSWORD" value="postgres" />
|
<env name="POSTGRES_JDBC_PASSWORD" value="postgres" />
|
||||||
|
@ -38,6 +38,7 @@ dependencies {
|
|||||||
implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
|
implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
|
||||||
implementation 'org.apache.commons:commons-pool2:2.12.0'
|
implementation 'org.apache.commons:commons-pool2:2.12.0'
|
||||||
implementation 'com.clickhouse:clickhouse-jdbc:0.6.5'
|
implementation 'com.clickhouse:clickhouse-jdbc:0.6.5'
|
||||||
|
implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.6.0'
|
||||||
|
|
||||||
compileOnly 'org.projectlombok:lombok'
|
compileOnly 'org.projectlombok:lombok'
|
||||||
|
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
package ru.pricepulse.parsingservice.config;
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import java.time.format.DateTimeFormatter;
|
||||||
|
|
||||||
import org.springframework.context.annotation.Bean;
|
import org.springframework.context.annotation.Bean;
|
||||||
import org.springframework.context.annotation.Configuration;
|
import org.springframework.context.annotation.Configuration;
|
||||||
|
|
||||||
import java.time.format.DateTimeFormatter;
|
|
||||||
|
|
||||||
@Configuration
|
@Configuration
|
||||||
public class DateTimeFormatterConfig {
|
public class DateTimeFormatterConfig {
|
||||||
|
|
||||||
|
@ -1,14 +1,14 @@
|
|||||||
package ru.pricepulse.parsingservice.config;
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.http.HttpRequest;
|
import org.springframework.http.HttpRequest;
|
||||||
import org.springframework.http.client.ClientHttpRequestExecution;
|
import org.springframework.http.client.ClientHttpRequestExecution;
|
||||||
import org.springframework.http.client.ClientHttpRequestInterceptor;
|
import org.springframework.http.client.ClientHttpRequestInterceptor;
|
||||||
import org.springframework.http.client.ClientHttpResponse;
|
import org.springframework.http.client.ClientHttpResponse;
|
||||||
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.net.InetSocketAddress;
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class DynamicProxyInterceptor implements ClientHttpRequestInterceptor {
|
public class DynamicProxyInterceptor implements ClientHttpRequestInterceptor {
|
||||||
|
|
||||||
|
@ -1,11 +1,11 @@
|
|||||||
package ru.pricepulse.parsingservice.config;
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
import org.springframework.stereotype.Component;
|
|
||||||
|
|
||||||
import java.net.InetSocketAddress;
|
import java.net.InetSocketAddress;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
@Component
|
@Component
|
||||||
public class ProxyProvider {
|
public class ProxyProvider {
|
||||||
private static final List<String> proxies = List.of(
|
private static final List<String> proxies = List.of(
|
||||||
|
@ -1,10 +1,10 @@
|
|||||||
package ru.pricepulse.parsingservice.config;
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
import org.springframework.stereotype.Component;
|
|
||||||
|
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Random;
|
import java.util.Random;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
@Component
|
@Component
|
||||||
public class UserAgentProvider {
|
public class UserAgentProvider {
|
||||||
private static final List<String> userAgents = List.of(
|
private static final List<String> userAgents = List.of(
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
package ru.pricepulse.parsingservice.config;
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import java.net.InetSocketAddress;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.context.annotation.Bean;
|
import org.springframework.context.annotation.Bean;
|
||||||
@ -10,8 +12,6 @@ import org.springframework.web.reactive.function.client.WebClient;
|
|||||||
import reactor.netty.http.client.HttpClient;
|
import reactor.netty.http.client.HttpClient;
|
||||||
import reactor.netty.transport.ProxyProvider;
|
import reactor.netty.transport.ProxyProvider;
|
||||||
|
|
||||||
import java.net.InetSocketAddress;
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Configuration
|
@Configuration
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
package ru.pricepulse.parsingservice.config;
|
package ru.pricepulse.parsingservice.config;
|
||||||
|
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import io.github.bonigarcia.wdm.WebDriverManager;
|
import io.github.bonigarcia.wdm.WebDriverManager;
|
||||||
import org.openqa.selenium.WebDriver;
|
import org.openqa.selenium.WebDriver;
|
||||||
import org.openqa.selenium.chrome.ChromeDriver;
|
import org.openqa.selenium.chrome.ChromeDriver;
|
||||||
@ -10,9 +13,6 @@ import org.springframework.context.annotation.Configuration;
|
|||||||
import org.springframework.context.annotation.Profile;
|
import org.springframework.context.annotation.Profile;
|
||||||
import org.springframework.context.annotation.Scope;
|
import org.springframework.context.annotation.Scope;
|
||||||
|
|
||||||
import java.util.HashMap;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
@Configuration
|
@Configuration
|
||||||
public class WebDriverConfig {
|
public class WebDriverConfig {
|
||||||
|
|
||||||
|
@ -4,11 +4,13 @@ import lombok.Getter;
|
|||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
@Setter
|
@Setter
|
||||||
@ConfigurationProperties(prefix = "marketplace.ozon")
|
@ConfigurationProperties(prefix = "marketplace.ozon")
|
||||||
public class OzonConfigProperties {
|
public class OzonConfigProperties {
|
||||||
private List<String> categoriesUrls;
|
|
||||||
|
private Integer maxThreads;
|
||||||
|
|
||||||
|
private Integer maxNumOfPagesOnScreen;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
package ru.pricepulse.parsingservice.enumeration;
|
package ru.pricepulse.parsingservice.enumeration;
|
||||||
|
|
||||||
public enum Category {
|
public enum Category {
|
||||||
LAPTOP
|
LAPTOP,
|
||||||
|
SMARTPHONE
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,31 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.enumeration;
|
||||||
|
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
|
||||||
|
public enum OzonCategory {
|
||||||
|
|
||||||
|
LAPTOP ("/noutbuki-15692/?brandcertified=t", Category.LAPTOP),
|
||||||
|
|
||||||
|
SMARTPHONE ("/smartfony-15502/?brandcertified=t", Category.SMARTPHONE);
|
||||||
|
|
||||||
|
private static final String BASE_CATEGORY_URL = "https://www.ozon.ru/category";
|
||||||
|
|
||||||
|
private final String categoryUrl;
|
||||||
|
|
||||||
|
private final Category mappedCategory;
|
||||||
|
|
||||||
|
OzonCategory(String categoryUrl,
|
||||||
|
Category mappedCategory) {
|
||||||
|
this.categoryUrl = categoryUrl;
|
||||||
|
this.mappedCategory = mappedCategory;
|
||||||
|
}
|
||||||
|
|
||||||
|
public String getCategoryUrl() {
|
||||||
|
return BASE_CATEGORY_URL + categoryUrl;
|
||||||
|
}
|
||||||
|
|
||||||
|
public Category getMappedCategory() {
|
||||||
|
return mappedCategory;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,62 +1,65 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.pool;
|
package ru.pricepulse.parsingservice.ozon_parser.pool;
|
||||||
|
|
||||||
|
import java.util.NoSuchElementException;
|
||||||
import java.util.Queue;
|
import java.util.Queue;
|
||||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||||
|
|
||||||
|
import jakarta.annotation.PreDestroy;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.openqa.selenium.WebDriver;
|
import org.openqa.selenium.WebDriver;
|
||||||
import org.openqa.selenium.chrome.ChromeDriver;
|
|
||||||
import org.openqa.selenium.chrome.ChromeOptions;
|
|
||||||
import org.springframework.beans.factory.ObjectFactory;
|
import org.springframework.beans.factory.ObjectFactory;
|
||||||
import org.springframework.context.annotation.Profile;
|
import org.springframework.context.annotation.Profile;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Component
|
@Component
|
||||||
@Profile("ozon")
|
@Profile("ozon")
|
||||||
public class WebDriverPool {
|
public class WebDriverPool {
|
||||||
|
|
||||||
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>(); // Список доступных драйверов
|
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>();
|
||||||
private final Queue<WebDriver> busyDrivers = new ConcurrentLinkedQueue<>(); // Список занятых драйверов
|
|
||||||
|
private final Queue<WebDriver> busyDrivers = new ConcurrentLinkedQueue<>();
|
||||||
|
|
||||||
private final ObjectFactory<WebDriver> webDriverFactory;
|
private final ObjectFactory<WebDriver> webDriverFactory;
|
||||||
|
|
||||||
public WebDriverPool(ObjectFactory<WebDriver> webDriverFactory) {
|
private final OzonConfigProperties ozonConfigProperties;
|
||||||
this.webDriverFactory = webDriverFactory;
|
|
||||||
int poolSize = 12;
|
public WebDriverPool(ObjectFactory<WebDriver> webDriverFactory,
|
||||||
|
OzonConfigProperties ozonConfigProperties) {
|
||||||
|
this.webDriverFactory = webDriverFactory;
|
||||||
|
this.ozonConfigProperties = ozonConfigProperties;
|
||||||
|
int poolSize = ozonConfigProperties.getMaxThreads();
|
||||||
|
|
||||||
// Инициализация пула с указанным количеством драйверов
|
|
||||||
for (int i = 0; i < poolSize; i++) {
|
for (int i = 0; i < poolSize; i++) {
|
||||||
availableDrivers.add(createNewDriver());
|
availableDrivers.add(createNewDriver());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Метод для создания нового экземпляра WebDriver
|
|
||||||
private WebDriver createNewDriver() {
|
private WebDriver createNewDriver() {
|
||||||
return webDriverFactory.getObject();
|
return webDriverFactory.getObject();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Метод для заимствования драйвера
|
|
||||||
public WebDriver borrowDriver() {
|
public WebDriver borrowDriver() {
|
||||||
WebDriver driver = availableDrivers.poll(); // Получаем драйвер из доступных
|
WebDriver driver = availableDrivers.poll();
|
||||||
if (driver != null) {
|
if (driver != null) {
|
||||||
busyDrivers.add(driver); // Добавляем драйвер в занятые
|
busyDrivers.add(driver);
|
||||||
|
return driver;
|
||||||
}
|
}
|
||||||
return driver; // Возвращаем драйвер
|
throw new NoSuchElementException("No available driver found");
|
||||||
}
|
}
|
||||||
|
|
||||||
// Метод для возврата драйвера в пул
|
|
||||||
public void returnDriver(WebDriver driver) {
|
public void returnDriver(WebDriver driver) {
|
||||||
busyDrivers.remove(driver); // Убираем драйвер из занятых
|
busyDrivers.remove(driver);
|
||||||
availableDrivers.add(driver); // Возвращаем драйвер в доступные
|
availableDrivers.add(driver);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Метод для закрытия всех драйверов в пуле
|
@PreDestroy
|
||||||
public void shutdownPool() {
|
public void shutdownPool() {
|
||||||
// Закрываем доступные драйверы
|
|
||||||
for (WebDriver driver : availableDrivers) {
|
for (WebDriver driver : availableDrivers) {
|
||||||
driver.quit();
|
driver.quit();
|
||||||
}
|
}
|
||||||
// Закрываем занятые драйверы
|
|
||||||
for (WebDriver driver : busyDrivers) {
|
for (WebDriver driver : busyDrivers) {
|
||||||
driver.quit();
|
driver.quit();
|
||||||
}
|
}
|
||||||
|
@ -1,7 +0,0 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service;
|
|
||||||
|
|
||||||
public interface MarketplaceParsingService {
|
|
||||||
|
|
||||||
void processCategory(String categoryUrl);
|
|
||||||
|
|
||||||
}
|
|
@ -0,0 +1,17 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class OzonService {
|
||||||
|
|
||||||
|
public OzonCategory[] getCategories() {
|
||||||
|
return OzonCategory.values();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -2,6 +2,7 @@ package ru.pricepulse.parsingservice.ozon_parser.service.dto;
|
|||||||
|
|
||||||
import java.math.BigDecimal;
|
import java.math.BigDecimal;
|
||||||
|
|
||||||
|
import lombok.Builder;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
@ -9,6 +10,7 @@ import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
|||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
@Setter
|
@Setter
|
||||||
|
@Builder
|
||||||
public class ParsedData {
|
public class ParsedData {
|
||||||
|
|
||||||
private Marketplace marketplace;
|
private Marketplace marketplace;
|
||||||
|
@ -1,18 +1,14 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
|
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
|
||||||
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
|
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.openqa.selenium.By;
|
import org.openqa.selenium.By;
|
||||||
import org.openqa.selenium.WebDriver;
|
import org.openqa.selenium.WebDriver;
|
||||||
import org.openqa.selenium.WebElement;
|
|
||||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
|
|
||||||
@ -63,13 +59,13 @@ public class CategoryPage implements MarketplacePage {
|
|||||||
|
|
||||||
var productPrice = parseCurrency(productDataDivs.get(2).findElement(By.cssSelector(":scope > div > div"))
|
var productPrice = parseCurrency(productDataDivs.get(2).findElement(By.cssSelector(":scope > div > div"))
|
||||||
.findElements(By.tagName("span")).getFirst().getText());
|
.findElements(By.tagName("span")).getFirst().getText());
|
||||||
var parsedData = new ParsedData();
|
/*var parsedData = new ParsedData();
|
||||||
parsedData.setUrl(productUrl);
|
parsedData.setUrl(productUrl);
|
||||||
parsedData.setBrand(productBrand);
|
parsedData.setBrand(productBrand);
|
||||||
parsedData.setProductName(productName);
|
parsedData.setProductName(productName);
|
||||||
parsedData.setImageUrl(productImageUrl);
|
parsedData.setImageUrl(productImageUrl);
|
||||||
parsedData.setPrice(productPrice);
|
parsedData.setPrice(productPrice);
|
||||||
products.add(parsedData);
|
products.add(parsedData);*/
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
|
@ -10,11 +10,11 @@ public class NoContentPage {
|
|||||||
|
|
||||||
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
|
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
|
||||||
private static final String NOT_FOUND_TEXT_XPATH = "\"//*[contains(text(), 'По вашим параметрам ничего не нашлось. Попробуйте сбросить фильтры. ')]\"";
|
private static final String NOT_FOUND_TEXT_XPATH = "\"//*[contains(text(), 'По вашим параметрам ничего не нашлось. Попробуйте сбросить фильтры. ')]\"";
|
||||||
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsError']";
|
private static final String SEARCH_RESULTS_ERROR = "div[data-widget='searchResultsError']";
|
||||||
|
|
||||||
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
|
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
|
||||||
private final By notFoundText = By.xpath(NOT_FOUND_TEXT_XPATH);
|
private final By notFoundText = By.xpath(NOT_FOUND_TEXT_XPATH);
|
||||||
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
|
private final By searchResultsError = By.cssSelector(SEARCH_RESULTS_ERROR);
|
||||||
|
|
||||||
private WebDriver driver;
|
private WebDriver driver;
|
||||||
|
|
||||||
@ -27,7 +27,7 @@ public class NoContentPage {
|
|||||||
|
|
||||||
public boolean isLoaded() {
|
public boolean isLoaded() {
|
||||||
try {
|
try {
|
||||||
return driver.findElement(searchResults) != null
|
return driver.findElement(searchResultsError) != null
|
||||||
|| driver.findElement(errorText) != null
|
|| driver.findElement(errorText) != null
|
||||||
|| driver.findElement(notFoundText) != null;
|
|| driver.findElement(notFoundText) != null;
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
|
@ -0,0 +1,228 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.jsoup.Jsoup;
|
||||||
|
import org.jsoup.nodes.Document;
|
||||||
|
import org.jsoup.nodes.Element;
|
||||||
|
import org.jsoup.select.Elements;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class OzonCategoryPage {
|
||||||
|
|
||||||
|
private static final String OZON_MAIN_LINK = "https://www.ozon.ru";
|
||||||
|
|
||||||
|
public static final String SEARCH_RESULTS_CSS_SELECTOR = "div[data-widget='searchResultsV2']";
|
||||||
|
|
||||||
|
public static final int INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT = 1;
|
||||||
|
|
||||||
|
public static final int INDEX_OF_PRODUCT_PRICE = 0;
|
||||||
|
|
||||||
|
public static final int INDEX_OF_PRODUCT_BRAND = 1;
|
||||||
|
|
||||||
|
public static final int INDEX_OF_PRODUCT_NAME = 2;
|
||||||
|
|
||||||
|
private final Document document;
|
||||||
|
|
||||||
|
public OzonCategoryPage(String pageHtml) {
|
||||||
|
this.document = Jsoup.parse(pageHtml);
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<ParsedData> getProducts(Category category) {
|
||||||
|
List<ParsedData> products = new ArrayList<>();
|
||||||
|
|
||||||
|
Elements searchResultsDivs = getSearchResultsDivs();
|
||||||
|
if (searchResultsDivs.isEmpty()) {
|
||||||
|
return List.of();
|
||||||
|
}
|
||||||
|
log.info("нашли столько результатов на странице {}", searchResultsDivs.size());
|
||||||
|
|
||||||
|
for (Element searchResultsDiv : searchResultsDivs) {
|
||||||
|
Elements productsDivs = getProductsDivs(searchResultsDiv);
|
||||||
|
List<Elements> allProductDataDivs = getAllProductDataDivs(productsDivs);
|
||||||
|
List<ParsedData> parsedProductsData = extractParsedData(allProductDataDivs, category);
|
||||||
|
products.addAll(parsedProductsData);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*try {
|
||||||
|
|
||||||
|
for (Element searchResultsDiv : searchResultsDivs) {
|
||||||
|
var productDivs = searchResultsDiv.select("> div > div");
|
||||||
|
for (Element productDiv : productDivs) {
|
||||||
|
Elements productDataDivs = productDivs.select("> div > *");
|
||||||
|
if (productDataDivs.select("> *").isEmpty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
productDataDivs.removeLast();
|
||||||
|
Element productUrlAndImageUrlA = productDataDivs.first();
|
||||||
|
Element productDataDiv = productDataDivs.last();
|
||||||
|
Elements productDataInnerDivs = productDataDiv.select("> *");
|
||||||
|
try {
|
||||||
|
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
|
||||||
|
.select("span").text().toLowerCase()
|
||||||
|
.contains("осталось")) {
|
||||||
|
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
|
||||||
|
}
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND).select("> span");
|
||||||
|
|
||||||
|
String productUrl = OZON_MAIN_LINK + productUrlAndImageUrlA.attr("href").replaceAll("\\?.*$", "");
|
||||||
|
String productImageUrl = productUrlAndImageUrlA.select("> div > div")
|
||||||
|
.first().getElementsByTag("img")
|
||||||
|
.first().attr("src");
|
||||||
|
|
||||||
|
BigDecimal productPrice;
|
||||||
|
try {
|
||||||
|
productPrice = parseOzonPriceToBigDecimal(
|
||||||
|
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
|
||||||
|
.first().text());
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error("не удалось распарсить цену");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
String productBrand = productBrandBlockSpans.first().selectFirst("> span > b").text();
|
||||||
|
String productName = productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME).select("> div > span").text();
|
||||||
|
|
||||||
|
ParsedData parsedData = new ParsedData();
|
||||||
|
parsedData.setCategory(category);
|
||||||
|
parsedData.setMarketplace(Marketplace.OZON);
|
||||||
|
parsedData.setUrl(productUrl);
|
||||||
|
parsedData.setImageUrl(productImageUrl);
|
||||||
|
parsedData.setPrice(productPrice);
|
||||||
|
parsedData.setBrand(productBrand);
|
||||||
|
parsedData.setProductName(productName);
|
||||||
|
products.add(parsedData);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.error(e.getMessage(), e);
|
||||||
|
}*/
|
||||||
|
return products;
|
||||||
|
}
|
||||||
|
|
||||||
|
private Elements getSearchResultsDivs() {
|
||||||
|
try {
|
||||||
|
return document.select(SEARCH_RESULTS_CSS_SELECTOR);
|
||||||
|
} catch (Exception e) {
|
||||||
|
log.warn("Не удалось достать блоки searchResultsV2");
|
||||||
|
return new Elements();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private Elements getProductsDivs(Element searchResultsDiv) {
|
||||||
|
return searchResultsDiv.select("> div > div");
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Elements> getAllProductDataDivs(Elements productsDivs) {
|
||||||
|
List<Elements> allProductDataDivs = new ArrayList<>();
|
||||||
|
for (Element productDiv : productsDivs) {
|
||||||
|
Elements productDataDivs = productDiv.select("> div > *");
|
||||||
|
if (productDataDivs.select("> *").isEmpty()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
removeAddInFavouriteDiv(productDataDivs);
|
||||||
|
allProductDataDivs.add(productDataDivs);
|
||||||
|
}
|
||||||
|
return allProductDataDivs;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void removeAddInFavouriteDiv(Elements productDataDivs) {
|
||||||
|
productDataDivs.removeLast();
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<ParsedData> extractParsedData(List<Elements> allProductDataDivs,
|
||||||
|
Category category) {
|
||||||
|
List<ParsedData> parsedData = new ArrayList<>();
|
||||||
|
for (Elements productDataDivs : allProductDataDivs) {
|
||||||
|
try {
|
||||||
|
ParsedData parsedDataItem = getParsedDataItem(productDataDivs, category);
|
||||||
|
parsedData.add(parsedDataItem);
|
||||||
|
} catch (Exception e) {
|
||||||
|
//log.error(e.getMessage(), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return parsedData;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ParsedData getParsedDataItem(Elements productDataDivs,
|
||||||
|
Category category) {
|
||||||
|
removeExtraDivIfExists(productDataDivs);
|
||||||
|
return ParsedData.builder()
|
||||||
|
.category(category)
|
||||||
|
.marketplace(Marketplace.OZON)
|
||||||
|
.url(extractUrl(productDataDivs))
|
||||||
|
.imageUrl(extractImageUrl(productDataDivs))
|
||||||
|
.brand(extractBrand(productDataDivs))
|
||||||
|
.productName(extractProductName(productDataDivs))
|
||||||
|
.price(extractPrice(productDataDivs))
|
||||||
|
.build();
|
||||||
|
}
|
||||||
|
|
||||||
|
private void removeExtraDivIfExists(Elements productDataDivs) {
|
||||||
|
Element productDataDiv = productDataDivs.last();
|
||||||
|
Elements productDataInnerDivs = productDataDiv.select("> *");
|
||||||
|
try {
|
||||||
|
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
|
||||||
|
.select("span").text().toLowerCase()
|
||||||
|
.contains("осталось")) {
|
||||||
|
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
|
||||||
|
}
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
private String extractUrl(Elements productDataDivs) {
|
||||||
|
Element productUrlA = productDataDivs.first();
|
||||||
|
return OZON_MAIN_LINK + productUrlA
|
||||||
|
.attr("href").replaceAll("\\?.*$", "");
|
||||||
|
}
|
||||||
|
|
||||||
|
private String extractImageUrl(Elements productDataDivs) {
|
||||||
|
Element productImageUrlA = productDataDivs.first();
|
||||||
|
return productImageUrlA.select("> div > div")
|
||||||
|
.first().getElementsByTag("img")
|
||||||
|
.first().attr("src");
|
||||||
|
}
|
||||||
|
|
||||||
|
private String extractBrand(Elements productDataDivs) {
|
||||||
|
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
|
||||||
|
//log.info(productDataInnerDivs.html());
|
||||||
|
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND)
|
||||||
|
.select("> span");
|
||||||
|
String brand = productBrandBlockSpans.first().selectFirst("> span > b").text();
|
||||||
|
if (productBrandBlockSpans.size() == 1 && "Оригинал".equals(brand)) {
|
||||||
|
return "БРЕНД_НЕ_УКАЗАН";
|
||||||
|
}
|
||||||
|
return brand;
|
||||||
|
}
|
||||||
|
|
||||||
|
private String extractProductName(Elements productDataDivs) {
|
||||||
|
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
|
||||||
|
return productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME)
|
||||||
|
.select("> div > span").text();
|
||||||
|
}
|
||||||
|
|
||||||
|
private BigDecimal extractPrice(Elements productDataDivs) {
|
||||||
|
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
|
||||||
|
return parseOzonPriceToBigDecimal(
|
||||||
|
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
|
||||||
|
.first().text());
|
||||||
|
}
|
||||||
|
|
||||||
|
private Elements getProductMainDataInnerDivs(Elements productDataDivs) {
|
||||||
|
return productDataDivs.last().select("> *");
|
||||||
|
}
|
||||||
|
|
||||||
|
private BigDecimal parseOzonPriceToBigDecimal(String ozonPrice) {
|
||||||
|
String cleanedString = ozonPrice.replaceAll("[^\\d]", "");
|
||||||
|
return new BigDecimal(cleanedString);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -2,7 +2,6 @@ package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
|||||||
|
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.time.temporal.ChronoUnit;
|
import java.time.temporal.ChronoUnit;
|
||||||
import java.util.List;
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
@ -11,36 +10,30 @@ import org.springframework.context.annotation.Profile;
|
|||||||
import org.springframework.retry.annotation.Recover;
|
import org.springframework.retry.annotation.Recover;
|
||||||
import org.springframework.retry.annotation.Retryable;
|
import org.springframework.retry.annotation.Retryable;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
|
||||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool;
|
import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.AccessDeniedPage;
|
import ru.pricepulse.parsingservice.ozon_parser.service.page.AccessDeniedPage;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.CategoryPage;
|
import ru.pricepulse.parsingservice.ozon_parser.service.page.CategoryPage;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.NoContentPage;
|
import ru.pricepulse.parsingservice.ozon_parser.service.page.NoContentPage;
|
||||||
import ru.pricepulse.parsingservice.service.ProductService;
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
@Profile("ozon")
|
@Profile("ozon")
|
||||||
public class OzonCategoryPageParsingService {
|
public class OzonHtmlFetcher {
|
||||||
|
|
||||||
private final WebDriverPool webDriverPool;
|
private final WebDriverPool webDriverPool;
|
||||||
|
|
||||||
private final ProductService productService;
|
private final PageScroller pageScroller;
|
||||||
|
|
||||||
public OzonCategoryPageParsingService(WebDriverPool webDriverPool,
|
public OzonHtmlFetcher(WebDriverPool webDriverPool,
|
||||||
ProductService productService) {
|
PageScroller pageScroller) {
|
||||||
this.webDriverPool = webDriverPool;
|
this.webDriverPool = webDriverPool;
|
||||||
this.productService = productService;
|
this.pageScroller = pageScroller;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Retryable(maxAttempts = 10, recover = "recover")
|
@Retryable(maxAttempts = 10, recover = "recover")
|
||||||
public void parseCategoryPage(String pageUrl, Category category, AtomicBoolean stopFlag) {
|
public String fetchPageHtml(String pageUrl,
|
||||||
|
AtomicBoolean lastPageInCategory) {
|
||||||
var driver = webDriverPool.borrowDriver();
|
var driver = webDriverPool.borrowDriver();
|
||||||
if (driver == null) {
|
|
||||||
throw new RuntimeException();
|
|
||||||
}
|
|
||||||
try {
|
try {
|
||||||
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
|
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
|
||||||
driver.get(pageUrl);
|
driver.get(pageUrl);
|
||||||
@ -48,64 +41,63 @@ public class OzonCategoryPageParsingService {
|
|||||||
var accessDeniedPage = new AccessDeniedPage(driver, wait);
|
var accessDeniedPage = new AccessDeniedPage(driver, wait);
|
||||||
var categoryPage = new CategoryPage(driver, wait);
|
var categoryPage = new CategoryPage(driver, wait);
|
||||||
var noContentPage = new NoContentPage(driver, wait);
|
var noContentPage = new NoContentPage(driver, wait);
|
||||||
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage));
|
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage, noContentPage, lastPageInCategory));
|
||||||
if (checkAccessDeniedPage(accessDeniedPage)) {
|
checkAceesDeniedAndResolve(accessDeniedPage);
|
||||||
log.info("Доступ ограничен, пробуем решить проблему: {}", pageUrl);
|
|
||||||
resolveAccessDeniedPage(accessDeniedPage);
|
|
||||||
log.info("Проблема успешно решена: {}", pageUrl);
|
|
||||||
}
|
|
||||||
if (noContentPage.isLoaded()) {
|
|
||||||
log.info("Страница не найдена");
|
|
||||||
stopFlag.set(true);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
log.info("Получаем список товаров на текущей странице: {}", pageUrl);
|
pageScroller.scrollToEndOfPage(driver);
|
||||||
List<ParsedData> parsedData;
|
return driver.getPageSource();
|
||||||
try {
|
} catch (Exception e) {
|
||||||
parsedData = categoryPage.getParsedProducts();
|
log.error(e.getMessage(), e);
|
||||||
for (ParsedData data : parsedData) {
|
throw new RuntimeException(e);
|
||||||
data.setMarketplace(Marketplace.OZON);
|
|
||||||
data.setCategory(category);
|
|
||||||
}
|
|
||||||
productService.saveBatch(parsedData);
|
|
||||||
} catch (Exception e) {
|
|
||||||
throw new Exception(e);
|
|
||||||
}
|
|
||||||
webDriverPool.returnDriver(driver);
|
|
||||||
} catch (Exception ignored) {
|
|
||||||
throw new RuntimeException(ignored);
|
|
||||||
} finally {
|
} finally {
|
||||||
webDriverPool.returnDriver(driver);
|
webDriverPool.returnDriver(driver);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
|
private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
|
||||||
CategoryPage categoryPage) {
|
CategoryPage categoryPage,
|
||||||
|
NoContentPage noContentPage,
|
||||||
|
AtomicBoolean stopFlag) {
|
||||||
log.debug("Проверка что страница 'Доступ ограничен'");
|
log.debug("Проверка что страница 'Доступ ограничен'");
|
||||||
try {
|
if (checkAccessDeniedPage(accessDeniedPage)) {
|
||||||
if (checkAccessDeniedPage(accessDeniedPage)) {
|
return true;
|
||||||
return true;
|
|
||||||
}
|
|
||||||
} catch (Exception e) {
|
|
||||||
log.debug("Ошибка проверки", e);
|
|
||||||
}
|
}
|
||||||
log.debug("Проверка что страница 'Страница категории'");
|
log.debug("Проверка что страница 'Страница категории'");
|
||||||
if (checkCategoryPage(categoryPage)) {
|
if (checkCategoryPage(categoryPage)) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (checkNoContentPage(noContentPage)) {
|
||||||
|
stopFlag.set(true);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
log.debug("Проверка загрузки страницы неудачна");
|
log.debug("Проверка загрузки страницы неудачна");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean checkAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
|
|
||||||
return accessDeniedPage.isLoaded();
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean checkCategoryPage(CategoryPage categoryPage) {
|
private boolean checkCategoryPage(CategoryPage categoryPage) {
|
||||||
return categoryPage.isLoaded();
|
return categoryPage.isLoaded();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void checkAceesDeniedAndResolve(AccessDeniedPage accessDeniedPage) {
|
||||||
|
if (checkAccessDeniedPage(accessDeniedPage)) {
|
||||||
|
log.info("Доступ ограничен, пробуем решить проблему");
|
||||||
|
resolveAccessDeniedPage(accessDeniedPage);
|
||||||
|
log.info("Проблема успешно решена");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkNoContentPage(NoContentPage noContentPage) {
|
||||||
|
if (noContentPage.isLoaded()) {
|
||||||
|
log.info("Страница не найдена");
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
private boolean checkAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
|
||||||
|
return accessDeniedPage.isLoaded();
|
||||||
|
}
|
||||||
|
|
||||||
private void resolveAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
|
private void resolveAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
|
||||||
accessDeniedPage.clickReloadButton();
|
accessDeniedPage.clickReloadButton();
|
||||||
}
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.page.OzonCategoryPage;
|
||||||
|
|
||||||
|
@Service
|
||||||
|
public class OzonPageParser {
|
||||||
|
|
||||||
|
public List<ParsedData> parseProductsFromCategoryPage(String pageSource,
|
||||||
|
Category category) {
|
||||||
|
OzonCategoryPage categoryPage = new OzonCategoryPage(pageSource);
|
||||||
|
return categoryPage.getProducts(category);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,65 +1,113 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.List;
|
||||||
import java.util.concurrent.ArrayBlockingQueue;
|
import java.util.Map;
|
||||||
import java.util.concurrent.BlockingQueue;
|
import java.util.Set;
|
||||||
|
import java.util.concurrent.ConcurrentHashMap;
|
||||||
import java.util.concurrent.ExecutorService;
|
import java.util.concurrent.ExecutorService;
|
||||||
import java.util.concurrent.Executors;
|
import java.util.concurrent.Executors;
|
||||||
import java.util.concurrent.Semaphore;
|
import java.util.concurrent.Semaphore;
|
||||||
import java.util.concurrent.ThreadPoolExecutor;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.slf4j.MDC;
|
||||||
import org.springframework.context.annotation.Profile;
|
import org.springframework.context.annotation.Profile;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingService;
|
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
|
import ru.pricepulse.parsingservice.service.ProductService;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
@Profile("ozon")
|
@Profile("ozon")
|
||||||
public class OzonParsingService implements MarketplaceParsingService {
|
public class OzonParsingService {
|
||||||
|
|
||||||
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
|
private final Map<String, Set<String>> urlCache;
|
||||||
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(5);
|
|
||||||
private final Semaphore semaphore = new Semaphore(5); // Ограничиваем количество активных и ожидающих задач
|
|
||||||
|
|
||||||
private final OzonCategoryPageParsingService categoryPageParsingService;
|
private final ExecutorService pageExecutorService;
|
||||||
|
|
||||||
|
private final Semaphore semaphore;
|
||||||
|
|
||||||
|
private final OzonHtmlFetcher categoryPageParsingService;
|
||||||
|
|
||||||
|
private final OzonConfigProperties ozonConfigProperties;
|
||||||
|
|
||||||
|
private final OzonPageParser ozonPageParser;
|
||||||
|
|
||||||
|
private final ProductService productService;
|
||||||
|
|
||||||
|
public OzonParsingService(OzonHtmlFetcher categoryPageParsingService,
|
||||||
|
OzonConfigProperties ozonConfigProperties, OzonPageParser ozonPageParser,
|
||||||
|
ProductService productService) {
|
||||||
|
this.pageExecutorService = Executors.newFixedThreadPool(ozonConfigProperties.getMaxThreads());
|
||||||
|
this.semaphore = new Semaphore(ozonConfigProperties.getMaxThreads());
|
||||||
|
this.urlCache = new ConcurrentHashMap<>();
|
||||||
|
for (OzonCategory category : OzonCategory.values()) {
|
||||||
|
urlCache.put(category.getCategoryUrl(), ConcurrentHashMap.newKeySet());
|
||||||
|
}
|
||||||
|
|
||||||
public OzonParsingService(OzonCategoryPageParsingService categoryPageParsingService) {
|
|
||||||
this.categoryPageParsingService = categoryPageParsingService;
|
this.categoryPageParsingService = categoryPageParsingService;
|
||||||
|
this.ozonConfigProperties = ozonConfigProperties;
|
||||||
|
this.ozonPageParser = ozonPageParser;
|
||||||
|
this.productService = productService;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void processCategory(String url) {
|
public void startProcessing() {
|
||||||
int pageIndex = 1;
|
for (OzonCategory category : OzonCategory.values()) {
|
||||||
|
log.info("НАЧАЛО ОБРАБОТКИ КАТЕГОРИИ {}", category);
|
||||||
|
processCategory(category);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while (!stopFlag.get()) {
|
private void processCategory(OzonCategory category) {
|
||||||
|
int pageIndex = 1;
|
||||||
|
AtomicBoolean lastPageInCategory = new AtomicBoolean(false);
|
||||||
|
while (!lastPageInCategory.get()) {
|
||||||
try {
|
try {
|
||||||
semaphore.acquire(); // Получаем разрешение перед созданием новой задачи
|
semaphore.acquire();
|
||||||
|
|
||||||
int finalPageIndex = pageIndex;
|
int finalPageIndex = pageIndex;
|
||||||
String pageUrl = url + "&page=" + finalPageIndex;
|
String pageUrl = category.getCategoryUrl() + "&page=" + finalPageIndex;
|
||||||
|
|
||||||
pageExecutorService.submit(() -> {
|
pageExecutorService.submit(() -> processCategoryPage(pageUrl, category, lastPageInCategory));
|
||||||
try {
|
|
||||||
categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag);
|
|
||||||
} finally {
|
|
||||||
semaphore.release(); // Освобождаем разрешение после завершения задачи
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
++pageIndex;
|
|
||||||
|
|
||||||
|
pageIndex += ozonConfigProperties.getMaxNumOfPagesOnScreen();
|
||||||
} catch (InterruptedException e) {
|
} catch (InterruptedException e) {
|
||||||
Thread.currentThread().interrupt();
|
Thread.currentThread().interrupt();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
if (lastPageInCategory.get()) {
|
||||||
|
log.info("Достигли последней страницы категории");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
if (stopFlag.get()) {
|
private void processCategoryPage(String pageUrl,
|
||||||
log.info("Конец парсинга категории");
|
OzonCategory category,
|
||||||
pageExecutorService.shutdownNow();
|
AtomicBoolean lastPageInCategory) {
|
||||||
|
try {
|
||||||
|
MDC.put("pageUrl", pageUrl);
|
||||||
|
String pageSource = categoryPageParsingService.fetchPageHtml(pageUrl, lastPageInCategory);
|
||||||
|
List<ParsedData> parsedProducts =
|
||||||
|
ozonPageParser.parseProductsFromCategoryPage(pageSource, category.getMappedCategory());
|
||||||
|
log.info("""
|
||||||
|
|
||||||
|
КОНЕЦ ПАРСИНГА СТРАНИЦЫ КАТЕГОРИИ
|
||||||
|
КОЛИЧЕСТВО НАЙДЕННЫХ ТОВАРОВ НА СТРАНИЦЕ {},
|
||||||
|
|
||||||
|
""", parsedProducts.size());
|
||||||
|
if (urlCache.size() > 1000000) {
|
||||||
|
urlCache.clear();
|
||||||
|
}
|
||||||
|
Set<String> categoryCachecUrl = urlCache.get(category.getCategoryUrl());
|
||||||
|
List<ParsedData> uniqueData = parsedProducts.stream()
|
||||||
|
.filter(data -> categoryCachecUrl.add(data.getUrl()))
|
||||||
|
.toList();
|
||||||
|
productService.saveBatch(uniqueData);
|
||||||
|
} finally {
|
||||||
|
MDC.clear();
|
||||||
|
semaphore.release();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -0,0 +1,56 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||||
|
|
||||||
|
import java.util.concurrent.atomic.AtomicLong;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.openqa.selenium.By;
|
||||||
|
import org.openqa.selenium.JavascriptExecutor;
|
||||||
|
import org.openqa.selenium.WebDriver;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class PageScroller {
|
||||||
|
|
||||||
|
private static final String ALL_CONTENT_PAGE_HEIGHT = "return document.body.scrollHeight";
|
||||||
|
|
||||||
|
private static final String SCROLL_TO_PAGE_HEIGHT = "window.scrollTo(0, document.body.scrollHeight);";
|
||||||
|
|
||||||
|
public void scrollToEndOfPage(WebDriver driver) throws InterruptedException {
|
||||||
|
JavascriptExecutor js = (JavascriptExecutor) driver;
|
||||||
|
AtomicLong lastHeight = new AtomicLong((long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT));
|
||||||
|
int attemptsLimit = 100;
|
||||||
|
log.info("Начинаем пролистывать страницу до конца");
|
||||||
|
while (true) {
|
||||||
|
js.executeScript(SCROLL_TO_PAGE_HEIGHT);
|
||||||
|
|
||||||
|
long newHeight = (long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT);
|
||||||
|
|
||||||
|
try {
|
||||||
|
var nextPageButtons = driver.findElements(By.cssSelector("div[data-widget='megaPaginator'] > div")).get(1)
|
||||||
|
.findElement(By.cssSelector(":scope > div > div > div"))
|
||||||
|
.findElements(By.tagName("a"));
|
||||||
|
|
||||||
|
if (nextPageButtons != null && newHeight > lastHeight.get()) {
|
||||||
|
log.info("ЗАКОНЧИЛИ СКРОЛЛИТЬ");
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (Exception ignored) {}
|
||||||
|
|
||||||
|
|
||||||
|
if (newHeight > lastHeight.get()) {
|
||||||
|
attemptsLimit = 100;
|
||||||
|
lastHeight.set(newHeight);
|
||||||
|
} else {
|
||||||
|
attemptsLimit--;
|
||||||
|
Thread.sleep(1000);
|
||||||
|
if (attemptsLimit == 0) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -4,7 +4,6 @@ import lombok.RequiredArgsConstructor;
|
|||||||
import org.springframework.context.annotation.Profile;
|
import org.springframework.context.annotation.Profile;
|
||||||
import org.springframework.scheduling.annotation.Scheduled;
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingService;
|
import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingService;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@ -12,13 +11,11 @@ import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingServi
|
|||||||
@Profile("ozon")
|
@Profile("ozon")
|
||||||
public class OzonProductUpdater {
|
public class OzonProductUpdater {
|
||||||
|
|
||||||
private final OzonConfigProperties properties;
|
|
||||||
private final OzonParsingService ozonParsingService;
|
private final OzonParsingService ozonParsingService;
|
||||||
|
|
||||||
@Scheduled(fixedRate = 3600000)
|
@Scheduled(cron = "0 0 0,6,12,18 * * *")
|
||||||
public void updateOzonProducts() {
|
public void updateOzonProducts() {
|
||||||
properties.getCategoriesUrls()
|
ozonParsingService.startProcessing();
|
||||||
.forEach(ozonParsingService::processCategory);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,4 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.task;
|
||||||
|
|
||||||
|
public class OzonParsingTask {
|
||||||
|
}
|
@ -1,17 +1,21 @@
|
|||||||
package ru.pricepulse.parsingservice.persistence.entity;
|
package ru.pricepulse.parsingservice.persistence.entity;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
import jakarta.persistence.Column;
|
import jakarta.persistence.Column;
|
||||||
import jakarta.persistence.EmbeddedId;
|
import jakarta.persistence.EmbeddedId;
|
||||||
import jakarta.persistence.Entity;
|
import jakarta.persistence.Entity;
|
||||||
import jakarta.persistence.PrePersist;
|
import jakarta.persistence.PrePersist;
|
||||||
import jakarta.persistence.Table;
|
import jakarta.persistence.Table;
|
||||||
import lombok.*;
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
import org.hibernate.proxy.HibernateProxy;
|
import org.hibernate.proxy.HibernateProxy;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
|
||||||
import java.time.LocalDateTime;
|
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
@Setter
|
@Setter
|
||||||
@Entity
|
@Entity
|
||||||
@ -52,7 +56,9 @@ public class PriceHistoryEntity {
|
|||||||
|
|
||||||
@PrePersist
|
@PrePersist
|
||||||
protected void onCreate() {
|
protected void onCreate() {
|
||||||
id.setDate(LocalDateTime.now());
|
if (id.getDate() == null) {
|
||||||
|
id.setDate(ZonedDateTime.now());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -1,17 +1,17 @@
|
|||||||
package ru.pricepulse.parsingservice.persistence.entity;
|
package ru.pricepulse.parsingservice.persistence.entity;
|
||||||
|
|
||||||
import jakarta.persistence.*;
|
import java.io.Serializable;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import jakarta.persistence.Column;
|
||||||
|
import jakarta.persistence.Embeddable;
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
import org.hibernate.proxy.HibernateProxy;
|
import org.hibernate.proxy.HibernateProxy;
|
||||||
|
|
||||||
import java.io.Serializable;
|
|
||||||
import java.time.LocalDateTime;
|
|
||||||
import java.time.OffsetDateTime;
|
|
||||||
import java.util.Objects;
|
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
@Setter
|
@Setter
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@ -23,7 +23,7 @@ public class PriceHistoryId implements Serializable {
|
|||||||
private String productUrl;
|
private String productUrl;
|
||||||
|
|
||||||
@Column(name = "date", nullable = false)
|
@Column(name = "date", nullable = false)
|
||||||
private LocalDateTime date;
|
private ZonedDateTime date;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final boolean equals(Object o) {
|
public final boolean equals(Object o) {
|
||||||
|
@ -1,14 +1,26 @@
|
|||||||
package ru.pricepulse.parsingservice.persistence.entity;
|
package ru.pricepulse.parsingservice.persistence.entity;
|
||||||
|
|
||||||
import jakarta.persistence.*;
|
|
||||||
import lombok.*;
|
|
||||||
import org.hibernate.proxy.HibernateProxy;
|
|
||||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
|
||||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
|
||||||
|
|
||||||
import java.time.LocalDateTime;
|
import java.time.LocalDateTime;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
|
import jakarta.persistence.Column;
|
||||||
|
import jakarta.persistence.Entity;
|
||||||
|
import jakarta.persistence.EnumType;
|
||||||
|
import jakarta.persistence.Enumerated;
|
||||||
|
import jakarta.persistence.GeneratedValue;
|
||||||
|
import jakarta.persistence.GenerationType;
|
||||||
|
import jakarta.persistence.Id;
|
||||||
|
import jakarta.persistence.PrePersist;
|
||||||
|
import jakarta.persistence.Table;
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Builder;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.NoArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
|
import org.hibernate.proxy.HibernateProxy;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
@Setter
|
@Setter
|
||||||
@Entity
|
@Entity
|
||||||
|
@ -1,8 +1,16 @@
|
|||||||
package ru.pricepulse.parsingservice.persistence.repository;
|
package ru.pricepulse.parsingservice.persistence.repository;
|
||||||
|
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
import org.springframework.data.jpa.repository.JpaRepository;
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||||
|
|
||||||
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, PriceHistoryId> {
|
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, PriceHistoryId> {
|
||||||
|
|
||||||
|
List<PriceHistoryEntity> findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(String productUrl,
|
||||||
|
ZonedDateTime from,
|
||||||
|
ZonedDateTime to);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,15 +1,27 @@
|
|||||||
package ru.pricepulse.parsingservice.persistence.repository;
|
package ru.pricepulse.parsingservice.persistence.repository;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Optional;
|
||||||
|
|
||||||
|
import org.springframework.data.domain.Page;
|
||||||
|
import org.springframework.data.domain.Pageable;
|
||||||
import org.springframework.data.jpa.repository.JpaRepository;
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
|
import org.springframework.data.jpa.repository.Query;
|
||||||
|
import org.springframework.stereotype.Repository;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
|
|
||||||
import java.util.List;
|
@Repository
|
||||||
|
|
||||||
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
|
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
|
||||||
|
|
||||||
boolean existsByUrl(String url);
|
|
||||||
|
|
||||||
ProductEntity findByUrl(String url);
|
|
||||||
List<ProductEntity> findAllByUrlIn(List<String> urls);
|
List<ProductEntity> findAllByUrlIn(List<String> urls);
|
||||||
|
|
||||||
|
@Query("""
|
||||||
|
select p.url from ProductEntity p where p.url in :urls
|
||||||
|
""")
|
||||||
|
List<String> findSavedUrl(List<String> urls);
|
||||||
|
|
||||||
|
Optional<ProductEntity> findByUrl(String url);
|
||||||
|
|
||||||
|
Page<ProductEntity> findAllByCategory(Category category, Pageable pageable);
|
||||||
}
|
}
|
||||||
|
@ -1,19 +1,27 @@
|
|||||||
package ru.pricepulse.parsingservice.service;
|
package ru.pricepulse.parsingservice.service;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.time.ZonedDateTime;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
|
|
||||||
|
import jakarta.persistence.EntityNotFoundException;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.data.domain.Pageable;
|
||||||
import org.springframework.retry.annotation.Retryable;
|
import org.springframework.retry.annotation.Retryable;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
||||||
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.ProductDto;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
|
||||||
|
import ru.pricepulse.parsingservice.service.mapper.PriceHistoryMapper;
|
||||||
|
import ru.pricepulse.parsingservice.service.mapper.ProductMapper;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
@ -24,34 +32,55 @@ public class ProductService {
|
|||||||
|
|
||||||
private final ProductPriceRepository productPriceRepository;
|
private final ProductPriceRepository productPriceRepository;
|
||||||
|
|
||||||
|
private final ProductMapper productMapper;
|
||||||
|
|
||||||
|
private final PriceHistoryMapper priceHistoryMapper;
|
||||||
|
|
||||||
@Transactional
|
@Transactional
|
||||||
@Retryable
|
@Retryable
|
||||||
public void saveBatch(List<ParsedData> parsedData) {
|
public void saveBatch(List<ParsedData> parsedData) {
|
||||||
var products = new ArrayList<ProductEntity>();
|
List<String> productsUrls = parsedData.stream().map(ParsedData::getUrl).toList();
|
||||||
var prices = new ArrayList<PriceHistoryEntity>();
|
List<String> alreadySavedUrls = productRepository.findSavedUrl(productsUrls);
|
||||||
parsedData.forEach(product -> processParsedProduct(product, prices, products));
|
List<ProductEntity> products = parsedData.stream()
|
||||||
|
.filter(data -> !alreadySavedUrls.contains(data.getUrl()))
|
||||||
|
.map(this::getProduct)
|
||||||
|
.toList();
|
||||||
|
List<PriceHistoryEntity> prices = parsedData.stream().map(this::getPriceHistory).toList();
|
||||||
productRepository.saveAll(products);
|
productRepository.saveAll(products);
|
||||||
log.info("Сохранили пачку товаров {}", products.size());
|
log.info("Сохранили пачку товаров {}", products.size());
|
||||||
productPriceRepository.saveAll(prices);
|
productPriceRepository.saveAll(prices);
|
||||||
log.info("Сохранили историю цен {}", prices.size());
|
log.info("Сохранили историю цен {}", prices.size());
|
||||||
}
|
}
|
||||||
|
|
||||||
private void processParsedProduct(ParsedData product,
|
@Transactional(readOnly = true)
|
||||||
ArrayList<PriceHistoryEntity> prices,
|
public ProductDto findByUrl(String productUrl) {
|
||||||
ArrayList<ProductEntity> products) {
|
var product = productRepository.findByUrl(productUrl).orElseThrow(EntityNotFoundException::new);
|
||||||
var priceHistoryEntity = getPriceHistory(product);
|
return productMapper.toProductDto(product);
|
||||||
prices.add(priceHistoryEntity);
|
}
|
||||||
if (productRepository.existsByUrl(product.getUrl())) {
|
|
||||||
log.info("Запись {} уже есть", product.getUrl());
|
@Transactional(readOnly = true)
|
||||||
return;
|
public PriceHistoryDto findPriceHistoryByRange(String productUrl, ZonedDateTime from, ZonedDateTime to) {
|
||||||
}
|
var priceHistory = productPriceRepository
|
||||||
var productEntity = getProduct(product);
|
.findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(productUrl, from, to);
|
||||||
products.add(productEntity);
|
return priceHistoryMapper.toPriceHistoryDto(priceHistory);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Transactional(readOnly = true)
|
||||||
|
public ProductsPageDto findAllProductsByPage(Category category,
|
||||||
|
Pageable pageable) {
|
||||||
|
var page = productRepository.findAllByCategory(category, pageable);
|
||||||
|
return new ProductsPageDto(
|
||||||
|
page.getNumberOfElements(),
|
||||||
|
page.getTotalPages(),
|
||||||
|
page.getNumber(),
|
||||||
|
page.getContent().stream().map(productMapper::toProductDto).toList()
|
||||||
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
private PriceHistoryEntity getPriceHistory(ParsedData product) {
|
private PriceHistoryEntity getPriceHistory(ParsedData product) {
|
||||||
var priceHistoryId = new PriceHistoryId();
|
var priceHistoryId = new PriceHistoryId();
|
||||||
priceHistoryId.setProductUrl(product.getUrl());
|
priceHistoryId.setProductUrl(product.getUrl());
|
||||||
|
priceHistoryId.setDate(ZonedDateTime.now());
|
||||||
var priceHistory = new PriceHistoryEntity();
|
var priceHistory = new PriceHistoryEntity();
|
||||||
priceHistory.setId(priceHistoryId);
|
priceHistory.setId(priceHistoryId);
|
||||||
priceHistory.setPrice(product.getPrice());
|
priceHistory.setPrice(product.getPrice());
|
||||||
@ -68,4 +97,5 @@ public class ProductService {
|
|||||||
productEntity.setImageUrl(product.getImageUrl());
|
productEntity.setImageUrl(product.getImageUrl());
|
||||||
return productEntity;
|
return productEntity;
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,20 @@
|
|||||||
|
package ru.pricepulse.parsingservice.service.dto;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
public class PriceHistoryDto {
|
||||||
|
|
||||||
|
private final Map<ZonedDateTime, BigDecimal> priceHistory;
|
||||||
|
|
||||||
|
public PriceHistoryDto() {
|
||||||
|
this.priceHistory = new HashMap<>();
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,28 @@
|
|||||||
|
package ru.pricepulse.parsingservice.service.dto;
|
||||||
|
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.Setter;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class ProductDto {
|
||||||
|
|
||||||
|
private final Long id;
|
||||||
|
|
||||||
|
private final Marketplace marketplace;
|
||||||
|
|
||||||
|
private final Category category;
|
||||||
|
|
||||||
|
private final String brand;
|
||||||
|
|
||||||
|
private final String productName;
|
||||||
|
|
||||||
|
private final String url;
|
||||||
|
|
||||||
|
private final String imageUrl;
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,22 @@
|
|||||||
|
package ru.pricepulse.parsingservice.service.dto;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import lombok.AllArgsConstructor;
|
||||||
|
import lombok.Getter;
|
||||||
|
import lombok.Setter;
|
||||||
|
|
||||||
|
@Getter
|
||||||
|
@Setter
|
||||||
|
@AllArgsConstructor
|
||||||
|
public class ProductsPageDto {
|
||||||
|
|
||||||
|
private final int totalItems;
|
||||||
|
|
||||||
|
private final int totalPages;
|
||||||
|
|
||||||
|
private final int currentPage;
|
||||||
|
|
||||||
|
private final List<ProductDto> products;
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,19 @@
|
|||||||
|
package ru.pricepulse.parsingservice.service.mapper;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class PriceHistoryMapper {
|
||||||
|
|
||||||
|
public PriceHistoryDto toPriceHistoryDto (List<PriceHistoryEntity> priceHistory) {
|
||||||
|
var priceHistoryDto = new PriceHistoryDto();
|
||||||
|
priceHistory.forEach(item ->
|
||||||
|
priceHistoryDto.getPriceHistory().put(item.getId().getDate().withNano(0), item.getPrice()));
|
||||||
|
return priceHistoryDto;
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,22 @@
|
|||||||
|
package ru.pricepulse.parsingservice.service.mapper;
|
||||||
|
|
||||||
|
import org.springframework.stereotype.Component;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.ProductDto;
|
||||||
|
|
||||||
|
@Component
|
||||||
|
public class ProductMapper {
|
||||||
|
|
||||||
|
public ProductDto toProductDto(ProductEntity product) {
|
||||||
|
return new ProductDto(
|
||||||
|
product.getId(),
|
||||||
|
product.getMarketplace(),
|
||||||
|
product.getCategory(),
|
||||||
|
product.getBrand(),
|
||||||
|
product.getProductName(),
|
||||||
|
product.getUrl(),
|
||||||
|
product.getImageUrl()
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,4 +1,4 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
|
package ru.pricepulse.parsingservice.service.scheduler;
|
||||||
|
|
||||||
import java.time.LocalDate;
|
import java.time.LocalDate;
|
||||||
import java.time.format.DateTimeFormatter;
|
import java.time.format.DateTimeFormatter;
|
@ -1,5 +1,7 @@
|
|||||||
package ru.pricepulse.parsingservice.web.handler;
|
package ru.pricepulse.parsingservice.web.handler;
|
||||||
|
|
||||||
|
import java.net.URI;
|
||||||
|
|
||||||
import jakarta.persistence.EntityNotFoundException;
|
import jakarta.persistence.EntityNotFoundException;
|
||||||
import jakarta.servlet.http.HttpServletRequest;
|
import jakarta.servlet.http.HttpServletRequest;
|
||||||
import org.springframework.http.HttpStatus;
|
import org.springframework.http.HttpStatus;
|
||||||
@ -8,8 +10,6 @@ import org.springframework.web.bind.annotation.ControllerAdvice;
|
|||||||
import org.springframework.web.bind.annotation.ExceptionHandler;
|
import org.springframework.web.bind.annotation.ExceptionHandler;
|
||||||
import org.springframework.web.bind.annotation.ResponseStatus;
|
import org.springframework.web.bind.annotation.ResponseStatus;
|
||||||
|
|
||||||
import java.net.URI;
|
|
||||||
|
|
||||||
@ControllerAdvice
|
@ControllerAdvice
|
||||||
public class CommonExceptionHandler {
|
public class CommonExceptionHandler {
|
||||||
|
|
||||||
|
@ -1,9 +1,9 @@
|
|||||||
package ru.pricepulse.parsingservice.web.handler;
|
package ru.pricepulse.parsingservice.web.handler;
|
||||||
|
|
||||||
import org.springframework.http.HttpStatus;
|
|
||||||
|
|
||||||
import java.net.URI;
|
import java.net.URI;
|
||||||
|
|
||||||
|
import org.springframework.http.HttpStatus;
|
||||||
|
|
||||||
public record ErrorResponse (
|
public record ErrorResponse (
|
||||||
Integer statusCode,
|
Integer statusCode,
|
||||||
HttpStatus status,
|
HttpStatus status,
|
||||||
|
@ -0,0 +1,50 @@
|
|||||||
|
package ru.pricepulse.parsingservice.web.rest;
|
||||||
|
|
||||||
|
import java.time.LocalDate;
|
||||||
|
import java.time.ZoneOffset;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.data.domain.Pageable;
|
||||||
|
import org.springframework.format.annotation.DateTimeFormat;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestParam;
|
||||||
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.service.ProductService;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.ProductDto;
|
||||||
|
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
|
||||||
|
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/api/v1/products")
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class MainRestApi {
|
||||||
|
|
||||||
|
private final ProductService productService;
|
||||||
|
|
||||||
|
@GetMapping("/info")
|
||||||
|
public ResponseEntity<ProductDto> getProductInfo(@RequestParam String productUrl) {
|
||||||
|
return ResponseEntity.ok(productService.findByUrl(productUrl));
|
||||||
|
}
|
||||||
|
|
||||||
|
@GetMapping("/price-history")
|
||||||
|
public ResponseEntity<PriceHistoryDto> getProductPriceHistoryByRange(@RequestParam String productUrl,
|
||||||
|
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate from,
|
||||||
|
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate to,
|
||||||
|
String zoneOffset) {
|
||||||
|
ZoneOffset zone = ZoneOffset.of(zoneOffset);
|
||||||
|
ZonedDateTime fromDateTime = from.atStartOfDay(zone);
|
||||||
|
ZonedDateTime toDateTime = to.atStartOfDay(zone);
|
||||||
|
return ResponseEntity.ok(productService.findPriceHistoryByRange(productUrl, fromDateTime, toDateTime));
|
||||||
|
}
|
||||||
|
|
||||||
|
@GetMapping
|
||||||
|
public ResponseEntity<ProductsPageDto> getAllProductsByCategoryAndPage(Category category,
|
||||||
|
Pageable pageable) {
|
||||||
|
return ResponseEntity.ok(productService.findAllProductsByPage(category, pageable));
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,23 @@
|
|||||||
|
package ru.pricepulse.parsingservice.web.rest;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.http.ResponseEntity;
|
||||||
|
import org.springframework.web.bind.annotation.GetMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RequestMapping;
|
||||||
|
import org.springframework.web.bind.annotation.RestController;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.OzonService;
|
||||||
|
|
||||||
|
@RestController
|
||||||
|
@RequestMapping("/api/v1/ozon")
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class OzonRestApi {
|
||||||
|
|
||||||
|
private final OzonService ozonService;
|
||||||
|
|
||||||
|
@GetMapping("/categories")
|
||||||
|
public ResponseEntity<OzonCategory[]> getOzon() {
|
||||||
|
return ResponseEntity.ok(ozonService.getCategories());
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,5 +1,7 @@
|
|||||||
package ru.pricepulse.parsingservice.wildberries_parser.converter;
|
package ru.pricepulse.parsingservice.wildberries_parser.converter;
|
||||||
|
|
||||||
|
import java.time.LocalDateTime;
|
||||||
|
|
||||||
import org.springframework.core.convert.converter.Converter;
|
import org.springframework.core.convert.converter.Converter;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
@ -7,8 +9,6 @@ import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
|||||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
||||||
|
|
||||||
import java.time.LocalDateTime;
|
|
||||||
|
|
||||||
|
|
||||||
@Component
|
@Component
|
||||||
public class ProductInfoDto2ProductEntity implements Converter<ProductInfoDto, ProductEntity> {
|
public class ProductInfoDto2ProductEntity implements Converter<ProductInfoDto, ProductEntity> {
|
||||||
|
@ -1,5 +1,11 @@
|
|||||||
package ru.pricepulse.parsingservice.wildberries_parser.service;
|
package ru.pricepulse.parsingservice.wildberries_parser.service;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.time.ZonedDateTime;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import com.fasterxml.jackson.core.type.TypeReference;
|
import com.fasterxml.jackson.core.type.TypeReference;
|
||||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
@ -12,12 +18,6 @@ import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
|||||||
import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
|
import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
|
||||||
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
|
||||||
import java.time.LocalDateTime;
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
@Service("wildberriesParsingService")
|
@Service("wildberriesParsingService")
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class ParsingService {
|
public class ParsingService {
|
||||||
@ -52,7 +52,7 @@ public class ParsingService {
|
|||||||
productEntity.setUrl("https://www.wildberries.ru/catalog/" + dto.getId() + "/detail.aspx?targetUrl=BP");
|
productEntity.setUrl("https://www.wildberries.ru/catalog/" + dto.getId() + "/detail.aspx?targetUrl=BP");
|
||||||
|
|
||||||
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
|
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
|
||||||
.id(new PriceHistoryId(productEntity.getUrl(), LocalDateTime.now()))
|
.id(new PriceHistoryId(productEntity.getUrl(), ZonedDateTime.now()))
|
||||||
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
|
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
|
||||||
.build();
|
.build();
|
||||||
|
|
||||||
|
@ -1,5 +1,9 @@
|
|||||||
package ru.pricepulse.parsingservice.wildberries_parser.service;
|
package ru.pricepulse.parsingservice.wildberries_parser.service;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import org.springframework.transaction.annotation.Transactional;
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
@ -8,10 +12,6 @@ import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
|||||||
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
||||||
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
@Service("wildberriesProductService")
|
@Service("wildberriesProductService")
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class ProductService {
|
public class ProductService {
|
||||||
|
@ -1,5 +1,8 @@
|
|||||||
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
|
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
|
||||||
|
|
||||||
|
import java.util.Collections;
|
||||||
|
import java.util.Map;
|
||||||
|
|
||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.springframework.core.ParameterizedTypeReference;
|
import org.springframework.core.ParameterizedTypeReference;
|
||||||
@ -11,9 +14,6 @@ import org.springframework.stereotype.Service;
|
|||||||
import org.springframework.web.client.RestTemplate;
|
import org.springframework.web.client.RestTemplate;
|
||||||
import ru.pricepulse.parsingservice.config.MarketplacesConfig;
|
import ru.pricepulse.parsingservice.config.MarketplacesConfig;
|
||||||
|
|
||||||
import java.util.Collections;
|
|
||||||
import java.util.Map;
|
|
||||||
|
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
@Service
|
@Service
|
||||||
@Slf4j
|
@Slf4j
|
||||||
|
@ -22,10 +22,10 @@ spring:
|
|||||||
|
|
||||||
marketplace:
|
marketplace:
|
||||||
ozon:
|
ozon:
|
||||||
categories-urls:
|
max-threads: ${OZON_MAX_PROCESSING_THREADS:5}
|
||||||
- https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t
|
max-num-of-pages-on-screen: ${OZON_MAX_NUM_OF_PAGES_ON_SCREEN:100}
|
||||||
wildberries:
|
wildberries:
|
||||||
status: true
|
status: false
|
||||||
base-url: "https://static-basket-01.wbbasket.ru"
|
base-url: "https://static-basket-01.wbbasket.ru"
|
||||||
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
||||||
user-agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)"
|
user-agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)"
|
||||||
@ -35,10 +35,11 @@ marketplace:
|
|||||||
shard: "electronic15"
|
shard: "electronic15"
|
||||||
laptop-url: "/catalog"
|
laptop-url: "/catalog"
|
||||||
|
|
||||||
|
|
||||||
logging:
|
logging:
|
||||||
pattern:
|
pattern:
|
||||||
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg %X%n"
|
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg %X%n"
|
||||||
|
level:
|
||||||
|
sql: debug
|
||||||
# level:
|
# level:
|
||||||
# org:
|
# org:
|
||||||
# springframework:
|
# springframework:
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
package ru.pricepulse.parsingservice;
|
package ru.pricepulse.parsingservice;
|
||||||
|
|
||||||
import org.junit.jupiter.api.Test;
|
import org.junit.jupiter.api.Test;
|
||||||
import org.springframework.boot.test.context.SpringBootTest;
|
|
||||||
|
|
||||||
class ParsingServiceApplicationTests {
|
class ParsingServiceApplicationTests {
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user