Merge branch 'feature/ozon-parser-v0.1' into feature/parsing-service
# Conflicts: # parsing-service/src/main/java/ru/pricepulse/parsingservice/config/DynamicProxyInterceptor.java # parsing-service/src/main/java/ru/pricepulse/parsingservice/config/ProxyProvider.java # parsing-service/src/main/java/ru/pricepulse/parsingservice/wildberries_parser/service/client/ClientImpl.java
This commit is contained in:
commit
4f5dda4dbf
@ -1,6 +1,6 @@
|
||||
<component name="ProjectRunConfigurationManager">
|
||||
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
||||
<option name="ACTIVE_PROFILES" value="dev,headless,postgres_stat" />
|
||||
<option name="ACTIVE_PROFILES" value="dev,ozon,headless,postgres_stat" />
|
||||
<option name="SCHEDULED_DEBUGGER" value="true" />
|
||||
<envs>
|
||||
<env name="POSTGRES_JDBC_PASSWORD" value="postgres" />
|
||||
|
@ -38,6 +38,7 @@ dependencies {
|
||||
implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
|
||||
implementation 'org.apache.commons:commons-pool2:2.12.0'
|
||||
implementation 'com.clickhouse:clickhouse-jdbc:0.6.5'
|
||||
implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.6.0'
|
||||
|
||||
compileOnly 'org.projectlombok:lombok'
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
import java.time.format.DateTimeFormatter;
|
||||
|
||||
@Configuration
|
||||
public class DateTimeFormatterConfig {
|
||||
|
||||
|
@ -1,10 +1,10 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Random;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Component
|
||||
public class UserAgentProvider {
|
||||
private static final List<String> userAgents = List.of(
|
||||
|
@ -1,5 +1,7 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import java.net.InetSocketAddress;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
@ -10,8 +12,6 @@ import org.springframework.web.reactive.function.client.WebClient;
|
||||
import reactor.netty.http.client.HttpClient;
|
||||
import reactor.netty.transport.ProxyProvider;
|
||||
|
||||
import java.net.InetSocketAddress;
|
||||
|
||||
@Slf4j
|
||||
@Configuration
|
||||
@AllArgsConstructor
|
||||
|
@ -1,5 +1,8 @@
|
||||
package ru.pricepulse.parsingservice.config;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import io.github.bonigarcia.wdm.WebDriverManager;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.chrome.ChromeDriver;
|
||||
@ -10,9 +13,6 @@ import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.context.annotation.Scope;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
@Configuration
|
||||
public class WebDriverConfig {
|
||||
|
||||
|
@ -4,11 +4,13 @@ import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@ConfigurationProperties(prefix = "marketplace.ozon")
|
||||
public class OzonConfigProperties {
|
||||
private List<String> categoriesUrls;
|
||||
|
||||
private Integer maxThreads;
|
||||
|
||||
private Integer maxNumOfPagesOnScreen;
|
||||
|
||||
}
|
||||
|
@ -1,5 +1,6 @@
|
||||
package ru.pricepulse.parsingservice.enumeration;
|
||||
|
||||
public enum Category {
|
||||
LAPTOP
|
||||
LAPTOP,
|
||||
SMARTPHONE
|
||||
}
|
||||
|
@ -0,0 +1,31 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.enumeration;
|
||||
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
|
||||
public enum OzonCategory {
|
||||
|
||||
LAPTOP ("/noutbuki-15692/?brandcertified=t", Category.LAPTOP),
|
||||
|
||||
SMARTPHONE ("/smartfony-15502/?brandcertified=t", Category.SMARTPHONE);
|
||||
|
||||
private static final String BASE_CATEGORY_URL = "https://www.ozon.ru/category";
|
||||
|
||||
private final String categoryUrl;
|
||||
|
||||
private final Category mappedCategory;
|
||||
|
||||
OzonCategory(String categoryUrl,
|
||||
Category mappedCategory) {
|
||||
this.categoryUrl = categoryUrl;
|
||||
this.mappedCategory = mappedCategory;
|
||||
}
|
||||
|
||||
public String getCategoryUrl() {
|
||||
return BASE_CATEGORY_URL + categoryUrl;
|
||||
}
|
||||
|
||||
public Category getMappedCategory() {
|
||||
return mappedCategory;
|
||||
}
|
||||
|
||||
}
|
@ -1,62 +1,65 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.pool;
|
||||
|
||||
import java.util.NoSuchElementException;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.ConcurrentLinkedQueue;
|
||||
|
||||
import jakarta.annotation.PreDestroy;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.chrome.ChromeDriver;
|
||||
import org.openqa.selenium.chrome.ChromeOptions;
|
||||
import org.springframework.beans.factory.ObjectFactory;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.stereotype.Component;
|
||||
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@Profile("ozon")
|
||||
public class WebDriverPool {
|
||||
|
||||
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>(); // Список доступных драйверов
|
||||
private final Queue<WebDriver> busyDrivers = new ConcurrentLinkedQueue<>(); // Список занятых драйверов
|
||||
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>();
|
||||
|
||||
private final Queue<WebDriver> busyDrivers = new ConcurrentLinkedQueue<>();
|
||||
|
||||
private final ObjectFactory<WebDriver> webDriverFactory;
|
||||
|
||||
public WebDriverPool(ObjectFactory<WebDriver> webDriverFactory) {
|
||||
this.webDriverFactory = webDriverFactory;
|
||||
int poolSize = 12;
|
||||
private final OzonConfigProperties ozonConfigProperties;
|
||||
|
||||
public WebDriverPool(ObjectFactory<WebDriver> webDriverFactory,
|
||||
OzonConfigProperties ozonConfigProperties) {
|
||||
this.webDriverFactory = webDriverFactory;
|
||||
this.ozonConfigProperties = ozonConfigProperties;
|
||||
int poolSize = ozonConfigProperties.getMaxThreads();
|
||||
|
||||
// Инициализация пула с указанным количеством драйверов
|
||||
for (int i = 0; i < poolSize; i++) {
|
||||
availableDrivers.add(createNewDriver());
|
||||
}
|
||||
}
|
||||
|
||||
// Метод для создания нового экземпляра WebDriver
|
||||
private WebDriver createNewDriver() {
|
||||
return webDriverFactory.getObject();
|
||||
}
|
||||
|
||||
// Метод для заимствования драйвера
|
||||
public WebDriver borrowDriver() {
|
||||
WebDriver driver = availableDrivers.poll(); // Получаем драйвер из доступных
|
||||
WebDriver driver = availableDrivers.poll();
|
||||
if (driver != null) {
|
||||
busyDrivers.add(driver); // Добавляем драйвер в занятые
|
||||
busyDrivers.add(driver);
|
||||
return driver;
|
||||
}
|
||||
return driver; // Возвращаем драйвер
|
||||
throw new NoSuchElementException("No available driver found");
|
||||
}
|
||||
|
||||
// Метод для возврата драйвера в пул
|
||||
public void returnDriver(WebDriver driver) {
|
||||
busyDrivers.remove(driver); // Убираем драйвер из занятых
|
||||
availableDrivers.add(driver); // Возвращаем драйвер в доступные
|
||||
busyDrivers.remove(driver);
|
||||
availableDrivers.add(driver);
|
||||
}
|
||||
|
||||
// Метод для закрытия всех драйверов в пуле
|
||||
@PreDestroy
|
||||
public void shutdownPool() {
|
||||
// Закрываем доступные драйверы
|
||||
for (WebDriver driver : availableDrivers) {
|
||||
driver.quit();
|
||||
}
|
||||
// Закрываем занятые драйверы
|
||||
|
||||
for (WebDriver driver : busyDrivers) {
|
||||
driver.quit();
|
||||
}
|
||||
|
@ -1,7 +0,0 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service;
|
||||
|
||||
public interface MarketplaceParsingService {
|
||||
|
||||
void processCategory(String categoryUrl);
|
||||
|
||||
}
|
@ -0,0 +1,17 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class OzonService {
|
||||
|
||||
public OzonCategory[] getCategories() {
|
||||
return OzonCategory.values();
|
||||
}
|
||||
|
||||
}
|
@ -2,6 +2,7 @@ package ru.pricepulse.parsingservice.ozon_parser.service.dto;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
@ -9,6 +10,7 @@ import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Builder
|
||||
public class ParsedData {
|
||||
|
||||
private Marketplace marketplace;
|
||||
|
@ -1,18 +1,14 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
|
||||
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
|
||||
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.WebElement;
|
||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
|
||||
@ -63,13 +59,13 @@ public class CategoryPage implements MarketplacePage {
|
||||
|
||||
var productPrice = parseCurrency(productDataDivs.get(2).findElement(By.cssSelector(":scope > div > div"))
|
||||
.findElements(By.tagName("span")).getFirst().getText());
|
||||
var parsedData = new ParsedData();
|
||||
/*var parsedData = new ParsedData();
|
||||
parsedData.setUrl(productUrl);
|
||||
parsedData.setBrand(productBrand);
|
||||
parsedData.setProductName(productName);
|
||||
parsedData.setImageUrl(productImageUrl);
|
||||
parsedData.setPrice(productPrice);
|
||||
products.add(parsedData);
|
||||
products.add(parsedData);*/
|
||||
});
|
||||
|
||||
|
||||
|
@ -10,11 +10,11 @@ public class NoContentPage {
|
||||
|
||||
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
|
||||
private static final String NOT_FOUND_TEXT_XPATH = "\"//*[contains(text(), 'По вашим параметрам ничего не нашлось. Попробуйте сбросить фильтры. ')]\"";
|
||||
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsError']";
|
||||
private static final String SEARCH_RESULTS_ERROR = "div[data-widget='searchResultsError']";
|
||||
|
||||
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
|
||||
private final By notFoundText = By.xpath(NOT_FOUND_TEXT_XPATH);
|
||||
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
|
||||
private final By searchResultsError = By.cssSelector(SEARCH_RESULTS_ERROR);
|
||||
|
||||
private WebDriver driver;
|
||||
|
||||
@ -27,7 +27,7 @@ public class NoContentPage {
|
||||
|
||||
public boolean isLoaded() {
|
||||
try {
|
||||
return driver.findElement(searchResults) != null
|
||||
return driver.findElement(searchResultsError) != null
|
||||
|| driver.findElement(errorText) != null
|
||||
|| driver.findElement(notFoundText) != null;
|
||||
} catch (Exception e) {
|
||||
|
@ -0,0 +1,228 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.jsoup.select.Elements;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
|
||||
@Slf4j
|
||||
public class OzonCategoryPage {
|
||||
|
||||
private static final String OZON_MAIN_LINK = "https://www.ozon.ru";
|
||||
|
||||
public static final String SEARCH_RESULTS_CSS_SELECTOR = "div[data-widget='searchResultsV2']";
|
||||
|
||||
public static final int INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT = 1;
|
||||
|
||||
public static final int INDEX_OF_PRODUCT_PRICE = 0;
|
||||
|
||||
public static final int INDEX_OF_PRODUCT_BRAND = 1;
|
||||
|
||||
public static final int INDEX_OF_PRODUCT_NAME = 2;
|
||||
|
||||
private final Document document;
|
||||
|
||||
public OzonCategoryPage(String pageHtml) {
|
||||
this.document = Jsoup.parse(pageHtml);
|
||||
}
|
||||
|
||||
public List<ParsedData> getProducts(Category category) {
|
||||
List<ParsedData> products = new ArrayList<>();
|
||||
|
||||
Elements searchResultsDivs = getSearchResultsDivs();
|
||||
if (searchResultsDivs.isEmpty()) {
|
||||
return List.of();
|
||||
}
|
||||
log.info("нашли столько результатов на странице {}", searchResultsDivs.size());
|
||||
|
||||
for (Element searchResultsDiv : searchResultsDivs) {
|
||||
Elements productsDivs = getProductsDivs(searchResultsDiv);
|
||||
List<Elements> allProductDataDivs = getAllProductDataDivs(productsDivs);
|
||||
List<ParsedData> parsedProductsData = extractParsedData(allProductDataDivs, category);
|
||||
products.addAll(parsedProductsData);
|
||||
}
|
||||
|
||||
/*try {
|
||||
|
||||
for (Element searchResultsDiv : searchResultsDivs) {
|
||||
var productDivs = searchResultsDiv.select("> div > div");
|
||||
for (Element productDiv : productDivs) {
|
||||
Elements productDataDivs = productDivs.select("> div > *");
|
||||
if (productDataDivs.select("> *").isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
productDataDivs.removeLast();
|
||||
Element productUrlAndImageUrlA = productDataDivs.first();
|
||||
Element productDataDiv = productDataDivs.last();
|
||||
Elements productDataInnerDivs = productDataDiv.select("> *");
|
||||
try {
|
||||
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
|
||||
.select("span").text().toLowerCase()
|
||||
.contains("осталось")) {
|
||||
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
|
||||
}
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND).select("> span");
|
||||
|
||||
String productUrl = OZON_MAIN_LINK + productUrlAndImageUrlA.attr("href").replaceAll("\\?.*$", "");
|
||||
String productImageUrl = productUrlAndImageUrlA.select("> div > div")
|
||||
.first().getElementsByTag("img")
|
||||
.first().attr("src");
|
||||
|
||||
BigDecimal productPrice;
|
||||
try {
|
||||
productPrice = parseOzonPriceToBigDecimal(
|
||||
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
|
||||
.first().text());
|
||||
} catch (Exception e) {
|
||||
log.error("не удалось распарсить цену");
|
||||
continue;
|
||||
}
|
||||
|
||||
String productBrand = productBrandBlockSpans.first().selectFirst("> span > b").text();
|
||||
String productName = productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME).select("> div > span").text();
|
||||
|
||||
ParsedData parsedData = new ParsedData();
|
||||
parsedData.setCategory(category);
|
||||
parsedData.setMarketplace(Marketplace.OZON);
|
||||
parsedData.setUrl(productUrl);
|
||||
parsedData.setImageUrl(productImageUrl);
|
||||
parsedData.setPrice(productPrice);
|
||||
parsedData.setBrand(productBrand);
|
||||
parsedData.setProductName(productName);
|
||||
products.add(parsedData);
|
||||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
}*/
|
||||
return products;
|
||||
}
|
||||
|
||||
private Elements getSearchResultsDivs() {
|
||||
try {
|
||||
return document.select(SEARCH_RESULTS_CSS_SELECTOR);
|
||||
} catch (Exception e) {
|
||||
log.warn("Не удалось достать блоки searchResultsV2");
|
||||
return new Elements();
|
||||
}
|
||||
}
|
||||
|
||||
private Elements getProductsDivs(Element searchResultsDiv) {
|
||||
return searchResultsDiv.select("> div > div");
|
||||
}
|
||||
|
||||
private List<Elements> getAllProductDataDivs(Elements productsDivs) {
|
||||
List<Elements> allProductDataDivs = new ArrayList<>();
|
||||
for (Element productDiv : productsDivs) {
|
||||
Elements productDataDivs = productDiv.select("> div > *");
|
||||
if (productDataDivs.select("> *").isEmpty()) {
|
||||
continue;
|
||||
}
|
||||
removeAddInFavouriteDiv(productDataDivs);
|
||||
allProductDataDivs.add(productDataDivs);
|
||||
}
|
||||
return allProductDataDivs;
|
||||
}
|
||||
|
||||
private void removeAddInFavouriteDiv(Elements productDataDivs) {
|
||||
productDataDivs.removeLast();
|
||||
}
|
||||
|
||||
private List<ParsedData> extractParsedData(List<Elements> allProductDataDivs,
|
||||
Category category) {
|
||||
List<ParsedData> parsedData = new ArrayList<>();
|
||||
for (Elements productDataDivs : allProductDataDivs) {
|
||||
try {
|
||||
ParsedData parsedDataItem = getParsedDataItem(productDataDivs, category);
|
||||
parsedData.add(parsedDataItem);
|
||||
} catch (Exception e) {
|
||||
//log.error(e.getMessage(), e);
|
||||
}
|
||||
}
|
||||
return parsedData;
|
||||
}
|
||||
|
||||
private ParsedData getParsedDataItem(Elements productDataDivs,
|
||||
Category category) {
|
||||
removeExtraDivIfExists(productDataDivs);
|
||||
return ParsedData.builder()
|
||||
.category(category)
|
||||
.marketplace(Marketplace.OZON)
|
||||
.url(extractUrl(productDataDivs))
|
||||
.imageUrl(extractImageUrl(productDataDivs))
|
||||
.brand(extractBrand(productDataDivs))
|
||||
.productName(extractProductName(productDataDivs))
|
||||
.price(extractPrice(productDataDivs))
|
||||
.build();
|
||||
}
|
||||
|
||||
private void removeExtraDivIfExists(Elements productDataDivs) {
|
||||
Element productDataDiv = productDataDivs.last();
|
||||
Elements productDataInnerDivs = productDataDiv.select("> *");
|
||||
try {
|
||||
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
|
||||
.select("span").text().toLowerCase()
|
||||
.contains("осталось")) {
|
||||
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
|
||||
}
|
||||
} catch (Exception ignored) {}
|
||||
}
|
||||
|
||||
private String extractUrl(Elements productDataDivs) {
|
||||
Element productUrlA = productDataDivs.first();
|
||||
return OZON_MAIN_LINK + productUrlA
|
||||
.attr("href").replaceAll("\\?.*$", "");
|
||||
}
|
||||
|
||||
private String extractImageUrl(Elements productDataDivs) {
|
||||
Element productImageUrlA = productDataDivs.first();
|
||||
return productImageUrlA.select("> div > div")
|
||||
.first().getElementsByTag("img")
|
||||
.first().attr("src");
|
||||
}
|
||||
|
||||
private String extractBrand(Elements productDataDivs) {
|
||||
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
|
||||
//log.info(productDataInnerDivs.html());
|
||||
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND)
|
||||
.select("> span");
|
||||
String brand = productBrandBlockSpans.first().selectFirst("> span > b").text();
|
||||
if (productBrandBlockSpans.size() == 1 && "Оригинал".equals(brand)) {
|
||||
return "БРЕНД_НЕ_УКАЗАН";
|
||||
}
|
||||
return brand;
|
||||
}
|
||||
|
||||
private String extractProductName(Elements productDataDivs) {
|
||||
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
|
||||
return productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME)
|
||||
.select("> div > span").text();
|
||||
}
|
||||
|
||||
private BigDecimal extractPrice(Elements productDataDivs) {
|
||||
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
|
||||
return parseOzonPriceToBigDecimal(
|
||||
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
|
||||
.first().text());
|
||||
}
|
||||
|
||||
private Elements getProductMainDataInnerDivs(Elements productDataDivs) {
|
||||
return productDataDivs.last().select("> *");
|
||||
}
|
||||
|
||||
private BigDecimal parseOzonPriceToBigDecimal(String ozonPrice) {
|
||||
String cleanedString = ozonPrice.replaceAll("[^\\d]", "");
|
||||
return new BigDecimal(cleanedString);
|
||||
}
|
||||
|
||||
}
|
@ -2,7 +2,6 @@ package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
@ -11,36 +10,30 @@ import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.retry.annotation.Recover;
|
||||
import org.springframework.retry.annotation.Retryable;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.AccessDeniedPage;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.CategoryPage;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.NoContentPage;
|
||||
import ru.pricepulse.parsingservice.service.ProductService;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@Profile("ozon")
|
||||
public class OzonCategoryPageParsingService {
|
||||
public class OzonHtmlFetcher {
|
||||
|
||||
private final WebDriverPool webDriverPool;
|
||||
|
||||
private final ProductService productService;
|
||||
private final PageScroller pageScroller;
|
||||
|
||||
public OzonCategoryPageParsingService(WebDriverPool webDriverPool,
|
||||
ProductService productService) {
|
||||
public OzonHtmlFetcher(WebDriverPool webDriverPool,
|
||||
PageScroller pageScroller) {
|
||||
this.webDriverPool = webDriverPool;
|
||||
this.productService = productService;
|
||||
this.pageScroller = pageScroller;
|
||||
}
|
||||
|
||||
@Retryable(maxAttempts = 10, recover = "recover")
|
||||
public void parseCategoryPage(String pageUrl, Category category, AtomicBoolean stopFlag) {
|
||||
public String fetchPageHtml(String pageUrl,
|
||||
AtomicBoolean lastPageInCategory) {
|
||||
var driver = webDriverPool.borrowDriver();
|
||||
if (driver == null) {
|
||||
throw new RuntimeException();
|
||||
}
|
||||
try {
|
||||
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
|
||||
driver.get(pageUrl);
|
||||
@ -48,64 +41,63 @@ public class OzonCategoryPageParsingService {
|
||||
var accessDeniedPage = new AccessDeniedPage(driver, wait);
|
||||
var categoryPage = new CategoryPage(driver, wait);
|
||||
var noContentPage = new NoContentPage(driver, wait);
|
||||
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage));
|
||||
if (checkAccessDeniedPage(accessDeniedPage)) {
|
||||
log.info("Доступ ограничен, пробуем решить проблему: {}", pageUrl);
|
||||
resolveAccessDeniedPage(accessDeniedPage);
|
||||
log.info("Проблема успешно решена: {}", pageUrl);
|
||||
}
|
||||
if (noContentPage.isLoaded()) {
|
||||
log.info("Страница не найдена");
|
||||
stopFlag.set(true);
|
||||
return;
|
||||
}
|
||||
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage, noContentPage, lastPageInCategory));
|
||||
checkAceesDeniedAndResolve(accessDeniedPage);
|
||||
|
||||
log.info("Получаем список товаров на текущей странице: {}", pageUrl);
|
||||
List<ParsedData> parsedData;
|
||||
try {
|
||||
parsedData = categoryPage.getParsedProducts();
|
||||
for (ParsedData data : parsedData) {
|
||||
data.setMarketplace(Marketplace.OZON);
|
||||
data.setCategory(category);
|
||||
}
|
||||
productService.saveBatch(parsedData);
|
||||
} catch (Exception e) {
|
||||
throw new Exception(e);
|
||||
}
|
||||
webDriverPool.returnDriver(driver);
|
||||
} catch (Exception ignored) {
|
||||
throw new RuntimeException(ignored);
|
||||
pageScroller.scrollToEndOfPage(driver);
|
||||
return driver.getPageSource();
|
||||
} catch (Exception e) {
|
||||
log.error(e.getMessage(), e);
|
||||
throw new RuntimeException(e);
|
||||
} finally {
|
||||
webDriverPool.returnDriver(driver);
|
||||
}
|
||||
}
|
||||
|
||||
private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
|
||||
CategoryPage categoryPage) {
|
||||
CategoryPage categoryPage,
|
||||
NoContentPage noContentPage,
|
||||
AtomicBoolean stopFlag) {
|
||||
log.debug("Проверка что страница 'Доступ ограничен'");
|
||||
try {
|
||||
if (checkAccessDeniedPage(accessDeniedPage)) {
|
||||
return true;
|
||||
}
|
||||
} catch (Exception e) {
|
||||
log.debug("Ошибка проверки", e);
|
||||
if (checkAccessDeniedPage(accessDeniedPage)) {
|
||||
return true;
|
||||
}
|
||||
log.debug("Проверка что страница 'Страница категории'");
|
||||
if (checkCategoryPage(categoryPage)) {
|
||||
return true;
|
||||
}
|
||||
if (checkNoContentPage(noContentPage)) {
|
||||
stopFlag.set(true);
|
||||
return true;
|
||||
}
|
||||
log.debug("Проверка загрузки страницы неудачна");
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean checkAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
|
||||
return accessDeniedPage.isLoaded();
|
||||
}
|
||||
|
||||
private boolean checkCategoryPage(CategoryPage categoryPage) {
|
||||
return categoryPage.isLoaded();
|
||||
}
|
||||
|
||||
private void checkAceesDeniedAndResolve(AccessDeniedPage accessDeniedPage) {
|
||||
if (checkAccessDeniedPage(accessDeniedPage)) {
|
||||
log.info("Доступ ограничен, пробуем решить проблему");
|
||||
resolveAccessDeniedPage(accessDeniedPage);
|
||||
log.info("Проблема успешно решена");
|
||||
}
|
||||
}
|
||||
|
||||
private boolean checkNoContentPage(NoContentPage noContentPage) {
|
||||
if (noContentPage.isLoaded()) {
|
||||
log.info("Страница не найдена");
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
private boolean checkAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
|
||||
return accessDeniedPage.isLoaded();
|
||||
}
|
||||
|
||||
private void resolveAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
|
||||
accessDeniedPage.clickReloadButton();
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.OzonCategoryPage;
|
||||
|
||||
@Service
|
||||
public class OzonPageParser {
|
||||
|
||||
public List<ParsedData> parseProductsFromCategoryPage(String pageSource,
|
||||
Category category) {
|
||||
OzonCategoryPage categoryPage = new OzonCategoryPage(pageSource);
|
||||
return categoryPage.getProducts(category);
|
||||
}
|
||||
|
||||
}
|
@ -1,65 +1,113 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.concurrent.ArrayBlockingQueue;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ConcurrentHashMap;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.Semaphore;
|
||||
import java.util.concurrent.ThreadPoolExecutor;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.slf4j.MDC;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingService;
|
||||
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
import ru.pricepulse.parsingservice.service.ProductService;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@Profile("ozon")
|
||||
public class OzonParsingService implements MarketplaceParsingService {
|
||||
public class OzonParsingService {
|
||||
|
||||
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
|
||||
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(5);
|
||||
private final Semaphore semaphore = new Semaphore(5); // Ограничиваем количество активных и ожидающих задач
|
||||
private final Map<String, Set<String>> urlCache;
|
||||
|
||||
private final OzonCategoryPageParsingService categoryPageParsingService;
|
||||
private final ExecutorService pageExecutorService;
|
||||
|
||||
private final Semaphore semaphore;
|
||||
|
||||
private final OzonHtmlFetcher categoryPageParsingService;
|
||||
|
||||
private final OzonConfigProperties ozonConfigProperties;
|
||||
|
||||
private final OzonPageParser ozonPageParser;
|
||||
|
||||
private final ProductService productService;
|
||||
|
||||
public OzonParsingService(OzonHtmlFetcher categoryPageParsingService,
|
||||
OzonConfigProperties ozonConfigProperties, OzonPageParser ozonPageParser,
|
||||
ProductService productService) {
|
||||
this.pageExecutorService = Executors.newFixedThreadPool(ozonConfigProperties.getMaxThreads());
|
||||
this.semaphore = new Semaphore(ozonConfigProperties.getMaxThreads());
|
||||
this.urlCache = new ConcurrentHashMap<>();
|
||||
for (OzonCategory category : OzonCategory.values()) {
|
||||
urlCache.put(category.getCategoryUrl(), ConcurrentHashMap.newKeySet());
|
||||
}
|
||||
|
||||
public OzonParsingService(OzonCategoryPageParsingService categoryPageParsingService) {
|
||||
this.categoryPageParsingService = categoryPageParsingService;
|
||||
this.ozonConfigProperties = ozonConfigProperties;
|
||||
this.ozonPageParser = ozonPageParser;
|
||||
this.productService = productService;
|
||||
}
|
||||
|
||||
public void processCategory(String url) {
|
||||
int pageIndex = 1;
|
||||
public void startProcessing() {
|
||||
for (OzonCategory category : OzonCategory.values()) {
|
||||
log.info("НАЧАЛО ОБРАБОТКИ КАТЕГОРИИ {}", category);
|
||||
processCategory(category);
|
||||
}
|
||||
}
|
||||
|
||||
while (!stopFlag.get()) {
|
||||
private void processCategory(OzonCategory category) {
|
||||
int pageIndex = 1;
|
||||
AtomicBoolean lastPageInCategory = new AtomicBoolean(false);
|
||||
while (!lastPageInCategory.get()) {
|
||||
try {
|
||||
semaphore.acquire(); // Получаем разрешение перед созданием новой задачи
|
||||
semaphore.acquire();
|
||||
|
||||
int finalPageIndex = pageIndex;
|
||||
String pageUrl = url + "&page=" + finalPageIndex;
|
||||
String pageUrl = category.getCategoryUrl() + "&page=" + finalPageIndex;
|
||||
|
||||
pageExecutorService.submit(() -> {
|
||||
try {
|
||||
categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag);
|
||||
} finally {
|
||||
semaphore.release(); // Освобождаем разрешение после завершения задачи
|
||||
}
|
||||
});
|
||||
|
||||
++pageIndex;
|
||||
pageExecutorService.submit(() -> processCategoryPage(pageUrl, category, lastPageInCategory));
|
||||
|
||||
pageIndex += ozonConfigProperties.getMaxNumOfPagesOnScreen();
|
||||
} catch (InterruptedException e) {
|
||||
Thread.currentThread().interrupt();
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (lastPageInCategory.get()) {
|
||||
log.info("Достигли последней страницы категории");
|
||||
}
|
||||
}
|
||||
|
||||
if (stopFlag.get()) {
|
||||
log.info("Конец парсинга категории");
|
||||
pageExecutorService.shutdownNow();
|
||||
private void processCategoryPage(String pageUrl,
|
||||
OzonCategory category,
|
||||
AtomicBoolean lastPageInCategory) {
|
||||
try {
|
||||
MDC.put("pageUrl", pageUrl);
|
||||
String pageSource = categoryPageParsingService.fetchPageHtml(pageUrl, lastPageInCategory);
|
||||
List<ParsedData> parsedProducts =
|
||||
ozonPageParser.parseProductsFromCategoryPage(pageSource, category.getMappedCategory());
|
||||
log.info("""
|
||||
|
||||
КОНЕЦ ПАРСИНГА СТРАНИЦЫ КАТЕГОРИИ
|
||||
КОЛИЧЕСТВО НАЙДЕННЫХ ТОВАРОВ НА СТРАНИЦЕ {},
|
||||
|
||||
""", parsedProducts.size());
|
||||
if (urlCache.size() > 1000000) {
|
||||
urlCache.clear();
|
||||
}
|
||||
Set<String> categoryCachecUrl = urlCache.get(category.getCategoryUrl());
|
||||
List<ParsedData> uniqueData = parsedProducts.stream()
|
||||
.filter(data -> categoryCachecUrl.add(data.getUrl()))
|
||||
.toList();
|
||||
productService.saveBatch(uniqueData);
|
||||
} finally {
|
||||
MDC.clear();
|
||||
semaphore.release();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -0,0 +1,56 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||
|
||||
import java.util.concurrent.atomic.AtomicLong;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.JavascriptExecutor;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class PageScroller {
|
||||
|
||||
private static final String ALL_CONTENT_PAGE_HEIGHT = "return document.body.scrollHeight";
|
||||
|
||||
private static final String SCROLL_TO_PAGE_HEIGHT = "window.scrollTo(0, document.body.scrollHeight);";
|
||||
|
||||
public void scrollToEndOfPage(WebDriver driver) throws InterruptedException {
|
||||
JavascriptExecutor js = (JavascriptExecutor) driver;
|
||||
AtomicLong lastHeight = new AtomicLong((long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT));
|
||||
int attemptsLimit = 100;
|
||||
log.info("Начинаем пролистывать страницу до конца");
|
||||
while (true) {
|
||||
js.executeScript(SCROLL_TO_PAGE_HEIGHT);
|
||||
|
||||
long newHeight = (long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT);
|
||||
|
||||
try {
|
||||
var nextPageButtons = driver.findElements(By.cssSelector("div[data-widget='megaPaginator'] > div")).get(1)
|
||||
.findElement(By.cssSelector(":scope > div > div > div"))
|
||||
.findElements(By.tagName("a"));
|
||||
|
||||
if (nextPageButtons != null && newHeight > lastHeight.get()) {
|
||||
log.info("ЗАКОНЧИЛИ СКРОЛЛИТЬ");
|
||||
break;
|
||||
}
|
||||
} catch (Exception ignored) {}
|
||||
|
||||
|
||||
if (newHeight > lastHeight.get()) {
|
||||
attemptsLimit = 100;
|
||||
lastHeight.set(newHeight);
|
||||
} else {
|
||||
attemptsLimit--;
|
||||
Thread.sleep(1000);
|
||||
if (attemptsLimit == 0) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -4,7 +4,6 @@ import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingService;
|
||||
|
||||
@Service
|
||||
@ -12,13 +11,11 @@ import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingServi
|
||||
@Profile("ozon")
|
||||
public class OzonProductUpdater {
|
||||
|
||||
private final OzonConfigProperties properties;
|
||||
private final OzonParsingService ozonParsingService;
|
||||
|
||||
@Scheduled(fixedRate = 3600000)
|
||||
@Scheduled(cron = "0 0 0,6,12,18 * * *")
|
||||
public void updateOzonProducts() {
|
||||
properties.getCategoriesUrls()
|
||||
.forEach(ozonParsingService::processCategory);
|
||||
ozonParsingService.startProcessing();
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,4 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.task;
|
||||
|
||||
public class OzonParsingTask {
|
||||
}
|
@ -1,17 +1,21 @@
|
||||
package ru.pricepulse.parsingservice.persistence.entity;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.Objects;
|
||||
|
||||
import jakarta.persistence.Column;
|
||||
import jakarta.persistence.EmbeddedId;
|
||||
import jakarta.persistence.Entity;
|
||||
import jakarta.persistence.PrePersist;
|
||||
import jakarta.persistence.Table;
|
||||
import lombok.*;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import org.hibernate.proxy.HibernateProxy;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.Objects;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Entity
|
||||
@ -52,7 +56,9 @@ public class PriceHistoryEntity {
|
||||
|
||||
@PrePersist
|
||||
protected void onCreate() {
|
||||
id.setDate(LocalDateTime.now());
|
||||
if (id.getDate() == null) {
|
||||
id.setDate(ZonedDateTime.now());
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,17 +1,17 @@
|
||||
package ru.pricepulse.parsingservice.persistence.entity;
|
||||
|
||||
import jakarta.persistence.*;
|
||||
import java.io.Serializable;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.Objects;
|
||||
|
||||
import jakarta.persistence.Column;
|
||||
import jakarta.persistence.Embeddable;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import org.hibernate.proxy.HibernateProxy;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.Objects;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@AllArgsConstructor
|
||||
@ -23,7 +23,7 @@ public class PriceHistoryId implements Serializable {
|
||||
private String productUrl;
|
||||
|
||||
@Column(name = "date", nullable = false)
|
||||
private LocalDateTime date;
|
||||
private ZonedDateTime date;
|
||||
|
||||
@Override
|
||||
public final boolean equals(Object o) {
|
||||
|
@ -1,14 +1,26 @@
|
||||
package ru.pricepulse.parsingservice.persistence.entity;
|
||||
|
||||
import jakarta.persistence.*;
|
||||
import lombok.*;
|
||||
import org.hibernate.proxy.HibernateProxy;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.Objects;
|
||||
|
||||
import jakarta.persistence.Column;
|
||||
import jakarta.persistence.Entity;
|
||||
import jakarta.persistence.EnumType;
|
||||
import jakarta.persistence.Enumerated;
|
||||
import jakarta.persistence.GeneratedValue;
|
||||
import jakarta.persistence.GenerationType;
|
||||
import jakarta.persistence.Id;
|
||||
import jakarta.persistence.PrePersist;
|
||||
import jakarta.persistence.Table;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import org.hibernate.proxy.HibernateProxy;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Entity
|
||||
|
@ -1,8 +1,16 @@
|
||||
package ru.pricepulse.parsingservice.persistence.repository;
|
||||
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||
|
||||
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, PriceHistoryId> {
|
||||
|
||||
List<PriceHistoryEntity> findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(String productUrl,
|
||||
ZonedDateTime from,
|
||||
ZonedDateTime to);
|
||||
|
||||
}
|
||||
|
@ -1,15 +1,28 @@
|
||||
package ru.pricepulse.parsingservice.persistence.repository;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Optional;
|
||||
|
||||
import org.springframework.data.domain.Page;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import org.springframework.data.jpa.repository.Query;
|
||||
import org.springframework.stereotype.Repository;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@Repository
|
||||
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
|
||||
|
||||
boolean existsByUrl(String url);
|
||||
|
||||
ProductEntity findByUrl(String url);
|
||||
List<ProductEntity> findAllByUrlIn(List<String> urls);
|
||||
|
||||
@Query("""
|
||||
select p.url from ProductEntity p where p.url in :urls
|
||||
""")
|
||||
List<String> findSavedUrl(List<String> urls);
|
||||
|
||||
Optional<ProductEntity> findByUrl(String url);
|
||||
|
||||
Page<ProductEntity> findAllByMarketplaceAndCategory(Marketplace marketplace, Category category, Pageable pageable);
|
||||
}
|
||||
|
@ -1,19 +1,28 @@
|
||||
package ru.pricepulse.parsingservice.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.List;
|
||||
|
||||
import jakarta.persistence.EntityNotFoundException;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.retry.annotation.Retryable;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
||||
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
||||
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
|
||||
import ru.pricepulse.parsingservice.service.dto.ProductDto;
|
||||
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
|
||||
import ru.pricepulse.parsingservice.service.mapper.PriceHistoryMapper;
|
||||
import ru.pricepulse.parsingservice.service.mapper.ProductMapper;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@ -24,34 +33,58 @@ public class ProductService {
|
||||
|
||||
private final ProductPriceRepository productPriceRepository;
|
||||
|
||||
private final ProductMapper productMapper;
|
||||
|
||||
private final PriceHistoryMapper priceHistoryMapper;
|
||||
|
||||
@Transactional
|
||||
@Retryable
|
||||
public void saveBatch(List<ParsedData> parsedData) {
|
||||
var products = new ArrayList<ProductEntity>();
|
||||
var prices = new ArrayList<PriceHistoryEntity>();
|
||||
parsedData.forEach(product -> processParsedProduct(product, prices, products));
|
||||
List<String> productsUrls = parsedData.stream().map(ParsedData::getUrl).toList();
|
||||
List<String> alreadySavedUrls = productRepository.findSavedUrl(productsUrls);
|
||||
List<ProductEntity> products = parsedData.stream()
|
||||
.filter(data -> !alreadySavedUrls.contains(data.getUrl()))
|
||||
.map(this::getProduct)
|
||||
.toList();
|
||||
List<PriceHistoryEntity> prices = parsedData.stream().map(this::getPriceHistory).toList();
|
||||
productRepository.saveAll(products);
|
||||
log.info("Сохранили пачку товаров {}", products.size());
|
||||
productPriceRepository.saveAll(prices);
|
||||
log.info("Сохранили историю цен {}", prices.size());
|
||||
}
|
||||
|
||||
private void processParsedProduct(ParsedData product,
|
||||
ArrayList<PriceHistoryEntity> prices,
|
||||
ArrayList<ProductEntity> products) {
|
||||
var priceHistoryEntity = getPriceHistory(product);
|
||||
prices.add(priceHistoryEntity);
|
||||
if (productRepository.existsByUrl(product.getUrl())) {
|
||||
log.info("Запись {} уже есть", product.getUrl());
|
||||
return;
|
||||
}
|
||||
var productEntity = getProduct(product);
|
||||
products.add(productEntity);
|
||||
@Transactional(readOnly = true)
|
||||
public ProductDto findByUrl(String productUrl) {
|
||||
var product = productRepository.findByUrl(productUrl).orElseThrow(EntityNotFoundException::new);
|
||||
return productMapper.toProductDto(product);
|
||||
}
|
||||
|
||||
@Transactional(readOnly = true)
|
||||
public PriceHistoryDto findPriceHistoryByRange(String productUrl,
|
||||
ZonedDateTime from,
|
||||
ZonedDateTime to) {
|
||||
var priceHistory = productPriceRepository
|
||||
.findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(productUrl, from, to);
|
||||
return priceHistoryMapper.toPriceHistoryDto(priceHistory);
|
||||
}
|
||||
|
||||
@Transactional(readOnly = true)
|
||||
public ProductsPageDto findAllProductsByPage(Marketplace marketplace,
|
||||
Category category,
|
||||
Pageable pageable) {
|
||||
var page = productRepository.findAllByMarketplaceAndCategory(marketplace, category, pageable);
|
||||
return new ProductsPageDto(
|
||||
page.getNumberOfElements(),
|
||||
page.getTotalPages(),
|
||||
page.getNumber(),
|
||||
page.getContent().stream().map(productMapper::toProductDto).toList()
|
||||
);
|
||||
}
|
||||
|
||||
private PriceHistoryEntity getPriceHistory(ParsedData product) {
|
||||
var priceHistoryId = new PriceHistoryId();
|
||||
priceHistoryId.setProductUrl(product.getUrl());
|
||||
priceHistoryId.setDate(ZonedDateTime.now());
|
||||
var priceHistory = new PriceHistoryEntity();
|
||||
priceHistory.setId(priceHistoryId);
|
||||
priceHistory.setPrice(product.getPrice());
|
||||
@ -68,4 +101,5 @@ public class ProductService {
|
||||
productEntity.setImageUrl(product.getImageUrl());
|
||||
return productEntity;
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,20 @@
|
||||
package ru.pricepulse.parsingservice.service.dto;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
public class PriceHistoryDto {
|
||||
|
||||
private final Map<ZonedDateTime, BigDecimal> priceHistory;
|
||||
|
||||
public PriceHistoryDto() {
|
||||
this.priceHistory = new HashMap<>();
|
||||
}
|
||||
}
|
@ -0,0 +1,28 @@
|
||||
package ru.pricepulse.parsingservice.service.dto;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@RequiredArgsConstructor
|
||||
public class ProductDto {
|
||||
|
||||
private final Long id;
|
||||
|
||||
private final Marketplace marketplace;
|
||||
|
||||
private final Category category;
|
||||
|
||||
private final String brand;
|
||||
|
||||
private final String productName;
|
||||
|
||||
private final String url;
|
||||
|
||||
private final String imageUrl;
|
||||
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
package ru.pricepulse.parsingservice.service.dto;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@AllArgsConstructor
|
||||
public class ProductsPageDto {
|
||||
|
||||
private final int totalItems;
|
||||
|
||||
private final int totalPages;
|
||||
|
||||
private final int currentPage;
|
||||
|
||||
private final List<ProductDto> products;
|
||||
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package ru.pricepulse.parsingservice.service.mapper;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
|
||||
|
||||
@Component
|
||||
public class PriceHistoryMapper {
|
||||
|
||||
public PriceHistoryDto toPriceHistoryDto (List<PriceHistoryEntity> priceHistory) {
|
||||
var priceHistoryDto = new PriceHistoryDto();
|
||||
priceHistory.forEach(item ->
|
||||
priceHistoryDto.getPriceHistory().put(item.getId().getDate().withNano(0), item.getPrice()));
|
||||
return priceHistoryDto;
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,22 @@
|
||||
package ru.pricepulse.parsingservice.service.mapper;
|
||||
|
||||
import org.springframework.stereotype.Component;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
import ru.pricepulse.parsingservice.service.dto.ProductDto;
|
||||
|
||||
@Component
|
||||
public class ProductMapper {
|
||||
|
||||
public ProductDto toProductDto(ProductEntity product) {
|
||||
return new ProductDto(
|
||||
product.getId(),
|
||||
product.getMarketplace(),
|
||||
product.getCategory(),
|
||||
product.getBrand(),
|
||||
product.getProductName(),
|
||||
product.getUrl(),
|
||||
product.getImageUrl()
|
||||
);
|
||||
}
|
||||
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
|
||||
package ru.pricepulse.parsingservice.service.scheduler;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.format.DateTimeFormatter;
|
@ -1,5 +1,7 @@
|
||||
package ru.pricepulse.parsingservice.web.handler;
|
||||
|
||||
import java.net.URI;
|
||||
|
||||
import jakarta.persistence.EntityNotFoundException;
|
||||
import jakarta.servlet.http.HttpServletRequest;
|
||||
import org.springframework.http.HttpStatus;
|
||||
@ -8,8 +10,6 @@ import org.springframework.web.bind.annotation.ControllerAdvice;
|
||||
import org.springframework.web.bind.annotation.ExceptionHandler;
|
||||
import org.springframework.web.bind.annotation.ResponseStatus;
|
||||
|
||||
import java.net.URI;
|
||||
|
||||
@ControllerAdvice
|
||||
public class CommonExceptionHandler {
|
||||
|
||||
|
@ -1,9 +1,9 @@
|
||||
package ru.pricepulse.parsingservice.web.handler;
|
||||
|
||||
import org.springframework.http.HttpStatus;
|
||||
|
||||
import java.net.URI;
|
||||
|
||||
import org.springframework.http.HttpStatus;
|
||||
|
||||
public record ErrorResponse (
|
||||
Integer statusCode,
|
||||
HttpStatus status,
|
||||
|
@ -0,0 +1,27 @@
|
||||
package ru.pricepulse.parsingservice.web.rest;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.OzonService;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/categories")
|
||||
@RequiredArgsConstructor
|
||||
public class CategoryApi {
|
||||
|
||||
private final OzonService ozonService;
|
||||
|
||||
@GetMapping
|
||||
public ResponseEntity<?> getCategories(Marketplace marketplace) {
|
||||
if (Marketplace.OZON.equals(marketplace)) {
|
||||
return ResponseEntity.ok(ozonService.getCategories());
|
||||
}
|
||||
return ResponseEntity.ok(Category.values());
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package ru.pricepulse.parsingservice.web.rest;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/marketplaces")
|
||||
@RequiredArgsConstructor
|
||||
public class MarketplaceApi {
|
||||
|
||||
@GetMapping
|
||||
public ResponseEntity<Marketplace[]> getMarketplace() {
|
||||
return ResponseEntity.ok(Marketplace.values());
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,52 @@
|
||||
package ru.pricepulse.parsingservice.web.rest;
|
||||
|
||||
import java.time.LocalDate;
|
||||
import java.time.ZoneOffset;
|
||||
import java.time.ZonedDateTime;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.data.domain.Pageable;
|
||||
import org.springframework.format.annotation.DateTimeFormat;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.web.bind.annotation.GetMapping;
|
||||
import org.springframework.web.bind.annotation.RequestMapping;
|
||||
import org.springframework.web.bind.annotation.RequestParam;
|
||||
import org.springframework.web.bind.annotation.RestController;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.service.ProductService;
|
||||
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
|
||||
import ru.pricepulse.parsingservice.service.dto.ProductDto;
|
||||
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
|
||||
|
||||
@RestController
|
||||
@RequestMapping("/api/v1/products")
|
||||
@RequiredArgsConstructor
|
||||
public class ProductApi {
|
||||
|
||||
private final ProductService productService;
|
||||
|
||||
@GetMapping("/info")
|
||||
public ResponseEntity<ProductDto> getProductInfo(@RequestParam String productUrl) {
|
||||
return ResponseEntity.ok(productService.findByUrl(productUrl));
|
||||
}
|
||||
|
||||
@GetMapping("/price-history")
|
||||
public ResponseEntity<PriceHistoryDto> getProductPriceHistoryByRange(@RequestParam String productUrl,
|
||||
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate from,
|
||||
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate to,
|
||||
String zoneOffset) {
|
||||
ZoneOffset zone = ZoneOffset.of(zoneOffset);
|
||||
ZonedDateTime fromDateTime = from.atStartOfDay(zone);
|
||||
ZonedDateTime toDateTime = to.atStartOfDay(zone);
|
||||
return ResponseEntity.ok(productService.findPriceHistoryByRange(productUrl, fromDateTime, toDateTime));
|
||||
}
|
||||
|
||||
@GetMapping
|
||||
public ResponseEntity<ProductsPageDto> getAllProductsByCategoryAndPage(Marketplace marketplace,
|
||||
Category category,
|
||||
Pageable pageable) {
|
||||
return ResponseEntity.ok(productService.findAllProductsByPage(marketplace, category, pageable));
|
||||
}
|
||||
|
||||
}
|
@ -1,5 +1,7 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.converter;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
import org.springframework.core.convert.converter.Converter;
|
||||
import org.springframework.stereotype.Component;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
@ -7,8 +9,6 @@ import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
||||
|
||||
import java.time.LocalDateTime;
|
||||
|
||||
|
||||
@Component
|
||||
public class ProductInfoDto2ProductEntity implements Converter<ProductInfoDto, ProductEntity> {
|
||||
|
@ -1,5 +1,11 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.service;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.ZonedDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import lombok.AllArgsConstructor;
|
||||
@ -12,12 +18,6 @@ import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Service("wildberriesParsingService")
|
||||
@AllArgsConstructor
|
||||
public class ParsingService {
|
||||
@ -52,7 +52,7 @@ public class ParsingService {
|
||||
productEntity.setUrl("https://www.wildberries.ru/catalog/" + dto.getId() + "/detail.aspx?targetUrl=BP");
|
||||
|
||||
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
|
||||
.id(new PriceHistoryId(productEntity.getUrl(), LocalDateTime.now()))
|
||||
.id(new PriceHistoryId(productEntity.getUrl(), ZonedDateTime.now()))
|
||||
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
|
||||
.build();
|
||||
|
||||
|
@ -1,5 +1,9 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.service;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
@ -8,10 +12,6 @@ import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
||||
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
@Service("wildberriesProductService")
|
||||
@AllArgsConstructor
|
||||
public class ProductService {
|
||||
|
@ -22,10 +22,10 @@ liquibase:
|
||||
|
||||
marketplace:
|
||||
ozon:
|
||||
categories-urls:
|
||||
- https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t
|
||||
max-threads: ${OZON_MAX_PROCESSING_THREADS:5}
|
||||
max-num-of-pages-on-screen: ${OZON_MAX_NUM_OF_PAGES_ON_SCREEN:100}
|
||||
wildberries:
|
||||
status: true
|
||||
status: false
|
||||
base-url: "https://static-basket-01.wbbasket.ru"
|
||||
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
||||
user-agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)"
|
||||
@ -35,10 +35,11 @@ marketplace:
|
||||
shard: "electronic15"
|
||||
laptop-url: "/catalog"
|
||||
|
||||
|
||||
logging:
|
||||
pattern:
|
||||
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg %X%n"
|
||||
level:
|
||||
sql: debug
|
||||
# level:
|
||||
# org:
|
||||
# springframework:
|
||||
|
@ -1,7 +1,6 @@
|
||||
package ru.pricepulse.parsingservice;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
|
||||
class ParsingServiceApplicationTests {
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user