Merge branch 'feature/ozon-parser-v0.1' into feature/parsing-service

# Conflicts:
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/config/DynamicProxyInterceptor.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/config/ProxyProvider.java
#	parsing-service/src/main/java/ru/pricepulse/parsingservice/wildberries_parser/service/client/ClientImpl.java
This commit is contained in:
danil.markov 2024-11-13 14:00:53 +04:00
commit 4f5dda4dbf
44 changed files with 873 additions and 200 deletions

View File

@ -1,6 +1,6 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
<option name="ACTIVE_PROFILES" value="dev,headless,postgres_stat" />
<option name="ACTIVE_PROFILES" value="dev,ozon,headless,postgres_stat" />
<option name="SCHEDULED_DEBUGGER" value="true" />
<envs>
<env name="POSTGRES_JDBC_PASSWORD" value="postgres" />

View File

@ -38,6 +38,7 @@ dependencies {
implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
implementation 'org.apache.commons:commons-pool2:2.12.0'
implementation 'com.clickhouse:clickhouse-jdbc:0.6.5'
implementation 'org.springdoc:springdoc-openapi-starter-webmvc-ui:2.6.0'
compileOnly 'org.projectlombok:lombok'

View File

@ -1,10 +1,10 @@
package ru.pricepulse.parsingservice.config;
import java.time.format.DateTimeFormatter;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import java.time.format.DateTimeFormatter;
@Configuration
public class DateTimeFormatterConfig {

View File

@ -1,10 +1,10 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.stereotype.Component;
import java.util.List;
import java.util.Random;
import org.springframework.stereotype.Component;
@Component
public class UserAgentProvider {
private static final List<String> userAgents = List.of(

View File

@ -1,5 +1,7 @@
package ru.pricepulse.parsingservice.config;
import java.net.InetSocketAddress;
import lombok.AllArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Bean;
@ -10,8 +12,6 @@ import org.springframework.web.reactive.function.client.WebClient;
import reactor.netty.http.client.HttpClient;
import reactor.netty.transport.ProxyProvider;
import java.net.InetSocketAddress;
@Slf4j
@Configuration
@AllArgsConstructor

View File

@ -1,5 +1,8 @@
package ru.pricepulse.parsingservice.config;
import java.util.HashMap;
import java.util.Map;
import io.github.bonigarcia.wdm.WebDriverManager;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
@ -10,9 +13,6 @@ import org.springframework.context.annotation.Configuration;
import org.springframework.context.annotation.Profile;
import org.springframework.context.annotation.Scope;
import java.util.HashMap;
import java.util.Map;
@Configuration
public class WebDriverConfig {

View File

@ -4,11 +4,13 @@ import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
import java.util.List;
@Getter
@Setter
@ConfigurationProperties(prefix = "marketplace.ozon")
public class OzonConfigProperties {
private List<String> categoriesUrls;
private Integer maxThreads;
private Integer maxNumOfPagesOnScreen;
}

View File

@ -1,5 +1,6 @@
package ru.pricepulse.parsingservice.enumeration;
public enum Category {
LAPTOP
LAPTOP,
SMARTPHONE
}

View File

@ -0,0 +1,31 @@
package ru.pricepulse.parsingservice.ozon_parser.enumeration;
import ru.pricepulse.parsingservice.enumeration.Category;
public enum OzonCategory {
LAPTOP ("/noutbuki-15692/?brandcertified=t", Category.LAPTOP),
SMARTPHONE ("/smartfony-15502/?brandcertified=t", Category.SMARTPHONE);
private static final String BASE_CATEGORY_URL = "https://www.ozon.ru/category";
private final String categoryUrl;
private final Category mappedCategory;
OzonCategory(String categoryUrl,
Category mappedCategory) {
this.categoryUrl = categoryUrl;
this.mappedCategory = mappedCategory;
}
public String getCategoryUrl() {
return BASE_CATEGORY_URL + categoryUrl;
}
public Category getMappedCategory() {
return mappedCategory;
}
}

View File

@ -1,62 +1,65 @@
package ru.pricepulse.parsingservice.ozon_parser.pool;
import java.util.NoSuchElementException;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
import jakarta.annotation.PreDestroy;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.beans.factory.ObjectFactory;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
@Slf4j
@Component
@Profile("ozon")
public class WebDriverPool {
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>(); // Список доступных драйверов
private final Queue<WebDriver> busyDrivers = new ConcurrentLinkedQueue<>(); // Список занятых драйверов
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>();
private final Queue<WebDriver> busyDrivers = new ConcurrentLinkedQueue<>();
private final ObjectFactory<WebDriver> webDriverFactory;
public WebDriverPool(ObjectFactory<WebDriver> webDriverFactory) {
this.webDriverFactory = webDriverFactory;
int poolSize = 12;
private final OzonConfigProperties ozonConfigProperties;
public WebDriverPool(ObjectFactory<WebDriver> webDriverFactory,
OzonConfigProperties ozonConfigProperties) {
this.webDriverFactory = webDriverFactory;
this.ozonConfigProperties = ozonConfigProperties;
int poolSize = ozonConfigProperties.getMaxThreads();
// Инициализация пула с указанным количеством драйверов
for (int i = 0; i < poolSize; i++) {
availableDrivers.add(createNewDriver());
}
}
// Метод для создания нового экземпляра WebDriver
private WebDriver createNewDriver() {
return webDriverFactory.getObject();
}
// Метод для заимствования драйвера
public WebDriver borrowDriver() {
WebDriver driver = availableDrivers.poll(); // Получаем драйвер из доступных
WebDriver driver = availableDrivers.poll();
if (driver != null) {
busyDrivers.add(driver); // Добавляем драйвер в занятые
busyDrivers.add(driver);
return driver;
}
return driver; // Возвращаем драйвер
throw new NoSuchElementException("No available driver found");
}
// Метод для возврата драйвера в пул
public void returnDriver(WebDriver driver) {
busyDrivers.remove(driver); // Убираем драйвер из занятых
availableDrivers.add(driver); // Возвращаем драйвер в доступные
busyDrivers.remove(driver);
availableDrivers.add(driver);
}
// Метод для закрытия всех драйверов в пуле
@PreDestroy
public void shutdownPool() {
// Закрываем доступные драйверы
for (WebDriver driver : availableDrivers) {
driver.quit();
}
// Закрываем занятые драйверы
for (WebDriver driver : busyDrivers) {
driver.quit();
}

View File

@ -1,7 +0,0 @@
package ru.pricepulse.parsingservice.ozon_parser.service;
public interface MarketplaceParsingService {
void processCategory(String categoryUrl);
}

View File

@ -0,0 +1,17 @@
package ru.pricepulse.parsingservice.ozon_parser.service;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
@Slf4j
@Service
@RequiredArgsConstructor
public class OzonService {
public OzonCategory[] getCategories() {
return OzonCategory.values();
}
}

View File

@ -2,6 +2,7 @@ package ru.pricepulse.parsingservice.ozon_parser.service.dto;
import java.math.BigDecimal;
import lombok.Builder;
import lombok.Getter;
import lombok.Setter;
import ru.pricepulse.parsingservice.enumeration.Category;
@ -9,6 +10,7 @@ import ru.pricepulse.parsingservice.enumeration.Marketplace;
@Getter
@Setter
@Builder
public class ParsedData {
private Marketplace marketplace;

View File

@ -1,18 +1,14 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import java.math.BigDecimal;
import java.util.ArrayList;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.support.ui.WebDriverWait;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
@ -63,13 +59,13 @@ public class CategoryPage implements MarketplacePage {
var productPrice = parseCurrency(productDataDivs.get(2).findElement(By.cssSelector(":scope > div > div"))
.findElements(By.tagName("span")).getFirst().getText());
var parsedData = new ParsedData();
/*var parsedData = new ParsedData();
parsedData.setUrl(productUrl);
parsedData.setBrand(productBrand);
parsedData.setProductName(productName);
parsedData.setImageUrl(productImageUrl);
parsedData.setPrice(productPrice);
products.add(parsedData);
products.add(parsedData);*/
});

View File

@ -10,11 +10,11 @@ public class NoContentPage {
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
private static final String NOT_FOUND_TEXT_XPATH = "\"//*[contains(text(), 'По вашим параметрам ничего не нашлось. Попробуйте сбросить фильтры. ')]\"";
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsError']";
private static final String SEARCH_RESULTS_ERROR = "div[data-widget='searchResultsError']";
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
private final By notFoundText = By.xpath(NOT_FOUND_TEXT_XPATH);
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
private final By searchResultsError = By.cssSelector(SEARCH_RESULTS_ERROR);
private WebDriver driver;
@ -27,7 +27,7 @@ public class NoContentPage {
public boolean isLoaded() {
try {
return driver.findElement(searchResults) != null
return driver.findElement(searchResultsError) != null
|| driver.findElement(errorText) != null
|| driver.findElement(notFoundText) != null;
} catch (Exception e) {

View File

@ -0,0 +1,228 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
@Slf4j
public class OzonCategoryPage {
private static final String OZON_MAIN_LINK = "https://www.ozon.ru";
public static final String SEARCH_RESULTS_CSS_SELECTOR = "div[data-widget='searchResultsV2']";
public static final int INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT = 1;
public static final int INDEX_OF_PRODUCT_PRICE = 0;
public static final int INDEX_OF_PRODUCT_BRAND = 1;
public static final int INDEX_OF_PRODUCT_NAME = 2;
private final Document document;
public OzonCategoryPage(String pageHtml) {
this.document = Jsoup.parse(pageHtml);
}
public List<ParsedData> getProducts(Category category) {
List<ParsedData> products = new ArrayList<>();
Elements searchResultsDivs = getSearchResultsDivs();
if (searchResultsDivs.isEmpty()) {
return List.of();
}
log.info("нашли столько результатов на странице {}", searchResultsDivs.size());
for (Element searchResultsDiv : searchResultsDivs) {
Elements productsDivs = getProductsDivs(searchResultsDiv);
List<Elements> allProductDataDivs = getAllProductDataDivs(productsDivs);
List<ParsedData> parsedProductsData = extractParsedData(allProductDataDivs, category);
products.addAll(parsedProductsData);
}
/*try {
for (Element searchResultsDiv : searchResultsDivs) {
var productDivs = searchResultsDiv.select("> div > div");
for (Element productDiv : productDivs) {
Elements productDataDivs = productDivs.select("> div > *");
if (productDataDivs.select("> *").isEmpty()) {
continue;
}
productDataDivs.removeLast();
Element productUrlAndImageUrlA = productDataDivs.first();
Element productDataDiv = productDataDivs.last();
Elements productDataInnerDivs = productDataDiv.select("> *");
try {
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
.select("span").text().toLowerCase()
.contains("осталось")) {
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
}
} catch (Exception ignored) {}
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND).select("> span");
String productUrl = OZON_MAIN_LINK + productUrlAndImageUrlA.attr("href").replaceAll("\\?.*$", "");
String productImageUrl = productUrlAndImageUrlA.select("> div > div")
.first().getElementsByTag("img")
.first().attr("src");
BigDecimal productPrice;
try {
productPrice = parseOzonPriceToBigDecimal(
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
.first().text());
} catch (Exception e) {
log.error("не удалось распарсить цену");
continue;
}
String productBrand = productBrandBlockSpans.first().selectFirst("> span > b").text();
String productName = productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME).select("> div > span").text();
ParsedData parsedData = new ParsedData();
parsedData.setCategory(category);
parsedData.setMarketplace(Marketplace.OZON);
parsedData.setUrl(productUrl);
parsedData.setImageUrl(productImageUrl);
parsedData.setPrice(productPrice);
parsedData.setBrand(productBrand);
parsedData.setProductName(productName);
products.add(parsedData);
}
}
} catch (Exception e) {
log.error(e.getMessage(), e);
}*/
return products;
}
private Elements getSearchResultsDivs() {
try {
return document.select(SEARCH_RESULTS_CSS_SELECTOR);
} catch (Exception e) {
log.warn("Не удалось достать блоки searchResultsV2");
return new Elements();
}
}
private Elements getProductsDivs(Element searchResultsDiv) {
return searchResultsDiv.select("> div > div");
}
private List<Elements> getAllProductDataDivs(Elements productsDivs) {
List<Elements> allProductDataDivs = new ArrayList<>();
for (Element productDiv : productsDivs) {
Elements productDataDivs = productDiv.select("> div > *");
if (productDataDivs.select("> *").isEmpty()) {
continue;
}
removeAddInFavouriteDiv(productDataDivs);
allProductDataDivs.add(productDataDivs);
}
return allProductDataDivs;
}
private void removeAddInFavouriteDiv(Elements productDataDivs) {
productDataDivs.removeLast();
}
private List<ParsedData> extractParsedData(List<Elements> allProductDataDivs,
Category category) {
List<ParsedData> parsedData = new ArrayList<>();
for (Elements productDataDivs : allProductDataDivs) {
try {
ParsedData parsedDataItem = getParsedDataItem(productDataDivs, category);
parsedData.add(parsedDataItem);
} catch (Exception e) {
//log.error(e.getMessage(), e);
}
}
return parsedData;
}
private ParsedData getParsedDataItem(Elements productDataDivs,
Category category) {
removeExtraDivIfExists(productDataDivs);
return ParsedData.builder()
.category(category)
.marketplace(Marketplace.OZON)
.url(extractUrl(productDataDivs))
.imageUrl(extractImageUrl(productDataDivs))
.brand(extractBrand(productDataDivs))
.productName(extractProductName(productDataDivs))
.price(extractPrice(productDataDivs))
.build();
}
private void removeExtraDivIfExists(Elements productDataDivs) {
Element productDataDiv = productDataDivs.last();
Elements productDataInnerDivs = productDataDiv.select("> *");
try {
if (productDataInnerDivs.get(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT)
.select("span").text().toLowerCase()
.contains("осталось")) {
productDataInnerDivs.remove(INDEX_OF_EXTRA_DIV_IF_SALE_PRODUCT);
}
} catch (Exception ignored) {}
}
private String extractUrl(Elements productDataDivs) {
Element productUrlA = productDataDivs.first();
return OZON_MAIN_LINK + productUrlA
.attr("href").replaceAll("\\?.*$", "");
}
private String extractImageUrl(Elements productDataDivs) {
Element productImageUrlA = productDataDivs.first();
return productImageUrlA.select("> div > div")
.first().getElementsByTag("img")
.first().attr("src");
}
private String extractBrand(Elements productDataDivs) {
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
//log.info(productDataInnerDivs.html());
Elements productBrandBlockSpans = productDataInnerDivs.get(INDEX_OF_PRODUCT_BRAND)
.select("> span");
String brand = productBrandBlockSpans.first().selectFirst("> span > b").text();
if (productBrandBlockSpans.size() == 1 && "Оригинал".equals(brand)) {
return "БРЕНД_НЕ_УКАЗАН";
}
return brand;
}
private String extractProductName(Elements productDataDivs) {
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
return productDataInnerDivs.get(INDEX_OF_PRODUCT_NAME)
.select("> div > span").text();
}
private BigDecimal extractPrice(Elements productDataDivs) {
Elements productDataInnerDivs = getProductMainDataInnerDivs(productDataDivs);
return parseOzonPriceToBigDecimal(
productDataInnerDivs.get(INDEX_OF_PRODUCT_PRICE).select("> div > span")
.first().text());
}
private Elements getProductMainDataInnerDivs(Elements productDataDivs) {
return productDataDivs.last().select("> *");
}
private BigDecimal parseOzonPriceToBigDecimal(String ozonPrice) {
String cleanedString = ozonPrice.replaceAll("[^\\d]", "");
return new BigDecimal(cleanedString);
}
}

View File

@ -2,7 +2,6 @@ package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.time.Duration;
import java.time.temporal.ChronoUnit;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
@ -11,36 +10,30 @@ import org.springframework.context.annotation.Profile;
import org.springframework.retry.annotation.Recover;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.ozon_parser.service.page.AccessDeniedPage;
import ru.pricepulse.parsingservice.ozon_parser.service.page.CategoryPage;
import ru.pricepulse.parsingservice.ozon_parser.service.page.NoContentPage;
import ru.pricepulse.parsingservice.service.ProductService;
@Slf4j
@Service
@Profile("ozon")
public class OzonCategoryPageParsingService {
public class OzonHtmlFetcher {
private final WebDriverPool webDriverPool;
private final ProductService productService;
private final PageScroller pageScroller;
public OzonCategoryPageParsingService(WebDriverPool webDriverPool,
ProductService productService) {
public OzonHtmlFetcher(WebDriverPool webDriverPool,
PageScroller pageScroller) {
this.webDriverPool = webDriverPool;
this.productService = productService;
this.pageScroller = pageScroller;
}
@Retryable(maxAttempts = 10, recover = "recover")
public void parseCategoryPage(String pageUrl, Category category, AtomicBoolean stopFlag) {
public String fetchPageHtml(String pageUrl,
AtomicBoolean lastPageInCategory) {
var driver = webDriverPool.borrowDriver();
if (driver == null) {
throw new RuntimeException();
}
try {
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
driver.get(pageUrl);
@ -48,64 +41,63 @@ public class OzonCategoryPageParsingService {
var accessDeniedPage = new AccessDeniedPage(driver, wait);
var categoryPage = new CategoryPage(driver, wait);
var noContentPage = new NoContentPage(driver, wait);
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage));
if (checkAccessDeniedPage(accessDeniedPage)) {
log.info("Доступ ограничен, пробуем решить проблему: {}", pageUrl);
resolveAccessDeniedPage(accessDeniedPage);
log.info("Проблема успешно решена: {}", pageUrl);
}
if (noContentPage.isLoaded()) {
log.info("Страница не найдена");
stopFlag.set(true);
return;
}
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage, noContentPage, lastPageInCategory));
checkAceesDeniedAndResolve(accessDeniedPage);
log.info("Получаем список товаров на текущей странице: {}", pageUrl);
List<ParsedData> parsedData;
try {
parsedData = categoryPage.getParsedProducts();
for (ParsedData data : parsedData) {
data.setMarketplace(Marketplace.OZON);
data.setCategory(category);
}
productService.saveBatch(parsedData);
} catch (Exception e) {
throw new Exception(e);
}
webDriverPool.returnDriver(driver);
} catch (Exception ignored) {
throw new RuntimeException(ignored);
pageScroller.scrollToEndOfPage(driver);
return driver.getPageSource();
} catch (Exception e) {
log.error(e.getMessage(), e);
throw new RuntimeException(e);
} finally {
webDriverPool.returnDriver(driver);
}
}
private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
CategoryPage categoryPage) {
CategoryPage categoryPage,
NoContentPage noContentPage,
AtomicBoolean stopFlag) {
log.debug("Проверка что страница 'Доступ ограничен'");
try {
if (checkAccessDeniedPage(accessDeniedPage)) {
return true;
}
} catch (Exception e) {
log.debug("Ошибка проверки", e);
if (checkAccessDeniedPage(accessDeniedPage)) {
return true;
}
log.debug("Проверка что страница 'Страница категории'");
if (checkCategoryPage(categoryPage)) {
return true;
}
if (checkNoContentPage(noContentPage)) {
stopFlag.set(true);
return true;
}
log.debug("Проверка загрузки страницы неудачна");
return false;
}
private boolean checkAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
return accessDeniedPage.isLoaded();
}
private boolean checkCategoryPage(CategoryPage categoryPage) {
return categoryPage.isLoaded();
}
private void checkAceesDeniedAndResolve(AccessDeniedPage accessDeniedPage) {
if (checkAccessDeniedPage(accessDeniedPage)) {
log.info("Доступ ограничен, пробуем решить проблему");
resolveAccessDeniedPage(accessDeniedPage);
log.info("Проблема успешно решена");
}
}
private boolean checkNoContentPage(NoContentPage noContentPage) {
if (noContentPage.isLoaded()) {
log.info("Страница не найдена");
return true;
}
return false;
}
private boolean checkAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
return accessDeniedPage.isLoaded();
}
private void resolveAccessDeniedPage(AccessDeniedPage accessDeniedPage) {
accessDeniedPage.clickReloadButton();
}

View File

@ -0,0 +1,19 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.util.List;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.ozon_parser.service.page.OzonCategoryPage;
@Service
public class OzonPageParser {
public List<ParsedData> parseProductsFromCategoryPage(String pageSource,
Category category) {
OzonCategoryPage categoryPage = new OzonCategoryPage(pageSource);
return categoryPage.getProducts(category);
}
}

View File

@ -1,65 +1,113 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.util.ArrayList;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Semaphore;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
import org.slf4j.MDC;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingService;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
import ru.pricepulse.parsingservice.ozon_parser.enumeration.OzonCategory;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.service.ProductService;
@Slf4j
@Service
@Profile("ozon")
public class OzonParsingService implements MarketplaceParsingService {
public class OzonParsingService {
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(5);
private final Semaphore semaphore = new Semaphore(5); // Ограничиваем количество активных и ожидающих задач
private final Map<String, Set<String>> urlCache;
private final OzonCategoryPageParsingService categoryPageParsingService;
private final ExecutorService pageExecutorService;
private final Semaphore semaphore;
private final OzonHtmlFetcher categoryPageParsingService;
private final OzonConfigProperties ozonConfigProperties;
private final OzonPageParser ozonPageParser;
private final ProductService productService;
public OzonParsingService(OzonHtmlFetcher categoryPageParsingService,
OzonConfigProperties ozonConfigProperties, OzonPageParser ozonPageParser,
ProductService productService) {
this.pageExecutorService = Executors.newFixedThreadPool(ozonConfigProperties.getMaxThreads());
this.semaphore = new Semaphore(ozonConfigProperties.getMaxThreads());
this.urlCache = new ConcurrentHashMap<>();
for (OzonCategory category : OzonCategory.values()) {
urlCache.put(category.getCategoryUrl(), ConcurrentHashMap.newKeySet());
}
public OzonParsingService(OzonCategoryPageParsingService categoryPageParsingService) {
this.categoryPageParsingService = categoryPageParsingService;
this.ozonConfigProperties = ozonConfigProperties;
this.ozonPageParser = ozonPageParser;
this.productService = productService;
}
public void processCategory(String url) {
int pageIndex = 1;
public void startProcessing() {
for (OzonCategory category : OzonCategory.values()) {
log.info("НАЧАЛО ОБРАБОТКИ КАТЕГОРИИ {}", category);
processCategory(category);
}
}
while (!stopFlag.get()) {
private void processCategory(OzonCategory category) {
int pageIndex = 1;
AtomicBoolean lastPageInCategory = new AtomicBoolean(false);
while (!lastPageInCategory.get()) {
try {
semaphore.acquire(); // Получаем разрешение перед созданием новой задачи
semaphore.acquire();
int finalPageIndex = pageIndex;
String pageUrl = url + "&page=" + finalPageIndex;
String pageUrl = category.getCategoryUrl() + "&page=" + finalPageIndex;
pageExecutorService.submit(() -> {
try {
categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag);
} finally {
semaphore.release(); // Освобождаем разрешение после завершения задачи
}
});
++pageIndex;
pageExecutorService.submit(() -> processCategoryPage(pageUrl, category, lastPageInCategory));
pageIndex += ozonConfigProperties.getMaxNumOfPagesOnScreen();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
break;
}
}
if (lastPageInCategory.get()) {
log.info("Достигли последней страницы категории");
}
}
if (stopFlag.get()) {
log.info("Конец парсинга категории");
pageExecutorService.shutdownNow();
private void processCategoryPage(String pageUrl,
OzonCategory category,
AtomicBoolean lastPageInCategory) {
try {
MDC.put("pageUrl", pageUrl);
String pageSource = categoryPageParsingService.fetchPageHtml(pageUrl, lastPageInCategory);
List<ParsedData> parsedProducts =
ozonPageParser.parseProductsFromCategoryPage(pageSource, category.getMappedCategory());
log.info("""
КОНЕЦ ПАРСИНГА СТРАНИЦЫ КАТЕГОРИИ
КОЛИЧЕСТВО НАЙДЕННЫХ ТОВАРОВ НА СТРАНИЦЕ {},
""", parsedProducts.size());
if (urlCache.size() > 1000000) {
urlCache.clear();
}
Set<String> categoryCachecUrl = urlCache.get(category.getCategoryUrl());
List<ParsedData> uniqueData = parsedProducts.stream()
.filter(data -> categoryCachecUrl.add(data.getUrl()))
.toList();
productService.saveBatch(uniqueData);
} finally {
MDC.clear();
semaphore.release();
}
}

View File

@ -0,0 +1,56 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.util.concurrent.atomic.AtomicLong;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.JavascriptExecutor;
import org.openqa.selenium.WebDriver;
import org.springframework.stereotype.Service;
@Slf4j
@Service
@RequiredArgsConstructor
public class PageScroller {
private static final String ALL_CONTENT_PAGE_HEIGHT = "return document.body.scrollHeight";
private static final String SCROLL_TO_PAGE_HEIGHT = "window.scrollTo(0, document.body.scrollHeight);";
public void scrollToEndOfPage(WebDriver driver) throws InterruptedException {
JavascriptExecutor js = (JavascriptExecutor) driver;
AtomicLong lastHeight = new AtomicLong((long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT));
int attemptsLimit = 100;
log.info("Начинаем пролистывать страницу до конца");
while (true) {
js.executeScript(SCROLL_TO_PAGE_HEIGHT);
long newHeight = (long) js.executeScript(ALL_CONTENT_PAGE_HEIGHT);
try {
var nextPageButtons = driver.findElements(By.cssSelector("div[data-widget='megaPaginator'] > div")).get(1)
.findElement(By.cssSelector(":scope > div > div > div"))
.findElements(By.tagName("a"));
if (nextPageButtons != null && newHeight > lastHeight.get()) {
log.info("ЗАКОНЧИЛИ СКРОЛЛИТЬ");
break;
}
} catch (Exception ignored) {}
if (newHeight > lastHeight.get()) {
attemptsLimit = 100;
lastHeight.set(newHeight);
} else {
attemptsLimit--;
Thread.sleep(1000);
if (attemptsLimit == 0) {
break;
}
}
}
}
}

View File

@ -4,7 +4,6 @@ import lombok.RequiredArgsConstructor;
import org.springframework.context.annotation.Profile;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingService;
@Service
@ -12,13 +11,11 @@ import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingServi
@Profile("ozon")
public class OzonProductUpdater {
private final OzonConfigProperties properties;
private final OzonParsingService ozonParsingService;
@Scheduled(fixedRate = 3600000)
@Scheduled(cron = "0 0 0,6,12,18 * * *")
public void updateOzonProducts() {
properties.getCategoriesUrls()
.forEach(ozonParsingService::processCategory);
ozonParsingService.startProcessing();
}
}

View File

@ -0,0 +1,4 @@
package ru.pricepulse.parsingservice.ozon_parser.service.task;
public class OzonParsingTask {
}

View File

@ -1,17 +1,21 @@
package ru.pricepulse.parsingservice.persistence.entity;
import java.math.BigDecimal;
import java.time.ZonedDateTime;
import java.util.Objects;
import jakarta.persistence.Column;
import jakarta.persistence.EmbeddedId;
import jakarta.persistence.Entity;
import jakarta.persistence.PrePersist;
import jakarta.persistence.Table;
import lombok.*;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.proxy.HibernateProxy;
import java.math.BigDecimal;
import java.time.LocalDateTime;
import java.util.Objects;
@Getter
@Setter
@Entity
@ -52,7 +56,9 @@ public class PriceHistoryEntity {
@PrePersist
protected void onCreate() {
id.setDate(LocalDateTime.now());
if (id.getDate() == null) {
id.setDate(ZonedDateTime.now());
}
}
}

View File

@ -1,17 +1,17 @@
package ru.pricepulse.parsingservice.persistence.entity;
import jakarta.persistence.*;
import java.io.Serializable;
import java.time.ZonedDateTime;
import java.util.Objects;
import jakarta.persistence.Column;
import jakarta.persistence.Embeddable;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.proxy.HibernateProxy;
import java.io.Serializable;
import java.time.LocalDateTime;
import java.time.OffsetDateTime;
import java.util.Objects;
@Getter
@Setter
@AllArgsConstructor
@ -23,7 +23,7 @@ public class PriceHistoryId implements Serializable {
private String productUrl;
@Column(name = "date", nullable = false)
private LocalDateTime date;
private ZonedDateTime date;
@Override
public final boolean equals(Object o) {

View File

@ -1,14 +1,26 @@
package ru.pricepulse.parsingservice.persistence.entity;
import jakarta.persistence.*;
import lombok.*;
import org.hibernate.proxy.HibernateProxy;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import java.time.LocalDateTime;
import java.util.Objects;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.EnumType;
import jakarta.persistence.Enumerated;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id;
import jakarta.persistence.PrePersist;
import jakarta.persistence.Table;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.proxy.HibernateProxy;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
@Getter
@Setter
@Entity

View File

@ -1,8 +1,16 @@
package ru.pricepulse.parsingservice.persistence.repository;
import java.time.ZonedDateTime;
import java.util.List;
import org.springframework.data.jpa.repository.JpaRepository;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, PriceHistoryId> {
List<PriceHistoryEntity> findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(String productUrl,
ZonedDateTime from,
ZonedDateTime to);
}

View File

@ -1,15 +1,28 @@
package ru.pricepulse.parsingservice.persistence.repository;
import java.util.List;
import java.util.Optional;
import org.springframework.data.domain.Page;
import org.springframework.data.domain.Pageable;
import org.springframework.data.jpa.repository.JpaRepository;
import org.springframework.data.jpa.repository.Query;
import org.springframework.stereotype.Repository;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import java.util.List;
@Repository
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
boolean existsByUrl(String url);
ProductEntity findByUrl(String url);
List<ProductEntity> findAllByUrlIn(List<String> urls);
@Query("""
select p.url from ProductEntity p where p.url in :urls
""")
List<String> findSavedUrl(List<String> urls);
Optional<ProductEntity> findByUrl(String url);
Page<ProductEntity> findAllByMarketplaceAndCategory(Marketplace marketplace, Category category, Pageable pageable);
}

View File

@ -1,19 +1,28 @@
package ru.pricepulse.parsingservice.service;
import java.util.ArrayList;
import java.time.ZonedDateTime;
import java.util.List;
import jakarta.persistence.EntityNotFoundException;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.data.domain.Pageable;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
import ru.pricepulse.parsingservice.service.dto.ProductDto;
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
import ru.pricepulse.parsingservice.service.mapper.PriceHistoryMapper;
import ru.pricepulse.parsingservice.service.mapper.ProductMapper;
@Slf4j
@Service
@ -24,34 +33,58 @@ public class ProductService {
private final ProductPriceRepository productPriceRepository;
private final ProductMapper productMapper;
private final PriceHistoryMapper priceHistoryMapper;
@Transactional
@Retryable
public void saveBatch(List<ParsedData> parsedData) {
var products = new ArrayList<ProductEntity>();
var prices = new ArrayList<PriceHistoryEntity>();
parsedData.forEach(product -> processParsedProduct(product, prices, products));
List<String> productsUrls = parsedData.stream().map(ParsedData::getUrl).toList();
List<String> alreadySavedUrls = productRepository.findSavedUrl(productsUrls);
List<ProductEntity> products = parsedData.stream()
.filter(data -> !alreadySavedUrls.contains(data.getUrl()))
.map(this::getProduct)
.toList();
List<PriceHistoryEntity> prices = parsedData.stream().map(this::getPriceHistory).toList();
productRepository.saveAll(products);
log.info("Сохранили пачку товаров {}", products.size());
productPriceRepository.saveAll(prices);
log.info("Сохранили историю цен {}", prices.size());
}
private void processParsedProduct(ParsedData product,
ArrayList<PriceHistoryEntity> prices,
ArrayList<ProductEntity> products) {
var priceHistoryEntity = getPriceHistory(product);
prices.add(priceHistoryEntity);
if (productRepository.existsByUrl(product.getUrl())) {
log.info("Запись {} уже есть", product.getUrl());
return;
}
var productEntity = getProduct(product);
products.add(productEntity);
@Transactional(readOnly = true)
public ProductDto findByUrl(String productUrl) {
var product = productRepository.findByUrl(productUrl).orElseThrow(EntityNotFoundException::new);
return productMapper.toProductDto(product);
}
@Transactional(readOnly = true)
public PriceHistoryDto findPriceHistoryByRange(String productUrl,
ZonedDateTime from,
ZonedDateTime to) {
var priceHistory = productPriceRepository
.findAllById_ProductUrlAndIdDateAfterAndId_DateBeforeOrderById_DateAsc(productUrl, from, to);
return priceHistoryMapper.toPriceHistoryDto(priceHistory);
}
@Transactional(readOnly = true)
public ProductsPageDto findAllProductsByPage(Marketplace marketplace,
Category category,
Pageable pageable) {
var page = productRepository.findAllByMarketplaceAndCategory(marketplace, category, pageable);
return new ProductsPageDto(
page.getNumberOfElements(),
page.getTotalPages(),
page.getNumber(),
page.getContent().stream().map(productMapper::toProductDto).toList()
);
}
private PriceHistoryEntity getPriceHistory(ParsedData product) {
var priceHistoryId = new PriceHistoryId();
priceHistoryId.setProductUrl(product.getUrl());
priceHistoryId.setDate(ZonedDateTime.now());
var priceHistory = new PriceHistoryEntity();
priceHistory.setId(priceHistoryId);
priceHistory.setPrice(product.getPrice());
@ -68,4 +101,5 @@ public class ProductService {
productEntity.setImageUrl(product.getImageUrl());
return productEntity;
}
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.service.dto;
import java.math.BigDecimal;
import java.time.ZonedDateTime;
import java.util.HashMap;
import java.util.Map;
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
public class PriceHistoryDto {
private final Map<ZonedDateTime, BigDecimal> priceHistory;
public PriceHistoryDto() {
this.priceHistory = new HashMap<>();
}
}

View File

@ -0,0 +1,28 @@
package ru.pricepulse.parsingservice.service.dto;
import lombok.Getter;
import lombok.RequiredArgsConstructor;
import lombok.Setter;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
@Getter
@Setter
@RequiredArgsConstructor
public class ProductDto {
private final Long id;
private final Marketplace marketplace;
private final Category category;
private final String brand;
private final String productName;
private final String url;
private final String imageUrl;
}

View File

@ -0,0 +1,22 @@
package ru.pricepulse.parsingservice.service.dto;
import java.util.List;
import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.Setter;
@Getter
@Setter
@AllArgsConstructor
public class ProductsPageDto {
private final int totalItems;
private final int totalPages;
private final int currentPage;
private final List<ProductDto> products;
}

View File

@ -0,0 +1,19 @@
package ru.pricepulse.parsingservice.service.mapper;
import java.util.List;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
@Component
public class PriceHistoryMapper {
public PriceHistoryDto toPriceHistoryDto (List<PriceHistoryEntity> priceHistory) {
var priceHistoryDto = new PriceHistoryDto();
priceHistory.forEach(item ->
priceHistoryDto.getPriceHistory().put(item.getId().getDate().withNano(0), item.getPrice()));
return priceHistoryDto;
}
}

View File

@ -0,0 +1,22 @@
package ru.pricepulse.parsingservice.service.mapper;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.service.dto.ProductDto;
@Component
public class ProductMapper {
public ProductDto toProductDto(ProductEntity product) {
return new ProductDto(
product.getId(),
product.getMarketplace(),
product.getCategory(),
product.getBrand(),
product.getProductName(),
product.getUrl(),
product.getImageUrl()
);
}
}

View File

@ -1,4 +1,4 @@
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
package ru.pricepulse.parsingservice.service.scheduler;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;

View File

@ -1,5 +1,7 @@
package ru.pricepulse.parsingservice.web.handler;
import java.net.URI;
import jakarta.persistence.EntityNotFoundException;
import jakarta.servlet.http.HttpServletRequest;
import org.springframework.http.HttpStatus;
@ -8,8 +10,6 @@ import org.springframework.web.bind.annotation.ControllerAdvice;
import org.springframework.web.bind.annotation.ExceptionHandler;
import org.springframework.web.bind.annotation.ResponseStatus;
import java.net.URI;
@ControllerAdvice
public class CommonExceptionHandler {

View File

@ -1,9 +1,9 @@
package ru.pricepulse.parsingservice.web.handler;
import org.springframework.http.HttpStatus;
import java.net.URI;
import org.springframework.http.HttpStatus;
public record ErrorResponse (
Integer statusCode,
HttpStatus status,

View File

@ -0,0 +1,27 @@
package ru.pricepulse.parsingservice.web.rest;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.ozon_parser.service.OzonService;
@RestController
@RequestMapping("/api/v1/categories")
@RequiredArgsConstructor
public class CategoryApi {
private final OzonService ozonService;
@GetMapping
public ResponseEntity<?> getCategories(Marketplace marketplace) {
if (Marketplace.OZON.equals(marketplace)) {
return ResponseEntity.ok(ozonService.getCategories());
}
return ResponseEntity.ok(Category.values());
}
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.web.rest;
import lombok.RequiredArgsConstructor;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RestController;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
@RestController
@RequestMapping("/api/v1/marketplaces")
@RequiredArgsConstructor
public class MarketplaceApi {
@GetMapping
public ResponseEntity<Marketplace[]> getMarketplace() {
return ResponseEntity.ok(Marketplace.values());
}
}

View File

@ -0,0 +1,52 @@
package ru.pricepulse.parsingservice.web.rest;
import java.time.LocalDate;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import lombok.RequiredArgsConstructor;
import org.springframework.data.domain.Pageable;
import org.springframework.format.annotation.DateTimeFormat;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestParam;
import org.springframework.web.bind.annotation.RestController;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.service.ProductService;
import ru.pricepulse.parsingservice.service.dto.PriceHistoryDto;
import ru.pricepulse.parsingservice.service.dto.ProductDto;
import ru.pricepulse.parsingservice.service.dto.ProductsPageDto;
@RestController
@RequestMapping("/api/v1/products")
@RequiredArgsConstructor
public class ProductApi {
private final ProductService productService;
@GetMapping("/info")
public ResponseEntity<ProductDto> getProductInfo(@RequestParam String productUrl) {
return ResponseEntity.ok(productService.findByUrl(productUrl));
}
@GetMapping("/price-history")
public ResponseEntity<PriceHistoryDto> getProductPriceHistoryByRange(@RequestParam String productUrl,
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate from,
@RequestParam @DateTimeFormat(iso = DateTimeFormat.ISO.DATE) LocalDate to,
String zoneOffset) {
ZoneOffset zone = ZoneOffset.of(zoneOffset);
ZonedDateTime fromDateTime = from.atStartOfDay(zone);
ZonedDateTime toDateTime = to.atStartOfDay(zone);
return ResponseEntity.ok(productService.findPriceHistoryByRange(productUrl, fromDateTime, toDateTime));
}
@GetMapping
public ResponseEntity<ProductsPageDto> getAllProductsByCategoryAndPage(Marketplace marketplace,
Category category,
Pageable pageable) {
return ResponseEntity.ok(productService.findAllProductsByPage(marketplace, category, pageable));
}
}

View File

@ -1,5 +1,7 @@
package ru.pricepulse.parsingservice.wildberries_parser.converter;
import java.time.LocalDateTime;
import org.springframework.core.convert.converter.Converter;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.enumeration.Category;
@ -7,8 +9,6 @@ import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
import java.time.LocalDateTime;
@Component
public class ProductInfoDto2ProductEntity implements Converter<ProductInfoDto, ProductEntity> {

View File

@ -1,5 +1,11 @@
package ru.pricepulse.parsingservice.wildberries_parser.service;
import java.math.BigDecimal;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import lombok.AllArgsConstructor;
@ -12,12 +18,6 @@ import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
import java.math.BigDecimal;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@Service("wildberriesParsingService")
@AllArgsConstructor
public class ParsingService {
@ -52,7 +52,7 @@ public class ParsingService {
productEntity.setUrl("https://www.wildberries.ru/catalog/" + dto.getId() + "/detail.aspx?targetUrl=BP");
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
.id(new PriceHistoryId(productEntity.getUrl(), LocalDateTime.now()))
.id(new PriceHistoryId(productEntity.getUrl(), ZonedDateTime.now()))
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
.build();

View File

@ -1,5 +1,9 @@
package ru.pricepulse.parsingservice.wildberries_parser.service;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import lombok.AllArgsConstructor;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
@ -8,10 +12,6 @@ import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
@Service("wildberriesProductService")
@AllArgsConstructor
public class ProductService {

View File

@ -22,10 +22,10 @@ liquibase:
marketplace:
ozon:
categories-urls:
- https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t
max-threads: ${OZON_MAX_PROCESSING_THREADS:5}
max-num-of-pages-on-screen: ${OZON_MAX_NUM_OF_PAGES_ON_SCREEN:100}
wildberries:
status: true
status: false
base-url: "https://static-basket-01.wbbasket.ru"
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
user-agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)"
@ -35,10 +35,11 @@ marketplace:
shard: "electronic15"
laptop-url: "/catalog"
logging:
pattern:
console: "%d{yyyy-MM-dd HH:mm:ss} [%thread] %-5level %logger{36} - %msg %X%n"
level:
sql: debug
# level:
# org:
# springframework:

View File

@ -1,7 +1,6 @@
package ru.pricepulse.parsingservice;
import org.junit.jupiter.api.Test;
import org.springframework.boot.test.context.SpringBootTest;
class ParsingServiceApplicationTests {