Feature/parsing-service: save commit

This commit is contained in:
danil.markov 2024-10-14 21:43:57 +04:00
parent ae8ac061bc
commit 9895aaff33
27 changed files with 336 additions and 301 deletions

View File

@ -37,6 +37,7 @@ dependencies {
implementation "org.seleniumhq.selenium:selenium-java:${seleniumVersion}" implementation "org.seleniumhq.selenium:selenium-java:${seleniumVersion}"
implementation 'io.github.bonigarcia:webdrivermanager:5.5.0' implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
implementation 'org.apache.commons:commons-pool2:2.12.0' implementation 'org.apache.commons:commons-pool2:2.12.0'
implementation 'com.clickhouse:clickhouse-jdbc:0.6.5'
compileOnly 'org.projectlombok:lombok' compileOnly 'org.projectlombok:lombok'

View File

@ -8,10 +8,12 @@ import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver; import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions; import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.beans.factory.ObjectFactory; import org.springframework.beans.factory.ObjectFactory;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
@Slf4j @Slf4j
@Component @Component
@Profile("ozon")
public class WebDriverPool { public class WebDriverPool {
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>(); // Список доступных драйверов private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>(); // Список доступных драйверов

View File

@ -1,37 +0,0 @@
package ru.pricepulse.parsingservice.ozon_parser.service;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.ozon_parser.service.messaging.ParsedDataProducer;
@Slf4j
@Service
@RequiredArgsConstructor
public class DataParser {
private final ParsedDataProducer queueProducer;
public boolean pageHasData(String html) {
Document doc = Jsoup.parse(html);
return doc.select("div[data-widget=searchResultsError]").isEmpty();
}
public void parseAndQueueData(String html) {
Document doc = Jsoup.parse(html);
for (Element item : doc.select(".item-class")) {
String title = item.select(".item-title").text();
String price = item.select(".item-price").text();
ParsedData parsedData = new ParsedData();
log.info("Попытка отправить данные в очередь");
queueProducer.sendToQueue(parsedData);
log.info("Данные успешно отправлены в очередь");
}
}
}

View File

@ -1,25 +1,27 @@
package ru.pricepulse.parsingservice.ozon_parser.service; package ru.pricepulse.parsingservice.ozon_parser.service;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import org.springframework.context.annotation.Profile;
import org.springframework.jdbc.core.JdbcTemplate; import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
@Service @Service
@RequiredArgsConstructor @RequiredArgsConstructor
@Profile("postgres_stat")
public class PartitionService { public class PartitionService {
private final JdbcTemplate jdbcTemplate; private final JdbcTemplate postgresDataSource;
public boolean checkPartitionExists(String partitionName) { public boolean checkPartitionExists(String partitionName) {
String query = "SELECT to_regclass('public." + partitionName + "')"; String query = "SELECT to_regclass('public." + partitionName + "')";
String result = jdbcTemplate.queryForObject(query, String.class); String result = postgresDataSource.queryForObject(query, String.class);
return result != null; return result != null;
} }
public void createPartition(String partitionName, String startDate, String endDate) { public void createPartition(String partitionName, String startDate, String endDate) {
String createPartitionSQL = "CREATE TABLE IF NOT EXISTS " + partitionName + String createPartitionSQL = "CREATE TABLE IF NOT EXISTS " + partitionName +
" PARTITION OF price_history FOR VALUES FROM ('" + startDate + "') TO ('" + endDate + "')"; " PARTITION OF price_history FOR VALUES FROM ('" + startDate + "') TO ('" + endDate + "')";
jdbcTemplate.execute(createPartitionSQL); postgresDataSource.execute(createPartitionSQL);
} }
} }

View File

@ -1,7 +1,10 @@
package ru.pricepulse.parsingservice.ozon_parser.service.dto; package ru.pricepulse.parsingservice.ozon_parser.service.dto;
import java.math.BigDecimal;
import lombok.Getter; import lombok.Getter;
import lombok.Setter; import lombok.Setter;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace; import ru.pricepulse.parsingservice.enumeration.Marketplace;
@Getter @Getter
@ -10,7 +13,7 @@ public class ParsedData {
private Marketplace marketplace; private Marketplace marketplace;
private String category; private Category category;
private String brand; private String brand;
@ -20,4 +23,6 @@ public class ParsedData {
private String imageUrl; private String imageUrl;
private BigDecimal price;
} }

View File

@ -1,7 +0,0 @@
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon;
public interface MarketplacePage {
boolean isLoaded();
}

View File

@ -1,73 +0,0 @@
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.support.ui.WebDriverWait;
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.MarketplacePage;
@Slf4j
public class CategoryPage implements MarketplacePage {
private static final int PAGE_SIZE = 12;
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsV2']";
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
private WebDriver driver;
private WebDriverWait wait;
public CategoryPage(WebDriver driver, WebDriverWait wait) {
this.driver = driver;
this.wait = wait;
}
public Set<String> getProductsLinks() {
wait.until(visibilityOfElementLocated(searchResults));
var searchResultsElement = driver.findElement(searchResults);
wait.until(driver -> visibilityOfElementLocated(By.cssSelector(":scope > div")));
var outerDiv = searchResultsElement.findElement(By.cssSelector(":scope > div")); // Внешний блок со списком товаров
wait.until(driver -> visibilityOfAllElements(outerDiv.findElements(By.cssSelector(":scope > div"))));
var innerDivs = outerDiv.findElements(By.cssSelector(":scope > div")); // Блок карточки товара
return searchProductsLinks(innerDivs, driver);
}
private Set<String> searchProductsLinks(List<WebElement> innerDivs, WebDriver driver) {
return innerDivs.stream()
.map(div -> {
waitVisibility(div);
List<WebElement> linkTags = null;
try {
linkTags = div.findElements(By.tagName("a"));
} catch (Exception ignored) {}
return linkTags != null && !linkTags.isEmpty()
? linkTags.getFirst().getAttribute("href")
: null;
})
.filter(href -> href != null && !href.isEmpty())
.collect(Collectors.toSet());
}
private void waitVisibility(WebElement outerElement) {
wait.until(driver -> !outerElement.findElements(By.tagName("a")).isEmpty());
}
@Override
public boolean isLoaded() {
try {
return driver.findElement(searchResults) != null;
} catch (Exception e) {
return false;
}
}
}

View File

@ -1,47 +0,0 @@
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing;
import java.util.ArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingService;
@Slf4j
@Service
public class ParsingService implements MarketplaceParsingService {
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
private final ExecutorService categoryExecutor = Executors.newFixedThreadPool(1);
private final CategoryPageParsingService categoryPageParsingService;
public ParsingService(CategoryPageParsingService categoryPageParsingService) {
this.categoryPageParsingService = categoryPageParsingService;
}
public void processCategory(String url) {
var startTime = System.currentTimeMillis();
log.info("Начало обработки категории: {}", url);
int pageIndex = 1;
var errors = new ArrayList<String>();
while (!stopFlag.get()) {
int finalPageIndex = pageIndex;
try {
categoryPageParsingService.parseCategoryPage(finalPageIndex, url, errors);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
++pageIndex;
if (pageIndex > 5) {
stopFlag.set(true);
}
}
log.info("Время выполнения {} ", (System.currentTimeMillis() - startTime) / 1000);
}
}

View File

@ -1,18 +0,0 @@
package ru.pricepulse.parsingservice.ozon_parser.service.messaging;
import lombok.RequiredArgsConstructor;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
@Service
@RequiredArgsConstructor
public class ParsedDataProducer {
private final KafkaTemplate<String, ParsedData> kafkaTemplate;
public void sendToQueue(ParsedData data) {
kafkaTemplate.send("parsed-data-queue", data);
}
}

View File

@ -1,10 +1,9 @@
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page; package ru.pricepulse.parsingservice.ozon_parser.service.page;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By; import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver; import org.openqa.selenium.WebDriver;
import org.openqa.selenium.support.ui.WebDriverWait; import org.openqa.selenium.support.ui.WebDriverWait;
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.MarketplacePage;
@Slf4j @Slf4j
public class AccessDeniedPage implements MarketplacePage { public class AccessDeniedPage implements MarketplacePage {

View File

@ -0,0 +1,94 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.support.ui.WebDriverWait;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
@Slf4j
public class CategoryPage implements MarketplacePage {
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsV2']";
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
private WebDriver driver;
private WebDriverWait wait;
public CategoryPage(WebDriver driver, WebDriverWait wait) {
this.driver = driver;
this.wait = wait;
}
public ArrayList<ParsedData> getParsedProducts() {
wait.until(visibilityOfElementLocated(searchResults));
log.info("Нашли SearchResultsV2");
var searchResultsElement = driver.findElement(searchResults);
wait.until(driver -> visibilityOfElementLocated(By.cssSelector(":scope > div")));
log.info("Нашли внешний блок списка");
var outerDiv = searchResultsElement.findElement(By.cssSelector(":scope > div")); // Внешний блок со списком товаров
wait.until(driver -> visibilityOfAllElements(outerDiv.findElements(By.cssSelector(":scope > div"))));
log.info("Нашли элементы списка");
var innerDivs = outerDiv.findElements(By.cssSelector(":scope > div")); // Блок карточки товара
var products = new ArrayList<ParsedData>();
innerDivs.forEach(innerDiv -> {
var productDataDivs = innerDiv.findElements(By.cssSelector(":scope > div"));
var productImageUrl = productDataDivs.get(0)
.findElement(By.cssSelector(":scope > a > div"))
.findElements(By.cssSelector(":scope > div")).getFirst()
.findElement(By.tagName("img")).getAttribute("src");
var productBrand = productDataDivs.get(1).findElement(By.cssSelector(":scope > div"))
.findElements(By.cssSelector(":scope > div")).getFirst()
.findElement(By.tagName("b")).getText();
var productNameLink = productDataDivs.get(1).findElement(By.cssSelector(":scope > div > a"));
var productUrl = productNameLink.getAttribute("href");
var productName = productNameLink.findElement(By.tagName("span")).getText();
var productPrice = parseCurrency(productDataDivs.get(2).findElement(By.cssSelector(":scope > div > div"))
.findElements(By.tagName("span")).getFirst().getText());
var parsedData = new ParsedData();
parsedData.setUrl(productUrl);
parsedData.setBrand(productBrand);
parsedData.setProductName(productName);
parsedData.setImageUrl(productImageUrl);
parsedData.setPrice(productPrice);
products.add(parsedData);
});
return products;
}
private BigDecimal parseCurrency(String currencyStr) {
String cleanedString = currencyStr.replaceAll("[^\\d]", "");
return new BigDecimal(cleanedString);
}
@Override
public boolean isLoaded() {
try {
return driver.findElement(searchResults) != null;
} catch (Exception e) {
return false;
}
}
}

View File

@ -0,0 +1,7 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
public interface MarketplacePage {
boolean isLoaded();
}

View File

@ -0,0 +1,32 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.support.ui.WebDriverWait;
@Slf4j
public class NoContentPage {
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
private WebDriver driver;
private WebDriverWait wait;
public NoContentPage(WebDriver driver, WebDriverWait wait) {
this.driver = driver;
this.wait = wait;
}
public boolean isLoaded() {
try {
return driver.findElement(errorText) != null;
} catch (Exception e) {
return false;
}
}
}

View File

@ -1,103 +1,82 @@
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing; package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.time.Duration; import java.time.Duration;
import java.time.temporal.ChronoUnit; import java.time.temporal.ChronoUnit;
import java.util.ArrayList; import java.util.List;
import java.util.Set; import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.support.ui.WebDriverWait; import org.openqa.selenium.support.ui.WebDriverWait;
import org.slf4j.MDC; import org.springframework.context.annotation.Profile;
import org.springframework.retry.annotation.Recover; import org.springframework.retry.annotation.Recover;
import org.springframework.retry.annotation.Retryable; import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool; import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool;
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page.AccessDeniedPage; import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page.CategoryPage; import ru.pricepulse.parsingservice.ozon_parser.service.page.AccessDeniedPage;
import ru.pricepulse.parsingservice.ozon_parser.service.page.CategoryPage;
import ru.pricepulse.parsingservice.ozon_parser.service.page.NoContentPage;
import ru.pricepulse.parsingservice.service.ProductService;
@Slf4j @Slf4j
@Service @Service
public class CategoryPageParsingService { @Profile("ozon")
public class OzonCategoryPageParsingService {
private final ExecutorService productPageExecutor = Executors.newFixedThreadPool(3);
private final WebDriverPool webDriverPool; private final WebDriverPool webDriverPool;
public CategoryPageParsingService(WebDriverPool webDriverPool) { private final ProductService productService;
public OzonCategoryPageParsingService(WebDriverPool webDriverPool,
ProductService productService) {
this.webDriverPool = webDriverPool; this.webDriverPool = webDriverPool;
this.productService = productService;
} }
@Retryable(maxAttempts = 10, recover = "recover") @Retryable(maxAttempts = 10, recover = "recover")
public void parseCategoryPage(int finalPageIndex, String url, ArrayList<String> errors) throws InterruptedException { public void parseCategoryPage(String pageUrl, Category category, AtomicBoolean stopFlag) {
MDC.put("pageIndex", String.valueOf(finalPageIndex));
String pageUrl = url + "/?page=" + finalPageIndex;
var driver = webDriverPool.borrowDriver(); var driver = webDriverPool.borrowDriver();
try { try {
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS)); driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
driver.get(pageUrl); driver.get(pageUrl);
WebDriverWait wait = new WebDriverWait(driver, Duration.of(10, ChronoUnit.SECONDS)); WebDriverWait wait = new WebDriverWait(driver, Duration.of(10, ChronoUnit.SECONDS));
var accessDeniedPage = new AccessDeniedPage(driver, wait); // TODO подумать как не создавать кучу PageObject var accessDeniedPage = new AccessDeniedPage(driver, wait);
var categoryPage = new CategoryPage(driver, wait); var categoryPage = new CategoryPage(driver, wait);
var noContentPage = new NoContentPage(driver, wait);
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage)); wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage));
if (checkAccessDeniedPage(accessDeniedPage)) { if (checkAccessDeniedPage(accessDeniedPage)) {
log.info("Доступ ограничен, пробуем решить проблему: {}", pageUrl); log.info("Доступ ограничен, пробуем решить проблему: {}", pageUrl);
resolveAccessDeniedPage(accessDeniedPage); resolveAccessDeniedPage(accessDeniedPage);
log.info("Проблема успешно решена: {}", pageUrl); log.info("Проблема успешно решена: {}", pageUrl);
} }
log.info("Получаем список ссылок на товары на текущей странице: {}", pageUrl); if (noContentPage.isLoaded()) {
Set<String> hrefs = Set.of(); log.info("Страница не найдена");
stopFlag.set(true);
return;
}
log.info("Получаем список товаров на текущей странице: {}", pageUrl);
List<ParsedData> parsedData;
try { try {
hrefs = categoryPage.getProductsLinks(); parsedData = categoryPage.getParsedProducts();
for (ParsedData data : parsedData) {
data.setMarketplace(Marketplace.OZON);
data.setCategory(category);
}
productService.saveBatch(parsedData);
} catch (Exception e) { } catch (Exception e) {
throw new Exception(e); throw new Exception(e);
} }
webDriverPool.returnDriver(driver); webDriverPool.returnDriver(driver);
log.info("Страница {} Получены ссылки на товары: {}", finalPageIndex, hrefs.size());
hrefs.forEach(href -> {
MDC.put("pageIndex", String.valueOf(finalPageIndex));
try {
processPage(href);
errors.add(href);
log.error(String.valueOf(errors.size()));
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
});
/*hrefs.forEach(href -> productPageExecutor.submit(() -> {
MDC.put("pageIndex", String.valueOf(finalPageIndex));
try {
processPage(href);
errors.add(href);
log.error(String.valueOf(errors.size()));
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}));*/
productPageExecutor.awaitTermination(10, TimeUnit.SECONDS);
} catch (Exception ignored) { } catch (Exception ignored) {
throw new RuntimeException(ignored); throw new RuntimeException(ignored);
} finally { } finally {
webDriverPool.returnDriver(driver); // Завершаем работу драйвера webDriverPool.returnDriver(driver);
} }
} }
private String processPage(String href) throws InterruptedException {
var driver = webDriverPool.borrowDriver();
try {
driver.get(href);
log.info("Страница обработана");
} catch (Throwable ignored) {
} finally {
webDriverPool.returnDriver(driver); // Завершаем работу драйвера
}
return href;
}
private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage, private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
CategoryPage categoryPage) { CategoryPage categoryPage) {
log.debug("Проверка что страница 'Доступ ограничен'"); log.debug("Проверка что страница 'Доступ ограничен'");

View File

@ -0,0 +1,43 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.util.ArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingService;
@Slf4j
@Service
@Profile("ozon")
public class OzonParsingService implements MarketplaceParsingService {
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(12);
private final OzonCategoryPageParsingService categoryPageParsingService;
public OzonParsingService(OzonCategoryPageParsingService categoryPageParsingService) {
this.categoryPageParsingService = categoryPageParsingService;
}
public void processCategory(String url) {
int pageIndex = 1;
while (!stopFlag.get()) {
int finalPageIndex = pageIndex;
String pageUrl = url + "&page=" + finalPageIndex;
pageExecutorService.submit(() -> categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag));
++pageIndex;
}
if (stopFlag.get()) {
pageExecutorService.shutdownNow();
}
}
}

View File

@ -1,20 +0,0 @@
package ru.pricepulse.parsingservice.ozon_parser.service.request;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
@Slf4j
@Service
@RequiredArgsConstructor
public class PageFetcher {
private final RestTemplate restTemplate;
public String fetchPage(String url) {
log.info("Поолучение страницы {}", url);
return restTemplate.getForObject(url, String.class);
}
}

View File

@ -1,17 +1,19 @@
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler; package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import org.springframework.context.annotation.Profile;
import org.springframework.scheduling.annotation.Scheduled; import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties; import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing.ParsingService; import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingService;
@Service @Service
@RequiredArgsConstructor @RequiredArgsConstructor
@Profile("ozon")
public class OzonProductUpdater { public class OzonProductUpdater {
private final OzonConfigProperties properties; private final OzonConfigProperties properties;
private final ParsingService ozonParsingService; private final OzonParsingService ozonParsingService;
@Scheduled(fixedRate = 3600000) @Scheduled(fixedRate = 3600000)
public void updateOzonProducts() { public void updateOzonProducts() {

View File

@ -6,6 +6,7 @@ import java.time.format.DateTimeFormatter;
import jakarta.annotation.PostConstruct; import jakarta.annotation.PostConstruct;
import lombok.RequiredArgsConstructor; import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j; import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Profile;
import org.springframework.scheduling.annotation.Scheduled; import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service; import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService; import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService;
@ -13,6 +14,7 @@ import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService;
@Slf4j @Slf4j
@Service @Service
@RequiredArgsConstructor @RequiredArgsConstructor
@Profile("postgres_stat")
public class PartitionScheduler { public class PartitionScheduler {
private final PartitionService partitionService; private final PartitionService partitionService;

View File

@ -3,11 +3,13 @@ package ru.pricepulse.parsingservice.persistence.entity;
import jakarta.persistence.Column; import jakarta.persistence.Column;
import jakarta.persistence.EmbeddedId; import jakarta.persistence.EmbeddedId;
import jakarta.persistence.Entity; import jakarta.persistence.Entity;
import jakarta.persistence.PrePersist;
import jakarta.persistence.Table; import jakarta.persistence.Table;
import lombok.*; import lombok.*;
import org.hibernate.proxy.HibernateProxy; import org.hibernate.proxy.HibernateProxy;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.time.LocalDateTime;
import java.util.Objects; import java.util.Objects;
@Getter @Getter
@ -48,4 +50,9 @@ public class PriceHistoryEntity {
return Objects.hash(id); return Objects.hash(id);
} }
@PrePersist
protected void onCreate() {
id.setDate(LocalDateTime.now());
}
} }

View File

@ -5,11 +5,10 @@ import lombok.AllArgsConstructor;
import lombok.Getter; import lombok.Getter;
import lombok.NoArgsConstructor; import lombok.NoArgsConstructor;
import lombok.Setter; import lombok.Setter;
import org.hibernate.annotations.OnDelete;
import org.hibernate.annotations.OnDeleteAction;
import org.hibernate.proxy.HibernateProxy; import org.hibernate.proxy.HibernateProxy;
import java.io.Serializable; import java.io.Serializable;
import java.time.LocalDateTime;
import java.time.OffsetDateTime; import java.time.OffsetDateTime;
import java.util.Objects; import java.util.Objects;
@ -20,13 +19,11 @@ import java.util.Objects;
@Embeddable @Embeddable
public class PriceHistoryId implements Serializable { public class PriceHistoryId implements Serializable {
@ManyToOne(fetch = FetchType.LAZY, optional = false) @Column(name = "product_url", nullable = false, unique = true)
@OnDelete(action = OnDeleteAction.CASCADE) private String productUrl;
@JoinColumn(name = "product_id", nullable = false)
private ProductEntity product;
@Column(name = "date", nullable = false) @Column(name = "date", nullable = false)
private OffsetDateTime date; private LocalDateTime date;
@Override @Override
public final boolean equals(Object o) { public final boolean equals(Object o) {

View File

@ -2,6 +2,7 @@ package ru.pricepulse.parsingservice.persistence.repository;
import org.springframework.data.jpa.repository.JpaRepository; import org.springframework.data.jpa.repository.JpaRepository;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity; import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, Long> { public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, PriceHistoryId> {
} }

View File

@ -4,4 +4,7 @@ import org.springframework.data.jpa.repository.JpaRepository;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity; import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
public interface ProductRepository extends JpaRepository<ProductEntity, Long> { public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
boolean existsByUrl(String url);
} }

View File

@ -0,0 +1,69 @@
package ru.pricepulse.parsingservice.service;
import java.util.ArrayList;
import java.util.List;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
@Slf4j
@Service
@RequiredArgsConstructor
public class ProductService {
private final ProductRepository productRepository;
private final ProductPriceRepository productPriceRepository;
@Transactional
public void saveBatch(List<ParsedData> parsedData) {
var products = new ArrayList<ProductEntity>();
var prices = new ArrayList<PriceHistoryEntity>();
parsedData.forEach(product -> processParsedProduct(product, prices, products));
productRepository.saveAll(products);
log.info("Сохранили пачку товаров {}", products.size());
productPriceRepository.saveAll(prices);
log.info("Сохранили историю цен {}", prices.size());
}
private void processParsedProduct(ParsedData product,
ArrayList<PriceHistoryEntity> prices,
ArrayList<ProductEntity> products) {
var priceHistoryEntity = getPriceHistory(product);
prices.add(priceHistoryEntity);
if (productRepository.existsByUrl(product.getUrl())) {
log.debug("Запись {} уже есть", product.getUrl());
return;
}
var productEntity = getProduct(product);
products.add(productEntity);
}
private PriceHistoryEntity getPriceHistory(ParsedData product) {
var priceHistoryId = new PriceHistoryId();
priceHistoryId.setProductUrl(product.getUrl());
var priceHistory = new PriceHistoryEntity();
priceHistory.setId(priceHistoryId);
priceHistory.setPrice(product.getPrice());
return priceHistory;
}
private ProductEntity getProduct(ParsedData product) {
var productEntity = new ProductEntity();
productEntity.setCategory(product.getCategory());
productEntity.setBrand(product.getBrand());
productEntity.setProductName(product.getProductName());
productEntity.setUrl(product.getUrl());
productEntity.setMarketplace(product.getMarketplace());
productEntity.setImageUrl(product.getImageUrl());
return productEntity;
}
}

View File

@ -3,17 +3,17 @@ package ru.pricepulse.parsingservice.wildberries_parser;
import lombok.AllArgsConstructor; import lombok.AllArgsConstructor;
import org.springframework.boot.CommandLineRunner; import org.springframework.boot.CommandLineRunner;
import org.springframework.stereotype.Component; import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.wildberries_parser.service.ParsingService; import ru.pricepulse.parsingservice.wildberries_parser.service.WildberriesParsingService;
@Component @Component
@AllArgsConstructor @AllArgsConstructor
public class DebugRunner implements CommandLineRunner { public class DebugRunner implements CommandLineRunner {
private final ParsingService parsingService; private final WildberriesParsingService parsingService;
@Override @Override
public void run(String... args){ public void run(String... args){
System.out.println("Начинаем отладку..."); /*System.out.println("Начинаем отладку...");
parsingService.parse(); parsingService.parse();
System.out.println("Заканчиваем отладку..."); System.out.println("Заканчиваем отладку...");*/
} }
} }

View File

@ -16,14 +16,14 @@ import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto; import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
import java.math.BigDecimal; import java.math.BigDecimal;
import java.time.OffsetDateTime; import java.time.LocalDateTime;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Map; import java.util.Map;
@Service @Service
@AllArgsConstructor @AllArgsConstructor
public class ParsingService { public class WildberriesParsingService {
private final Client client; private final Client client;
private final ObjectMapper objectMapper; private final ObjectMapper objectMapper;
private final ConversionService conversionService; private final ConversionService conversionService;
@ -56,7 +56,7 @@ public class ParsingService {
ProductEntity productEntity = conversionService.convert(dto, ProductEntity.class); ProductEntity productEntity = conversionService.convert(dto, ProductEntity.class);
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder() PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
.id(new PriceHistoryId(productEntity, OffsetDateTime.now())) .id(new PriceHistoryId(productEntity.getUrl(), LocalDateTime.now()))
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0)) .price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
.build(); .build();

View File

@ -9,18 +9,21 @@ spring:
database: postgresql database: postgresql
datasource: datasource:
driver-class-name: org.postgresql.Driver driver-class-name: org.postgresql.Driver
url: jdbc:postgresql://${JDBC_URL} url: jdbc:postgresql://${POSTGRES_JDBC_URL}
username: ${JDBC_USERNAME} username: ${POSTGRES_JDBC_USERNAME}
password: ${JDBC_PASSWORD} password: ${POSTGRES_JDBC_PASSWORD}
clickhouse:
driver-class-name: com.clickhouse.jdbc.ClickHouseDriver
url: jdbc:clickhouse://${CLICKHOUSE_JDBC_URL}
username: ${CLICKHOUSE_JDBC_USERNAME}
password: ${CLICKHOUSE_JDBC_PASSWORD}
liquibase: liquibase:
change-log: classpath:/db/changelog/master.yml change-log: classpath:/db/changelog/master.yml
kafka:
selenium:
marketplace: marketplace:
ozon: ozon:
categories-urls: categories-urls:
- https://www.ozon.ru/category/noutbuki-15692 - https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t&is_high_rating=t
wildberries: wildberries:
base-url: "https://static-basket-01.wbbasket.ru" base-url: "https://static-basket-01.wbbasket.ru"
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json" catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"

View File

@ -7,7 +7,7 @@
<changeSet id="20240926_create_product_table.xml" author="danil"> <changeSet id="20240926_create_product_table.xml" author="danil">
<addColumn tableName="product"> <addColumn tableName="product">
<column name="url" type="varchar" remarks="Ссылка на товар"> <column name="url" type="varchar" remarks="Ссылка на товар">
<constraints nullable="false" /> <constraints nullable="false" unique="true" />
</column> </column>
</addColumn> </addColumn>
<addColumn tableName="product"> <addColumn tableName="product">
@ -15,25 +15,14 @@
<constraints nullable="false" /> <constraints nullable="false" />
</column> </column>
</addColumn> </addColumn>
<addColumn tableName="product">
<column name="article" type="varchar" remarks="Артикул товара">
<constraints nullable="false" />
</column>
</addColumn>
<dropTable tableName="price_history" cascadeConstraints="true" /> <dropTable tableName="price_history" cascadeConstraints="true" />
<sql> <sql>
CREATE TABLE if not exists price_history( CREATE TABLE if not exists price_history(
product_id bigint NOT NULL, product_url varchar NOT NULL,
price numeric(10, 2) NOT NULL, price numeric(10, 2) NOT NULL,
date timestamptz NOT NULL, date timestamptz NOT NULL,
PRIMARY KEY (product_id, date) PRIMARY KEY (product_url, date)
) PARTITION BY RANGE (date); ) PARTITION BY RANGE (date);
</sql> </sql>
<addForeignKeyConstraint baseTableName="price_history"
baseColumnNames="product_id"
constraintName="fk_product_price_history"
referencedTableName="product"
referencedColumnNames="id"
onDelete="CASCADE"/>
</changeSet> </changeSet>
</databaseChangeLog> </databaseChangeLog>