Feature/parsing-service: save commit

This commit is contained in:
danil.markov 2024-10-14 21:43:57 +04:00
parent ae8ac061bc
commit 9895aaff33
27 changed files with 336 additions and 301 deletions

View File

@ -37,6 +37,7 @@ dependencies {
implementation "org.seleniumhq.selenium:selenium-java:${seleniumVersion}"
implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
implementation 'org.apache.commons:commons-pool2:2.12.0'
implementation 'com.clickhouse:clickhouse-jdbc:0.6.5'
compileOnly 'org.projectlombok:lombok'

View File

@ -8,10 +8,12 @@ import org.openqa.selenium.WebDriver;
import org.openqa.selenium.chrome.ChromeDriver;
import org.openqa.selenium.chrome.ChromeOptions;
import org.springframework.beans.factory.ObjectFactory;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Component;
@Slf4j
@Component
@Profile("ozon")
public class WebDriverPool {
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>(); // Список доступных драйверов

View File

@ -1,37 +0,0 @@
package ru.pricepulse.parsingservice.ozon_parser.service;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.ozon_parser.service.messaging.ParsedDataProducer;
@Slf4j
@Service
@RequiredArgsConstructor
public class DataParser {
private final ParsedDataProducer queueProducer;
public boolean pageHasData(String html) {
Document doc = Jsoup.parse(html);
return doc.select("div[data-widget=searchResultsError]").isEmpty();
}
public void parseAndQueueData(String html) {
Document doc = Jsoup.parse(html);
for (Element item : doc.select(".item-class")) {
String title = item.select(".item-title").text();
String price = item.select(".item-price").text();
ParsedData parsedData = new ParsedData();
log.info("Попытка отправить данные в очередь");
queueProducer.sendToQueue(parsedData);
log.info("Данные успешно отправлены в очередь");
}
}
}

View File

@ -1,25 +1,27 @@
package ru.pricepulse.parsingservice.ozon_parser.service;
import lombok.RequiredArgsConstructor;
import org.springframework.context.annotation.Profile;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Service;
@Service
@RequiredArgsConstructor
@Profile("postgres_stat")
public class PartitionService {
private final JdbcTemplate jdbcTemplate;
private final JdbcTemplate postgresDataSource;
public boolean checkPartitionExists(String partitionName) {
String query = "SELECT to_regclass('public." + partitionName + "')";
String result = jdbcTemplate.queryForObject(query, String.class);
String result = postgresDataSource.queryForObject(query, String.class);
return result != null;
}
public void createPartition(String partitionName, String startDate, String endDate) {
String createPartitionSQL = "CREATE TABLE IF NOT EXISTS " + partitionName +
" PARTITION OF price_history FOR VALUES FROM ('" + startDate + "') TO ('" + endDate + "')";
jdbcTemplate.execute(createPartitionSQL);
postgresDataSource.execute(createPartitionSQL);
}
}

View File

@ -1,7 +1,10 @@
package ru.pricepulse.parsingservice.ozon_parser.service.dto;
import java.math.BigDecimal;
import lombok.Getter;
import lombok.Setter;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
@Getter
@ -10,7 +13,7 @@ public class ParsedData {
private Marketplace marketplace;
private String category;
private Category category;
private String brand;
@ -20,4 +23,6 @@ public class ParsedData {
private String imageUrl;
private BigDecimal price;
}

View File

@ -1,7 +0,0 @@
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon;
public interface MarketplacePage {
boolean isLoaded();
}

View File

@ -1,73 +0,0 @@
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.support.ui.WebDriverWait;
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.MarketplacePage;
@Slf4j
public class CategoryPage implements MarketplacePage {
private static final int PAGE_SIZE = 12;
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsV2']";
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
private WebDriver driver;
private WebDriverWait wait;
public CategoryPage(WebDriver driver, WebDriverWait wait) {
this.driver = driver;
this.wait = wait;
}
public Set<String> getProductsLinks() {
wait.until(visibilityOfElementLocated(searchResults));
var searchResultsElement = driver.findElement(searchResults);
wait.until(driver -> visibilityOfElementLocated(By.cssSelector(":scope > div")));
var outerDiv = searchResultsElement.findElement(By.cssSelector(":scope > div")); // Внешний блок со списком товаров
wait.until(driver -> visibilityOfAllElements(outerDiv.findElements(By.cssSelector(":scope > div"))));
var innerDivs = outerDiv.findElements(By.cssSelector(":scope > div")); // Блок карточки товара
return searchProductsLinks(innerDivs, driver);
}
private Set<String> searchProductsLinks(List<WebElement> innerDivs, WebDriver driver) {
return innerDivs.stream()
.map(div -> {
waitVisibility(div);
List<WebElement> linkTags = null;
try {
linkTags = div.findElements(By.tagName("a"));
} catch (Exception ignored) {}
return linkTags != null && !linkTags.isEmpty()
? linkTags.getFirst().getAttribute("href")
: null;
})
.filter(href -> href != null && !href.isEmpty())
.collect(Collectors.toSet());
}
private void waitVisibility(WebElement outerElement) {
wait.until(driver -> !outerElement.findElements(By.tagName("a")).isEmpty());
}
@Override
public boolean isLoaded() {
try {
return driver.findElement(searchResults) != null;
} catch (Exception e) {
return false;
}
}
}

View File

@ -1,47 +0,0 @@
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing;
import java.util.ArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingService;
@Slf4j
@Service
public class ParsingService implements MarketplaceParsingService {
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
private final ExecutorService categoryExecutor = Executors.newFixedThreadPool(1);
private final CategoryPageParsingService categoryPageParsingService;
public ParsingService(CategoryPageParsingService categoryPageParsingService) {
this.categoryPageParsingService = categoryPageParsingService;
}
public void processCategory(String url) {
var startTime = System.currentTimeMillis();
log.info("Начало обработки категории: {}", url);
int pageIndex = 1;
var errors = new ArrayList<String>();
while (!stopFlag.get()) {
int finalPageIndex = pageIndex;
try {
categoryPageParsingService.parseCategoryPage(finalPageIndex, url, errors);
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
++pageIndex;
if (pageIndex > 5) {
stopFlag.set(true);
}
}
log.info("Время выполнения {} ", (System.currentTimeMillis() - startTime) / 1000);
}
}

View File

@ -1,18 +0,0 @@
package ru.pricepulse.parsingservice.ozon_parser.service.messaging;
import lombok.RequiredArgsConstructor;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
@Service
@RequiredArgsConstructor
public class ParsedDataProducer {
private final KafkaTemplate<String, ParsedData> kafkaTemplate;
public void sendToQueue(ParsedData data) {
kafkaTemplate.send("parsed-data-queue", data);
}
}

View File

@ -1,10 +1,9 @@
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page;
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.support.ui.WebDriverWait;
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.MarketplacePage;
@Slf4j
public class AccessDeniedPage implements MarketplacePage {

View File

@ -0,0 +1,94 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.support.ui.WebDriverWait;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
@Slf4j
public class CategoryPage implements MarketplacePage {
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsV2']";
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
private WebDriver driver;
private WebDriverWait wait;
public CategoryPage(WebDriver driver, WebDriverWait wait) {
this.driver = driver;
this.wait = wait;
}
public ArrayList<ParsedData> getParsedProducts() {
wait.until(visibilityOfElementLocated(searchResults));
log.info("Нашли SearchResultsV2");
var searchResultsElement = driver.findElement(searchResults);
wait.until(driver -> visibilityOfElementLocated(By.cssSelector(":scope > div")));
log.info("Нашли внешний блок списка");
var outerDiv = searchResultsElement.findElement(By.cssSelector(":scope > div")); // Внешний блок со списком товаров
wait.until(driver -> visibilityOfAllElements(outerDiv.findElements(By.cssSelector(":scope > div"))));
log.info("Нашли элементы списка");
var innerDivs = outerDiv.findElements(By.cssSelector(":scope > div")); // Блок карточки товара
var products = new ArrayList<ParsedData>();
innerDivs.forEach(innerDiv -> {
var productDataDivs = innerDiv.findElements(By.cssSelector(":scope > div"));
var productImageUrl = productDataDivs.get(0)
.findElement(By.cssSelector(":scope > a > div"))
.findElements(By.cssSelector(":scope > div")).getFirst()
.findElement(By.tagName("img")).getAttribute("src");
var productBrand = productDataDivs.get(1).findElement(By.cssSelector(":scope > div"))
.findElements(By.cssSelector(":scope > div")).getFirst()
.findElement(By.tagName("b")).getText();
var productNameLink = productDataDivs.get(1).findElement(By.cssSelector(":scope > div > a"));
var productUrl = productNameLink.getAttribute("href");
var productName = productNameLink.findElement(By.tagName("span")).getText();
var productPrice = parseCurrency(productDataDivs.get(2).findElement(By.cssSelector(":scope > div > div"))
.findElements(By.tagName("span")).getFirst().getText());
var parsedData = new ParsedData();
parsedData.setUrl(productUrl);
parsedData.setBrand(productBrand);
parsedData.setProductName(productName);
parsedData.setImageUrl(productImageUrl);
parsedData.setPrice(productPrice);
products.add(parsedData);
});
return products;
}
private BigDecimal parseCurrency(String currencyStr) {
String cleanedString = currencyStr.replaceAll("[^\\d]", "");
return new BigDecimal(cleanedString);
}
@Override
public boolean isLoaded() {
try {
return driver.findElement(searchResults) != null;
} catch (Exception e) {
return false;
}
}
}

View File

@ -0,0 +1,7 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
public interface MarketplacePage {
boolean isLoaded();
}

View File

@ -0,0 +1,32 @@
package ru.pricepulse.parsingservice.ozon_parser.service.page;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.support.ui.WebDriverWait;
@Slf4j
public class NoContentPage {
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
private WebDriver driver;
private WebDriverWait wait;
public NoContentPage(WebDriver driver, WebDriverWait wait) {
this.driver = driver;
this.wait = wait;
}
public boolean isLoaded() {
try {
return driver.findElement(errorText) != null;
} catch (Exception e) {
return false;
}
}
}

View File

@ -1,103 +1,82 @@
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing;
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.time.Duration;
import java.time.temporal.ChronoUnit;
import java.util.ArrayList;
import java.util.Set;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
import org.openqa.selenium.support.ui.WebDriverWait;
import org.slf4j.MDC;
import org.springframework.context.annotation.Profile;
import org.springframework.retry.annotation.Recover;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool;
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page.AccessDeniedPage;
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page.CategoryPage;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.ozon_parser.service.page.AccessDeniedPage;
import ru.pricepulse.parsingservice.ozon_parser.service.page.CategoryPage;
import ru.pricepulse.parsingservice.ozon_parser.service.page.NoContentPage;
import ru.pricepulse.parsingservice.service.ProductService;
@Slf4j
@Service
public class CategoryPageParsingService {
private final ExecutorService productPageExecutor = Executors.newFixedThreadPool(3);
@Profile("ozon")
public class OzonCategoryPageParsingService {
private final WebDriverPool webDriverPool;
public CategoryPageParsingService(WebDriverPool webDriverPool) {
private final ProductService productService;
public OzonCategoryPageParsingService(WebDriverPool webDriverPool,
ProductService productService) {
this.webDriverPool = webDriverPool;
this.productService = productService;
}
@Retryable(maxAttempts = 10, recover = "recover")
public void parseCategoryPage(int finalPageIndex, String url, ArrayList<String> errors) throws InterruptedException {
MDC.put("pageIndex", String.valueOf(finalPageIndex));
String pageUrl = url + "/?page=" + finalPageIndex;
public void parseCategoryPage(String pageUrl, Category category, AtomicBoolean stopFlag) {
var driver = webDriverPool.borrowDriver();
try {
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
driver.get(pageUrl);
WebDriverWait wait = new WebDriverWait(driver, Duration.of(10, ChronoUnit.SECONDS));
var accessDeniedPage = new AccessDeniedPage(driver, wait); // TODO подумать как не создавать кучу PageObject
var accessDeniedPage = new AccessDeniedPage(driver, wait);
var categoryPage = new CategoryPage(driver, wait);
var noContentPage = new NoContentPage(driver, wait);
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage));
if (checkAccessDeniedPage(accessDeniedPage)) {
log.info("Доступ ограничен, пробуем решить проблему: {}", pageUrl);
resolveAccessDeniedPage(accessDeniedPage);
log.info("Проблема успешно решена: {}", pageUrl);
}
log.info("Получаем список ссылок на товары на текущей странице: {}", pageUrl);
Set<String> hrefs = Set.of();
if (noContentPage.isLoaded()) {
log.info("Страница не найдена");
stopFlag.set(true);
return;
}
log.info("Получаем список товаров на текущей странице: {}", pageUrl);
List<ParsedData> parsedData;
try {
hrefs = categoryPage.getProductsLinks();
parsedData = categoryPage.getParsedProducts();
for (ParsedData data : parsedData) {
data.setMarketplace(Marketplace.OZON);
data.setCategory(category);
}
productService.saveBatch(parsedData);
} catch (Exception e) {
throw new Exception(e);
}
webDriverPool.returnDriver(driver);
log.info("Страница {} Получены ссылки на товары: {}", finalPageIndex, hrefs.size());
hrefs.forEach(href -> {
MDC.put("pageIndex", String.valueOf(finalPageIndex));
try {
processPage(href);
errors.add(href);
log.error(String.valueOf(errors.size()));
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
});
/*hrefs.forEach(href -> productPageExecutor.submit(() -> {
MDC.put("pageIndex", String.valueOf(finalPageIndex));
try {
processPage(href);
errors.add(href);
log.error(String.valueOf(errors.size()));
} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}));*/
productPageExecutor.awaitTermination(10, TimeUnit.SECONDS);
} catch (Exception ignored) {
throw new RuntimeException(ignored);
} finally {
webDriverPool.returnDriver(driver); // Завершаем работу драйвера
webDriverPool.returnDriver(driver);
}
}
private String processPage(String href) throws InterruptedException {
var driver = webDriverPool.borrowDriver();
try {
driver.get(href);
log.info("Страница обработана");
} catch (Throwable ignored) {
} finally {
webDriverPool.returnDriver(driver); // Завершаем работу драйвера
}
return href;
}
private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
CategoryPage categoryPage) {
log.debug("Проверка что страница 'Доступ ограничен'");

View File

@ -0,0 +1,43 @@
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
import java.util.ArrayList;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Profile;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingService;
@Slf4j
@Service
@Profile("ozon")
public class OzonParsingService implements MarketplaceParsingService {
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(12);
private final OzonCategoryPageParsingService categoryPageParsingService;
public OzonParsingService(OzonCategoryPageParsingService categoryPageParsingService) {
this.categoryPageParsingService = categoryPageParsingService;
}
public void processCategory(String url) {
int pageIndex = 1;
while (!stopFlag.get()) {
int finalPageIndex = pageIndex;
String pageUrl = url + "&page=" + finalPageIndex;
pageExecutorService.submit(() -> categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag));
++pageIndex;
}
if (stopFlag.get()) {
pageExecutorService.shutdownNow();
}
}
}

View File

@ -1,20 +0,0 @@
package ru.pricepulse.parsingservice.ozon_parser.service.request;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
@Slf4j
@Service
@RequiredArgsConstructor
public class PageFetcher {
private final RestTemplate restTemplate;
public String fetchPage(String url) {
log.info("Поолучение страницы {}", url);
return restTemplate.getForObject(url, String.class);
}
}

View File

@ -1,17 +1,19 @@
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
import lombok.RequiredArgsConstructor;
import org.springframework.context.annotation.Profile;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing.ParsingService;
import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingService;
@Service
@RequiredArgsConstructor
@Profile("ozon")
public class OzonProductUpdater {
private final OzonConfigProperties properties;
private final ParsingService ozonParsingService;
private final OzonParsingService ozonParsingService;
@Scheduled(fixedRate = 3600000)
public void updateOzonProducts() {

View File

@ -6,6 +6,7 @@ import java.time.format.DateTimeFormatter;
import jakarta.annotation.PostConstruct;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.context.annotation.Profile;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService;
@ -13,6 +14,7 @@ import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService;
@Slf4j
@Service
@RequiredArgsConstructor
@Profile("postgres_stat")
public class PartitionScheduler {
private final PartitionService partitionService;

View File

@ -3,11 +3,13 @@ package ru.pricepulse.parsingservice.persistence.entity;
import jakarta.persistence.Column;
import jakarta.persistence.EmbeddedId;
import jakarta.persistence.Entity;
import jakarta.persistence.PrePersist;
import jakarta.persistence.Table;
import lombok.*;
import org.hibernate.proxy.HibernateProxy;
import java.math.BigDecimal;
import java.time.LocalDateTime;
import java.util.Objects;
@Getter
@ -48,4 +50,9 @@ public class PriceHistoryEntity {
return Objects.hash(id);
}
@PrePersist
protected void onCreate() {
id.setDate(LocalDateTime.now());
}
}

View File

@ -5,11 +5,10 @@ import lombok.AllArgsConstructor;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
import org.hibernate.annotations.OnDelete;
import org.hibernate.annotations.OnDeleteAction;
import org.hibernate.proxy.HibernateProxy;
import java.io.Serializable;
import java.time.LocalDateTime;
import java.time.OffsetDateTime;
import java.util.Objects;
@ -20,13 +19,11 @@ import java.util.Objects;
@Embeddable
public class PriceHistoryId implements Serializable {
@ManyToOne(fetch = FetchType.LAZY, optional = false)
@OnDelete(action = OnDeleteAction.CASCADE)
@JoinColumn(name = "product_id", nullable = false)
private ProductEntity product;
@Column(name = "product_url", nullable = false, unique = true)
private String productUrl;
@Column(name = "date", nullable = false)
private OffsetDateTime date;
private LocalDateTime date;
@Override
public final boolean equals(Object o) {

View File

@ -2,6 +2,7 @@ package ru.pricepulse.parsingservice.persistence.repository;
import org.springframework.data.jpa.repository.JpaRepository;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, Long> {
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, PriceHistoryId> {
}

View File

@ -4,4 +4,7 @@ import org.springframework.data.jpa.repository.JpaRepository;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
boolean existsByUrl(String url);
}

View File

@ -0,0 +1,69 @@
package ru.pricepulse.parsingservice.service;
import java.util.ArrayList;
import java.util.List;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
@Slf4j
@Service
@RequiredArgsConstructor
public class ProductService {
private final ProductRepository productRepository;
private final ProductPriceRepository productPriceRepository;
@Transactional
public void saveBatch(List<ParsedData> parsedData) {
var products = new ArrayList<ProductEntity>();
var prices = new ArrayList<PriceHistoryEntity>();
parsedData.forEach(product -> processParsedProduct(product, prices, products));
productRepository.saveAll(products);
log.info("Сохранили пачку товаров {}", products.size());
productPriceRepository.saveAll(prices);
log.info("Сохранили историю цен {}", prices.size());
}
private void processParsedProduct(ParsedData product,
ArrayList<PriceHistoryEntity> prices,
ArrayList<ProductEntity> products) {
var priceHistoryEntity = getPriceHistory(product);
prices.add(priceHistoryEntity);
if (productRepository.existsByUrl(product.getUrl())) {
log.debug("Запись {} уже есть", product.getUrl());
return;
}
var productEntity = getProduct(product);
products.add(productEntity);
}
private PriceHistoryEntity getPriceHistory(ParsedData product) {
var priceHistoryId = new PriceHistoryId();
priceHistoryId.setProductUrl(product.getUrl());
var priceHistory = new PriceHistoryEntity();
priceHistory.setId(priceHistoryId);
priceHistory.setPrice(product.getPrice());
return priceHistory;
}
private ProductEntity getProduct(ParsedData product) {
var productEntity = new ProductEntity();
productEntity.setCategory(product.getCategory());
productEntity.setBrand(product.getBrand());
productEntity.setProductName(product.getProductName());
productEntity.setUrl(product.getUrl());
productEntity.setMarketplace(product.getMarketplace());
productEntity.setImageUrl(product.getImageUrl());
return productEntity;
}
}

View File

@ -3,17 +3,17 @@ package ru.pricepulse.parsingservice.wildberries_parser;
import lombok.AllArgsConstructor;
import org.springframework.boot.CommandLineRunner;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.wildberries_parser.service.ParsingService;
import ru.pricepulse.parsingservice.wildberries_parser.service.WildberriesParsingService;
@Component
@AllArgsConstructor
public class DebugRunner implements CommandLineRunner {
private final ParsingService parsingService;
private final WildberriesParsingService parsingService;
@Override
public void run(String... args){
System.out.println("Начинаем отладку...");
/*System.out.println("Начинаем отладку...");
parsingService.parse();
System.out.println("Заканчиваем отладку...");
System.out.println("Заканчиваем отладку...");*/
}
}

View File

@ -16,14 +16,14 @@ import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
import java.math.BigDecimal;
import java.time.OffsetDateTime;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@Service
@AllArgsConstructor
public class ParsingService {
public class WildberriesParsingService {
private final Client client;
private final ObjectMapper objectMapper;
private final ConversionService conversionService;
@ -56,7 +56,7 @@ public class ParsingService {
ProductEntity productEntity = conversionService.convert(dto, ProductEntity.class);
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
.id(new PriceHistoryId(productEntity, OffsetDateTime.now()))
.id(new PriceHistoryId(productEntity.getUrl(), LocalDateTime.now()))
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
.build();

View File

@ -9,18 +9,21 @@ spring:
database: postgresql
datasource:
driver-class-name: org.postgresql.Driver
url: jdbc:postgresql://${JDBC_URL}
username: ${JDBC_USERNAME}
password: ${JDBC_PASSWORD}
url: jdbc:postgresql://${POSTGRES_JDBC_URL}
username: ${POSTGRES_JDBC_USERNAME}
password: ${POSTGRES_JDBC_PASSWORD}
clickhouse:
driver-class-name: com.clickhouse.jdbc.ClickHouseDriver
url: jdbc:clickhouse://${CLICKHOUSE_JDBC_URL}
username: ${CLICKHOUSE_JDBC_USERNAME}
password: ${CLICKHOUSE_JDBC_PASSWORD}
liquibase:
change-log: classpath:/db/changelog/master.yml
kafka:
selenium:
marketplace:
ozon:
categories-urls:
- https://www.ozon.ru/category/noutbuki-15692
- https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t&is_high_rating=t
wildberries:
base-url: "https://static-basket-01.wbbasket.ru"
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"

View File

@ -7,7 +7,7 @@
<changeSet id="20240926_create_product_table.xml" author="danil">
<addColumn tableName="product">
<column name="url" type="varchar" remarks="Ссылка на товар">
<constraints nullable="false" />
<constraints nullable="false" unique="true" />
</column>
</addColumn>
<addColumn tableName="product">
@ -15,25 +15,14 @@
<constraints nullable="false" />
</column>
</addColumn>
<addColumn tableName="product">
<column name="article" type="varchar" remarks="Артикул товара">
<constraints nullable="false" />
</column>
</addColumn>
<dropTable tableName="price_history" cascadeConstraints="true" />
<sql>
CREATE TABLE if not exists price_history(
product_id bigint NOT NULL,
product_url varchar NOT NULL,
price numeric(10, 2) NOT NULL,
date timestamptz NOT NULL,
PRIMARY KEY (product_id, date)
PRIMARY KEY (product_url, date)
) PARTITION BY RANGE (date);
</sql>
<addForeignKeyConstraint baseTableName="price_history"
baseColumnNames="product_id"
constraintName="fk_product_price_history"
referencedTableName="product"
referencedColumnNames="id"
onDelete="CASCADE"/>
</changeSet>
</databaseChangeLog>