Feature/parsing-service: save commit
This commit is contained in:
parent
ae8ac061bc
commit
9895aaff33
@ -37,6 +37,7 @@ dependencies {
|
||||
implementation "org.seleniumhq.selenium:selenium-java:${seleniumVersion}"
|
||||
implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
|
||||
implementation 'org.apache.commons:commons-pool2:2.12.0'
|
||||
implementation 'com.clickhouse:clickhouse-jdbc:0.6.5'
|
||||
|
||||
compileOnly 'org.projectlombok:lombok'
|
||||
|
||||
|
@ -8,10 +8,12 @@ import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.chrome.ChromeDriver;
|
||||
import org.openqa.selenium.chrome.ChromeOptions;
|
||||
import org.springframework.beans.factory.ObjectFactory;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.stereotype.Component;
|
||||
|
||||
@Slf4j
|
||||
@Component
|
||||
@Profile("ozon")
|
||||
public class WebDriverPool {
|
||||
|
||||
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>(); // Список доступных драйверов
|
||||
|
@ -1,37 +0,0 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.jsoup.Jsoup;
|
||||
import org.jsoup.nodes.Document;
|
||||
import org.jsoup.nodes.Element;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.messaging.ParsedDataProducer;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class DataParser {
|
||||
|
||||
private final ParsedDataProducer queueProducer;
|
||||
|
||||
public boolean pageHasData(String html) {
|
||||
Document doc = Jsoup.parse(html);
|
||||
return doc.select("div[data-widget=searchResultsError]").isEmpty();
|
||||
}
|
||||
|
||||
public void parseAndQueueData(String html) {
|
||||
Document doc = Jsoup.parse(html);
|
||||
for (Element item : doc.select(".item-class")) {
|
||||
String title = item.select(".item-title").text();
|
||||
String price = item.select(".item-price").text();
|
||||
|
||||
ParsedData parsedData = new ParsedData();
|
||||
log.info("Попытка отправить данные в очередь");
|
||||
queueProducer.sendToQueue(parsedData);
|
||||
log.info("Данные успешно отправлены в очередь");
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,25 +1,27 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.stereotype.Service;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Profile("postgres_stat")
|
||||
public class PartitionService {
|
||||
|
||||
private final JdbcTemplate jdbcTemplate;
|
||||
private final JdbcTemplate postgresDataSource;
|
||||
|
||||
public boolean checkPartitionExists(String partitionName) {
|
||||
String query = "SELECT to_regclass('public." + partitionName + "')";
|
||||
String result = jdbcTemplate.queryForObject(query, String.class);
|
||||
String result = postgresDataSource.queryForObject(query, String.class);
|
||||
return result != null;
|
||||
}
|
||||
|
||||
public void createPartition(String partitionName, String startDate, String endDate) {
|
||||
String createPartitionSQL = "CREATE TABLE IF NOT EXISTS " + partitionName +
|
||||
" PARTITION OF price_history FOR VALUES FROM ('" + startDate + "') TO ('" + endDate + "')";
|
||||
jdbcTemplate.execute(createPartitionSQL);
|
||||
postgresDataSource.execute(createPartitionSQL);
|
||||
}
|
||||
|
||||
}
|
||||
|
@ -1,7 +1,10 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.dto;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
|
||||
@Getter
|
||||
@ -10,7 +13,7 @@ public class ParsedData {
|
||||
|
||||
private Marketplace marketplace;
|
||||
|
||||
private String category;
|
||||
private Category category;
|
||||
|
||||
private String brand;
|
||||
|
||||
@ -20,4 +23,6 @@ public class ParsedData {
|
||||
|
||||
private String imageUrl;
|
||||
|
||||
private BigDecimal price;
|
||||
|
||||
}
|
||||
|
@ -1,7 +0,0 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon;
|
||||
|
||||
public interface MarketplacePage {
|
||||
|
||||
boolean isLoaded();
|
||||
|
||||
}
|
@ -1,73 +0,0 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page;
|
||||
|
||||
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
|
||||
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.WebElement;
|
||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.MarketplacePage;
|
||||
|
||||
@Slf4j
|
||||
public class CategoryPage implements MarketplacePage {
|
||||
|
||||
private static final int PAGE_SIZE = 12;
|
||||
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsV2']";
|
||||
|
||||
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
|
||||
|
||||
private WebDriver driver;
|
||||
|
||||
private WebDriverWait wait;
|
||||
|
||||
public CategoryPage(WebDriver driver, WebDriverWait wait) {
|
||||
this.driver = driver;
|
||||
this.wait = wait;
|
||||
}
|
||||
|
||||
public Set<String> getProductsLinks() {
|
||||
wait.until(visibilityOfElementLocated(searchResults));
|
||||
var searchResultsElement = driver.findElement(searchResults);
|
||||
wait.until(driver -> visibilityOfElementLocated(By.cssSelector(":scope > div")));
|
||||
var outerDiv = searchResultsElement.findElement(By.cssSelector(":scope > div")); // Внешний блок со списком товаров
|
||||
wait.until(driver -> visibilityOfAllElements(outerDiv.findElements(By.cssSelector(":scope > div"))));
|
||||
var innerDivs = outerDiv.findElements(By.cssSelector(":scope > div")); // Блок карточки товара
|
||||
return searchProductsLinks(innerDivs, driver);
|
||||
}
|
||||
|
||||
private Set<String> searchProductsLinks(List<WebElement> innerDivs, WebDriver driver) {
|
||||
return innerDivs.stream()
|
||||
.map(div -> {
|
||||
waitVisibility(div);
|
||||
List<WebElement> linkTags = null;
|
||||
try {
|
||||
linkTags = div.findElements(By.tagName("a"));
|
||||
} catch (Exception ignored) {}
|
||||
return linkTags != null && !linkTags.isEmpty()
|
||||
? linkTags.getFirst().getAttribute("href")
|
||||
: null;
|
||||
})
|
||||
.filter(href -> href != null && !href.isEmpty())
|
||||
.collect(Collectors.toSet());
|
||||
}
|
||||
|
||||
private void waitVisibility(WebElement outerElement) {
|
||||
wait.until(driver -> !outerElement.findElements(By.tagName("a")).isEmpty());
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isLoaded() {
|
||||
try {
|
||||
return driver.findElement(searchResults) != null;
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,47 +0,0 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingService;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
public class ParsingService implements MarketplaceParsingService {
|
||||
|
||||
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
|
||||
|
||||
private final ExecutorService categoryExecutor = Executors.newFixedThreadPool(1);
|
||||
|
||||
private final CategoryPageParsingService categoryPageParsingService;
|
||||
|
||||
public ParsingService(CategoryPageParsingService categoryPageParsingService) {
|
||||
this.categoryPageParsingService = categoryPageParsingService;
|
||||
}
|
||||
|
||||
public void processCategory(String url) {
|
||||
var startTime = System.currentTimeMillis();
|
||||
log.info("Начало обработки категории: {}", url);
|
||||
int pageIndex = 1;
|
||||
var errors = new ArrayList<String>();
|
||||
|
||||
while (!stopFlag.get()) {
|
||||
int finalPageIndex = pageIndex;
|
||||
try {
|
||||
categoryPageParsingService.parseCategoryPage(finalPageIndex, url, errors);
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
++pageIndex;
|
||||
if (pageIndex > 5) {
|
||||
stopFlag.set(true);
|
||||
}
|
||||
}
|
||||
log.info("Время выполнения {} ", (System.currentTimeMillis() - startTime) / 1000);
|
||||
}
|
||||
|
||||
}
|
@ -1,18 +0,0 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.messaging;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.kafka.core.KafkaTemplate;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ParsedDataProducer {
|
||||
|
||||
private final KafkaTemplate<String, ParsedData> kafkaTemplate;
|
||||
|
||||
public void sendToQueue(ParsedData data) {
|
||||
kafkaTemplate.send("parsed-data-queue", data);
|
||||
}
|
||||
|
||||
}
|
@ -1,10 +1,9 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page;
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.MarketplacePage;
|
||||
|
||||
@Slf4j
|
||||
public class AccessDeniedPage implements MarketplacePage {
|
@ -0,0 +1,94 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||
|
||||
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
|
||||
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Set;
|
||||
import java.util.stream.Collectors;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.WebElement;
|
||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
|
||||
@Slf4j
|
||||
public class CategoryPage implements MarketplacePage {
|
||||
|
||||
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsV2']";
|
||||
|
||||
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
|
||||
|
||||
private WebDriver driver;
|
||||
|
||||
private WebDriverWait wait;
|
||||
|
||||
public CategoryPage(WebDriver driver, WebDriverWait wait) {
|
||||
this.driver = driver;
|
||||
this.wait = wait;
|
||||
}
|
||||
|
||||
public ArrayList<ParsedData> getParsedProducts() {
|
||||
wait.until(visibilityOfElementLocated(searchResults));
|
||||
log.info("Нашли SearchResultsV2");
|
||||
var searchResultsElement = driver.findElement(searchResults);
|
||||
wait.until(driver -> visibilityOfElementLocated(By.cssSelector(":scope > div")));
|
||||
log.info("Нашли внешний блок списка");
|
||||
var outerDiv = searchResultsElement.findElement(By.cssSelector(":scope > div")); // Внешний блок со списком товаров
|
||||
wait.until(driver -> visibilityOfAllElements(outerDiv.findElements(By.cssSelector(":scope > div"))));
|
||||
log.info("Нашли элементы списка");
|
||||
var innerDivs = outerDiv.findElements(By.cssSelector(":scope > div")); // Блок карточки товара
|
||||
|
||||
var products = new ArrayList<ParsedData>();
|
||||
innerDivs.forEach(innerDiv -> {
|
||||
var productDataDivs = innerDiv.findElements(By.cssSelector(":scope > div"));
|
||||
var productImageUrl = productDataDivs.get(0)
|
||||
.findElement(By.cssSelector(":scope > a > div"))
|
||||
.findElements(By.cssSelector(":scope > div")).getFirst()
|
||||
.findElement(By.tagName("img")).getAttribute("src");
|
||||
|
||||
var productBrand = productDataDivs.get(1).findElement(By.cssSelector(":scope > div"))
|
||||
.findElements(By.cssSelector(":scope > div")).getFirst()
|
||||
.findElement(By.tagName("b")).getText();
|
||||
|
||||
var productNameLink = productDataDivs.get(1).findElement(By.cssSelector(":scope > div > a"));
|
||||
|
||||
var productUrl = productNameLink.getAttribute("href");
|
||||
|
||||
var productName = productNameLink.findElement(By.tagName("span")).getText();
|
||||
|
||||
var productPrice = parseCurrency(productDataDivs.get(2).findElement(By.cssSelector(":scope > div > div"))
|
||||
.findElements(By.tagName("span")).getFirst().getText());
|
||||
var parsedData = new ParsedData();
|
||||
parsedData.setUrl(productUrl);
|
||||
parsedData.setBrand(productBrand);
|
||||
parsedData.setProductName(productName);
|
||||
parsedData.setImageUrl(productImageUrl);
|
||||
parsedData.setPrice(productPrice);
|
||||
products.add(parsedData);
|
||||
});
|
||||
|
||||
|
||||
return products;
|
||||
}
|
||||
|
||||
private BigDecimal parseCurrency(String currencyStr) {
|
||||
String cleanedString = currencyStr.replaceAll("[^\\d]", "");
|
||||
|
||||
return new BigDecimal(cleanedString);
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean isLoaded() {
|
||||
try {
|
||||
return driver.findElement(searchResults) != null;
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||
|
||||
public interface MarketplacePage {
|
||||
|
||||
boolean isLoaded();
|
||||
|
||||
}
|
@ -0,0 +1,32 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.By;
|
||||
import org.openqa.selenium.WebDriver;
|
||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||
|
||||
@Slf4j
|
||||
public class NoContentPage {
|
||||
|
||||
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
|
||||
|
||||
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
|
||||
|
||||
private WebDriver driver;
|
||||
|
||||
private WebDriverWait wait;
|
||||
|
||||
public NoContentPage(WebDriver driver, WebDriverWait wait) {
|
||||
this.driver = driver;
|
||||
this.wait = wait;
|
||||
}
|
||||
|
||||
public boolean isLoaded() {
|
||||
try {
|
||||
return driver.findElement(errorText) != null;
|
||||
} catch (Exception e) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,103 +1,82 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing;
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||
|
||||
import java.time.Duration;
|
||||
import java.time.temporal.ChronoUnit;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Set;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.List;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||
import org.slf4j.MDC;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.retry.annotation.Recover;
|
||||
import org.springframework.retry.annotation.Retryable;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page.AccessDeniedPage;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page.CategoryPage;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.AccessDeniedPage;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.CategoryPage;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.page.NoContentPage;
|
||||
import ru.pricepulse.parsingservice.service.ProductService;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
public class CategoryPageParsingService {
|
||||
|
||||
private final ExecutorService productPageExecutor = Executors.newFixedThreadPool(3);
|
||||
@Profile("ozon")
|
||||
public class OzonCategoryPageParsingService {
|
||||
|
||||
private final WebDriverPool webDriverPool;
|
||||
|
||||
public CategoryPageParsingService(WebDriverPool webDriverPool) {
|
||||
private final ProductService productService;
|
||||
|
||||
public OzonCategoryPageParsingService(WebDriverPool webDriverPool,
|
||||
ProductService productService) {
|
||||
this.webDriverPool = webDriverPool;
|
||||
this.productService = productService;
|
||||
}
|
||||
|
||||
@Retryable(maxAttempts = 10, recover = "recover")
|
||||
public void parseCategoryPage(int finalPageIndex, String url, ArrayList<String> errors) throws InterruptedException {
|
||||
MDC.put("pageIndex", String.valueOf(finalPageIndex));
|
||||
String pageUrl = url + "/?page=" + finalPageIndex;
|
||||
public void parseCategoryPage(String pageUrl, Category category, AtomicBoolean stopFlag) {
|
||||
var driver = webDriverPool.borrowDriver();
|
||||
|
||||
try {
|
||||
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
|
||||
driver.get(pageUrl);
|
||||
WebDriverWait wait = new WebDriverWait(driver, Duration.of(10, ChronoUnit.SECONDS));
|
||||
var accessDeniedPage = new AccessDeniedPage(driver, wait); // TODO подумать как не создавать кучу PageObject
|
||||
var accessDeniedPage = new AccessDeniedPage(driver, wait);
|
||||
var categoryPage = new CategoryPage(driver, wait);
|
||||
var noContentPage = new NoContentPage(driver, wait);
|
||||
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage));
|
||||
if (checkAccessDeniedPage(accessDeniedPage)) {
|
||||
log.info("Доступ ограничен, пробуем решить проблему: {}", pageUrl);
|
||||
resolveAccessDeniedPage(accessDeniedPage);
|
||||
log.info("Проблема успешно решена: {}", pageUrl);
|
||||
}
|
||||
log.info("Получаем список ссылок на товары на текущей странице: {}", pageUrl);
|
||||
Set<String> hrefs = Set.of();
|
||||
if (noContentPage.isLoaded()) {
|
||||
log.info("Страница не найдена");
|
||||
stopFlag.set(true);
|
||||
return;
|
||||
}
|
||||
log.info("Получаем список товаров на текущей странице: {}", pageUrl);
|
||||
List<ParsedData> parsedData;
|
||||
try {
|
||||
hrefs = categoryPage.getProductsLinks();
|
||||
parsedData = categoryPage.getParsedProducts();
|
||||
for (ParsedData data : parsedData) {
|
||||
data.setMarketplace(Marketplace.OZON);
|
||||
data.setCategory(category);
|
||||
}
|
||||
productService.saveBatch(parsedData);
|
||||
} catch (Exception e) {
|
||||
throw new Exception(e);
|
||||
}
|
||||
webDriverPool.returnDriver(driver);
|
||||
log.info("Страница {} Получены ссылки на товары: {}", finalPageIndex, hrefs.size());
|
||||
hrefs.forEach(href -> {
|
||||
MDC.put("pageIndex", String.valueOf(finalPageIndex));
|
||||
try {
|
||||
processPage(href);
|
||||
errors.add(href);
|
||||
log.error(String.valueOf(errors.size()));
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
});
|
||||
/*hrefs.forEach(href -> productPageExecutor.submit(() -> {
|
||||
MDC.put("pageIndex", String.valueOf(finalPageIndex));
|
||||
try {
|
||||
processPage(href);
|
||||
errors.add(href);
|
||||
log.error(String.valueOf(errors.size()));
|
||||
} catch (InterruptedException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}));*/
|
||||
productPageExecutor.awaitTermination(10, TimeUnit.SECONDS);
|
||||
} catch (Exception ignored) {
|
||||
throw new RuntimeException(ignored);
|
||||
} finally {
|
||||
webDriverPool.returnDriver(driver); // Завершаем работу драйвера
|
||||
webDriverPool.returnDriver(driver);
|
||||
}
|
||||
}
|
||||
|
||||
private String processPage(String href) throws InterruptedException {
|
||||
var driver = webDriverPool.borrowDriver();
|
||||
try {
|
||||
driver.get(href);
|
||||
log.info("Страница обработана");
|
||||
} catch (Throwable ignored) {
|
||||
|
||||
} finally {
|
||||
webDriverPool.returnDriver(driver); // Завершаем работу драйвера
|
||||
}
|
||||
return href;
|
||||
}
|
||||
|
||||
private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
|
||||
CategoryPage categoryPage) {
|
||||
log.debug("Проверка что страница 'Доступ ограничен'");
|
@ -0,0 +1,43 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.concurrent.ExecutorService;
|
||||
import java.util.concurrent.Executors;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingService;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@Profile("ozon")
|
||||
public class OzonParsingService implements MarketplaceParsingService {
|
||||
|
||||
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
|
||||
|
||||
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(12);
|
||||
|
||||
private final OzonCategoryPageParsingService categoryPageParsingService;
|
||||
|
||||
public OzonParsingService(OzonCategoryPageParsingService categoryPageParsingService) {
|
||||
this.categoryPageParsingService = categoryPageParsingService;
|
||||
}
|
||||
|
||||
public void processCategory(String url) {
|
||||
int pageIndex = 1;
|
||||
|
||||
while (!stopFlag.get()) {
|
||||
int finalPageIndex = pageIndex;
|
||||
String pageUrl = url + "&page=" + finalPageIndex;
|
||||
pageExecutorService.submit(() -> categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag));
|
||||
++pageIndex;
|
||||
}
|
||||
if (stopFlag.get()) {
|
||||
pageExecutorService.shutdownNow();
|
||||
}
|
||||
}
|
||||
|
||||
}
|
@ -1,20 +0,0 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.request;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.client.RestTemplate;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class PageFetcher {
|
||||
|
||||
private final RestTemplate restTemplate;
|
||||
|
||||
public String fetchPage(String url) {
|
||||
log.info("Поолучение страницы {}", url);
|
||||
return restTemplate.getForObject(url, String.class);
|
||||
}
|
||||
|
||||
}
|
@ -1,17 +1,19 @@
|
||||
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing.ParsingService;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingService;
|
||||
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Profile("ozon")
|
||||
public class OzonProductUpdater {
|
||||
|
||||
private final OzonConfigProperties properties;
|
||||
private final ParsingService ozonParsingService;
|
||||
private final OzonParsingService ozonParsingService;
|
||||
|
||||
@Scheduled(fixedRate = 3600000)
|
||||
public void updateOzonProducts() {
|
||||
|
@ -6,6 +6,7 @@ import java.time.format.DateTimeFormatter;
|
||||
import jakarta.annotation.PostConstruct;
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.context.annotation.Profile;
|
||||
import org.springframework.scheduling.annotation.Scheduled;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService;
|
||||
@ -13,6 +14,7 @@ import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService;
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
@Profile("postgres_stat")
|
||||
public class PartitionScheduler {
|
||||
|
||||
private final PartitionService partitionService;
|
||||
|
@ -3,11 +3,13 @@ package ru.pricepulse.parsingservice.persistence.entity;
|
||||
import jakarta.persistence.Column;
|
||||
import jakarta.persistence.EmbeddedId;
|
||||
import jakarta.persistence.Entity;
|
||||
import jakarta.persistence.PrePersist;
|
||||
import jakarta.persistence.Table;
|
||||
import lombok.*;
|
||||
import org.hibernate.proxy.HibernateProxy;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.Objects;
|
||||
|
||||
@Getter
|
||||
@ -48,4 +50,9 @@ public class PriceHistoryEntity {
|
||||
return Objects.hash(id);
|
||||
}
|
||||
|
||||
@PrePersist
|
||||
protected void onCreate() {
|
||||
id.setDate(LocalDateTime.now());
|
||||
}
|
||||
|
||||
}
|
@ -5,11 +5,10 @@ import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
import lombok.NoArgsConstructor;
|
||||
import lombok.Setter;
|
||||
import org.hibernate.annotations.OnDelete;
|
||||
import org.hibernate.annotations.OnDeleteAction;
|
||||
import org.hibernate.proxy.HibernateProxy;
|
||||
|
||||
import java.io.Serializable;
|
||||
import java.time.LocalDateTime;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.Objects;
|
||||
|
||||
@ -20,13 +19,11 @@ import java.util.Objects;
|
||||
@Embeddable
|
||||
public class PriceHistoryId implements Serializable {
|
||||
|
||||
@ManyToOne(fetch = FetchType.LAZY, optional = false)
|
||||
@OnDelete(action = OnDeleteAction.CASCADE)
|
||||
@JoinColumn(name = "product_id", nullable = false)
|
||||
private ProductEntity product;
|
||||
@Column(name = "product_url", nullable = false, unique = true)
|
||||
private String productUrl;
|
||||
|
||||
@Column(name = "date", nullable = false)
|
||||
private OffsetDateTime date;
|
||||
private LocalDateTime date;
|
||||
|
||||
@Override
|
||||
public final boolean equals(Object o) {
|
||||
|
@ -2,6 +2,7 @@ package ru.pricepulse.parsingservice.persistence.repository;
|
||||
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||
|
||||
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, Long> {
|
||||
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, PriceHistoryId> {
|
||||
}
|
||||
|
@ -4,4 +4,7 @@ import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
|
||||
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
|
||||
|
||||
boolean existsByUrl(String url);
|
||||
|
||||
}
|
||||
|
@ -0,0 +1,69 @@
|
||||
package ru.pricepulse.parsingservice.service;
|
||||
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import lombok.RequiredArgsConstructor;
|
||||
import lombok.extern.slf4j.Slf4j;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.transaction.annotation.Transactional;
|
||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
||||
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
||||
|
||||
@Slf4j
|
||||
@Service
|
||||
@RequiredArgsConstructor
|
||||
public class ProductService {
|
||||
|
||||
private final ProductRepository productRepository;
|
||||
|
||||
private final ProductPriceRepository productPriceRepository;
|
||||
|
||||
@Transactional
|
||||
public void saveBatch(List<ParsedData> parsedData) {
|
||||
var products = new ArrayList<ProductEntity>();
|
||||
var prices = new ArrayList<PriceHistoryEntity>();
|
||||
parsedData.forEach(product -> processParsedProduct(product, prices, products));
|
||||
productRepository.saveAll(products);
|
||||
log.info("Сохранили пачку товаров {}", products.size());
|
||||
productPriceRepository.saveAll(prices);
|
||||
log.info("Сохранили историю цен {}", prices.size());
|
||||
}
|
||||
|
||||
private void processParsedProduct(ParsedData product,
|
||||
ArrayList<PriceHistoryEntity> prices,
|
||||
ArrayList<ProductEntity> products) {
|
||||
var priceHistoryEntity = getPriceHistory(product);
|
||||
prices.add(priceHistoryEntity);
|
||||
if (productRepository.existsByUrl(product.getUrl())) {
|
||||
log.debug("Запись {} уже есть", product.getUrl());
|
||||
return;
|
||||
}
|
||||
var productEntity = getProduct(product);
|
||||
products.add(productEntity);
|
||||
}
|
||||
|
||||
private PriceHistoryEntity getPriceHistory(ParsedData product) {
|
||||
var priceHistoryId = new PriceHistoryId();
|
||||
priceHistoryId.setProductUrl(product.getUrl());
|
||||
var priceHistory = new PriceHistoryEntity();
|
||||
priceHistory.setId(priceHistoryId);
|
||||
priceHistory.setPrice(product.getPrice());
|
||||
return priceHistory;
|
||||
}
|
||||
|
||||
private ProductEntity getProduct(ParsedData product) {
|
||||
var productEntity = new ProductEntity();
|
||||
productEntity.setCategory(product.getCategory());
|
||||
productEntity.setBrand(product.getBrand());
|
||||
productEntity.setProductName(product.getProductName());
|
||||
productEntity.setUrl(product.getUrl());
|
||||
productEntity.setMarketplace(product.getMarketplace());
|
||||
productEntity.setImageUrl(product.getImageUrl());
|
||||
return productEntity;
|
||||
}
|
||||
}
|
@ -3,17 +3,17 @@ package ru.pricepulse.parsingservice.wildberries_parser;
|
||||
import lombok.AllArgsConstructor;
|
||||
import org.springframework.boot.CommandLineRunner;
|
||||
import org.springframework.stereotype.Component;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.ParsingService;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.WildberriesParsingService;
|
||||
|
||||
@Component
|
||||
@AllArgsConstructor
|
||||
public class DebugRunner implements CommandLineRunner {
|
||||
private final ParsingService parsingService;
|
||||
private final WildberriesParsingService parsingService;
|
||||
|
||||
@Override
|
||||
public void run(String... args){
|
||||
System.out.println("Начинаем отладку...");
|
||||
/*System.out.println("Начинаем отладку...");
|
||||
parsingService.parse();
|
||||
System.out.println("Заканчиваем отладку...");
|
||||
System.out.println("Заканчиваем отладку...");*/
|
||||
}
|
||||
}
|
||||
|
@ -16,14 +16,14 @@ import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.time.LocalDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Service
|
||||
@AllArgsConstructor
|
||||
public class ParsingService {
|
||||
public class WildberriesParsingService {
|
||||
private final Client client;
|
||||
private final ObjectMapper objectMapper;
|
||||
private final ConversionService conversionService;
|
||||
@ -56,7 +56,7 @@ public class ParsingService {
|
||||
ProductEntity productEntity = conversionService.convert(dto, ProductEntity.class);
|
||||
|
||||
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
|
||||
.id(new PriceHistoryId(productEntity, OffsetDateTime.now()))
|
||||
.id(new PriceHistoryId(productEntity.getUrl(), LocalDateTime.now()))
|
||||
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
|
||||
.build();
|
||||
|
@ -9,18 +9,21 @@ spring:
|
||||
database: postgresql
|
||||
datasource:
|
||||
driver-class-name: org.postgresql.Driver
|
||||
url: jdbc:postgresql://${JDBC_URL}
|
||||
username: ${JDBC_USERNAME}
|
||||
password: ${JDBC_PASSWORD}
|
||||
url: jdbc:postgresql://${POSTGRES_JDBC_URL}
|
||||
username: ${POSTGRES_JDBC_USERNAME}
|
||||
password: ${POSTGRES_JDBC_PASSWORD}
|
||||
clickhouse:
|
||||
driver-class-name: com.clickhouse.jdbc.ClickHouseDriver
|
||||
url: jdbc:clickhouse://${CLICKHOUSE_JDBC_URL}
|
||||
username: ${CLICKHOUSE_JDBC_USERNAME}
|
||||
password: ${CLICKHOUSE_JDBC_PASSWORD}
|
||||
liquibase:
|
||||
change-log: classpath:/db/changelog/master.yml
|
||||
kafka:
|
||||
selenium:
|
||||
|
||||
marketplace:
|
||||
ozon:
|
||||
categories-urls:
|
||||
- https://www.ozon.ru/category/noutbuki-15692
|
||||
- https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t&is_high_rating=t
|
||||
wildberries:
|
||||
base-url: "https://static-basket-01.wbbasket.ru"
|
||||
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
||||
|
@ -7,7 +7,7 @@
|
||||
<changeSet id="20240926_create_product_table.xml" author="danil">
|
||||
<addColumn tableName="product">
|
||||
<column name="url" type="varchar" remarks="Ссылка на товар">
|
||||
<constraints nullable="false" />
|
||||
<constraints nullable="false" unique="true" />
|
||||
</column>
|
||||
</addColumn>
|
||||
<addColumn tableName="product">
|
||||
@ -15,25 +15,14 @@
|
||||
<constraints nullable="false" />
|
||||
</column>
|
||||
</addColumn>
|
||||
<addColumn tableName="product">
|
||||
<column name="article" type="varchar" remarks="Артикул товара">
|
||||
<constraints nullable="false" />
|
||||
</column>
|
||||
</addColumn>
|
||||
<dropTable tableName="price_history" cascadeConstraints="true" />
|
||||
<sql>
|
||||
CREATE TABLE if not exists price_history(
|
||||
product_id bigint NOT NULL,
|
||||
product_url varchar NOT NULL,
|
||||
price numeric(10, 2) NOT NULL,
|
||||
date timestamptz NOT NULL,
|
||||
PRIMARY KEY (product_id, date)
|
||||
PRIMARY KEY (product_url, date)
|
||||
) PARTITION BY RANGE (date);
|
||||
</sql>
|
||||
<addForeignKeyConstraint baseTableName="price_history"
|
||||
baseColumnNames="product_id"
|
||||
constraintName="fk_product_price_history"
|
||||
referencedTableName="product"
|
||||
referencedColumnNames="id"
|
||||
onDelete="CASCADE"/>
|
||||
</changeSet>
|
||||
</databaseChangeLog>
|
Loading…
Reference in New Issue
Block a user