Feature/parsing-service: save commit
This commit is contained in:
parent
ae8ac061bc
commit
9895aaff33
@ -37,6 +37,7 @@ dependencies {
|
|||||||
implementation "org.seleniumhq.selenium:selenium-java:${seleniumVersion}"
|
implementation "org.seleniumhq.selenium:selenium-java:${seleniumVersion}"
|
||||||
implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
|
implementation 'io.github.bonigarcia:webdrivermanager:5.5.0'
|
||||||
implementation 'org.apache.commons:commons-pool2:2.12.0'
|
implementation 'org.apache.commons:commons-pool2:2.12.0'
|
||||||
|
implementation 'com.clickhouse:clickhouse-jdbc:0.6.5'
|
||||||
|
|
||||||
compileOnly 'org.projectlombok:lombok'
|
compileOnly 'org.projectlombok:lombok'
|
||||||
|
|
||||||
|
@ -8,10 +8,12 @@ import org.openqa.selenium.WebDriver;
|
|||||||
import org.openqa.selenium.chrome.ChromeDriver;
|
import org.openqa.selenium.chrome.ChromeDriver;
|
||||||
import org.openqa.selenium.chrome.ChromeOptions;
|
import org.openqa.selenium.chrome.ChromeOptions;
|
||||||
import org.springframework.beans.factory.ObjectFactory;
|
import org.springframework.beans.factory.ObjectFactory;
|
||||||
|
import org.springframework.context.annotation.Profile;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Component
|
@Component
|
||||||
|
@Profile("ozon")
|
||||||
public class WebDriverPool {
|
public class WebDriverPool {
|
||||||
|
|
||||||
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>(); // Список доступных драйверов
|
private final Queue<WebDriver> availableDrivers = new ConcurrentLinkedQueue<>(); // Список доступных драйверов
|
||||||
|
@ -1,37 +0,0 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service;
|
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
import org.jsoup.Jsoup;
|
|
||||||
import org.jsoup.nodes.Document;
|
|
||||||
import org.jsoup.nodes.Element;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.messaging.ParsedDataProducer;
|
|
||||||
|
|
||||||
@Slf4j
|
|
||||||
@Service
|
|
||||||
@RequiredArgsConstructor
|
|
||||||
public class DataParser {
|
|
||||||
|
|
||||||
private final ParsedDataProducer queueProducer;
|
|
||||||
|
|
||||||
public boolean pageHasData(String html) {
|
|
||||||
Document doc = Jsoup.parse(html);
|
|
||||||
return doc.select("div[data-widget=searchResultsError]").isEmpty();
|
|
||||||
}
|
|
||||||
|
|
||||||
public void parseAndQueueData(String html) {
|
|
||||||
Document doc = Jsoup.parse(html);
|
|
||||||
for (Element item : doc.select(".item-class")) {
|
|
||||||
String title = item.select(".item-title").text();
|
|
||||||
String price = item.select(".item-price").text();
|
|
||||||
|
|
||||||
ParsedData parsedData = new ParsedData();
|
|
||||||
log.info("Попытка отправить данные в очередь");
|
|
||||||
queueProducer.sendToQueue(parsedData);
|
|
||||||
log.info("Данные успешно отправлены в очередь");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,25 +1,27 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service;
|
package ru.pricepulse.parsingservice.ozon_parser.service;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.context.annotation.Profile;
|
||||||
import org.springframework.jdbc.core.JdbcTemplate;
|
import org.springframework.jdbc.core.JdbcTemplate;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
|
@Profile("postgres_stat")
|
||||||
public class PartitionService {
|
public class PartitionService {
|
||||||
|
|
||||||
private final JdbcTemplate jdbcTemplate;
|
private final JdbcTemplate postgresDataSource;
|
||||||
|
|
||||||
public boolean checkPartitionExists(String partitionName) {
|
public boolean checkPartitionExists(String partitionName) {
|
||||||
String query = "SELECT to_regclass('public." + partitionName + "')";
|
String query = "SELECT to_regclass('public." + partitionName + "')";
|
||||||
String result = jdbcTemplate.queryForObject(query, String.class);
|
String result = postgresDataSource.queryForObject(query, String.class);
|
||||||
return result != null;
|
return result != null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public void createPartition(String partitionName, String startDate, String endDate) {
|
public void createPartition(String partitionName, String startDate, String endDate) {
|
||||||
String createPartitionSQL = "CREATE TABLE IF NOT EXISTS " + partitionName +
|
String createPartitionSQL = "CREATE TABLE IF NOT EXISTS " + partitionName +
|
||||||
" PARTITION OF price_history FOR VALUES FROM ('" + startDate + "') TO ('" + endDate + "')";
|
" PARTITION OF price_history FOR VALUES FROM ('" + startDate + "') TO ('" + endDate + "')";
|
||||||
jdbcTemplate.execute(createPartitionSQL);
|
postgresDataSource.execute(createPartitionSQL);
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.dto;
|
package ru.pricepulse.parsingservice.ozon_parser.service.dto;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
|
||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
@ -10,7 +13,7 @@ public class ParsedData {
|
|||||||
|
|
||||||
private Marketplace marketplace;
|
private Marketplace marketplace;
|
||||||
|
|
||||||
private String category;
|
private Category category;
|
||||||
|
|
||||||
private String brand;
|
private String brand;
|
||||||
|
|
||||||
@ -20,4 +23,6 @@ public class ParsedData {
|
|||||||
|
|
||||||
private String imageUrl;
|
private String imageUrl;
|
||||||
|
|
||||||
|
private BigDecimal price;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,7 +0,0 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon;
|
|
||||||
|
|
||||||
public interface MarketplacePage {
|
|
||||||
|
|
||||||
boolean isLoaded();
|
|
||||||
|
|
||||||
}
|
|
@ -1,73 +0,0 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page;
|
|
||||||
|
|
||||||
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
|
|
||||||
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
|
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Set;
|
|
||||||
import java.util.stream.Collectors;
|
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
import org.openqa.selenium.By;
|
|
||||||
import org.openqa.selenium.WebDriver;
|
|
||||||
import org.openqa.selenium.WebElement;
|
|
||||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.MarketplacePage;
|
|
||||||
|
|
||||||
@Slf4j
|
|
||||||
public class CategoryPage implements MarketplacePage {
|
|
||||||
|
|
||||||
private static final int PAGE_SIZE = 12;
|
|
||||||
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsV2']";
|
|
||||||
|
|
||||||
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
|
|
||||||
|
|
||||||
private WebDriver driver;
|
|
||||||
|
|
||||||
private WebDriverWait wait;
|
|
||||||
|
|
||||||
public CategoryPage(WebDriver driver, WebDriverWait wait) {
|
|
||||||
this.driver = driver;
|
|
||||||
this.wait = wait;
|
|
||||||
}
|
|
||||||
|
|
||||||
public Set<String> getProductsLinks() {
|
|
||||||
wait.until(visibilityOfElementLocated(searchResults));
|
|
||||||
var searchResultsElement = driver.findElement(searchResults);
|
|
||||||
wait.until(driver -> visibilityOfElementLocated(By.cssSelector(":scope > div")));
|
|
||||||
var outerDiv = searchResultsElement.findElement(By.cssSelector(":scope > div")); // Внешний блок со списком товаров
|
|
||||||
wait.until(driver -> visibilityOfAllElements(outerDiv.findElements(By.cssSelector(":scope > div"))));
|
|
||||||
var innerDivs = outerDiv.findElements(By.cssSelector(":scope > div")); // Блок карточки товара
|
|
||||||
return searchProductsLinks(innerDivs, driver);
|
|
||||||
}
|
|
||||||
|
|
||||||
private Set<String> searchProductsLinks(List<WebElement> innerDivs, WebDriver driver) {
|
|
||||||
return innerDivs.stream()
|
|
||||||
.map(div -> {
|
|
||||||
waitVisibility(div);
|
|
||||||
List<WebElement> linkTags = null;
|
|
||||||
try {
|
|
||||||
linkTags = div.findElements(By.tagName("a"));
|
|
||||||
} catch (Exception ignored) {}
|
|
||||||
return linkTags != null && !linkTags.isEmpty()
|
|
||||||
? linkTags.getFirst().getAttribute("href")
|
|
||||||
: null;
|
|
||||||
})
|
|
||||||
.filter(href -> href != null && !href.isEmpty())
|
|
||||||
.collect(Collectors.toSet());
|
|
||||||
}
|
|
||||||
|
|
||||||
private void waitVisibility(WebElement outerElement) {
|
|
||||||
wait.until(driver -> !outerElement.findElements(By.tagName("a")).isEmpty());
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public boolean isLoaded() {
|
|
||||||
try {
|
|
||||||
return driver.findElement(searchResults) != null;
|
|
||||||
} catch (Exception e) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,47 +0,0 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.concurrent.ExecutorService;
|
|
||||||
import java.util.concurrent.Executors;
|
|
||||||
import java.util.concurrent.atomic.AtomicBoolean;
|
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingService;
|
|
||||||
|
|
||||||
@Slf4j
|
|
||||||
@Service
|
|
||||||
public class ParsingService implements MarketplaceParsingService {
|
|
||||||
|
|
||||||
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
|
|
||||||
|
|
||||||
private final ExecutorService categoryExecutor = Executors.newFixedThreadPool(1);
|
|
||||||
|
|
||||||
private final CategoryPageParsingService categoryPageParsingService;
|
|
||||||
|
|
||||||
public ParsingService(CategoryPageParsingService categoryPageParsingService) {
|
|
||||||
this.categoryPageParsingService = categoryPageParsingService;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void processCategory(String url) {
|
|
||||||
var startTime = System.currentTimeMillis();
|
|
||||||
log.info("Начало обработки категории: {}", url);
|
|
||||||
int pageIndex = 1;
|
|
||||||
var errors = new ArrayList<String>();
|
|
||||||
|
|
||||||
while (!stopFlag.get()) {
|
|
||||||
int finalPageIndex = pageIndex;
|
|
||||||
try {
|
|
||||||
categoryPageParsingService.parseCategoryPage(finalPageIndex, url, errors);
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
++pageIndex;
|
|
||||||
if (pageIndex > 5) {
|
|
||||||
stopFlag.set(true);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
log.info("Время выполнения {} ", (System.currentTimeMillis() - startTime) / 1000);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,18 +0,0 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.messaging;
|
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
|
||||||
import org.springframework.kafka.core.KafkaTemplate;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
|
||||||
|
|
||||||
@Service
|
|
||||||
@RequiredArgsConstructor
|
|
||||||
public class ParsedDataProducer {
|
|
||||||
|
|
||||||
private final KafkaTemplate<String, ParsedData> kafkaTemplate;
|
|
||||||
|
|
||||||
public void sendToQueue(ParsedData data) {
|
|
||||||
kafkaTemplate.send("parsed-data-queue", data);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,10 +1,9 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page;
|
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.openqa.selenium.By;
|
import org.openqa.selenium.By;
|
||||||
import org.openqa.selenium.WebDriver;
|
import org.openqa.selenium.WebDriver;
|
||||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.MarketplacePage;
|
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
public class AccessDeniedPage implements MarketplacePage {
|
public class AccessDeniedPage implements MarketplacePage {
|
@ -0,0 +1,94 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||||
|
|
||||||
|
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfAllElements;
|
||||||
|
import static org.openqa.selenium.support.ui.ExpectedConditions.visibilityOfElementLocated;
|
||||||
|
|
||||||
|
import java.math.BigDecimal;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
import java.util.Set;
|
||||||
|
import java.util.stream.Collectors;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.openqa.selenium.By;
|
||||||
|
import org.openqa.selenium.WebDriver;
|
||||||
|
import org.openqa.selenium.WebElement;
|
||||||
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class CategoryPage implements MarketplacePage {
|
||||||
|
|
||||||
|
private static final String SEARCH_RESULTS = "div[data-widget='searchResultsV2']";
|
||||||
|
|
||||||
|
private final By searchResults = By.cssSelector(SEARCH_RESULTS);
|
||||||
|
|
||||||
|
private WebDriver driver;
|
||||||
|
|
||||||
|
private WebDriverWait wait;
|
||||||
|
|
||||||
|
public CategoryPage(WebDriver driver, WebDriverWait wait) {
|
||||||
|
this.driver = driver;
|
||||||
|
this.wait = wait;
|
||||||
|
}
|
||||||
|
|
||||||
|
public ArrayList<ParsedData> getParsedProducts() {
|
||||||
|
wait.until(visibilityOfElementLocated(searchResults));
|
||||||
|
log.info("Нашли SearchResultsV2");
|
||||||
|
var searchResultsElement = driver.findElement(searchResults);
|
||||||
|
wait.until(driver -> visibilityOfElementLocated(By.cssSelector(":scope > div")));
|
||||||
|
log.info("Нашли внешний блок списка");
|
||||||
|
var outerDiv = searchResultsElement.findElement(By.cssSelector(":scope > div")); // Внешний блок со списком товаров
|
||||||
|
wait.until(driver -> visibilityOfAllElements(outerDiv.findElements(By.cssSelector(":scope > div"))));
|
||||||
|
log.info("Нашли элементы списка");
|
||||||
|
var innerDivs = outerDiv.findElements(By.cssSelector(":scope > div")); // Блок карточки товара
|
||||||
|
|
||||||
|
var products = new ArrayList<ParsedData>();
|
||||||
|
innerDivs.forEach(innerDiv -> {
|
||||||
|
var productDataDivs = innerDiv.findElements(By.cssSelector(":scope > div"));
|
||||||
|
var productImageUrl = productDataDivs.get(0)
|
||||||
|
.findElement(By.cssSelector(":scope > a > div"))
|
||||||
|
.findElements(By.cssSelector(":scope > div")).getFirst()
|
||||||
|
.findElement(By.tagName("img")).getAttribute("src");
|
||||||
|
|
||||||
|
var productBrand = productDataDivs.get(1).findElement(By.cssSelector(":scope > div"))
|
||||||
|
.findElements(By.cssSelector(":scope > div")).getFirst()
|
||||||
|
.findElement(By.tagName("b")).getText();
|
||||||
|
|
||||||
|
var productNameLink = productDataDivs.get(1).findElement(By.cssSelector(":scope > div > a"));
|
||||||
|
|
||||||
|
var productUrl = productNameLink.getAttribute("href");
|
||||||
|
|
||||||
|
var productName = productNameLink.findElement(By.tagName("span")).getText();
|
||||||
|
|
||||||
|
var productPrice = parseCurrency(productDataDivs.get(2).findElement(By.cssSelector(":scope > div > div"))
|
||||||
|
.findElements(By.tagName("span")).getFirst().getText());
|
||||||
|
var parsedData = new ParsedData();
|
||||||
|
parsedData.setUrl(productUrl);
|
||||||
|
parsedData.setBrand(productBrand);
|
||||||
|
parsedData.setProductName(productName);
|
||||||
|
parsedData.setImageUrl(productImageUrl);
|
||||||
|
parsedData.setPrice(productPrice);
|
||||||
|
products.add(parsedData);
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
return products;
|
||||||
|
}
|
||||||
|
|
||||||
|
private BigDecimal parseCurrency(String currencyStr) {
|
||||||
|
String cleanedString = currencyStr.replaceAll("[^\\d]", "");
|
||||||
|
|
||||||
|
return new BigDecimal(cleanedString);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean isLoaded() {
|
||||||
|
try {
|
||||||
|
return driver.findElement(searchResults) != null;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,7 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||||
|
|
||||||
|
public interface MarketplacePage {
|
||||||
|
|
||||||
|
boolean isLoaded();
|
||||||
|
|
||||||
|
}
|
@ -0,0 +1,32 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.page;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.openqa.selenium.By;
|
||||||
|
import org.openqa.selenium.WebDriver;
|
||||||
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
public class NoContentPage {
|
||||||
|
|
||||||
|
private static final String ERROR_TEXT_XPATH = "\"//*[contains(text(), 'Простите, произошла ошибка. Попробуйте обновить страницу или вернуться на шаг назад.')]\"";
|
||||||
|
|
||||||
|
private final By errorText = By.xpath(ERROR_TEXT_XPATH);
|
||||||
|
|
||||||
|
private WebDriver driver;
|
||||||
|
|
||||||
|
private WebDriverWait wait;
|
||||||
|
|
||||||
|
public NoContentPage(WebDriver driver, WebDriverWait wait) {
|
||||||
|
this.driver = driver;
|
||||||
|
this.wait = wait;
|
||||||
|
}
|
||||||
|
|
||||||
|
public boolean isLoaded() {
|
||||||
|
try {
|
||||||
|
return driver.findElement(errorText) != null;
|
||||||
|
} catch (Exception e) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,103 +1,82 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing;
|
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||||
|
|
||||||
import java.time.Duration;
|
import java.time.Duration;
|
||||||
import java.time.temporal.ChronoUnit;
|
import java.time.temporal.ChronoUnit;
|
||||||
import java.util.ArrayList;
|
import java.util.List;
|
||||||
import java.util.Set;
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
import java.util.concurrent.ExecutorService;
|
|
||||||
import java.util.concurrent.Executors;
|
|
||||||
import java.util.concurrent.TimeUnit;
|
|
||||||
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
import org.openqa.selenium.support.ui.WebDriverWait;
|
import org.openqa.selenium.support.ui.WebDriverWait;
|
||||||
import org.slf4j.MDC;
|
import org.springframework.context.annotation.Profile;
|
||||||
import org.springframework.retry.annotation.Recover;
|
import org.springframework.retry.annotation.Recover;
|
||||||
import org.springframework.retry.annotation.Retryable;
|
import org.springframework.retry.annotation.Retryable;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool;
|
import ru.pricepulse.parsingservice.ozon_parser.pool.WebDriverPool;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page.AccessDeniedPage;
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.page.CategoryPage;
|
import ru.pricepulse.parsingservice.ozon_parser.service.page.AccessDeniedPage;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.page.CategoryPage;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.page.NoContentPage;
|
||||||
|
import ru.pricepulse.parsingservice.service.ProductService;
|
||||||
|
|
||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
public class CategoryPageParsingService {
|
@Profile("ozon")
|
||||||
|
public class OzonCategoryPageParsingService {
|
||||||
private final ExecutorService productPageExecutor = Executors.newFixedThreadPool(3);
|
|
||||||
|
|
||||||
private final WebDriverPool webDriverPool;
|
private final WebDriverPool webDriverPool;
|
||||||
|
|
||||||
public CategoryPageParsingService(WebDriverPool webDriverPool) {
|
private final ProductService productService;
|
||||||
|
|
||||||
|
public OzonCategoryPageParsingService(WebDriverPool webDriverPool,
|
||||||
|
ProductService productService) {
|
||||||
this.webDriverPool = webDriverPool;
|
this.webDriverPool = webDriverPool;
|
||||||
|
this.productService = productService;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Retryable(maxAttempts = 10, recover = "recover")
|
@Retryable(maxAttempts = 10, recover = "recover")
|
||||||
public void parseCategoryPage(int finalPageIndex, String url, ArrayList<String> errors) throws InterruptedException {
|
public void parseCategoryPage(String pageUrl, Category category, AtomicBoolean stopFlag) {
|
||||||
MDC.put("pageIndex", String.valueOf(finalPageIndex));
|
|
||||||
String pageUrl = url + "/?page=" + finalPageIndex;
|
|
||||||
var driver = webDriverPool.borrowDriver();
|
var driver = webDriverPool.borrowDriver();
|
||||||
|
|
||||||
try {
|
try {
|
||||||
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
|
driver.manage().timeouts().pageLoadTimeout(Duration.of(10, ChronoUnit.SECONDS));
|
||||||
driver.get(pageUrl);
|
driver.get(pageUrl);
|
||||||
WebDriverWait wait = new WebDriverWait(driver, Duration.of(10, ChronoUnit.SECONDS));
|
WebDriverWait wait = new WebDriverWait(driver, Duration.of(10, ChronoUnit.SECONDS));
|
||||||
var accessDeniedPage = new AccessDeniedPage(driver, wait); // TODO подумать как не создавать кучу PageObject
|
var accessDeniedPage = new AccessDeniedPage(driver, wait);
|
||||||
var categoryPage = new CategoryPage(driver, wait);
|
var categoryPage = new CategoryPage(driver, wait);
|
||||||
|
var noContentPage = new NoContentPage(driver, wait);
|
||||||
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage));
|
wait.until(d -> checkForWaitingPageLoading(accessDeniedPage, categoryPage));
|
||||||
if (checkAccessDeniedPage(accessDeniedPage)) {
|
if (checkAccessDeniedPage(accessDeniedPage)) {
|
||||||
log.info("Доступ ограничен, пробуем решить проблему: {}", pageUrl);
|
log.info("Доступ ограничен, пробуем решить проблему: {}", pageUrl);
|
||||||
resolveAccessDeniedPage(accessDeniedPage);
|
resolveAccessDeniedPage(accessDeniedPage);
|
||||||
log.info("Проблема успешно решена: {}", pageUrl);
|
log.info("Проблема успешно решена: {}", pageUrl);
|
||||||
}
|
}
|
||||||
log.info("Получаем список ссылок на товары на текущей странице: {}", pageUrl);
|
if (noContentPage.isLoaded()) {
|
||||||
Set<String> hrefs = Set.of();
|
log.info("Страница не найдена");
|
||||||
|
stopFlag.set(true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
log.info("Получаем список товаров на текущей странице: {}", pageUrl);
|
||||||
|
List<ParsedData> parsedData;
|
||||||
try {
|
try {
|
||||||
hrefs = categoryPage.getProductsLinks();
|
parsedData = categoryPage.getParsedProducts();
|
||||||
|
for (ParsedData data : parsedData) {
|
||||||
|
data.setMarketplace(Marketplace.OZON);
|
||||||
|
data.setCategory(category);
|
||||||
|
}
|
||||||
|
productService.saveBatch(parsedData);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
throw new Exception(e);
|
throw new Exception(e);
|
||||||
}
|
}
|
||||||
webDriverPool.returnDriver(driver);
|
webDriverPool.returnDriver(driver);
|
||||||
log.info("Страница {} Получены ссылки на товары: {}", finalPageIndex, hrefs.size());
|
|
||||||
hrefs.forEach(href -> {
|
|
||||||
MDC.put("pageIndex", String.valueOf(finalPageIndex));
|
|
||||||
try {
|
|
||||||
processPage(href);
|
|
||||||
errors.add(href);
|
|
||||||
log.error(String.valueOf(errors.size()));
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
/*hrefs.forEach(href -> productPageExecutor.submit(() -> {
|
|
||||||
MDC.put("pageIndex", String.valueOf(finalPageIndex));
|
|
||||||
try {
|
|
||||||
processPage(href);
|
|
||||||
errors.add(href);
|
|
||||||
log.error(String.valueOf(errors.size()));
|
|
||||||
} catch (InterruptedException e) {
|
|
||||||
throw new RuntimeException(e);
|
|
||||||
}
|
|
||||||
}));*/
|
|
||||||
productPageExecutor.awaitTermination(10, TimeUnit.SECONDS);
|
|
||||||
} catch (Exception ignored) {
|
} catch (Exception ignored) {
|
||||||
throw new RuntimeException(ignored);
|
throw new RuntimeException(ignored);
|
||||||
} finally {
|
} finally {
|
||||||
webDriverPool.returnDriver(driver); // Завершаем работу драйвера
|
webDriverPool.returnDriver(driver);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private String processPage(String href) throws InterruptedException {
|
|
||||||
var driver = webDriverPool.borrowDriver();
|
|
||||||
try {
|
|
||||||
driver.get(href);
|
|
||||||
log.info("Страница обработана");
|
|
||||||
} catch (Throwable ignored) {
|
|
||||||
|
|
||||||
} finally {
|
|
||||||
webDriverPool.returnDriver(driver); // Завершаем работу драйвера
|
|
||||||
}
|
|
||||||
return href;
|
|
||||||
}
|
|
||||||
|
|
||||||
private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
|
private boolean checkForWaitingPageLoading(AccessDeniedPage accessDeniedPage,
|
||||||
CategoryPage categoryPage) {
|
CategoryPage categoryPage) {
|
||||||
log.debug("Проверка что страница 'Доступ ограничен'");
|
log.debug("Проверка что страница 'Доступ ограничен'");
|
@ -0,0 +1,43 @@
|
|||||||
|
package ru.pricepulse.parsingservice.ozon_parser.service.parsing;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.atomic.AtomicBoolean;
|
||||||
|
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.context.annotation.Profile;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.MarketplaceParsingService;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@Profile("ozon")
|
||||||
|
public class OzonParsingService implements MarketplaceParsingService {
|
||||||
|
|
||||||
|
private final AtomicBoolean stopFlag = new AtomicBoolean(false);
|
||||||
|
|
||||||
|
private final ExecutorService pageExecutorService = Executors.newFixedThreadPool(12);
|
||||||
|
|
||||||
|
private final OzonCategoryPageParsingService categoryPageParsingService;
|
||||||
|
|
||||||
|
public OzonParsingService(OzonCategoryPageParsingService categoryPageParsingService) {
|
||||||
|
this.categoryPageParsingService = categoryPageParsingService;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void processCategory(String url) {
|
||||||
|
int pageIndex = 1;
|
||||||
|
|
||||||
|
while (!stopFlag.get()) {
|
||||||
|
int finalPageIndex = pageIndex;
|
||||||
|
String pageUrl = url + "&page=" + finalPageIndex;
|
||||||
|
pageExecutorService.submit(() -> categoryPageParsingService.parseCategoryPage(pageUrl, Category.LAPTOP, stopFlag));
|
||||||
|
++pageIndex;
|
||||||
|
}
|
||||||
|
if (stopFlag.get()) {
|
||||||
|
pageExecutorService.shutdownNow();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
@ -1,20 +0,0 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.request;
|
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
|
||||||
import lombok.extern.slf4j.Slf4j;
|
|
||||||
import org.springframework.stereotype.Service;
|
|
||||||
import org.springframework.web.client.RestTemplate;
|
|
||||||
|
|
||||||
@Slf4j
|
|
||||||
@Service
|
|
||||||
@RequiredArgsConstructor
|
|
||||||
public class PageFetcher {
|
|
||||||
|
|
||||||
private final RestTemplate restTemplate;
|
|
||||||
|
|
||||||
public String fetchPage(String url) {
|
|
||||||
log.info("Поолучение страницы {}", url);
|
|
||||||
return restTemplate.getForObject(url, String.class);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,17 +1,19 @@
|
|||||||
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
|
package ru.pricepulse.parsingservice.ozon_parser.service.scheduler;
|
||||||
|
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import org.springframework.context.annotation.Profile;
|
||||||
import org.springframework.scheduling.annotation.Scheduled;
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.marketplace.ozon.parsing.ParsingService;
|
import ru.pricepulse.parsingservice.ozon_parser.service.parsing.OzonParsingService;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
|
@Profile("ozon")
|
||||||
public class OzonProductUpdater {
|
public class OzonProductUpdater {
|
||||||
|
|
||||||
private final OzonConfigProperties properties;
|
private final OzonConfigProperties properties;
|
||||||
private final ParsingService ozonParsingService;
|
private final OzonParsingService ozonParsingService;
|
||||||
|
|
||||||
@Scheduled(fixedRate = 3600000)
|
@Scheduled(fixedRate = 3600000)
|
||||||
public void updateOzonProducts() {
|
public void updateOzonProducts() {
|
||||||
|
@ -6,6 +6,7 @@ import java.time.format.DateTimeFormatter;
|
|||||||
import jakarta.annotation.PostConstruct;
|
import jakarta.annotation.PostConstruct;
|
||||||
import lombok.RequiredArgsConstructor;
|
import lombok.RequiredArgsConstructor;
|
||||||
import lombok.extern.slf4j.Slf4j;
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.context.annotation.Profile;
|
||||||
import org.springframework.scheduling.annotation.Scheduled;
|
import org.springframework.scheduling.annotation.Scheduled;
|
||||||
import org.springframework.stereotype.Service;
|
import org.springframework.stereotype.Service;
|
||||||
import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService;
|
import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService;
|
||||||
@ -13,6 +14,7 @@ import ru.pricepulse.parsingservice.ozon_parser.service.PartitionService;
|
|||||||
@Slf4j
|
@Slf4j
|
||||||
@Service
|
@Service
|
||||||
@RequiredArgsConstructor
|
@RequiredArgsConstructor
|
||||||
|
@Profile("postgres_stat")
|
||||||
public class PartitionScheduler {
|
public class PartitionScheduler {
|
||||||
|
|
||||||
private final PartitionService partitionService;
|
private final PartitionService partitionService;
|
||||||
|
@ -3,11 +3,13 @@ package ru.pricepulse.parsingservice.persistence.entity;
|
|||||||
import jakarta.persistence.Column;
|
import jakarta.persistence.Column;
|
||||||
import jakarta.persistence.EmbeddedId;
|
import jakarta.persistence.EmbeddedId;
|
||||||
import jakarta.persistence.Entity;
|
import jakarta.persistence.Entity;
|
||||||
|
import jakarta.persistence.PrePersist;
|
||||||
import jakarta.persistence.Table;
|
import jakarta.persistence.Table;
|
||||||
import lombok.*;
|
import lombok.*;
|
||||||
import org.hibernate.proxy.HibernateProxy;
|
import org.hibernate.proxy.HibernateProxy;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
import java.math.BigDecimal;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
@Getter
|
@Getter
|
||||||
@ -48,4 +50,9 @@ public class PriceHistoryEntity {
|
|||||||
return Objects.hash(id);
|
return Objects.hash(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@PrePersist
|
||||||
|
protected void onCreate() {
|
||||||
|
id.setDate(LocalDateTime.now());
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -5,11 +5,10 @@ import lombok.AllArgsConstructor;
|
|||||||
import lombok.Getter;
|
import lombok.Getter;
|
||||||
import lombok.NoArgsConstructor;
|
import lombok.NoArgsConstructor;
|
||||||
import lombok.Setter;
|
import lombok.Setter;
|
||||||
import org.hibernate.annotations.OnDelete;
|
|
||||||
import org.hibernate.annotations.OnDeleteAction;
|
|
||||||
import org.hibernate.proxy.HibernateProxy;
|
import org.hibernate.proxy.HibernateProxy;
|
||||||
|
|
||||||
import java.io.Serializable;
|
import java.io.Serializable;
|
||||||
|
import java.time.LocalDateTime;
|
||||||
import java.time.OffsetDateTime;
|
import java.time.OffsetDateTime;
|
||||||
import java.util.Objects;
|
import java.util.Objects;
|
||||||
|
|
||||||
@ -20,13 +19,11 @@ import java.util.Objects;
|
|||||||
@Embeddable
|
@Embeddable
|
||||||
public class PriceHistoryId implements Serializable {
|
public class PriceHistoryId implements Serializable {
|
||||||
|
|
||||||
@ManyToOne(fetch = FetchType.LAZY, optional = false)
|
@Column(name = "product_url", nullable = false, unique = true)
|
||||||
@OnDelete(action = OnDeleteAction.CASCADE)
|
private String productUrl;
|
||||||
@JoinColumn(name = "product_id", nullable = false)
|
|
||||||
private ProductEntity product;
|
|
||||||
|
|
||||||
@Column(name = "date", nullable = false)
|
@Column(name = "date", nullable = false)
|
||||||
private OffsetDateTime date;
|
private LocalDateTime date;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public final boolean equals(Object o) {
|
public final boolean equals(Object o) {
|
||||||
|
@ -2,6 +2,7 @@ package ru.pricepulse.parsingservice.persistence.repository;
|
|||||||
|
|
||||||
import org.springframework.data.jpa.repository.JpaRepository;
|
import org.springframework.data.jpa.repository.JpaRepository;
|
||||||
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||||
|
|
||||||
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, Long> {
|
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, PriceHistoryId> {
|
||||||
}
|
}
|
||||||
|
@ -4,4 +4,7 @@ import org.springframework.data.jpa.repository.JpaRepository;
|
|||||||
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
|
|
||||||
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
|
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
|
||||||
|
|
||||||
|
boolean existsByUrl(String url);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,69 @@
|
|||||||
|
package ru.pricepulse.parsingservice.service;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import lombok.RequiredArgsConstructor;
|
||||||
|
import lombok.extern.slf4j.Slf4j;
|
||||||
|
import org.springframework.stereotype.Service;
|
||||||
|
import org.springframework.transaction.annotation.Transactional;
|
||||||
|
import ru.pricepulse.parsingservice.ozon_parser.service.dto.ParsedData;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryEntity;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.PriceHistoryId;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.entity.ProductEntity;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.repository.ProductPriceRepository;
|
||||||
|
import ru.pricepulse.parsingservice.persistence.repository.ProductRepository;
|
||||||
|
|
||||||
|
@Slf4j
|
||||||
|
@Service
|
||||||
|
@RequiredArgsConstructor
|
||||||
|
public class ProductService {
|
||||||
|
|
||||||
|
private final ProductRepository productRepository;
|
||||||
|
|
||||||
|
private final ProductPriceRepository productPriceRepository;
|
||||||
|
|
||||||
|
@Transactional
|
||||||
|
public void saveBatch(List<ParsedData> parsedData) {
|
||||||
|
var products = new ArrayList<ProductEntity>();
|
||||||
|
var prices = new ArrayList<PriceHistoryEntity>();
|
||||||
|
parsedData.forEach(product -> processParsedProduct(product, prices, products));
|
||||||
|
productRepository.saveAll(products);
|
||||||
|
log.info("Сохранили пачку товаров {}", products.size());
|
||||||
|
productPriceRepository.saveAll(prices);
|
||||||
|
log.info("Сохранили историю цен {}", prices.size());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void processParsedProduct(ParsedData product,
|
||||||
|
ArrayList<PriceHistoryEntity> prices,
|
||||||
|
ArrayList<ProductEntity> products) {
|
||||||
|
var priceHistoryEntity = getPriceHistory(product);
|
||||||
|
prices.add(priceHistoryEntity);
|
||||||
|
if (productRepository.existsByUrl(product.getUrl())) {
|
||||||
|
log.debug("Запись {} уже есть", product.getUrl());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
var productEntity = getProduct(product);
|
||||||
|
products.add(productEntity);
|
||||||
|
}
|
||||||
|
|
||||||
|
private PriceHistoryEntity getPriceHistory(ParsedData product) {
|
||||||
|
var priceHistoryId = new PriceHistoryId();
|
||||||
|
priceHistoryId.setProductUrl(product.getUrl());
|
||||||
|
var priceHistory = new PriceHistoryEntity();
|
||||||
|
priceHistory.setId(priceHistoryId);
|
||||||
|
priceHistory.setPrice(product.getPrice());
|
||||||
|
return priceHistory;
|
||||||
|
}
|
||||||
|
|
||||||
|
private ProductEntity getProduct(ParsedData product) {
|
||||||
|
var productEntity = new ProductEntity();
|
||||||
|
productEntity.setCategory(product.getCategory());
|
||||||
|
productEntity.setBrand(product.getBrand());
|
||||||
|
productEntity.setProductName(product.getProductName());
|
||||||
|
productEntity.setUrl(product.getUrl());
|
||||||
|
productEntity.setMarketplace(product.getMarketplace());
|
||||||
|
productEntity.setImageUrl(product.getImageUrl());
|
||||||
|
return productEntity;
|
||||||
|
}
|
||||||
|
}
|
@ -3,17 +3,17 @@ package ru.pricepulse.parsingservice.wildberries_parser;
|
|||||||
import lombok.AllArgsConstructor;
|
import lombok.AllArgsConstructor;
|
||||||
import org.springframework.boot.CommandLineRunner;
|
import org.springframework.boot.CommandLineRunner;
|
||||||
import org.springframework.stereotype.Component;
|
import org.springframework.stereotype.Component;
|
||||||
import ru.pricepulse.parsingservice.wildberries_parser.service.ParsingService;
|
import ru.pricepulse.parsingservice.wildberries_parser.service.WildberriesParsingService;
|
||||||
|
|
||||||
@Component
|
@Component
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class DebugRunner implements CommandLineRunner {
|
public class DebugRunner implements CommandLineRunner {
|
||||||
private final ParsingService parsingService;
|
private final WildberriesParsingService parsingService;
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void run(String... args){
|
public void run(String... args){
|
||||||
System.out.println("Начинаем отладку...");
|
/*System.out.println("Начинаем отладку...");
|
||||||
parsingService.parse();
|
parsingService.parse();
|
||||||
System.out.println("Заканчиваем отладку...");
|
System.out.println("Заканчиваем отладку...");*/
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -16,14 +16,14 @@ import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
|
|||||||
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
||||||
|
|
||||||
import java.math.BigDecimal;
|
import java.math.BigDecimal;
|
||||||
import java.time.OffsetDateTime;
|
import java.time.LocalDateTime;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
@Service
|
@Service
|
||||||
@AllArgsConstructor
|
@AllArgsConstructor
|
||||||
public class ParsingService {
|
public class WildberriesParsingService {
|
||||||
private final Client client;
|
private final Client client;
|
||||||
private final ObjectMapper objectMapper;
|
private final ObjectMapper objectMapper;
|
||||||
private final ConversionService conversionService;
|
private final ConversionService conversionService;
|
||||||
@ -56,7 +56,7 @@ public class ParsingService {
|
|||||||
ProductEntity productEntity = conversionService.convert(dto, ProductEntity.class);
|
ProductEntity productEntity = conversionService.convert(dto, ProductEntity.class);
|
||||||
|
|
||||||
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
|
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
|
||||||
.id(new PriceHistoryId(productEntity, OffsetDateTime.now()))
|
.id(new PriceHistoryId(productEntity.getUrl(), LocalDateTime.now()))
|
||||||
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
|
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
|
||||||
.build();
|
.build();
|
||||||
|
|
@ -9,18 +9,21 @@ spring:
|
|||||||
database: postgresql
|
database: postgresql
|
||||||
datasource:
|
datasource:
|
||||||
driver-class-name: org.postgresql.Driver
|
driver-class-name: org.postgresql.Driver
|
||||||
url: jdbc:postgresql://${JDBC_URL}
|
url: jdbc:postgresql://${POSTGRES_JDBC_URL}
|
||||||
username: ${JDBC_USERNAME}
|
username: ${POSTGRES_JDBC_USERNAME}
|
||||||
password: ${JDBC_PASSWORD}
|
password: ${POSTGRES_JDBC_PASSWORD}
|
||||||
|
clickhouse:
|
||||||
|
driver-class-name: com.clickhouse.jdbc.ClickHouseDriver
|
||||||
|
url: jdbc:clickhouse://${CLICKHOUSE_JDBC_URL}
|
||||||
|
username: ${CLICKHOUSE_JDBC_USERNAME}
|
||||||
|
password: ${CLICKHOUSE_JDBC_PASSWORD}
|
||||||
liquibase:
|
liquibase:
|
||||||
change-log: classpath:/db/changelog/master.yml
|
change-log: classpath:/db/changelog/master.yml
|
||||||
kafka:
|
|
||||||
selenium:
|
|
||||||
|
|
||||||
marketplace:
|
marketplace:
|
||||||
ozon:
|
ozon:
|
||||||
categories-urls:
|
categories-urls:
|
||||||
- https://www.ozon.ru/category/noutbuki-15692
|
- https://www.ozon.ru/category/noutbuki-15692/?brandcertified=t&is_high_rating=t
|
||||||
wildberries:
|
wildberries:
|
||||||
base-url: "https://static-basket-01.wbbasket.ru"
|
base-url: "https://static-basket-01.wbbasket.ru"
|
||||||
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
<changeSet id="20240926_create_product_table.xml" author="danil">
|
<changeSet id="20240926_create_product_table.xml" author="danil">
|
||||||
<addColumn tableName="product">
|
<addColumn tableName="product">
|
||||||
<column name="url" type="varchar" remarks="Ссылка на товар">
|
<column name="url" type="varchar" remarks="Ссылка на товар">
|
||||||
<constraints nullable="false" />
|
<constraints nullable="false" unique="true" />
|
||||||
</column>
|
</column>
|
||||||
</addColumn>
|
</addColumn>
|
||||||
<addColumn tableName="product">
|
<addColumn tableName="product">
|
||||||
@ -15,25 +15,14 @@
|
|||||||
<constraints nullable="false" />
|
<constraints nullable="false" />
|
||||||
</column>
|
</column>
|
||||||
</addColumn>
|
</addColumn>
|
||||||
<addColumn tableName="product">
|
|
||||||
<column name="article" type="varchar" remarks="Артикул товара">
|
|
||||||
<constraints nullable="false" />
|
|
||||||
</column>
|
|
||||||
</addColumn>
|
|
||||||
<dropTable tableName="price_history" cascadeConstraints="true" />
|
<dropTable tableName="price_history" cascadeConstraints="true" />
|
||||||
<sql>
|
<sql>
|
||||||
CREATE TABLE if not exists price_history(
|
CREATE TABLE if not exists price_history(
|
||||||
product_id bigint NOT NULL,
|
product_url varchar NOT NULL,
|
||||||
price numeric(10, 2) NOT NULL,
|
price numeric(10, 2) NOT NULL,
|
||||||
date timestamptz NOT NULL,
|
date timestamptz NOT NULL,
|
||||||
PRIMARY KEY (product_id, date)
|
PRIMARY KEY (product_url, date)
|
||||||
) PARTITION BY RANGE (date);
|
) PARTITION BY RANGE (date);
|
||||||
</sql>
|
</sql>
|
||||||
<addForeignKeyConstraint baseTableName="price_history"
|
|
||||||
baseColumnNames="product_id"
|
|
||||||
constraintName="fk_product_price_history"
|
|
||||||
referencedTableName="product"
|
|
||||||
referencedColumnNames="id"
|
|
||||||
onDelete="CASCADE"/>
|
|
||||||
</changeSet>
|
</changeSet>
|
||||||
</databaseChangeLog>
|
</databaseChangeLog>
|
Loading…
Reference in New Issue
Block a user