Feature/parsing-service intermediate commit

This commit is contained in:
danil 2024-10-12 13:51:32 +04:00
parent ffe6920b29
commit f58b0a4a02
32 changed files with 641 additions and 46 deletions

View File

@ -1,6 +1,7 @@
<component name="ProjectRunConfigurationManager">
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
<option name="ACTIVE_PROFILES" value="dev" />
<option name="SCHEDULED_DEBUGGER" value="true" />
<envs>
<env name="JDBC_PASSWORD" value="postgres" />
<env name="JDBC_USERNAME" value="postgres" />

View File

@ -23,16 +23,26 @@ repositories {
mavenCentral()
}
ext {
jsoupVesion = '1.18.1'
}
dependencies {
implementation 'org.springframework.boot:spring-boot-starter-data-jpa'
implementation 'org.springframework.boot:spring-boot-starter-web'
implementation 'org.liquibase:liquibase-core'
implementation 'org.springframework.kafka:spring-kafka'
implementation "org.jsoup:jsoup:${jsoupVesion}"
compileOnly 'org.projectlombok:lombok'
runtimeOnly 'org.postgresql:postgresql'
annotationProcessor 'org.projectlombok:lombok'
testImplementation 'org.springframework.boot:spring-boot-starter-test'
testImplementation 'org.springframework.kafka:spring-kafka-test'
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
}

View File

@ -0,0 +1,16 @@
package ru.pricepulse.parsingservice.config;
import java.time.format.DateTimeFormatter;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
@Configuration
public class DateTimeFormatterConfig {
@Bean
public DateTimeFormatter partitionDateTimeFormatter() {
return DateTimeFormatter.ofPattern("yyyy_MM");
}
}

View File

@ -0,0 +1,11 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.boot.autoconfigure.kafka.KafkaProperties;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Configuration;
@Configuration
@EnableConfigurationProperties(KafkaProperties.class)
public class KafkaConfig {
}

View File

@ -0,0 +1,11 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Configuration;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
@Configuration
@EnableConfigurationProperties({
OzonConfigProperties.class
})
public class MarketplacesConfig {}

View File

@ -0,0 +1,15 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.client.RestTemplate;
@Configuration
public class RestTemplateConfig {
@Bean
public RestTemplate restTemplate() {
return new RestTemplate();
}
}

View File

@ -0,0 +1,12 @@
package ru.pricepulse.parsingservice.config;
import org.springframework.context.annotation.Configuration;
import org.springframework.scheduling.annotation.EnableScheduling;
@Configuration
@EnableScheduling
public class SchedulerConfig {
}

View File

@ -0,0 +1,8 @@
package ru.pricepulse.parsingservice.config.properties;
import org.springframework.boot.context.properties.ConfigurationProperties;
@ConfigurationProperties(prefix = "application.kafka")
public class KafkaConfigProperties {
}

View File

@ -0,0 +1,14 @@
package ru.pricepulse.parsingservice.config.properties;
import java.util.List;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
@Getter
@Setter
@ConfigurationProperties(prefix = "marketplace.ozon")
public class OzonConfigProperties {
private List<String> categoriesUrls;
}

View File

@ -1,32 +0,0 @@
package ru.pricepulse.parsingservice.persistance.entity;
import jakarta.persistence.*;
import lombok.Getter;
import lombok.Setter;
import org.hibernate.annotations.OnDelete;
import org.hibernate.annotations.OnDeleteAction;
import java.math.BigDecimal;
import java.time.OffsetDateTime;
@Getter
@Setter
@Entity
@Table(name = "price_history")
public class PriceHistoryEntity {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "id", nullable = false)
private Long id;
@ManyToOne(fetch = FetchType.LAZY, optional = false)
@OnDelete(action = OnDeleteAction.CASCADE)
@JoinColumn(name = "product_id", nullable = false)
private ProductEntity product;
@Column(name = "price", nullable = false, precision = 10, scale = 2)
private BigDecimal price;
@Column(name = "date", nullable = false)
private OffsetDateTime date;
}

View File

@ -0,0 +1,49 @@
package ru.pricepulse.parsingservice.persistence.entity;
import java.math.BigDecimal;
import java.util.Objects;
import jakarta.persistence.Column;
import jakarta.persistence.EmbeddedId;
import jakarta.persistence.Entity;
import jakarta.persistence.Table;
import lombok.Getter;
import lombok.Setter;
import org.hibernate.proxy.HibernateProxy;
@Getter
@Setter
@Entity
@Table(name = "price_history")
public class PriceHistoryEntity {
@EmbeddedId
private PriceHistoryId id;
@Column(name = "price", nullable = false, precision = 10, scale = 2)
private BigDecimal price;
@Override
public final boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null) {
return false;
}
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
Class<?> thisEffectiveClass =
this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
if (thisEffectiveClass != oEffectiveClass) {
return false;
}
PriceHistoryEntity that = (PriceHistoryEntity) o;
return getId() != null && Objects.equals(getId(), that.getId());
}
@Override
public final int hashCode() {
return Objects.hash(id);
}
}

View File

@ -0,0 +1,54 @@
package ru.pricepulse.parsingservice.persistence.entity;
import java.io.Serializable;
import java.time.OffsetDateTime;
import java.util.Objects;
import jakarta.persistence.Column;
import jakarta.persistence.Embeddable;
import jakarta.persistence.FetchType;
import jakarta.persistence.JoinColumn;
import jakarta.persistence.ManyToOne;
import lombok.Getter;
import lombok.Setter;
import org.hibernate.annotations.OnDelete;
import org.hibernate.annotations.OnDeleteAction;
import org.hibernate.proxy.HibernateProxy;
@Getter
@Setter
@Embeddable
public class PriceHistoryId implements Serializable {
@ManyToOne(fetch = FetchType.LAZY, optional = false)
@OnDelete(action = OnDeleteAction.CASCADE)
@JoinColumn(name = "product_id", nullable = false)
private ProductEntity product;
@Column(name = "date", nullable = false)
private OffsetDateTime date;
@Override
public final boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null) {
return false;
}
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
Class<?> thisEffectiveClass =
this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
if (thisEffectiveClass != oEffectiveClass) {
return false;
}
PriceHistoryId that = (PriceHistoryId) o;
return getDate() != null && Objects.equals(getDate(), that.getDate());
}
@Override
public final int hashCode() {
return Objects.hash(date);
}
}

View File

@ -1,14 +1,21 @@
package ru.pricepulse.parsingservice.persistance.entity;
package ru.pricepulse.parsingservice.persistence.entity;
import jakarta.persistence.*;
import java.time.LocalDateTime;
import java.util.Objects;
import jakarta.persistence.Column;
import jakarta.persistence.Entity;
import jakarta.persistence.EnumType;
import jakarta.persistence.Enumerated;
import jakarta.persistence.GeneratedValue;
import jakarta.persistence.GenerationType;
import jakarta.persistence.Id;
import jakarta.persistence.PrePersist;
import jakarta.persistence.Table;
import lombok.Getter;
import lombok.Setter;
import org.hibernate.proxy.HibernateProxy;
import java.time.OffsetDateTime;
import java.util.LinkedHashSet;
import java.util.Objects;
import java.util.Set;
import ru.pricepulse.parsingservice.persistence.enums.MarketplaceEnum;
@Getter
@Setter
@ -20,8 +27,9 @@ public class ProductEntity {
@Column(name = "id", nullable = false)
private Long id;
@Enumerated(EnumType.STRING)
@Column(name = "marketplace", nullable = false, length = Integer.MAX_VALUE)
private String marketplace;
private MarketplaceEnum marketplace;
@Column(name = "category", nullable = false, length = Integer.MAX_VALUE)
private String category;
@ -33,10 +41,13 @@ public class ProductEntity {
private String productName;
@Column(name = "created_at", nullable = false)
private OffsetDateTime createdAt;
private LocalDateTime createdAt;
@OneToMany(mappedBy = "product")
private Set<PriceHistoryEntity> priceHistories = new LinkedHashSet<>();
@Column(name = "url", nullable = false)
private String url;
@Column(name = "image-url", nullable = false)
private String imageUrl;
@Override
public final boolean equals(Object o) {
@ -53,4 +64,9 @@ public class ProductEntity {
public final int hashCode() {
return this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass().hashCode() : getClass().hashCode();
}
@PrePersist
protected void onCreate() {
createdAt = LocalDateTime.now();
}
}

View File

@ -0,0 +1,7 @@
package ru.pricepulse.parsingservice.persistence.enums;
public enum MarketplaceEnum {
OZON
}

View File

@ -0,0 +1,37 @@
package ru.pricepulse.parsingservice.service;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.service.dto.ParsedData;
import ru.pricepulse.parsingservice.service.messaging.ParsedDataProducer;
@Slf4j
@Service
@RequiredArgsConstructor
public class DataParser {
private final ParsedDataProducer queueProducer;
public boolean pageHasData(String html) {
Document doc = Jsoup.parse(html);
return doc.select("div[data-widget=searchResultsError]").isEmpty();
}
public void parseAndQueueData(String html) {
Document doc = Jsoup.parse(html);
for (Element item : doc.select(".item-class")) {
String title = item.select(".item-title").text();
String price = item.select(".item-price").text();
ParsedData parsedData = new ParsedData();
log.info("Попытка отправить данные в очередь");
queueProducer.sendToQueue(parsedData);
log.info("Данные успешно отправлены в очередь");
}
}
}

View File

@ -0,0 +1,7 @@
package ru.pricepulse.parsingservice.service;
public interface MarketplaceParsingService {
void processCategory(String categoryUrl);
}

View File

@ -0,0 +1,25 @@
package ru.pricepulse.parsingservice.service;
import lombok.RequiredArgsConstructor;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Service;
@Service
@RequiredArgsConstructor
public class PartitionService {
private final JdbcTemplate jdbcTemplate;
public boolean checkPartitionExists(String partitionName) {
String query = "SELECT to_regclass('public." + partitionName + "')";
String result = jdbcTemplate.queryForObject(query, String.class);
return result != null;
}
public void createPartition(String partitionName, String startDate, String endDate) {
String createPartitionSQL = "CREATE TABLE IF NOT EXISTS " + partitionName +
" PARTITION OF price_history FOR VALUES FROM ('" + startDate + "') TO ('" + endDate + "')";
jdbcTemplate.execute(createPartitionSQL);
}
}

View File

@ -0,0 +1,23 @@
package ru.pricepulse.parsingservice.service.dto;
import lombok.Getter;
import lombok.Setter;
import ru.pricepulse.parsingservice.persistence.enums.MarketplaceEnum;
@Getter
@Setter
public class ParsedData {
private MarketplaceEnum marketplace;
private String category;
private String brand;
private String productName;
private String url;
private String imageUrl;
}

View File

@ -0,0 +1,67 @@
package ru.pricepulse.parsingservice.service.impl.parsing;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicBoolean;
import lombok.extern.slf4j.Slf4j;
import org.springframework.retry.annotation.Recover;
import org.springframework.retry.annotation.Retryable;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.service.DataParser;
import ru.pricepulse.parsingservice.service.MarketplaceParsingService;
import ru.pricepulse.parsingservice.service.request.PageFetcher;
@Slf4j
@Service
public class OzonParsingService implements MarketplaceParsingService {
private final PageFetcher pageFetcher;
private final DataParser dataParser;
private final ExecutorService executorService;
public OzonParsingService(PageFetcher pageFetcher, DataParser dataParser) {
this.pageFetcher = pageFetcher;
this.dataParser = dataParser;
this.executorService = Executors.newFixedThreadPool(1);
}
@Override
public void processCategory(String categoryUrl) {
int pageNumber = 1;
AtomicBoolean hasMoreData = new AtomicBoolean(true);
while (hasMoreData.get()) {
int finalPageNumber = pageNumber;
executorService.submit(() -> processTask(categoryUrl, finalPageNumber, hasMoreData));
pageNumber++;
}
}
@Retryable
private void processTask(String categoryUrl, int pageNumber, AtomicBoolean hasMoreData) {
String pageUrl = categoryUrl + "?page=" + pageNumber;
String pageContent;
try {
log.info("Получение страницы {}", pageUrl);
pageContent = pageFetcher.fetchPage(pageUrl);
} catch (Exception e) {
log.error("Ошибка получения страницы - {} \n {}", pageUrl, e.getMessage(), e);
throw new RuntimeException(e);
}
if (!dataParser.pageHasData(pageContent)) {
log.warn("Данные не найдены - {}", pageUrl);
hasMoreData.set(false);
return;
}
dataParser.parseAndQueueData(pageContent);
}
@Recover
private void recover(Exception e, String categoryUrl, int pageNumber, AtomicBoolean hasMoreData) {
log.error(e.getMessage(), e);
}
}

View File

@ -0,0 +1,18 @@
package ru.pricepulse.parsingservice.service.messaging;
import lombok.RequiredArgsConstructor;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.service.dto.ParsedData;
@Service
@RequiredArgsConstructor
public class ParsedDataProducer {
private final KafkaTemplate<String, ParsedData> kafkaTemplate;
public void sendToQueue(ParsedData data) {
kafkaTemplate.send("parsed-data-queue", data);
}
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.service.request;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.stereotype.Service;
import org.springframework.web.client.RestTemplate;
@Slf4j
@Service
@RequiredArgsConstructor
public class PageFetcher {
private final RestTemplate restTemplate;
public String fetchPage(String url) {
log.info("Поолучение страницы {}", url);
return restTemplate.getForObject(url, String.class);
}
}

View File

@ -0,0 +1,23 @@
package ru.pricepulse.parsingservice.service.scheduler;
import lombok.RequiredArgsConstructor;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.config.properties.OzonConfigProperties;
import ru.pricepulse.parsingservice.service.impl.parsing.OzonParsingService;
@Service
@RequiredArgsConstructor
public class OzonProductUpdater {
private final OzonConfigProperties properties;
private final OzonParsingService parsingService;
@Scheduled(fixedRate = 3600000)
public void updateOzonProducts() {
properties.getCategoriesUrls().forEach(
parsingService::processCategory);
}
}

View File

@ -0,0 +1,56 @@
package ru.pricepulse.parsingservice.service.scheduler;
import java.time.LocalDate;
import java.time.format.DateTimeFormatter;
import jakarta.annotation.PostConstruct;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.service.PartitionService;
@Slf4j
@Service
@RequiredArgsConstructor
public class PartitionScheduler {
private final PartitionService partitionService;
private final DateTimeFormatter partitionDateTimeFormatter;
@PostConstruct
public void init() {
checkAndCreateMonthlyPartitions();
}
@Scheduled(cron = "@monthly")
public void checkAndCreatePartitionsMonthly() {
checkAndCreateMonthlyPartitions();
}
public void checkAndCreateMonthlyPartitions() {
LocalDate currentMonth = LocalDate.now().withDayOfMonth(1);
LocalDate nextMonth = currentMonth.plusMonths(1);
String currentMonthPartition = getPartitionName(currentMonth);
String nextMonthPartition = getPartitionName(nextMonth);
checkAndCreatePartition(currentMonthPartition, currentMonth);
checkAndCreatePartition(nextMonthPartition, nextMonth);
}
private String getPartitionName(LocalDate date) {
return "price_history_" + partitionDateTimeFormatter.format(date);
}
private void checkAndCreatePartition(String partitionName, LocalDate startDate) {
if (!partitionService.checkPartitionExists(partitionName)) {
LocalDate endDate = startDate.plusMonths(1);
partitionService.createPartition(partitionName, startDate.toString(), endDate.toString());
log.info("Партиция {} создана для диапазона: {} - {} ", partitionName, startDate, endDate);
} else {
log.info("Партиция {} уже существует.", partitionName);
}
}
}

View File

@ -0,0 +1,60 @@
package ru.pricepulse.parsingservice.web.handler;
import java.net.URI;
import jakarta.persistence.EntityNotFoundException;
import jakarta.servlet.http.HttpServletRequest;
import org.springframework.http.HttpStatus;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.ControllerAdvice;
import org.springframework.web.bind.annotation.ExceptionHandler;
import org.springframework.web.bind.annotation.ResponseStatus;
@ControllerAdvice
public class CommonExceptionHandler {
@ExceptionHandler(IllegalArgumentException.class)
@ResponseStatus(HttpStatus.BAD_REQUEST)
public ResponseEntity<ErrorResponse> exceptionHandler(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.BAD_REQUEST, request, ex);
}
@ExceptionHandler(EntityNotFoundException.class)
@ResponseStatus(HttpStatus.NOT_FOUND)
public ResponseEntity<ErrorResponse> handleNotFoundException(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.NOT_FOUND, request, ex);
}
@ExceptionHandler(Exception.class)
@ResponseStatus(HttpStatus.INTERNAL_SERVER_ERROR)
public ResponseEntity<ErrorResponse> handleInternalServerErrorException(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.INTERNAL_SERVER_ERROR, request, ex);
}
/*@ExceptionHandler(AccessDeniedException.class)
@ResponseStatus(HttpStatus.FORBIDDEN)
public ResponseEntity<ErrorResponse> handleForbiddenException(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.FORBIDDEN, request, ex);
}
@ExceptionHandler(AuthenticationException.class)
@ResponseStatus(HttpStatus.UNAUTHORIZED)
public ResponseEntity<ErrorResponse> handleUnauthorizedException(Exception ex,
HttpServletRequest request) {
return handleException(HttpStatus.UNAUTHORIZED, request, ex);
}*/
private ResponseEntity<ErrorResponse> handleException(HttpStatus status, HttpServletRequest request, Exception ex) {
var errorResponse = new ErrorResponse(
status.value(),
status,
URI.create(request.getRequestURI()),
ex.getMessage()
);
return ResponseEntity.status(status).body(errorResponse);
}
}

View File

@ -0,0 +1,13 @@
package ru.pricepulse.parsingservice.web.handler;
import java.net.URI;
import org.springframework.http.HttpStatus;
public record ErrorResponse (
Integer statusCode,
HttpStatus status,
URI requestURI,
String message
){
}

View File

@ -14,3 +14,11 @@ spring:
password: ${JDBC_PASSWORD}
liquibase:
change-log: classpath:/db/changelog/master.yml
kafka:
marketplace:
ozon:
categories-urls:
- https://www.ozon.ru/category/noutbuki-15692

View File

@ -1,7 +1,7 @@
databaseChangeLog:
- include:
file: 20240926_create_product_table.xml
file: 20240926_001_create_product_table.xml
relativeToChangelogFile: true
- include:
file: 20240926_create_price_history_table.xml
file: 20240926_002_create_price_history_table.xml
relativeToChangelogFile: true

View File

@ -0,0 +1,39 @@
<?xml version="1.0" encoding="UTF-8"?>
<databaseChangeLog
xmlns="http://www.liquibase.org/xml/ns/dbchangelog"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://www.liquibase.org/xml/ns/dbchangelog
http://www.liquibase.org/xml/ns/dbchangelog/dbchangelog-4.9.xsd">
<changeSet id="20240926_create_product_table.xml" author="danil">
<addColumn tableName="product">
<column name="url" type="varchar" remarks="Ссылка на товар">
<constraints nullable="false" />
</column>
</addColumn>
<addColumn tableName="product">
<column name="image-url" type="varchar" remarks="Ссылка на изображение товара">
<constraints nullable="false" />
</column>
</addColumn>
<addColumn tableName="product">
<column name="article" type="varchar" remarks="Артикул товара">
<constraints nullable="false" />
</column>
</addColumn>
<dropTable tableName="price_history" cascadeConstraints="true" />
<sql>
CREATE TABLE if not exists price_history(
product_id bigint NOT NULL,
price numeric(10, 2) NOT NULL,
date timestamptz NOT NULL,
PRIMARY KEY (product_id, date)
) PARTITION BY RANGE (date);
</sql>
<addForeignKeyConstraint baseTableName="price_history"
baseColumnNames="product_id"
constraintName="fk_product_price_history"
referencedTableName="product"
referencedColumnNames="id"
onDelete="CASCADE"/>
</changeSet>
</databaseChangeLog>

View File

@ -0,0 +1,4 @@
databaseChangeLog:
- include:
file: 20241006_001_add_columns_in_tables.xml
relativeToChangelogFile: true

View File

@ -2,3 +2,6 @@ databaseChangeLog:
- include:
file: 20240926/master.yml
relativeToChangelogFile: true
- include:
file: 20241006/master.yml
relativeToChangelogFile: true