feature: wb parser

This commit is contained in:
Emelyanov535 2024-10-13 18:34:11 +04:00
parent ffe6920b29
commit 5ae300389c
17 changed files with 316 additions and 8 deletions

View File

@ -13,4 +13,18 @@
<option name="Make" enabled="true" />
</method>
</configuration>
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
<option name="ACTIVE_PROFILES" value="dev" />
<envs>
<env name="JDBC_PASSWORD" value="postgres" />
<env name="JDBC_USERNAME" value="postgres" />
<env name="JDBC_URL" value="localhost:5432/parsed_data" />
<env name="SERVER_PORT" value="8080" />
</envs>
<module name="parsing-service.main" />
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
<method v="2">
<option name="Make" enabled="true" />
</method>
</configuration>
</component>

View File

@ -34,6 +34,7 @@ dependencies {
testImplementation 'org.springframework.boot:spring-boot-starter-test'
testImplementation 'org.springframework.kafka:spring-kafka-test'
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
implementation 'org.springframework.boot:spring-boot-starter-webflux'
}
tasks.named('test') {

View File

@ -0,0 +1,5 @@
package ru.pricepulse.parsingservice.enumeration;
public enum Category {
LAPTOP
}

View File

@ -0,0 +1,8 @@
package ru.pricepulse.parsingservice.enumeration;
public enum Marketplace {
WILDBERRIES,
OZON,
DNS
}

View File

@ -1,18 +1,22 @@
package ru.pricepulse.parsingservice.persistance.entity;
import jakarta.persistence.*;
import lombok.Getter;
import lombok.Setter;
import lombok.*;
import org.hibernate.annotations.OnDelete;
import org.hibernate.annotations.OnDeleteAction;
import org.hibernate.proxy.HibernateProxy;
import java.math.BigDecimal;
import java.time.OffsetDateTime;
import java.util.Objects;
@Getter
@Setter
@Entity
@Table(name = "price_history")
@AllArgsConstructor
@NoArgsConstructor
@Builder
public class PriceHistoryEntity {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@ -29,4 +33,20 @@ public class PriceHistoryEntity {
@Column(name = "date", nullable = false)
private OffsetDateTime date;
@Override
public final boolean equals(Object o) {
if (this == o) return true;
if (o == null) return false;
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
Class<?> thisEffectiveClass = this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
if (thisEffectiveClass != oEffectiveClass) return false;
PriceHistoryEntity that = (PriceHistoryEntity) o;
return getId() != null && Objects.equals(getId(), that.getId());
}
@Override
public final int hashCode() {
return this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass().hashCode() : getClass().hashCode();
}
}

View File

@ -1,19 +1,23 @@
package ru.pricepulse.parsingservice.persistance.entity;
import jakarta.persistence.*;
import lombok.Getter;
import lombok.Setter;
import lombok.*;
import org.hibernate.proxy.HibernateProxy;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import java.time.OffsetDateTime;
import java.util.LinkedHashSet;
import java.util.Objects;
import java.util.Set;
@Entity
@Getter
@Setter
@Entity
@Table(name = "product")
@AllArgsConstructor
@NoArgsConstructor
@Builder
public class ProductEntity {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@ -21,10 +25,12 @@ public class ProductEntity {
private Long id;
@Column(name = "marketplace", nullable = false, length = Integer.MAX_VALUE)
private String marketplace;
@Enumerated(EnumType.STRING)
private Marketplace marketplace;
@Column(name = "category", nullable = false, length = Integer.MAX_VALUE)
private String category;
@Enumerated(EnumType.STRING)
private Category category;
@Column(name = "brand", nullable = false, length = Integer.MAX_VALUE)
private String brand;

View File

@ -0,0 +1,7 @@
package ru.pricepulse.parsingservice.persistance.repository;
import org.springframework.data.jpa.repository.JpaRepository;
import ru.pricepulse.parsingservice.persistance.entity.PriceHistoryEntity;
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, Long> {
}

View File

@ -0,0 +1,7 @@
package ru.pricepulse.parsingservice.persistance.repository;
import org.springframework.data.jpa.repository.JpaRepository;
import ru.pricepulse.parsingservice.persistance.entity.ProductEntity;
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
}

View File

@ -0,0 +1,19 @@
package ru.pricepulse.parsingservice.wildberries_parser;
import lombok.AllArgsConstructor;
import org.springframework.boot.CommandLineRunner;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.wildberries_parser.service.ParsingService;
@Component
@AllArgsConstructor
public class DebugRunner implements CommandLineRunner {
private final ParsingService parsingService;
@Override
public void run(String... args){
System.out.println("Начинаем отладку...");
parsingService.parse();
System.out.println("Заканчиваем отладку...");
}
}

View File

@ -0,0 +1,21 @@
package ru.pricepulse.parsingservice.wildberries_parser.configuration;
import lombok.Getter;
import lombok.Setter;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.context.annotation.Configuration;
@Configuration
@ConfigurationProperties(prefix = "rest.wildberries")
@Getter
@Setter
public class WbProperties {
private String baseUrl;
private String catalogUrl;
private String userAgent;
private String catalogWbUrl;
private int retryAttempts;
private long retryDelay;
private String laptopUrl;
private String shard;
}

View File

@ -0,0 +1,18 @@
package ru.pricepulse.parsingservice.wildberries_parser.configuration;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.web.reactive.function.client.WebClient;
@Configuration
public class WebClientConfig {
@Bean
public WebClient webClient() {
return WebClient.builder()
.defaultHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)")
.codecs(configurer -> configurer
.defaultCodecs()
.maxInMemorySize(10 * 1024 * 1024))
.build();
}
}

View File

@ -0,0 +1,25 @@
package ru.pricepulse.parsingservice.wildberries_parser.converter;
import org.springframework.core.convert.converter.Converter;
import org.springframework.stereotype.Component;
import ru.pricepulse.parsingservice.enumeration.Category;
import ru.pricepulse.parsingservice.enumeration.Marketplace;
import ru.pricepulse.parsingservice.persistance.entity.ProductEntity;
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
import java.time.OffsetDateTime;
@Component
public class ProductInfoDto2ProductEntity implements Converter<ProductInfoDto, ProductEntity> {
@Override
public ProductEntity convert(ProductInfoDto source) {
return ProductEntity.builder()
.marketplace(Marketplace.WILDBERRIES)
.category(Category.LAPTOP)
.brand(source.getBrand())
.productName(source.getName())
.createdAt(OffsetDateTime.now())
.build();
}
}

View File

@ -0,0 +1,85 @@
package ru.pricepulse.parsingservice.wildberries_parser.service;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.ObjectMapper;
import jakarta.transaction.Transactional;
import lombok.AllArgsConstructor;
import org.springframework.core.convert.ConversionService;
import org.springframework.stereotype.Service;
import ru.pricepulse.parsingservice.persistance.entity.PriceHistoryEntity;
import ru.pricepulse.parsingservice.persistance.entity.ProductEntity;
import ru.pricepulse.parsingservice.persistance.repository.ProductPriceRepository;
import ru.pricepulse.parsingservice.persistance.repository.ProductRepository;
import ru.pricepulse.parsingservice.wildberries_parser.configuration.WbProperties;
import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
import java.math.BigDecimal;
import java.time.OffsetDateTime;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
@Service
@AllArgsConstructor
public class ParsingService {
private final Client client;
private final ObjectMapper objectMapper;
private final ConversionService conversionService;
private final ProductRepository productRepository;
private final WbProperties wbProperties;
private final ProductPriceRepository productPriceRepository;
@Transactional
public void parse() {
List<ProductEntity> productEntities = new ArrayList<>();
List<PriceHistoryEntity> priceHistories = new ArrayList<>();
final int elementsInPage = 100;
int page = 1;
Integer totalPages = null;
do {
var pageData = client.scrapPage(page, wbProperties.getShard(), wbProperties.getLaptopUrl());
if (totalPages == null) {
Map<String, Object> dataMap = (Map<String, Object>) pageData.get("data");
int totalElements = (int) dataMap.get("total");
totalPages = (int) Math.ceil((double) totalElements / elementsInPage);
}
List<ProductInfoDto> productInfoDtoList = convertMapObjectToListProductInfoDto(pageData);
productInfoDtoList.forEach(dto -> {
ProductEntity productEntity = conversionService.convert(dto, ProductEntity.class);
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
.product(productEntity)
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
.date(OffsetDateTime.now())
.build();
productEntities.add(productEntity);
priceHistories.add(priceHistory);
});
page++;
} while (page <= totalPages);
productRepository.saveAll(productEntities);
productPriceRepository.saveAll(priceHistories);
}
private List<ProductInfoDto> convertMapObjectToListProductInfoDto(Map<String, Object> map) {
Map<String, ArrayList<Object>> dataMap = (Map<String, ArrayList<Object>>) map.get("data");
return getProductInfoDtos(dataMap);
}
private List<ProductInfoDto> getProductInfoDtos(Map<String, ArrayList<Object>> dataMap) {
return objectMapper.convertValue(
dataMap.get("products"),
new TypeReference<>() {}
);
}
}

View File

@ -0,0 +1,7 @@
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
import java.util.Map;
public interface Client {
Map<String, Object> scrapPage(int page, String shard, String query);
}

View File

@ -0,0 +1,33 @@
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
import lombok.AllArgsConstructor;
import org.springframework.core.ParameterizedTypeReference;
import org.springframework.stereotype.Service;
import org.springframework.web.reactive.function.client.WebClient;
import ru.pricepulse.parsingservice.wildberries_parser.configuration.WbProperties;
import java.util.Map;
@AllArgsConstructor
@Service
public class ClientImpl implements Client {
private final WebClient webClient;
private final WbProperties wbProperties;
@Override
public Map<String, Object> scrapPage(int page, String shard, String query) {
String url = wbProperties.getCatalogWbUrl() +
shard +
query +
"?dest=-1257786&page=" + page + "&subject=2290";
return webClient.get()
.uri(url)
.retrieve()
.bodyToMono(new ParameterizedTypeReference<Map<String, Object>>() {
})
.retry(50)
.block();
}
}

View File

@ -0,0 +1,20 @@
package ru.pricepulse.parsingservice.wildberries_parser.service.dto;
import lombok.AllArgsConstructor;
import lombok.Builder;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@AllArgsConstructor
@NoArgsConstructor
@Builder
public class ProductInfoDto {
private Long id;
private String brand;
private String name;
private String supplier;
private Double supplierRating;
private Integer salePriceU;
private Integer reviewRating;
}

View File

@ -13,4 +13,16 @@ spring:
username: ${JDBC_USERNAME}
password: ${JDBC_PASSWORD}
liquibase:
change-log: classpath:/db/changelog/master.yml
change-log: classpath:/db/changelog/master.yml
rest:
wildberries:
base-url: "https://static-basket-01.wbbasket.ru"
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
user-agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)"
catalog-wb-url: "https://catalog.wb.ru/catalog/"
retry-attempts: 5
retry-delay: 1000
shard: "electronic15"
laptop-url: "/catalog"