feature: wb parser
This commit is contained in:
parent
ffe6920b29
commit
5ae300389c
@ -13,4 +13,18 @@
|
||||
<option name="Make" enabled="true" />
|
||||
</method>
|
||||
</configuration>
|
||||
<configuration default="false" name="ParsingService [local]" type="SpringBootApplicationConfigurationType" factoryName="Spring Boot">
|
||||
<option name="ACTIVE_PROFILES" value="dev" />
|
||||
<envs>
|
||||
<env name="JDBC_PASSWORD" value="postgres" />
|
||||
<env name="JDBC_USERNAME" value="postgres" />
|
||||
<env name="JDBC_URL" value="localhost:5432/parsed_data" />
|
||||
<env name="SERVER_PORT" value="8080" />
|
||||
</envs>
|
||||
<module name="parsing-service.main" />
|
||||
<option name="SPRING_BOOT_MAIN_CLASS" value="ru.pricepulse.parsingservice.ParsingServiceApplication" />
|
||||
<method v="2">
|
||||
<option name="Make" enabled="true" />
|
||||
</method>
|
||||
</configuration>
|
||||
</component>
|
@ -34,6 +34,7 @@ dependencies {
|
||||
testImplementation 'org.springframework.boot:spring-boot-starter-test'
|
||||
testImplementation 'org.springframework.kafka:spring-kafka-test'
|
||||
testRuntimeOnly 'org.junit.platform:junit-platform-launcher'
|
||||
implementation 'org.springframework.boot:spring-boot-starter-webflux'
|
||||
}
|
||||
|
||||
tasks.named('test') {
|
||||
|
@ -0,0 +1,5 @@
|
||||
package ru.pricepulse.parsingservice.enumeration;
|
||||
|
||||
public enum Category {
|
||||
LAPTOP
|
||||
}
|
@ -0,0 +1,8 @@
|
||||
package ru.pricepulse.parsingservice.enumeration;
|
||||
|
||||
public enum Marketplace {
|
||||
WILDBERRIES,
|
||||
OZON,
|
||||
DNS
|
||||
}
|
||||
|
@ -1,18 +1,22 @@
|
||||
package ru.pricepulse.parsingservice.persistance.entity;
|
||||
|
||||
import jakarta.persistence.*;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import lombok.*;
|
||||
import org.hibernate.annotations.OnDelete;
|
||||
import org.hibernate.annotations.OnDeleteAction;
|
||||
import org.hibernate.proxy.HibernateProxy;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.Objects;
|
||||
|
||||
@Getter
|
||||
@Setter
|
||||
@Entity
|
||||
@Table(name = "price_history")
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@Builder
|
||||
public class PriceHistoryEntity {
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||
@ -29,4 +33,20 @@ public class PriceHistoryEntity {
|
||||
|
||||
@Column(name = "date", nullable = false)
|
||||
private OffsetDateTime date;
|
||||
|
||||
@Override
|
||||
public final boolean equals(Object o) {
|
||||
if (this == o) return true;
|
||||
if (o == null) return false;
|
||||
Class<?> oEffectiveClass = o instanceof HibernateProxy ? ((HibernateProxy) o).getHibernateLazyInitializer().getPersistentClass() : o.getClass();
|
||||
Class<?> thisEffectiveClass = this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass() : this.getClass();
|
||||
if (thisEffectiveClass != oEffectiveClass) return false;
|
||||
PriceHistoryEntity that = (PriceHistoryEntity) o;
|
||||
return getId() != null && Objects.equals(getId(), that.getId());
|
||||
}
|
||||
|
||||
@Override
|
||||
public final int hashCode() {
|
||||
return this instanceof HibernateProxy ? ((HibernateProxy) this).getHibernateLazyInitializer().getPersistentClass().hashCode() : getClass().hashCode();
|
||||
}
|
||||
}
|
@ -1,19 +1,23 @@
|
||||
package ru.pricepulse.parsingservice.persistance.entity;
|
||||
|
||||
import jakarta.persistence.*;
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import lombok.*;
|
||||
import org.hibernate.proxy.HibernateProxy;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.LinkedHashSet;
|
||||
import java.util.Objects;
|
||||
import java.util.Set;
|
||||
|
||||
@Entity
|
||||
@Getter
|
||||
@Setter
|
||||
@Entity
|
||||
@Table(name = "product")
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@Builder
|
||||
public class ProductEntity {
|
||||
@Id
|
||||
@GeneratedValue(strategy = GenerationType.IDENTITY)
|
||||
@ -21,10 +25,12 @@ public class ProductEntity {
|
||||
private Long id;
|
||||
|
||||
@Column(name = "marketplace", nullable = false, length = Integer.MAX_VALUE)
|
||||
private String marketplace;
|
||||
@Enumerated(EnumType.STRING)
|
||||
private Marketplace marketplace;
|
||||
|
||||
@Column(name = "category", nullable = false, length = Integer.MAX_VALUE)
|
||||
private String category;
|
||||
@Enumerated(EnumType.STRING)
|
||||
private Category category;
|
||||
|
||||
@Column(name = "brand", nullable = false, length = Integer.MAX_VALUE)
|
||||
private String brand;
|
||||
|
@ -0,0 +1,7 @@
|
||||
package ru.pricepulse.parsingservice.persistance.repository;
|
||||
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import ru.pricepulse.parsingservice.persistance.entity.PriceHistoryEntity;
|
||||
|
||||
public interface ProductPriceRepository extends JpaRepository<PriceHistoryEntity, Long> {
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package ru.pricepulse.parsingservice.persistance.repository;
|
||||
|
||||
import org.springframework.data.jpa.repository.JpaRepository;
|
||||
import ru.pricepulse.parsingservice.persistance.entity.ProductEntity;
|
||||
|
||||
public interface ProductRepository extends JpaRepository<ProductEntity, Long> {
|
||||
}
|
@ -0,0 +1,19 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import org.springframework.boot.CommandLineRunner;
|
||||
import org.springframework.stereotype.Component;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.ParsingService;
|
||||
|
||||
@Component
|
||||
@AllArgsConstructor
|
||||
public class DebugRunner implements CommandLineRunner {
|
||||
private final ParsingService parsingService;
|
||||
|
||||
@Override
|
||||
public void run(String... args){
|
||||
System.out.println("Начинаем отладку...");
|
||||
parsingService.parse();
|
||||
System.out.println("Заканчиваем отладку...");
|
||||
}
|
||||
}
|
@ -0,0 +1,21 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.configuration;
|
||||
|
||||
import lombok.Getter;
|
||||
import lombok.Setter;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
|
||||
@Configuration
|
||||
@ConfigurationProperties(prefix = "rest.wildberries")
|
||||
@Getter
|
||||
@Setter
|
||||
public class WbProperties {
|
||||
private String baseUrl;
|
||||
private String catalogUrl;
|
||||
private String userAgent;
|
||||
private String catalogWbUrl;
|
||||
private int retryAttempts;
|
||||
private long retryDelay;
|
||||
private String laptopUrl;
|
||||
private String shard;
|
||||
}
|
@ -0,0 +1,18 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.configuration;
|
||||
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Configuration;
|
||||
import org.springframework.web.reactive.function.client.WebClient;
|
||||
|
||||
@Configuration
|
||||
public class WebClientConfig {
|
||||
@Bean
|
||||
public WebClient webClient() {
|
||||
return WebClient.builder()
|
||||
.defaultHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)")
|
||||
.codecs(configurer -> configurer
|
||||
.defaultCodecs()
|
||||
.maxInMemorySize(10 * 1024 * 1024))
|
||||
.build();
|
||||
}
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.converter;
|
||||
|
||||
import org.springframework.core.convert.converter.Converter;
|
||||
import org.springframework.stereotype.Component;
|
||||
import ru.pricepulse.parsingservice.enumeration.Category;
|
||||
import ru.pricepulse.parsingservice.enumeration.Marketplace;
|
||||
import ru.pricepulse.parsingservice.persistance.entity.ProductEntity;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
||||
|
||||
import java.time.OffsetDateTime;
|
||||
|
||||
|
||||
@Component
|
||||
public class ProductInfoDto2ProductEntity implements Converter<ProductInfoDto, ProductEntity> {
|
||||
@Override
|
||||
public ProductEntity convert(ProductInfoDto source) {
|
||||
return ProductEntity.builder()
|
||||
.marketplace(Marketplace.WILDBERRIES)
|
||||
.category(Category.LAPTOP)
|
||||
.brand(source.getBrand())
|
||||
.productName(source.getName())
|
||||
.createdAt(OffsetDateTime.now())
|
||||
.build();
|
||||
}
|
||||
}
|
@ -0,0 +1,85 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.service;
|
||||
|
||||
import com.fasterxml.jackson.core.type.TypeReference;
|
||||
import com.fasterxml.jackson.databind.ObjectMapper;
|
||||
import jakarta.transaction.Transactional;
|
||||
import lombok.AllArgsConstructor;
|
||||
import org.springframework.core.convert.ConversionService;
|
||||
import org.springframework.stereotype.Service;
|
||||
import ru.pricepulse.parsingservice.persistance.entity.PriceHistoryEntity;
|
||||
import ru.pricepulse.parsingservice.persistance.entity.ProductEntity;
|
||||
import ru.pricepulse.parsingservice.persistance.repository.ProductPriceRepository;
|
||||
import ru.pricepulse.parsingservice.persistance.repository.ProductRepository;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.configuration.WbProperties;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.client.Client;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.service.dto.ProductInfoDto;
|
||||
|
||||
import java.math.BigDecimal;
|
||||
import java.time.OffsetDateTime;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
@Service
|
||||
@AllArgsConstructor
|
||||
public class ParsingService {
|
||||
private final Client client;
|
||||
private final ObjectMapper objectMapper;
|
||||
private final ConversionService conversionService;
|
||||
private final ProductRepository productRepository;
|
||||
private final WbProperties wbProperties;
|
||||
private final ProductPriceRepository productPriceRepository;
|
||||
|
||||
@Transactional
|
||||
public void parse() {
|
||||
List<ProductEntity> productEntities = new ArrayList<>();
|
||||
List<PriceHistoryEntity> priceHistories = new ArrayList<>();
|
||||
|
||||
final int elementsInPage = 100;
|
||||
int page = 1;
|
||||
Integer totalPages = null;
|
||||
|
||||
do {
|
||||
var pageData = client.scrapPage(page, wbProperties.getShard(), wbProperties.getLaptopUrl());
|
||||
|
||||
if (totalPages == null) {
|
||||
Map<String, Object> dataMap = (Map<String, Object>) pageData.get("data");
|
||||
int totalElements = (int) dataMap.get("total");
|
||||
totalPages = (int) Math.ceil((double) totalElements / elementsInPage);
|
||||
}
|
||||
|
||||
List<ProductInfoDto> productInfoDtoList = convertMapObjectToListProductInfoDto(pageData);
|
||||
|
||||
productInfoDtoList.forEach(dto -> {
|
||||
|
||||
ProductEntity productEntity = conversionService.convert(dto, ProductEntity.class);
|
||||
|
||||
PriceHistoryEntity priceHistory = PriceHistoryEntity.builder()
|
||||
.product(productEntity)
|
||||
.price(BigDecimal.valueOf(dto.getSalePriceU() / 100.0))
|
||||
.date(OffsetDateTime.now())
|
||||
.build();
|
||||
|
||||
productEntities.add(productEntity);
|
||||
priceHistories.add(priceHistory);
|
||||
});
|
||||
|
||||
page++;
|
||||
} while (page <= totalPages);
|
||||
|
||||
productRepository.saveAll(productEntities);
|
||||
productPriceRepository.saveAll(priceHistories);
|
||||
}
|
||||
|
||||
private List<ProductInfoDto> convertMapObjectToListProductInfoDto(Map<String, Object> map) {
|
||||
Map<String, ArrayList<Object>> dataMap = (Map<String, ArrayList<Object>>) map.get("data");
|
||||
return getProductInfoDtos(dataMap);
|
||||
}
|
||||
|
||||
private List<ProductInfoDto> getProductInfoDtos(Map<String, ArrayList<Object>> dataMap) {
|
||||
return objectMapper.convertValue(
|
||||
dataMap.get("products"),
|
||||
new TypeReference<>() {}
|
||||
);
|
||||
}
|
||||
}
|
@ -0,0 +1,7 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
public interface Client {
|
||||
Map<String, Object> scrapPage(int page, String shard, String query);
|
||||
}
|
@ -0,0 +1,33 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.service.client;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import org.springframework.core.ParameterizedTypeReference;
|
||||
import org.springframework.stereotype.Service;
|
||||
import org.springframework.web.reactive.function.client.WebClient;
|
||||
import ru.pricepulse.parsingservice.wildberries_parser.configuration.WbProperties;
|
||||
|
||||
import java.util.Map;
|
||||
|
||||
@AllArgsConstructor
|
||||
@Service
|
||||
public class ClientImpl implements Client {
|
||||
|
||||
private final WebClient webClient;
|
||||
private final WbProperties wbProperties;
|
||||
|
||||
@Override
|
||||
public Map<String, Object> scrapPage(int page, String shard, String query) {
|
||||
String url = wbProperties.getCatalogWbUrl() +
|
||||
shard +
|
||||
query +
|
||||
"?dest=-1257786&page=" + page + "&subject=2290";
|
||||
|
||||
return webClient.get()
|
||||
.uri(url)
|
||||
.retrieve()
|
||||
.bodyToMono(new ParameterizedTypeReference<Map<String, Object>>() {
|
||||
})
|
||||
.retry(50)
|
||||
.block();
|
||||
}
|
||||
}
|
@ -0,0 +1,20 @@
|
||||
package ru.pricepulse.parsingservice.wildberries_parser.service.dto;
|
||||
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Builder;
|
||||
import lombok.Data;
|
||||
import lombok.NoArgsConstructor;
|
||||
|
||||
@Data
|
||||
@AllArgsConstructor
|
||||
@NoArgsConstructor
|
||||
@Builder
|
||||
public class ProductInfoDto {
|
||||
private Long id;
|
||||
private String brand;
|
||||
private String name;
|
||||
private String supplier;
|
||||
private Double supplierRating;
|
||||
private Integer salePriceU;
|
||||
private Integer reviewRating;
|
||||
}
|
@ -13,4 +13,16 @@ spring:
|
||||
username: ${JDBC_USERNAME}
|
||||
password: ${JDBC_PASSWORD}
|
||||
liquibase:
|
||||
change-log: classpath:/db/changelog/master.yml
|
||||
change-log: classpath:/db/changelog/master.yml
|
||||
|
||||
rest:
|
||||
wildberries:
|
||||
base-url: "https://static-basket-01.wbbasket.ru"
|
||||
catalog-url: "/vol0/data/main-menu-ru-ru-v3.json"
|
||||
user-agent: "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:109.0)"
|
||||
catalog-wb-url: "https://catalog.wb.ru/catalog/"
|
||||
retry-attempts: 5
|
||||
retry-delay: 1000
|
||||
shard: "electronic15"
|
||||
laptop-url: "/catalog"
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user